Skip to content
Snippets Groups Projects
Commit 4b08c648 authored by Christophe Benz's avatar Christophe Benz
Browse files

Optimize lxml iterparse memory consumption

parent d331439a
No related branches found
No related tags found
No related merge requests found
......@@ -240,6 +240,14 @@ def iter_git_objects_in_sdmx_file(sdmx_file_path, category_tree, validate_json):
yield series_tree
dataset_tree.add(series_code.encode('utf-8'), git_tree_filemode, series_tree.id)
# From https://stackoverflow.com/questions/12160418/why-is-lxml-etree-iterparse-eating-up-all-my-memory
# It's safe to call clear() here because no descendants will be accessed
series_element.clear()
# Also eliminate now-empty references from the root node to elem
for ancestor in series_element.xpath('ancestor-or-self::*'):
while ancestor.getprevious() is not None:
del ancestor.getparent()[0]
# Write dataset.json
if validate_json:
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment