Skip to content
Snippets Groups Projects
Commit 03c751a5 authored by Christophe Benz's avatar Christophe Benz
Browse files

Log datasets counter

parent db1f1996
No related branches found
No related tags found
No related merge requests found
Pipeline #1310 failed with stage
in 1 minute and 24 seconds
......@@ -281,14 +281,17 @@ def iter_git_objects_in_sdmx_series_element(series_element, sdmx_blob_name, dsd_
def toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_dataset_code, data_package_tree, xml_element,
processed_datasets_codes):
processed_datasets_codes, leaf_index):
"""
Note: leaf_index is a singleton list because the function parameter must be modified between recursive calls.
"""
xml_element_tag = xml_element.tag[len(toc_nsmap["nt"]) + 2:]
if xml_element_tag == "tree":
return list(filter(
None,
(
toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_dataset_code, data_package_tree,
child_element, processed_datasets_codes)
child_element, processed_datasets_codes, leaf_index)
for child_element in xml_element
)
))
......@@ -297,7 +300,7 @@ def toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_data
None,
(
toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_dataset_code, data_package_tree,
child_element, processed_datasets_codes)
child_element, processed_datasets_codes, leaf_index)
for child_element in xml_element.iterfind("nt:children/*", namespaces=toc_nsmap)
)
))
......@@ -326,6 +329,7 @@ def toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_data
data_package_tree.add(dataset_code.encode('utf-8'), git_tree_filemode, dataset_pair[0].encode('utf-8'))
return categories_tree_dataset_json
else:
leaf_index[0] += 1
if (args.datasets is None or dataset_code in args.datasets) and \
(args.exclude_datasets is None or dataset_code not in args.exclude_datasets) and \
(args.start_from is None or dataset_code == args.start_from):
......@@ -342,7 +346,8 @@ def toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_data
if sdmx_blob is not None:
if dataset_code not in processed_datasets_codes:
sdmx_blob_data = sdmx_blob.data
log.info("Converting SDMX source file %s (size: %d)", sdmx_blob_name, len(sdmx_blob_data))
log.info("Converting SDMX source file %s (nb %d, size: %d)",
sdmx_blob_name, leaf_index[0], len(sdmx_blob_data))
pack_start_time = time.time()
write_pack(
pack_file_path,
......@@ -459,7 +464,7 @@ def main():
processed_datasets_codes = set()
categories_tree_json = toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_dataset_code,
data_package_tree, xml_element=toc_element,
processed_datasets_codes=processed_datasets_codes)
processed_datasets_codes=processed_datasets_codes, leaf_index=[0])
# Write datasets index in Git repository, which was modified above by a side-effect.
# TODO Write during iteration in case script crashes.
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment