From 03c751a5f055e7ff90cd324c03cede6ea22a72c3 Mon Sep 17 00:00:00 2001
From: Christophe Benz <christophe.benz@cepremap.org>
Date: Fri, 1 Dec 2017 10:07:28 +0100
Subject: [PATCH] Log datasets counter

---
 eurostat_to_dbnomics.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/eurostat_to_dbnomics.py b/eurostat_to_dbnomics.py
index 9b69ca6..893cfeb 100755
--- a/eurostat_to_dbnomics.py
+++ b/eurostat_to_dbnomics.py
@@ -281,14 +281,17 @@ def iter_git_objects_in_sdmx_series_element(series_element, sdmx_blob_name, dsd_
 
 
 def toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_dataset_code, data_package_tree, xml_element,
-                            processed_datasets_codes):
+                            processed_datasets_codes, leaf_index):
+    """
+    Note: leaf_index is a singleton list because the function parameter must be modified between recursive calls.
+    """
     xml_element_tag = xml_element.tag[len(toc_nsmap["nt"]) + 2:]
     if xml_element_tag == "tree":
         return list(filter(
             None,
             (
                 toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_dataset_code, data_package_tree,
-                                        child_element, processed_datasets_codes)
+                                        child_element, processed_datasets_codes, leaf_index)
                 for child_element in xml_element
             )
         ))
@@ -297,7 +300,7 @@ def toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_data
             None,
             (
                 toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_dataset_code, data_package_tree,
-                                        child_element, processed_datasets_codes)
+                                        child_element, processed_datasets_codes, leaf_index)
                 for child_element in xml_element.iterfind("nt:children/*", namespaces=toc_nsmap)
             )
         ))
@@ -326,6 +329,7 @@ def toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_data
             data_package_tree.add(dataset_code.encode('utf-8'), git_tree_filemode, dataset_pair[0].encode('utf-8'))
             return categories_tree_dataset_json
         else:
+            leaf_index[0] += 1
             if (args.datasets is None or dataset_code in args.datasets) and \
                     (args.exclude_datasets is None or dataset_code not in args.exclude_datasets) and \
                     (args.start_from is None or dataset_code == args.start_from):
@@ -342,7 +346,8 @@ def toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_data
                 if sdmx_blob is not None:
                     if dataset_code not in processed_datasets_codes:
                         sdmx_blob_data = sdmx_blob.data
-                        log.info("Converting SDMX source file %s (size: %d)", sdmx_blob_name, len(sdmx_blob_data))
+                        log.info("Converting SDMX source file %s (nb %d, size: %d)",
+                                 sdmx_blob_name, leaf_index[0], len(sdmx_blob_data))
                         pack_start_time = time.time()
                         write_pack(
                             pack_file_path,
@@ -459,7 +464,7 @@ def main():
     processed_datasets_codes = set()
     categories_tree_json = toc_xml_element_to_json(source_repo, source_tree, repo, dataset_pair_by_dataset_code,
                                                    data_package_tree, xml_element=toc_element,
-                                                   processed_datasets_codes=processed_datasets_codes)
+                                                   processed_datasets_codes=processed_datasets_codes, leaf_index=[0])
 
     # Write datasets index in Git repository, which was modified above by a side-effect.
     # TODO Write during iteration in case script crashes.
-- 
GitLab