From a37dc46eda6f3f7e83dba4ad41ec2a81f474e018 Mon Sep 17 00:00:00 2001
From: Christophe Benz <christophe.benz@cepremap.org>
Date: Thu, 16 Nov 2017 14:57:11 +0100
Subject: [PATCH] Add file size and time indicators

---
 eurostat_to_dbnomics.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/eurostat_to_dbnomics.py b/eurostat_to_dbnomics.py
index 9110593..5ffc055 100755
--- a/eurostat_to_dbnomics.py
+++ b/eurostat_to_dbnomics.py
@@ -66,7 +66,7 @@ sdmx_file_extension = ".sdmx.xml"
 
 
 def iter_git_objects_in_sdmx_file(sdmx_file_path, category_tree, validate_json):
-    log.info("Converting SDMX source file %s", sdmx_file_path)
+    log.info("Converting SDMX source file %s (size: %d)", sdmx_file_path, os.path.getsize(sdmx_file_path))
     dsd_file_path = "{}.dsd.xml".format(sdmx_file_path[:-len(sdmx_file_extension)])
     with open(dsd_file_path) as dsd_file:
         dsd_tree = etree.parse(dsd_file)
@@ -355,11 +355,13 @@ def write_dataset_pack(sdmx_file_path, git_objects, target_dir):
                 yielded_ids.add(git_object_id)
                 yield git_object
 
+    pack_start_time = time.time()
     dataset_code = os.path.basename(sdmx_file_path[:-len(sdmx_file_extension)])
     pack_file_name = "pack-{}".format(dataset_code)
     pack_file_path = os.path.abspath(os.path.join(target_dir, "objects", "pack", pack_file_name))
-    write_pack(pack_file_path, iter_object_path_pairs(iter_deduped(git_objects)))
-    log.info("Git pack file %s written", pack_file_path)
+    write_pack(pack_file_path, iter_deduped(git_objects))
+    pack_time = time.time() - pack_start_time
+    log.info("Git pack file %s written, took %s seconds", pack_file_path, pack_time)
 
 
 # Dulwich functions sightly modified not to require passing objects length.
-- 
GitLab