From a37dc46eda6f3f7e83dba4ad41ec2a81f474e018 Mon Sep 17 00:00:00 2001 From: Christophe Benz <christophe.benz@cepremap.org> Date: Thu, 16 Nov 2017 14:57:11 +0100 Subject: [PATCH] Add file size and time indicators --- eurostat_to_dbnomics.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/eurostat_to_dbnomics.py b/eurostat_to_dbnomics.py index 9110593..5ffc055 100755 --- a/eurostat_to_dbnomics.py +++ b/eurostat_to_dbnomics.py @@ -66,7 +66,7 @@ sdmx_file_extension = ".sdmx.xml" def iter_git_objects_in_sdmx_file(sdmx_file_path, category_tree, validate_json): - log.info("Converting SDMX source file %s", sdmx_file_path) + log.info("Converting SDMX source file %s (size: %d)", sdmx_file_path, os.path.getsize(sdmx_file_path)) dsd_file_path = "{}.dsd.xml".format(sdmx_file_path[:-len(sdmx_file_extension)]) with open(dsd_file_path) as dsd_file: dsd_tree = etree.parse(dsd_file) @@ -355,11 +355,13 @@ def write_dataset_pack(sdmx_file_path, git_objects, target_dir): yielded_ids.add(git_object_id) yield git_object + pack_start_time = time.time() dataset_code = os.path.basename(sdmx_file_path[:-len(sdmx_file_extension)]) pack_file_name = "pack-{}".format(dataset_code) pack_file_path = os.path.abspath(os.path.join(target_dir, "objects", "pack", pack_file_name)) - write_pack(pack_file_path, iter_object_path_pairs(iter_deduped(git_objects))) - log.info("Git pack file %s written", pack_file_path) + write_pack(pack_file_path, iter_deduped(git_objects)) + pack_time = time.time() - pack_start_time + log.info("Git pack file %s written, took %s seconds", pack_file_path, pack_time) # Dulwich functions sightly modified not to require passing objects length. -- GitLab