diff --git a/eurostat_to_dbnomics.py b/eurostat_to_dbnomics.py index 91105930f000b892c6f4337f423c440e7f6236f9..5ffc055d33395a354f5774c5fb5e253fdfc970ac 100755 --- a/eurostat_to_dbnomics.py +++ b/eurostat_to_dbnomics.py @@ -66,7 +66,7 @@ sdmx_file_extension = ".sdmx.xml" def iter_git_objects_in_sdmx_file(sdmx_file_path, category_tree, validate_json): - log.info("Converting SDMX source file %s", sdmx_file_path) + log.info("Converting SDMX source file %s (size: %d)", sdmx_file_path, os.path.getsize(sdmx_file_path)) dsd_file_path = "{}.dsd.xml".format(sdmx_file_path[:-len(sdmx_file_extension)]) with open(dsd_file_path) as dsd_file: dsd_tree = etree.parse(dsd_file) @@ -355,11 +355,13 @@ def write_dataset_pack(sdmx_file_path, git_objects, target_dir): yielded_ids.add(git_object_id) yield git_object + pack_start_time = time.time() dataset_code = os.path.basename(sdmx_file_path[:-len(sdmx_file_extension)]) pack_file_name = "pack-{}".format(dataset_code) pack_file_path = os.path.abspath(os.path.join(target_dir, "objects", "pack", pack_file_name)) - write_pack(pack_file_path, iter_object_path_pairs(iter_deduped(git_objects))) - log.info("Git pack file %s written", pack_file_path) + write_pack(pack_file_path, iter_deduped(git_objects)) + pack_time = time.time() - pack_start_time + log.info("Git pack file %s written, took %s seconds", pack_file_path, pack_time) # Dulwich functions sightly modified not to require passing objects length.