Commit c3815640 authored by Bruno Duyé's avatar Bruno Duyé

WIP

parent d149b997
......@@ -32,12 +32,13 @@ target_dir: path of target directory to write datasets & series in DBnomics form
Options:
--debug show debug output and make some additional tests during the process
--only <dataset_code> only convert given dataset_code
--only <appendix_code> only convert given appendix_code
"""
import logging
import os
# import subprocess
import sys
from collections import defaultdict
......@@ -171,6 +172,9 @@ def treat_dataset(dataset_dict, appendix_path):
'name': observation_dict['LineDescription'],
'dimensions': dimensions
})
# # If this file is a duplicate of another (two series with same code), check that they're equals
# if debug_mode and current_observations_file.name.endswith('-dup.tsv'):
# assert_same_files_content(current_observations_file.name, current_observations_file.name.replace('-dup', ''))
current_series_code = series_code
current_observations_filename = find_available_name_for_series_file(current_series_code, target_dataset_path)
current_observations_filepath = os.path.join(target_dataset_path, current_observations_filename)
......@@ -196,6 +200,32 @@ def treat_dataset(dataset_dict, appendix_path):
return dataset_code, dataset_name
def find_available_name_for_series_file(series_code, target_path):
"""Find a free name for given series code TSV file and return it
"""
suffix = ''
while True:
filename = "{}{}.tsv".format(series_code, suffix)
if os.path.exists(os.path.join(target_path, filename)):
suffix += '-dup'
else:
break
return filename
def assert_same_files_content(filepath1, filepath2):
"""Use system diff tool to check that the two files contents are equals
Raise an exception if not
"""
command = 'diff "{}" "{}"'.format(filepath1, filepath2)
try:
diff_output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True)
except subprocess.CalledProcessError: # subprocess returned non 0 output code
raise Exception("Error: the two series files {!r} and {!r} have the same series code but different contents!".format(filepath1, filepath2))
else:
log.debug("-> the two files are identical.")
def write_json_file(file_path, data):
with open(file_path, 'w', encoding='utf-8') as file_:
json.dump(data, file_, ensure_ascii=False, indent=2, sort_keys=True)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment