Commit c1abe354 authored by Bruno Duyé's avatar Bruno Duyé

Convert: add first version of dataset.json

parent 5cfc3ba0
......@@ -89,6 +89,9 @@ def main():
log.info("* Appendix: {!r}".format(appendix_code))
# Iterate through datasets
appendix_path = os.path.join(source_dir, appendix_code)
# dataset_json_series_list = [] # to be written into "series" key of dataset.json
# dimensions_values_labels = defaultdict(dict)
# dimension_values_codes_by_labels = {} # cache for labels slugifications to make dimensions_codes
# open datasets.json and read datasets information
with open(os.path.join(appendix_path, "datasets.json")) as datasets_json_file:
datasets_information = json.load(datasets_json_file)
......@@ -99,12 +102,15 @@ def main():
def treat_dataset(dataset_dict, appendix_path):
"""Create dataset dir, series files and dataset.json file
"""
dataset_code = dataset_dict["dataset_code"]
log.debug('- Dataset {!r}'.format(dataset_code))
# Create dataset dir
target_dataset_path = os.path.join(target_dir, dataset_code)
os.mkdir(target_dataset_path)
# Iterate through dataset source files
dataset_json_series_list = [] # to be written in dataset.json
for dataset_source_file_info in dataset_dict['files']:
dataset_source_filename = dataset_source_file_info['filename']
log.debug(dataset_source_filename)
......@@ -114,27 +120,43 @@ def treat_dataset(dataset_dict, appendix_path):
# Read each observation and create series files
current_series_code = None
current_observations_file = None
for observation_json in observations_source_json["Data"]:
series_code = observation_json["SeriesCode"] + '-' + dataset_source_file_info['dimension_code']
for observation_dict in observations_source_json["Data"]:
series_code = observation_dict["SeriesCode"] + '-' + dataset_source_file_info["dimension_value_code"]
if current_series_code and series_code != current_series_code or current_series_code is None:
# This is the beginning of a new series
log.debug(" - series {!r}".format(series_code))
if current_observations_file:
# This is the end of a series (so, not the beginning of the first one)
current_observations_file.close()
dimensions = toolz.keyfilter(
lambda key: not key in ["DataValue", "LineDescription", "LineNumber",
"SeriesCode", "TableName", "TimePeriod", "UNIT_MULT", "NoteRef"],
observation_json
)
dimensions = {
dataset_dict["dimension_code"]: dataset_source_file_info["dimension_value_code"], # Ex: 'FREQ': 'Q'
"concept": slugify(observation_dict["LineDescription"]),
}
dataset_json_series_list.append({
'code': series_code,
'name': observation_dict['LineDescription'],
'dimensions': dimensions
})
current_series_code = series_code
current_observations_filename = find_available_name_for_series_file(current_series_code, target_dataset_path)
current_observations_filepath = os.path.join(target_dataset_path, current_observations_filename)
current_observations_file = open(current_observations_filepath, "w")
current_observations_file.write("PERIOD\tVALUE\n")
# Prepare value
observation_value_str = str(numbers.parse_decimal(observation_json["DataValue"], locale='en_us'))
current_observations_file.write(observation_json["TimePeriod"] + "\t" + observation_value_str + "\n")
observation_value_str = str(numbers.parse_decimal(observation_dict["DataValue"], locale='en_us'))
current_observations_file.write(observation_dict["TimePeriod"] + "\t" + observation_value_str + "\n")
# Write dataset.json
write_json_file(os.path.join(target_dataset_path, "dataset.json"), {
'code': dataset_code,
# 'name': , # TODO
'series': dataset_json_series_list,
# 'dimensions_codes_order': ,# TODO
# 'dimensions_labels': ,# TODO
# 'dimensions_values_labels': dimensions_values_labels, # TODO: manually add frequencies labels
})
def write_json_file(file_path, data):
with open(file_path, 'w', encoding='utf-8') as file_:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment