Skip to content
Snippets Groups Projects

WIP: #45 - WTO: Write datasets in JSON repo for "annually" category

Closed Bruno Duyé requested to merge dev into master
All threads resolved!
+ 23
21
@@ -366,29 +366,14 @@ def create_dataset_and_series_from_csv(dataset, dataset_path):
dimensions_keys_set.add(dimensions_key)
if not dimensions_key in observations.keys():
observations[dimensions_key] = {'unit': row['Unit'], 'observations': []}
assert row['Unit'].strip() == observations[dimensions_key]['unit'], \
"Unhandled situation: the file {} contains more than one value for 'Unit' !".format(dataset['filename'])
# Store observation
observations[dimensions_key]['observations'].append([row['Year'], row['Value']])
# prepare data to be written in dataset.json
# Create series directories, each including series.json and observations.tsv files.
dimensions_labels = dataset['dimensions_names_and_codes_colnames'].keys()
dimensions_codes = list(map(cached_slugify, dimensions_labels))
dataset_json_data = {
'name': dataset['name'],
'dataset_code': dataset['code'],
'codelists': {
dimension_code: {
dimension_value_code: dimensions_values_labels[dimension_code][dimension_value_code]
for dimension_value_code in dimensions_values_labels_codes
}
for dimension_code, dimensions_values_labels_codes in found_dimensions_values_codes.items()
},
'concepts': {
cached_slugify(dimension_label): dimension_label
for dimension_label in dimensions_labels
},
'dimension_keys': dimensions_codes,
}
# Create series directories, each including series.json and observations.tsv files.
series_directories_names = set()
for dimensions_key in dimensions_keys_set:
dimensions_key_labels_by_dimension_code = OrderedDict(zip(dimensions_codes, dimensions_key))
@@ -418,9 +403,26 @@ def create_dataset_and_series_from_csv(dataset, dataset_path):
series = verified_value(validate_series(series, format='json'))
write_json_file(os.path.join(series_dir_path, 'series.json'), series)
# prepare data to be written in dataset.json
dataset_json_data = {
'name': dataset['name'],
'dataset_code': dataset['code'],
'codelists': {
dimension_code: {
dimension_value_code: dimensions_values_labels[dimension_code][dimension_value_code]
for dimension_value_code in dimensions_values_labels_codes
}
for dimension_code, dimensions_values_labels_codes in found_dimensions_values_codes.items()
},
'concepts': {
cached_slugify(dimension_label): dimension_label
for dimension_label in dimensions_labels
},
'dimension_keys': dimensions_codes,
}
dataset_json_data["series"] = list(sorted(series_directories_names))
# TODO: uncomment when added skip_series_duplicates_checking to validate_dataset() (#45) (disabled for performances)
# dataset_json_data = verified_value(validate_dataset(dataset_json_data, format='json'))
dataset_json_data = verified_value(validate_dataset(dataset_json_data, format='json',
skip_series_duplicates_checking=True))
write_json_file(os.path.join(dataset_path, 'dataset.json'), dataset_json_data)