Skip to content
Snippets Groups Projects

WIP: #45 - WTO: Write datasets in JSON repo for "annually" category

Closed Bruno Duyé requested to merge dev into master
All threads resolved!
1 file
+ 18
19
Compare changes
  • Side-by-side
  • Inline
+ 18
19
@@ -368,27 +368,9 @@ def create_dataset_and_series_from_csv(dataset, dataset_path):
observations[dimensions_key] = {'unit': row['Unit'], 'observations': []}
observations[dimensions_key]['observations'].append([row['Year'], row['Value']])
# prepare data to be written in dataset.json
# Create series directories, each including series.json and observations.tsv files.
dimensions_labels = dataset['dimensions_names_and_codes_colnames'].keys()
dimensions_codes = list(map(cached_slugify, dimensions_labels))
dataset_json_data = {
'name': dataset['name'],
'dataset_code': dataset['code'],
'codelists': {
dimension_code: {
dimension_value_code: dimensions_values_labels[dimension_code][dimension_value_code]
for dimension_value_code in dimensions_values_labels_codes
}
for dimension_code, dimensions_values_labels_codes in found_dimensions_values_codes.items()
},
'concepts': {
cached_slugify(dimension_label): dimension_label
for dimension_label in dimensions_labels
},
'dimension_keys': dimensions_codes,
}
# Create series directories, each including series.json and observations.tsv files.
series_directories_names = set()
for dimensions_key in dimensions_keys_set:
dimensions_key_labels_by_dimension_code = OrderedDict(zip(dimensions_codes, dimensions_key))
@@ -418,6 +400,23 @@ def create_dataset_and_series_from_csv(dataset, dataset_path):
series = verified_value(validate_series(series, format='json'))
write_json_file(os.path.join(series_dir_path, 'series.json'), series)
# prepare data to be written in dataset.json
dataset_json_data = {
'name': dataset['name'],
'dataset_code': dataset['code'],
'codelists': {
dimension_code: {
dimension_value_code: dimensions_values_labels[dimension_code][dimension_value_code]
for dimension_value_code in dimensions_values_labels_codes
}
for dimension_code, dimensions_values_labels_codes in found_dimensions_values_codes.items()
},
'concepts': {
cached_slugify(dimension_label): dimension_label
for dimension_label in dimensions_labels
},
'dimension_keys': dimensions_codes,
}
dataset_json_data["series"] = list(sorted(series_directories_names))
# TODO: uncomment when added skip_series_duplicates_checking to validate_dataset() (#45) (disabled for performances)
# dataset_json_data = verified_value(validate_dataset(dataset_json_data, format='json'))