Skip to content
Snippets Groups Projects

WIP: #45 - WTO: Write datasets in JSON repo for "annually" category

Closed Bruno Duyé requested to merge dev into master
All threads resolved!
+ 19
19
@@ -44,7 +44,7 @@ import xlrd
from docopt import docopt
from slugify import slugify
from dbnomics_converters.base import verified_value, to_float
from dbnomics_converters.base import assert_no_error, to_float
from dbnomics_converters.datasets import validate_dataset
from dbnomics_converters.providers import validate_provider
from dbnomics_converters.series import validate_series
@@ -55,8 +55,8 @@ log = logging.getLogger(__name__)
PROVIDER = dict(
name='WTO',
long_name='World Trade Organization',
code='WTO',
name='World Trade Organization',
region='World',
website='https://www.wto.org/',
terms_of_use='https://www.wto.org/english/tratop_e/trips_e/intel2_e.htm'
@@ -213,7 +213,7 @@ def main():
# Create provider.json
provider_json_data = PROVIDER
provider_json_data['categories'] = [category['name'] for category in CATEGORIES]
provider_json_data = verified_value(validate_provider(provider_json_data, format='json'))
provider_json_data = assert_no_error(validate_provider(provider_json_data, format='json'))
write_json_file(os.path.join(target_dir, 'provider.json'), provider_json_data)
for category in CATEGORIES:
@@ -242,7 +242,7 @@ def create_directories_subtree(category_or_dataset, parent_category_path):
],
'category_code': category['code']
}
# category_json_data = verified_value(validate_category(category_json_data, format='json', used_categories_code=used_categories_code))
# category_json_data = assert_no_error(validate_category(category_json_data, format='json', used_categories_code=used_categories_code))
write_json_file(os.path.join(category_path, element_dirname, 'category.json'), category_json_data)
element_type = category_or_dataset['type']
@@ -382,7 +382,7 @@ def create_dataset_and_series_from_csv(dataset, dataset_path):
for dimension_code, label in dimensions_key_labels_by_dimension_code.items()
]
series = {
'key': '-'.join(dimensions_key_values_codes),
'code': '-'.join(dimensions_key_values_codes),
'dimensions': dict(zip(dimensions_codes, dimensions_key_values_codes)),
}
@@ -400,28 +400,28 @@ def create_dataset_and_series_from_csv(dataset, dataset_path):
)
# Create series.json
series = verified_value(validate_series(series, format='json'))
series = assert_no_error(validate_series(series, format='json'))
write_json_file(os.path.join(series_dir_path, 'series.json'), series)
# prepare data to be written in dataset.json
dataset_json_data = {
'name': dataset['name'],
'dataset_code': dataset['code'],
'codelists': {
'code': dataset['code'],
'dimensions_values_labels': {
dimension_code: {
dimension_value_code: dimensions_values_labels[dimension_code][dimension_value_code]
for dimension_value_code in dimensions_values_labels_codes
}
for dimension_code, dimensions_values_labels_codes in found_dimensions_values_codes.items()
},
'concepts': {
'dimensions_labels': {
cached_slugify(dimension_label): dimension_label
for dimension_label in dimensions_labels
},
'dimension_keys': dimensions_codes,
'dimensions_codes_order': dimensions_codes,
}
dataset_json_data["series"] = list(sorted(series_directories_names))
dataset_json_data = verified_value(validate_dataset(dataset_json_data, format='json',
dataset_json_data = assert_no_error(validate_dataset(dataset_json_data, format='json',
skip_series_duplicates_check=True))
write_json_file(os.path.join(dataset_path, 'dataset.json'), dataset_json_data)
@@ -464,7 +464,7 @@ def create_dataset_and_series_from_xls(dataset, dataset_path):
series_dir_path = os.path.join(dataset_path, series_directory_name)
os.mkdir(series_dir_path)
# Create series.json
series = verified_value(validate_series(series, format='json'))
series = assert_no_error(validate_series(series, format='json'))
write_json_file(os.path.join(series_dir_path, 'series.json'), series)
# Write series observations
write_series_tsv_file(series_dir_path, observations, unit)
@@ -520,7 +520,7 @@ def create_dataset_and_series_from_xls(dataset, dataset_path):
generated_region_code = region_code or slugify(region_label)
regions_codes.add((region_label, generated_region_code))
series = dict(
key=series_directory_name,
code=series_directory_name,
dimensions=dict(
Flow=flow_code,
Region=generated_region_code
@@ -535,9 +535,9 @@ def create_dataset_and_series_from_xls(dataset, dataset_path):
dimension_codes = [slugify(label) for label in xls_constants['dimensions_labels']]
dataset_json_data = {
'name': dataset['name'],
'dataset_code': dataset['code'],
'dimension_keys': dimension_codes,
'codelists': {
'code': dataset['code'],
'dimensions_codes_order': dimension_codes,
'dimensions_values_labels': {
'flow': {
label_and_code[1]: label_and_code[0]
for label_and_code in xls_constants['flow_codes_and_names_by_sheet_names'].values()
@@ -547,14 +547,14 @@ def create_dataset_and_series_from_xls(dataset, dataset_path):
for region in regions_codes
}
},
'concepts': dict(zip(
'dimensions_labels': dict(zip(
dimension_codes,
xls_constants['dimensions_labels'],
)),
}
# Finaly, write dataset.json
dataset_json_data["series"] = list(sorted(series_directories_names))
dataset_json_data = verified_value(validate_dataset(dataset_json_data, format='json'))
dataset_json_data = assert_no_error(validate_dataset(dataset_json_data, format='json'))
write_json_file(os.path.join(dataset_path, 'dataset.json'), dataset_json_data)