...
 
Commits (3)
......@@ -75,7 +75,7 @@ DIMENSIONS_VALUES_LABELS = {
},
}
DIMENSIONS_ORDER = ['industry', 'sub-industry', 'concept', 'FREQ', 'unit']
DIMENSIONS_ORDER = ['industry', 'sub-industry', 'concept', 'FREQ', 'metric', 'unit']
log = logging.getLogger(__name__)
......@@ -119,7 +119,11 @@ def main():
for dataset_dict in datasets_information:
if args['--limit_nb_datasets'] and nb_datasets_treated >= int(args['--limit_nb_datasets']):
break
dataset_code, dataset_name = treat_dataset(dataset_dict, appendix_path, appendix_dict)
try:
dataset_code, dataset_name = treat_dataset(dataset_dict, appendix_path, appendix_dict)
except:
logging.error("Exception while convert {!r} dataset".format(dataset_dict['dataset_code']))
raise
# Add dataset information to category_tree
category_tree_appendix_datasets.append(toolz.valfilter(lambda e: e, { # filter not provided names
"code": dataset_code,
......@@ -182,25 +186,46 @@ def treat_dataset(dataset_dict, appendix_path, appendix_dict):
else:
return None
def get_series_unit(observation_dict):
""" Return the correct human readeable Dollars format, or None if not applicable.
>>> get_series_unit({
def compute_metric_name(observation_dict):
""" Return the correct human readeable format, or None if not applicable.
See https://git.nomics.world/dbnomics-fetchers/management/issues/491 for details
>>> compute_metric_name({
...
"METRIC_NAME":"Current Dollars",
"METRIC_NAME":"Chained Dollars (Period Rate)",
"UNIT_MULT":"9"
})
"milliard Dollars"
"Billions of Chained Dollars (Period Rate)"
>>> compute_metric_name({
...
"METRIC_NAME":"Historical Cost",
"UNIT_MULT":"3"
})
"Thousands of dollars at historical cost"
>>> compute_metric_name({
...
"METRIC_NAME":"Physical Quantity",
"UNIT_MULT":"3"
})
"Physical Quantity in Thousands"
"""
prefixes = {'6': 'Million', '9': 'Milliard', '12': 'Trillion'}
if observation_dict.get('UNIT_MULT', '0') == '0':
return
if not re.search(r'[dD]ollar', observation_dict['METRIC_NAME']):
return
if not observation_dict['UNIT_MULT'] in prefixes:
log.warning("Can't find unit prefix for {!r}. Know values are {!r}".format(
observation_dict['UNIT_MULT'], prefixes.keys()))
magnitudes = {'3': 'thousands', '6': 'millions', '9': 'billions', '12': 'trillions'}
metric = observation_dict['METRIC_NAME']
order_of_magnitude = observation_dict.get('UNIT_MULT', '0')
if order_of_magnitude != '0' and not order_of_magnitude in magnitudes:
log.warning("Can't find metric prefix for {!r}. Know values are {!r}".format(
order_of_magnitude, magnitudes.keys()))
return
return "{} of dollars".format(prefixes[observation_dict['UNIT_MULT']])
# if re.search(r'[dD]ollar', metric) or metric.lower() in ('persons', 'hours', 'historical cost', 'physical quantity'):
template = "{magnitude} of {metric}"
if metric.lower() == "historical cost":
metric = "Dollars at historical cost"
if metric.lower() == 'physical quantity':
template = "{metric} in {magnitude}"
if order_of_magnitude == '0':
return metric.capitalize()
else:
return template.format(magnitude=magnitudes[order_of_magnitude], metric=metric).capitalize().replace('dollars', 'Dollars')
# Prefix dataset_code with appendix_code because there's datasets names collisions between appendix
dataset_code = appendix_dict['code'] + '-' + dataset_dict["dataset_code"]
......@@ -282,8 +307,17 @@ def treat_dataset(dataset_dict, appendix_path, appendix_dict):
dataset_dimension_value_code = dataset_source_file_info["dimension_value_code"]
series_dimensions[dataset_dimension_code] = dataset_dimension_value_code # Ex: 'FREQ': 'Q'
dimensions_values_labels[dataset_dimension_code][dataset_dimension_value_code] = DIMENSIONS_VALUES_LABELS[dataset_dimension_code][dataset_dimension_value_code]
# Add unit dimension (https://git.nomics.world/dbnomics-fetchers/management/issues/491)
series_unit_label = get_series_unit(observation_dict)
# Add 'metric' and 'unit' dimensions values (https://git.nomics.world/dbnomics-fetchers/management/issues/491)
if observation_dict.get('METRIC_NAME'):
series_metric_name = compute_metric_name(observation_dict)
if not series_metric_name:
log.warning("Unexpected case: couldn't compute metric name for {}/{} ({!r})".format(
dataset_code, series_code, observation_dict['METRIC_NAME']))
if series_metric_name:
series_metric_code = slugify(series_metric_name)
series_dimensions['metric'] = series_metric_code
dimensions_values_labels['metric'][series_metric_code] = series_metric_name
series_unit_label = observation_dict.get('CL_UNIT')
if series_unit_label:
series_unit_code = slugify(series_unit_label)
series_dimensions['unit'] = series_unit_code
......@@ -352,7 +386,7 @@ def treat_dataset(dataset_dict, appendix_path, appendix_dict):
'industry': 'Industry',
'sub-industry': 'Industry precision',
'FREQ': 'Frequency',
'unit': 'Unit',
'metric': 'Metric',
},
'dimensions_values_labels': dimensions_values_labels,
# We take the last series's dimensions to get dataset's dimensions (assuming that all series in each dataset have the same dimensions)
......