Commit 7ca83d37 authored by Bruno Duyé's avatar Bruno Duyé

Add "unit" dimension + better computation of "metric" labels

parent fef728e7
......@@ -75,7 +75,7 @@ DIMENSIONS_VALUES_LABELS = {
},
}
DIMENSIONS_ORDER = ['industry', 'sub-industry', 'concept', 'FREQ', 'metric']
DIMENSIONS_ORDER = ['industry', 'sub-industry', 'concept', 'FREQ', 'metric', 'unit']
log = logging.getLogger(__name__)
......@@ -186,25 +186,46 @@ def treat_dataset(dataset_dict, appendix_path, appendix_dict):
else:
return None
def get_series_metric(observation_dict):
""" Return the correct human readeable Dollars format, or None if not applicable.
>>> get_series_metric({
def compute_metric_name(observation_dict):
""" Return the correct human readeable format, or None if not applicable.
See https://git.nomics.world/dbnomics-fetchers/management/issues/491 for details
>>> compute_metric_name({
...
"METRIC_NAME":"Chained Dollars (Period Rate)",
"UNIT_MULT":"9"
})
"Billions of Chained Dollars (Period Rate)"
>>> compute_metric_name({
...
"METRIC_NAME":"Historical Cost",
"UNIT_MULT":"3"
})
"Thousands of dollars at historical cost"
>>> compute_metric_name({
...
"METRIC_NAME":"Physical Quantity",
"UNIT_MULT":"3"
})
"Physical Quantity in Thousands"
"""
prefixes = {'6': 'Millions', '9': 'Billions', '12': 'Trillions'}
if observation_dict.get('UNIT_MULT', '0') == '0':
return
if not re.search(r'[dD]ollar', observation_dict['METRIC_NAME']):
return
if not observation_dict['UNIT_MULT'] in prefixes:
log.warning("Can't find unit prefix for {!r}. Know values are {!r}".format(
observation_dict['UNIT_MULT'], prefixes.keys()))
magnitudes = {'3': 'thousands', '6': 'millions', '9': 'billions', '12': 'trillions'}
metric = observation_dict['METRIC_NAME']
order_of_magnitude = observation_dict.get('UNIT_MULT', '0')
if order_of_magnitude != '0' and not order_of_magnitude in magnitudes:
log.warning("Can't find metric prefix for {!r}. Know values are {!r}".format(
order_of_magnitude, magnitudes.keys()))
return
return "{} of {}".format(prefixes[observation_dict['UNIT_MULT']], observation_dict['METRIC_NAME'])
# if re.search(r'[dD]ollar', metric) or metric.lower() in ('persons', 'hours', 'historical cost', 'physical quantity'):
template = "{magnitude} of {metric}"
if metric.lower() == "historical cost":
metric = "Dollars at historical cost"
if metric.lower() == 'physical quantity':
template = "{metric} in {magnitude}"
if order_of_magnitude == '0':
return metric.capitalize()
else:
return template.format(magnitude=magnitudes[order_of_magnitude], metric=metric).capitalize().replace('dollars', 'Dollars')
# Prefix dataset_code with appendix_code because there's datasets names collisions between appendix
dataset_code = appendix_dict['code'] + '-' + dataset_dict["dataset_code"]
......@@ -286,12 +307,21 @@ def treat_dataset(dataset_dict, appendix_path, appendix_dict):
dataset_dimension_value_code = dataset_source_file_info["dimension_value_code"]
series_dimensions[dataset_dimension_code] = dataset_dimension_value_code # Ex: 'FREQ': 'Q'
dimensions_values_labels[dataset_dimension_code][dataset_dimension_value_code] = DIMENSIONS_VALUES_LABELS[dataset_dimension_code][dataset_dimension_value_code]
# Add 'metric' dimension values (https://git.nomics.world/dbnomics-fetchers/management/issues/491)
series_metric_label = get_series_metric(observation_dict)
if series_metric_label:
series_metric_code = slugify(series_metric_label)
series_dimensions['metric'] = series_metric_code
dimensions_values_labels['metric'][series_metric_code] = series_metric_label
# Add 'metric' and 'unit' dimensions values (https://git.nomics.world/dbnomics-fetchers/management/issues/491)
if observation_dict.get('METRIC_NAME'):
series_metric_name = compute_metric_name(observation_dict)
if not series_metric_name:
log.warning("Unexpected case: couldn't compute metric name for {}/{} ({!r})".format(
dataset_code, series_code, observation_dict['METRIC_NAME']))
if series_metric_name:
series_metric_code = slugify(series_metric_name)
series_dimensions['metric'] = series_metric_code
dimensions_values_labels['metric'][series_metric_code] = series_metric_name
series_unit_label = observation_dict.get('CL_UNIT')
if series_unit_label:
series_unit_code = slugify(series_unit_label)
series_dimensions['unit'] = series_unit_code
dimensions_values_labels['unit'][series_unit_code] = series_unit_label
# Add series information to future series list of dataset.json file
dataset_json_series_list.append(filter_none_and_empty({
'code': series_code,
......@@ -356,7 +386,7 @@ def treat_dataset(dataset_dict, appendix_path, appendix_dict):
'industry': 'Industry',
'sub-industry': 'Industry precision',
'FREQ': 'Frequency',
'metric': 'metric',
'metric': 'Metric',
},
'dimensions_values_labels': dimensions_values_labels,
# We take the last series's dimensions to get dataset's dimensions (assuming that all series in each dataset have the same dimensions)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment