...
 
Commits (6)
......@@ -43,19 +43,13 @@ import sys
from collections import defaultdict
import humanize
from docopt import docopt
import toolz
from docopt import docopt
from slugify import slugify
import ine_fetcher_common
import ujson as json
DATAPACKAGE_JSON = {
"dbnomics": {
"data_model_version": "0.7.6"
}
}
PROVIDER_JSON = dict(
code='INE-SPAIN',
name='Instituto Nacional de Estadistica',
......@@ -80,9 +74,6 @@ def main():
# Does user asked for a nb series per dataset limit ?
limit_nb_dataset_series = int(args['--limit_nb_dataset_series']) if args['--limit_nb_dataset_series'] else None
# Write datapackage.json
write_json_file(os.path.join(target_dir, 'datapackage.json'), DATAPACKAGE_JSON)
# Write provider.json
write_json_file(os.path.join(target_dir, 'provider.json'), PROVIDER_JSON)
......@@ -103,9 +94,11 @@ def main():
dimensions_labels = {}
dimensions_values_labels = defaultdict(dict)
nb_series_converted = 1
dataset_series_list.clear()
series_iterator = iter_series(source_filepath)
for series_dict in series_iterator:
series_code = generate_series_code({k: list(v.keys())[0] for k, v in series_dict['dimensions_values'].items()})
series_code = generate_series_code({k: list(v.keys())[0]
for k, v in series_dict['dimensions_values'].items()})
# Create series observations file
with open(os.path.join(dataset_dir, series_code + '.tsv'), 'w', encoding='utf-8') as observations_file:
observations_file.write("PERIOD\tVALUE\n")
......@@ -188,7 +181,7 @@ def iter_series(table_filepath):
assert "T3_Periodo" in observation_dict.keys()
if observation_dict["T3_Periodo"].startswith("M"):
# Monthly
return "{}{}".format(observation_dict["Anyo"], observation_dict["T3_Periodo"])
return "{}-{}".format(observation_dict["Anyo"], observation_dict["T3_Periodo"][1:])
elif observation_dict["T3_Periodo"].startswith("QI"):
# Quarterly
quarter_id = parse_roman_quarters(observation_dict["T3_Periodo"])
......@@ -196,9 +189,13 @@ def iter_series(table_filepath):
else:
raise Exception("Non supported period notation: {!r}".format(observation_dict["T3_Periodo"]))
def norm_value(value):
val = str(value)
return 'NA' if val == '' or val == 'None' else val
def iter_observations(observations_list):
for observation_dict in observations_list:
yield (get_period(observation_dict), str(observation_dict["Valor"]))
yield (get_period(observation_dict), norm_value(observation_dict["Valor"]))
def get_dimensions_values_and_labels(series_dict):
def get_in_with_assert(metadata_dict, metadata_path, assertions=True):
......
......@@ -86,7 +86,7 @@ def download_table(table_code):
try:
json_response = json.loads(response.content)
except ValueError as e:
print('{}\n-> Response is not JSON, ignoring table {}!'.format(url, table_code))
print('ERROR: response is not JSON, ignoring table {}!'.format(table_code))
raise e
if isinstance(json_response, list):
# Series list
......@@ -101,7 +101,7 @@ def download_table(table_code):
assert table_code
url = DOWNLOAD_TABLE_URL.format(table_code)
print('URL =', url)
print('Downloading {}'.format(url))
# Try to get table, waiting a little each time to let the server process request
log.debug("Get {!r}".format(url))
waited_time = 0
......
......@@ -98,7 +98,7 @@ CATEGORY_TREE = [
]
},
{
'name': 'Industry, energy and constrution/Export and import price of industrial products',
'name': 'Industry, energy and construction/Export and import price of industrial products',
'code': '30071',
'children': [
{
......