Commit 2063f407 authored by Bruno Duyé's avatar Bruno Duyé

Externalize observations API call in iterator to prepare other appendix

parent a64346bb
......@@ -93,43 +93,36 @@ def main():
global api_user_id
api_user_id = get_api_user_id_from_env()
for appendix_code in APPENDIX_TO_DOWNLOAD:
for appendix_dict in bea_common.APPENDIX_TO_DOWNLOAD:
if args["--only"] and not appendix_code in args["--only"]:
log.info('-> ignoring {!r} (due to --only option)'.format(appendix_code))
continue
appendix_code = appendix_dict['code']
appendix_path = os.path.join(target_dir, appendix_code)
os.mkdir(appendix_path)
log.info("** {}".format(appendix_code))
datasets_list_url = API_URLs['datasets_list'].format(api_user_id=api_user_id, appendix_code=appendix_code)
datasets_list = get_from_api(datasets_list_url)
# Get available frequencies for this appendix
dataset_frequencies = get_from_api(API_URLs['series_frequencies'].format(api_user_id=api_user_id, appendix_code=appendix_code))['ParamValue']
nb_tables_downloaded = 0
datasets_json = [] # Data to be written to datasets.json
# For each dataset
for dataset_info in datasets_list["ParamValue"]:
dataset_code = dataset_info.get('TableName') or dataset_info.get('TableNumber')
assert dataset_code, "Error: can't find dataset code in dataset_info (dataset {!r}):\n{!r}".format(appendix_code, dataset_info)
# log.debug("* {}".format(table_name))
log.debug("* {}".format(dataset_code))
# Download series data and write series file
if args['--limit_nb_datasets'] and nb_tables_downloaded >= int(args['--limit_nb_datasets']):
break
nb_tables_downloaded += 1
dataset_json = [] # dataset information to be written in datasets.json
for frequency_dict in dataset_frequencies:
frequency_code = frequency_dict['FrequencyID']
url = API_URLs['series_data'].format(api_user_id=api_user_id, appendix_code=appendix_code,
dataset_code=dataset_code, frequency_code=frequency_code)
dataset_filename = slugify(dataset_code) + '-' + frequency_code + '.json'
for dimension_value_code, dataset_observations_json in iter_observations(dataset_code, appendix_code):
dataset_filename = slugify(dataset_code) + '-' + dimension_value_code + '.json'
dataset_filepath = os.path.join(appendix_path, dataset_filename)
dataset_observations_json = get_from_api(url, raise_on_errors=False)
if dataset_observations_json:
write_json_file(dataset_filepath, dataset_observations_json)
dataset_json.append({
'dimension_value_code': frequency_code,
'filename': dataset_filename,
})
write_json_file(dataset_filepath, dataset_observations_json)
dataset_json.append({
'dimension_value_code': dimension_value_code,
'filename': dataset_filename,
})
datasets_json.append({
'dataset_code': dataset_code,
'dimension_label': 'Frequency',
......@@ -142,6 +135,20 @@ def main():
log.info('END')
def iter_observations(dataset_code, appendix_code):
"""Yields frequency_code, dataset_observations_json tuples
"""
# Get available frequencies for this appendix
dataset_frequencies = get_from_api(API_URLs['series_frequencies'].format(api_user_id=api_user_id, appendix_code=appendix_code))['ParamValue']
for frequency_dict in dataset_frequencies:
frequency_code = frequency_dict['FrequencyID']
url = API_URLs['series_data'].format(api_user_id=api_user_id, appendix_code=appendix_code,
dataset_code=dataset_code, frequency_code=frequency_code)
dataset_observations_json = get_from_api(url, raise_on_errors=False)
if dataset_observations_json:
yield frequency_code, dataset_observations_json
def get_from_api(url, raise_on_errors=True):
"""Get data from API and return it.
Params:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment