Commit e8c279c1 authored by Bruno Duyé's avatar Bruno Duyé

Code refactor: variables renaming

parent 4ed0595f
......@@ -28,8 +28,8 @@ Usage:
Options:
--debug show debug output, and log each API request in 'last_api_result.json' file
--only <datasets_codes> only convert given dataset(s)_code(s). Ex: "--only ABA,BGG"
-l --limit_nb_tables <number> limit the number of tables to download per dataset
--only <appendix_codes> only convert given appendix(es)_code(s). Ex: "--only NIPA,MNE"
-l --limit_nb_datasets <number> limit the number of datasets to download per appendix
"""
import logging
......@@ -48,7 +48,7 @@ import ujson as json
log = logging.getLogger(__name__)
DATASETS_TO_DOWNLOAD = [
APPENDIX_TO_DOWNLOAD = [
'NIPA', # Appendix B – NIPA (National Income and Product Accounts)
'NIUnderlyingDetail', # Appendix C – NIUnderlyingDetail (National Income and Product Accounts)
# 'FixedAssets', # Appendix D – Fixed Assets
......@@ -61,9 +61,9 @@ DATASETS_TO_DOWNLOAD = [
]
API_URLs = {
'tables_list': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={dataset_code}&ParameterName=TableName',
'series_frequencies': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={dataset_code}&ParameterName=Frequency',
'series_data': 'https://www.bea.gov/api/data/?&UserID={api_user_id}&method=GetData&DataSetName={dataset_code}&TableName={table_code}&Frequency={frequency_code}&Year=ALL',
'datasets_list': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={appendix_code}&ParameterName=TableName',
'series_frequencies': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={appendix_code}&ParameterName=Frequency',
'series_data': 'https://www.bea.gov/api/data/?&UserID={api_user_id}&method=GetData&DataSetName={appendix_code}&TableName={dataset_code}&Frequency={frequency_code}&Year=ALL',
}
API_ERRORS_WHITELIST = [
......@@ -71,6 +71,7 @@ API_ERRORS_WHITELIST = [
'No data exists for the Year/Frequencies passed.',
]
class APIException(Exception):
pass
......@@ -92,45 +93,46 @@ def main():
global api_user_id
api_user_id = get_api_user_id_from_env()
for dataset_code in DATASETS_TO_DOWNLOAD:
if args["--only"] and not dataset_code in args["--only"]:
log.info('-> ignoring {!r} (due to --only option)'.format(dataset_code))
for appendix_code in APPENDIX_TO_DOWNLOAD:
if args["--only"] and not appendix_code in args["--only"]:
log.info('-> ignoring {!r} (due to --only option)'.format(appendix_code))
continue
dataset_path = os.path.join(target_dir, dataset_code)
os.mkdir(dataset_path)
log.info("** {}".format(dataset_code))
tables_list_url = API_URLs['tables_list'].format(api_user_id=api_user_id, dataset_code=dataset_code)
tables_list = get_from_api(tables_list_url)
# Get available frequencies for this dataset
dataset_frequencies = get_from_api(API_URLs['series_frequencies'].format(api_user_id=api_user_id, dataset_code=dataset_code))['ParamValue']
appendix_path = os.path.join(target_dir, appendix_code)
os.mkdir(appendix_path)
log.info("** {}".format(appendix_code))
datasets_list_url = API_URLs['datasets_list'].format(api_user_id=api_user_id, appendix_code=appendix_code)
datasets_list = get_from_api(datasets_list_url)
# Get available frequencies for this appendix
dataset_frequencies = get_from_api(API_URLs['series_frequencies'].format(api_user_id=api_user_id, appendix_code=appendix_code))['ParamValue']
nb_tables_downloaded = 0
dataset_json = [] # Data to be written to dataset.json
# For each "table" (set of series)
for table_info in tables_list["ParamValue"]:
table_code = table_info.get('TableName') or table_info.get('TableNumber')
assert table_code, "Error: can't find table code in table_info (dataset {!r}):\n{!r}".format(dataset_code, table_info)
log.debug("* {}".format(table_name))
datasets_json = [] # Data to be written to datasets.json
# For each dataset
for dataset_info in datasets_list["ParamValue"]:
dataset_code = dataset_info.get('TableName') or dataset_info.get('TableNumber')
assert dataset_code, "Error: can't find dataset code in dataset_info (dataset {!r}):\n{!r}".format(appendix_code, dataset_info)
# log.debug("* {}".format(table_name))
log.debug("* {}".format(dataset_code))
# Download series data and write series file
if args['--limit_nb_tables'] and nb_tables_downloaded >= int(args['--limit_nb_tables']):
if args['--limit_nb_datasets'] and nb_tables_downloaded >= int(args['--limit_nb_datasets']):
break
for frequency_dict in dataset_frequencies:
frequency_code = frequency_dict['FrequencyID']
url = API_URLs['series_data'].format(api_user_id=api_user_id, dataset_code=dataset_code,
table_code=table_code, frequency_code=frequency_code)
table_filename = slugify(table_code) + '-' + frequency_code + '.json'
table_filepath = os.path.join(dataset_path, table_filename)
json = get_from_api(url, raise_on_errors=False)
if json:
write_json_file(table_filepath, json)
url = API_URLs['series_data'].format(api_user_id=api_user_id, appendix_code=appendix_code,
dataset_code=dataset_code, frequency_code=frequency_code)
dataset_filename = slugify(dataset_code) + '-' + frequency_code + '.json'
dataset_filepath = os.path.join(appendix_path, dataset_filename)
dataset_json = get_from_api(url, raise_on_errors=False)
if dataset_json:
write_json_file(dataset_filepath, dataset_json)
nb_tables_downloaded += 1
dataset_json.append({
'table_code': table_code,
datasets_json.append({
'dataset_code': dataset_code,
'dimension_label': 'FREQ',
'dimension_code': frequency_code,
'filename': table_filename,
'filename': dataset_filename,
})
# Write dataset.json
write_json_file(os.path.join(dataset_path, "dataset.json"), dataset_json)
write_json_file(os.path.join(appendix_path, "dataset.json"), datasets_json)
log.info('END')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment