Commit 41f73fa9 authored by Bruno Duyé's avatar Bruno Duyé

Download: make code generic

parent 2398f017
......@@ -5,13 +5,30 @@ Common code between download and convert
APPENDIX_TO_DOWNLOAD = [
{
'code': 'NIPA',
'name': 'Appendix B – NIPA (National Income and Product Accounts)'
'name': 'Appendix B – NIPA (National Income and Product Accounts)',
'cut_on_dimension': True,
'dimension_code': 'FREQ',
'dimension_label': 'Frequency',
'api': {
'datasets_list_url': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={appendix_code}&ParameterName=TableName',
'dimensions_values_list_url': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={appendix_code}&ParameterName=Frequency',
'dimensions_values_keyname': 'FrequencyID',
'series_data_url': 'https://www.bea.gov/api/data/?&UserID={api_user_id}&method=GetData&DataSetName={appendix_code}&TableName={dataset_code}&Frequency={dimension_value_code}&Year=ALL',
}
},
{
'code': 'NIUnderlyingDetail',
'name': 'Appendix C – NIUnderlyingDetail (National Income and Product Accounts)'
}
# 'FixedAssets', # Appendix D – Fixed Assets
'name': 'Appendix C – NIUnderlyingDetail (National Income and Product Accounts)',
'cut_on_dimension': True,
'dimension_code': 'FREQ',
'dimension_label': 'Frequency',
'api': {
'datasets_list_url': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={appendix_code}&ParameterName=TableName',
'dimensions_values_list_url': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={appendix_code}&ParameterName=Frequency',
'dimensions_values_keyname': 'FrequencyID',
'series_data_url': 'https://www.bea.gov/api/data/?&UserID={api_user_id}&method=GetData&DataSetName={appendix_code}&TableName={dataset_code}&Frequency={dimension_value_code}&Year=ALL',
}
},
# pas de fréquences
# 'datasets_list': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={appendix_code}&ParameterName=TableID',
# 'series_data': 'https://www.bea.gov/api/data/?&UserID={api_user_id}&method=GetData&DataSetName={appendix_code}&TableID={dataset_code}&Year=X',
......
......@@ -44,28 +44,11 @@ from requests.packages.urllib3.util import Retry
from slugify import slugify
from toolz import dicttoolz
import bea_common
import ujson as json
log = logging.getLogger(__name__)
APPENDIX_TO_DOWNLOAD = [
'NIPA', # Appendix B – NIPA (National Income and Product Accounts)
'NIUnderlyingDetail', # Appendix C – NIUnderlyingDetail (National Income and Product Accounts)
# 'FixedAssets', # Appendix D – Fixed Assets
# 'MNE', # Appendix E – Direct Investment and Multinational Enterprises (MNEs)
# 'GDPbyIndustry', # Appendix F – Gross Domestic Product by Industry (GDPbyIndustry)
# 'ITA', # Appendix G – ITA (International Transactions)
# 'IIP', # Appendix H – IIP (International Investment Position)
# 'UnderlyingGDPbyIndustry', # Appendix L – Underlying Gross Domestic Product by Industry (UnderlyingGDPbyIndustry)
# 'IntlServTrade', # Appendix M - IntlServTrade (International Services Trade)
]
API_URLs = {
'datasets_list': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={appendix_code}&ParameterName=TableName',
'series_frequencies': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={appendix_code}&ParameterName=Frequency',
'series_data': 'https://www.bea.gov/api/data/?&UserID={api_user_id}&method=GetData&DataSetName={appendix_code}&TableName={dataset_code}&Frequency={frequency_code}&Year=ALL',
}
API_ERRORS_WHITELIST = [
# List of API errors descriptions that are not to be logged at info level, but debug
'No data exists for the Year/Frequencies passed.',
......@@ -94,14 +77,14 @@ def main():
api_user_id = get_api_user_id_from_env()
for appendix_dict in bea_common.APPENDIX_TO_DOWNLOAD:
appendix_code = appendix_dict['code']
if args["--only"] and not appendix_code in args["--only"]:
log.info('-> ignoring {!r} (due to --only option)'.format(appendix_code))
continue
appendix_code = appendix_dict['code']
appendix_path = os.path.join(target_dir, appendix_code)
os.mkdir(appendix_path)
log.info("** {}".format(appendix_code))
datasets_list_url = API_URLs['datasets_list'].format(api_user_id=api_user_id, appendix_code=appendix_code)
datasets_list_url = appendix_dict['api']['datasets_list_url'].format(api_user_id=api_user_id, appendix_code=appendix_code)
datasets_list = get_from_api(datasets_list_url)
nb_tables_downloaded = 0
datasets_json = [] # Data to be written to datasets.json
......@@ -115,8 +98,9 @@ def main():
break
nb_tables_downloaded += 1
dataset_json = [] # dataset information to be written in datasets.json
for dimension_value_code, dataset_observations_json in iter_observations(dataset_code, appendix_code):
dataset_filename = slugify(dataset_code) + '-' + dimension_value_code + '.json'
for dimension_value_code, dataset_observations_json in iter_data(dataset_code, appendix_dict):
dimension_value_code_str = '-' + dimension_value_code if dimension_value_code else ''
dataset_filename = slugify(dataset_code) + dimension_value_code_str + '.json'
dataset_filepath = os.path.join(appendix_path, dataset_filename)
write_json_file(dataset_filepath, dataset_observations_json)
dataset_json.append({
......@@ -125,8 +109,8 @@ def main():
})
datasets_json.append({
'dataset_code': dataset_code,
'dimension_label': 'Frequency',
'dimension_code': 'FREQ',
'dimension_label': appendix_dict['dimension_label'],
'dimension_code': appendix_dict['dimension_code'],
'files': dataset_json,
})
# Write datasets.json
......@@ -135,18 +119,23 @@ def main():
log.info('END')
def iter_observations(dataset_code, appendix_code):
"""Yields frequency_code, dataset_observations_json tuples
def iter_data(dataset_code, appendix_dict):
"""Yields dimension_value_code, dataset_observations_json tuples
=> in case of no "cut" on any dimension, dimension_value_code is None
"""
# Get available frequencies for this appendix
dataset_frequencies = get_from_api(API_URLs['series_frequencies'].format(api_user_id=api_user_id, appendix_code=appendix_code))['ParamValue']
for frequency_dict in dataset_frequencies:
frequency_code = frequency_dict['FrequencyID']
url = API_URLs['series_data'].format(api_user_id=api_user_id, appendix_code=appendix_code,
dataset_code=dataset_code, frequency_code=frequency_code)
dataset_observations_json = get_from_api(url, raise_on_errors=False)
if dataset_observations_json:
yield frequency_code, dataset_observations_json
appendix_code = appendix_dict['code']
if appendix_dict['cut_on_dimension']:
# Get list of dimensions_values to "cut" on
dimensions_values = get_from_api(appendix_dict['api']['dimensions_values_list_url'].format(
api_user_id=api_user_id, appendix_code=appendix_code))['ParamValue']
# For each dimension value
for dimensions_value_dict in dimensions_values:
dimension_value_code = dimensions_value_dict[appendix_dict['api']['dimensions_values_keyname']]
url = appendix_dict['api']['series_data_url'].format(api_user_id=api_user_id, appendix_code=appendix_code,
dataset_code=dataset_code, dimension_value_code=dimension_value_code)
dataset_observations_json = get_from_api(url, raise_on_errors=False)
if dataset_observations_json:
yield dimension_value_code, dataset_observations_json
def get_from_api(url, raise_on_errors=True):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment