Commit 745e035d authored by Bruno Duyé's avatar Bruno Duyé

API use - NIPA and NIUnderlyingDetail download

parent 9d487a44
......@@ -27,10 +27,10 @@ Usage:
{self_filename} <target_dir> [options]
Options:
--debug show debug output
--debug show debug output, and log each API request in 'last_api_result.json' file
-l --limit_nb_series <number> limit the number of series to download per dataset
"""
import json
import logging
import os
import sys
......@@ -41,11 +41,36 @@ from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util import Retry
from slugify import slugify
import ujson as json
log = logging.getLogger(__name__)
DATASETS_TO_DOWNLOAD = [
'NIPA', # Appendix B – NIPA (National Income and Product Accounts)
'NIUnderlyingDetail', # Appendix C – NIUnderlyingDetail (National Income and Product Accounts)
# 'FixedAssets', # Appendix D – Fixed Assets
# 'MNE', # Appendix E – Direct Investment and Multinational Enterprises (MNEs)
# 'GDPbyIndustry', # Appendix F – Gross Domestic Product by Industry (GDPbyIndustry)
# 'ITA', # Appendix G – ITA (International Transactions)
# 'IIP', # Appendix H – IIP (International Investment Position)
# 'UnderlyingGDPbyIndustry', # Appendix L – Underlying Gross Domestic Product by Industry (UnderlyingGDPbyIndustry)
# 'IntlServTrade', # Appendix M - IntlServTrade (International Services Trade)
]
API_URLs = {
'series_list': 'https://bea.gov/api/data/?&UserID={api_user_id}&method=GetParameterValues&DataSetName={dataset_code}&ParameterName=TableName',
'series_data': 'https://www.bea.gov/api/data/?&UserID={api_user_id}&method=GetData&DataSetName={dataset_code}&TableName={series_name}&Frequency=A,Q&Year=ALL',
}
class APIException(Exception):
pass
def main():
global log
global requests_session
global debug_mode
requests_session = Session()
# http://www.coglib.com/~icordasc/blog/2014/12/retries-in-requests.html
# backoff_factor=2 will make sleep for 2 * (2 ^ (retry_number - 1)), ie 0, 2, 4, 8, 16, 32 ...
......@@ -56,20 +81,81 @@ def main():
debug_mode = args['--debug']
logging.basicConfig(level=(logging.DEBUG if debug_mode else logging.INFO), format='%(message)s')
category_tree = json.load(open('category_tree.json'))
for category in category_tree:
category_name = category['name']
print("* {}".format(category_name))
category_dir = os.path.join(target_dir, slugify(category_name))
os.mkdir(category_dir)
for link in category['children']:
url = link['url']
print(url)
response = requests_session.get(url)
content = response.content
filename = os.path.basename(url)
with open(os.path.join(category_dir, filename), 'wb') as _f:
_f.write(content)
global api_user_id
api_user_id = get_api_user_id_from_env()
for dataset_code in DATASETS_TO_DOWNLOAD:
dataset_path = os.path.join(target_dir, dataset_code)
os.mkdir(dataset_path)
log.info("** {}".format(dataset_code))
series_list_url = API_URLs['series_list'].format(api_user_id=api_user_id, dataset_code=dataset_code)
series_list = get_from_api(series_list_url)
nb_series_donwloaded = 0
for series_info in series_list["ParamValue"]:
series_name = series_info['TableName']
log.debug("* {}".format(series_name))
series_filepath = os.path.join(dataset_path, slugify(series_name) + '.json')
url = API_URLs['series_data'].format(api_user_id=api_user_id, dataset_code=dataset_code, series_name=series_name)
# Download series data and write series file
json = get_from_api(url, raise_on_errors=False)
if json:
write_json_file(series_filepath, json)
nb_series_donwloaded += 1
if args['--limit_nb_series'] and nb_series_donwloaded == int(args['--limit_nb_series']):
break
log.info('END')
def get_from_api(url, raise_on_errors=True):
"""Get data from API and return it.
Params:
- exit_on_errors: if error is returned from API, raise an exception or only log error ?
"""
def raise_or_log_if_error(json):
"""Raise an error if raise_on_errors is True, else return True if tehre's an error"""
assert json
if 'Error' in json:
log.error('API error while getting {!r}'.format(url))
error_str = json['Error']
if raise_on_errors:
raise APIException(error_str)
else:
log.error(error_str)
return True
return False
global debug_mode
assert url
log.debug(url)
response = requests_session.get(url)
content_str = response.content
if debug_mode:
with open('last_api_result.json', 'wb') as _f:
_f.write(content_str)
content_dict = json.loads(content_str)
error = raise_or_log_if_error(content_dict['BEAAPI'])
if error:
return None
results = content_dict['BEAAPI']['Results']
raise_or_log_if_error(results)
if error:
return None
return results
def get_api_user_id_from_env():
"""Try getting API user ID from environnement variable and return it
"""
environnement_variable_name = 'BEA_API_USER_ID'
if not environnement_variable_name in os.environ:
exit(-1)
return os.environ[environnement_variable_name]
def write_json_file(file_path, data):
with open(file_path, 'w', encoding='utf-8') as file_:
json.dump(data, file_, ensure_ascii=False, indent=2, sort_keys=True)
if __name__ == '__main__':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment