Skip to content
Snippets Groups Projects
Commit cb944149 authored by Enzo Buthiot's avatar Enzo Buthiot
Browse files

Hard code topics_dict to fix the problem of missing topic codes from...

Hard code topics_dict to fix the problem of missing topic codes from help_on_time_series_databases.html
parent 6c3e0e1c
No related branches found
No related tags found
2 merge requests!4Fix download,!3Hard code topics_dict to fix the problem of download.py
......@@ -36,7 +36,7 @@ import requests
import buba_common as bc
HELP_ON_TIMESERIES_URL = "https://www.bundesbank.de/en/statistics/time-series-databases/-/help-on-the-time-series-databases-750894" # noqa
HELP_ON_TIMESERIES_URL = "https://www.bundesbank.de/en/statistics/time-series-databases/help-on-the-time-series-databases" # noqa
DATASETS_URL = "https://www.bundesbank.de/cae/servlet/StatisticDownload?its_fileFormat=Archive&mode=its" # noqa
TOPICS_URL = "https://www.bundesbank.de/cae/servlet/StatisticDownload?its_fileFormat=Archive&mode=its&tree=" # noqa
STRUCTURE_URL = "https://www.bundesbank.de/cae/servlet/StatisticDownload?metaDSI="
......@@ -97,6 +97,7 @@ def download_topics(topics_url, topics_codes, target_dir: Path):
for code in sorted(topics_codes):
log.info("Downloading topic %r", code)
topic_dir = topics_main_dir / code
print(topics_url + code)
r = requests.get(topics_url + code)
if not r.ok:
log.error("A download error occurred, skipping topic %r...", code)
......@@ -161,14 +162,24 @@ def main():
parser.error("Target dir %r not found", target_dir)
# Download main zip to get all the datasets
download_main_zip(DATASETS_URL, target_dir)
# download_main_zip(DATASETS_URL, target_dir)
# Downloads HTML page that contains topics reference table
topics_html_filepath = target_dir / bc.TOPICS_HTML_PAGE_NAME
download_html(HELP_ON_TIMESERIES_URL, topics_html_filepath)
# topics_html_filepath = target_dir / bc.TOPICS_HTML_PAGE_NAME
# download_html(HELP_ON_TIMESERIES_URL, topics_html_filepath)
# Extract topics_dict
topics_dict = bc.extract_topics_dict_from_html_help_page(topics_html_filepath)
# topics_dict = bc.extract_topics_dict_from_html_help_page(topics_html_filepath)
topics_dict = {'BANKEN': 'Banks and other financial corporations',
'KONJUNKTUR': 'Economic activity and price',
'UNTERNEHMEN': 'Enterprises and households',
'WECHSELKURSE': 'Exchange rates',
'AUSSENWIRTSCHAFT': 'External sector',
'FINANZSTAB': 'Financial stability',
'GESAMT': 'Macroeconomic accounting systems',
'GELD': 'Money and capital markets',
'FINANZEN': 'Public finances',
'INDIKATOR': 'Sets of indicators'}
# Downloads by topics to be able to sort datasets by topic
# Note: some datasets appear in more than one topic
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment