Hard code topics_dict to fix the problem of missing topic codes from...

Hard code topics_dict to fix the problem of missing topic codes from help_on_time_series_databases.html

Hard code topics_dict to fix the problem of missing topic codes from...
Hard code topics_dict to fix the problem of missing topic codes from help_on_time_series_databases.html
cb944149 · Enzo Buthiot · 6c3e0e1c · cb944149
Commit cb944149 authored 4 years ago by Enzo Buthiot
--- a/download.py
+++ b/download.py
@@ -36,7 +36,7 @@ import requests

 import buba_common as bc

-HELP_ON_TIMESERIES_URL = "https://www.bundesbank.de/en/statistics/time-series-databases/-/help-on-the-time-series-databases-750894"  # noqa
+HELP_ON_TIMESERIES_URL = "https://www.bundesbank.de/en/statistics/time-series-databases/help-on-the-time-series-databases"  # noqa
 DATASETS_URL = "https://www.bundesbank.de/cae/servlet/StatisticDownload?its_fileFormat=Archive&mode=its"  # noqa
 TOPICS_URL = "https://www.bundesbank.de/cae/servlet/StatisticDownload?its_fileFormat=Archive&mode=its&tree="  # noqa
 STRUCTURE_URL = "https://www.bundesbank.de/cae/servlet/StatisticDownload?metaDSI="
@@ -97,6 +97,7 @@ def download_topics(topics_url, topics_codes, target_dir: Path):
    for code in sorted(topics_codes):
        log.info("Downloading topic %r", code)
        topic_dir = topics_main_dir / code
+        print(topics_url + code)
        r = requests.get(topics_url + code)
        if not r.ok:
            log.error("A download error occurred, skipping topic %r...", code)
@@ -161,14 +162,24 @@ def main():
        parser.error("Target dir %r not found", target_dir)

    # Download main zip to get all the datasets
-    download_main_zip(DATASETS_URL, target_dir)
+    # download_main_zip(DATASETS_URL, target_dir)

    # Downloads HTML page that contains topics reference table
-    topics_html_filepath = target_dir / bc.TOPICS_HTML_PAGE_NAME
-    download_html(HELP_ON_TIMESERIES_URL, topics_html_filepath)
+    # topics_html_filepath = target_dir / bc.TOPICS_HTML_PAGE_NAME
+    # download_html(HELP_ON_TIMESERIES_URL, topics_html_filepath)

    # Extract topics_dict
-    topics_dict = bc.extract_topics_dict_from_html_help_page(topics_html_filepath)
+    # topics_dict = bc.extract_topics_dict_from_html_help_page(topics_html_filepath)
+    topics_dict = {'BANKEN': 'Banks and other financial corporations',
+                   'KONJUNKTUR': 'Economic activity and price',
+                   'UNTERNEHMEN': 'Enterprises and households',
+                   'WECHSELKURSE': 'Exchange rates',
+                   'AUSSENWIRTSCHAFT': 'External sector',
+                   'FINANZSTAB': 'Financial stability',
+                   'GESAMT': 'Macroeconomic accounting systems',
+                   'GELD': 'Money and capital markets',
+                   'FINANZEN': 'Public finances',
+                   'INDIKATOR': 'Sets of indicators'}

    # Downloads by topics to be able to sort datasets by topic
    # Note: some datasets appear in more than one topic