diff --git a/download.py b/download.py index a0dc904919b89fd1d605771053c711831333e337..fa4aa560221266bc29239b627b5236c8956e0e35 100755 --- a/download.py +++ b/download.py @@ -27,12 +27,14 @@ User and password are given through environnement variables USER_NAME and PASSWO The documentation of the API of GENESIS is available as a PDF here: https://www-genesis.destatis.de/genesis/online?language=en&operation=previous&levelindex=0&step=0&titel=Webservice&levelid=1651243696442&acceptscookies=false """ + import argparse import datetime import io import logging import os import re +from email.utils import parsedate_to_datetime from pathlib import Path from typing import Any, Dict, Optional, cast @@ -50,7 +52,7 @@ XML_STD_HEADER = '<?xml version="1.0" encoding="UTF-8"?>' # Categories to take into consideration are defined in destatis_util CATEGORIES = du.CATEGORY_LABELS.keys() -DESTATIS_RSS_URL = "https://www-genesis.destatis.de/genesis/online/news?language=en" +DESTATIS_RSS_URL = "https://www-genesis.destatis.de/genesisGONLINE/api/rest/information/newsfeed/rss/en" STATISTIKS_URL_TPL = ( "https://www-genesis.destatis.de/genesisWS/web/RechercheService_2010?" @@ -177,7 +179,7 @@ def fetch_updated_categories_from_rss(observed_categories, ref_date: datetime.da continue if elt.tag == PUBDATE_TAG and in_entry and evt == "end": - entry_info["pub_date"] = datetime.datetime.strptime(elt.text.strip(), "%a, %d %b %Y %H:%M:%S %z") + entry_info["pub_date"] = parsedate_to_datetime(elt.text.strip()) if elt.tag == TITLE_TAG and in_entry and evt == "end": # Do we have to consider this entry title?