Commit ffe2e682 authored by Pierre Dittgen's avatar Pierre Dittgen

Warn if empty RSS

parent d7225f01
Pipeline #88863 passed with stage
in 47 seconds
......@@ -129,11 +129,14 @@ def check_updated_categories(rss_xml_content, observed_categories, ref_date):
buff = io.BytesIO(rss_xml_content)
item_nb = 0
codes = set()
in_entry = False
for evt, elt in etree.iterparse(buff, tag=(ITEM_TAG, TITLE_TAG), events=('start', 'end'), huge_tree=True):
if elt.tag == ITEM_TAG:
in_entry = evt == 'start'
if in_entry:
item_nb += 1
continue
if elt.tag == TITLE_TAG and in_entry and evt == 'end':
......@@ -147,6 +150,10 @@ def check_updated_categories(rss_xml_content, observed_categories, ref_date):
entry_theme_code = m.group('theme_code')
if entry_theme_code in observed_categories and entry_date >= ref_date:
codes.add(entry_theme_code)
if item_nb == 0:
log.warning("New datasets RSS contains no entries (?)")
return codes
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment