Commit c808631a authored by Pierre Dittgen's avatar Pierre Dittgen

Adapt to SDMX_Change

parent b8e40b34
Pipeline #88910 passed with stage
in 10 minutes and 50 seconds
......@@ -247,6 +247,13 @@ def load_structure_xml_file(ds_code, structure_file: Path):
tags = (CODELIST_TAG, KEYFAMILY_TAG, DIMENSION_TAG, CODE_TAG, DESCRIPTION_TAG, NAME_TAG)
LANG_ATT = '{http://www.w3.org/XML/1998/namespace}lang'
def choose_name(lang_dict, code):
for lang in ('en', 'de'):
name = lang_dict.get(lang, '')
if name != '':
return name
return code
# I want!
# - dataset_name
# - dimensions_labels
......@@ -256,8 +263,9 @@ def load_structure_xml_file(ds_code, structure_file: Path):
info = {'dataset': {}}
codelists = []
concepts = []
current_code_list = None
in_keyfamily, in_code_list = False, False
current_codelist, current_code = None, None
current_codelist_name_dict, current_code_name_dict = {}, {}
in_keyfamily, in_codelist, in_code = False, False, False
with structure_file.open('rb') as fd:
for evt, elt in etree.iterparse(fd, tag=tags, events=('start', 'end')):
......@@ -265,24 +273,21 @@ def load_structure_xml_file(ds_code, structure_file: Path):
if elt.tag == KEYFAMILY_TAG:
in_keyfamily = (evt == 'start')
if in_keyfamily:
info['dataset']['namedict'] = {}
# </structure:KeyFamily>: Choose label: english first, then german else code
if not in_keyfamily:
else:
name_dict = info['dataset']['namedict']
info['dataset']['name'] = name_dict['en'] if 'en' in name_dict \
else name_dict['de'] if 'de' in name_dict else ds_code
info['dataset']['name'] = choose_name(name_dict, ds_code)
del info['dataset']['namedict']
continue
if in_keyfamily:
# Dataset name
if elt.tag == NAME_TAG and evt == 'start':
if not info['dataset'].get('namedict'):
info['dataset']['namedict'] = {}
lg_att = elt.attrib.get(LANG_ATT)
name_val = norm_space(elt.text)
if lg_att in ('de', 'en') and name_val:
info['dataset']['namedict'][lg_att] = name_val
lang, name_val = elt.attrib.get(LANG_ATT), norm_space(elt.text)
info['dataset']['namedict'][lang] = name_val
# concepts
if elt.tag == DIMENSION_TAG and evt == 'start':
concepts.append(elt.attrib.get('conceptRef'))
......@@ -291,34 +296,36 @@ def load_structure_xml_file(ds_code, structure_file: Path):
# In CodeList context?
if elt.tag == CODELIST_TAG:
if evt == 'start':
current_code_list = {'id': elt.attrib['id'], 'values': {}}
current_code, current_german_name = None, None
current_codelist, current_codelist_name_dict = {'id': elt.attrib['id'], 'values': {}}, {}
current_code, current_code_name_dict = None, {}
else: # evt == 'end'
codelists.append(current_code_list)
current_code_list, current_code, current_german_name = None, None, None
in_code_list = evt == 'start'
current_codelist['name'] = choose_name(current_codelist_name_dict, current_codelist)
codelists.append(current_codelist)
current_codelist, current_code, current_codelist_name_dict, current_code_name_dict = None, None, None, None
in_codelist = evt == 'start'
continue
if in_code_list:
# Code list name
if elt.tag == NAME_TAG and evt == 'start':
if elt.attrib.get(LANG_ATT) == 'en':
current_code_list['name'] = norm_space(elt.text)
if in_codelist:
# Code id
if elt.tag == CODE_TAG:
current_code = elt.attrib.get('value') if evt == 'start' else None
# Description
if elt.tag == DESCRIPTION_TAG and evt == 'start':
if current_code:
# Some english code description are void, use german one instead (if exists...)
if elt.attrib.get(LANG_ATT) == 'de':
current_german_name = norm_space(elt.text) if elt.text else ''
if elt.attrib.get(LANG_ATT) == 'en':
current_name = norm_space(elt.text) if elt.text else current_german_name
current_code_list['values'][current_code] = current_name
if evt == 'start':
current_code = elt.attrib.get('value')
else:
code_name = choose_name(current_code_name_dict, current_code)
current_codelist['values'][current_code] = code_name
in_code = (evt == 'start')
# Name
if elt.tag == NAME_TAG:
# Code name
if in_code:
if evt == 'start':
current_code_name_dict[elt.attrib.get(LANG_ATT)] = norm_space(elt.text)
# Code list name
else:
if evt == 'start':
current_codelist_name_dict[elt.attrib.get(LANG_ATT)] = norm_space(elt.text)
# Use conceptRef as code for dimensions instead of codelist id which can have duplicate...
codelists = [{'id': concept, 'name': cl['name'], 'values':cl['values']} for concept, cl in zip(concepts, codelists)]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment