Skip to content
Snippets Groups Projects
Commit 29d433d2 authored by Pierre Dittgen's avatar Pierre Dittgen
Browse files

Use topics extracted from BUBA website

parent 053e5aac
No related branches found
No related tags found
No related merge requests found
......@@ -51,6 +51,7 @@ DAY_RE = re.compile(r'^([12]\d{3})-([01]\d)-([0-3]\d)$')
# Helps normalize space
NORM_SPACES_RE = re.compile(r'\s+')
def compute_series_observations(periods, observations, obs_addon_cols, obs_addon_values):
"""Computes series observations"""
......@@ -383,10 +384,11 @@ def generate_dataset(ds_code, source_dir: Path, structure_file: Path, ds_dir: Pa
write_json_file(ds_dir / 'dataset.json', dataset_info)
def browse_topics(topics_dir: Path, dataset_info_dict):
def browse_topics(topics_dir: Path, dataset_info_dict, topics_dict):
""" Yields all topics along with datasets ids """
for topic_id in sorted(bc.TOPICS):
# Sort by topics label
for topic_id, _ in sorted(topics_dict.items(), key=itemgetter(1)):
topic_dir = topics_dir / topic_id
if not topic_dir.exists():
log.warning('Topic directory [{}] not found!'.format(str(topic_dir)))
......@@ -396,7 +398,7 @@ def browse_topics(topics_dir: Path, dataset_info_dict):
yield topic_dir.name, sorted(datasets, key=lambda elt: elt['name'])
def compute_category_data(topics_dir: Path, dataset_info_dict):
def compute_category_data(topics_dir: Path, dataset_info_dict, topics_dict):
""" Compute category_tree.json data ready to be serialized in category_tree.json """
categ_data = []
......@@ -406,9 +408,9 @@ def compute_category_data(topics_dir: Path, dataset_info_dict):
# Then distribute datasets along topics
# Some datasets belong to more than a topic
for topic_id, datasets in browse_topics(topics_dir, dataset_info_dict):
for topic_id, datasets in browse_topics(topics_dir, dataset_info_dict, topics_dict):
categ_data.append({
'name': bc.TOPICS[topic_id],
'name': topics_dict.get(topic_id, topic_id),
'children': datasets
})
# Removes dataset ids that belong to the category
......@@ -454,6 +456,10 @@ def main():
if not target_dir.exists():
parser.error("Target dir {!r} not found".format(str(target_dir)))
# Read topics dict from 'help on time series' html page
help_html_filepath = source_dir / bc.TOPICS_HTML_PAGE_NAME
topics_dict = bc.extract_topics_dict_from_html_help_page(help_html_filepath)
# dataset structure info
structures_dir = source_dir / 'structures'
......@@ -476,7 +482,9 @@ def main():
# Category_tree.json
if not args.skip_category_tree:
write_json_file(target_dir / 'category_tree.json', compute_category_data(source_dir / 'topics', dataset_info_dict))
category_json_data = compute_category_data(source_dir / 'topics',
dataset_info_dict, topics_dict)
write_json_file(target_dir / 'category_tree.json', category_json_data)
def write_json_file(file_path: Path, data):
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment