Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
buba-fetcher
Manage
Activity
Members
Labels
Plan
Issues
1
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
dbnomics-fetchers
buba-fetcher
Commits
29d433d2
Commit
29d433d2
authored
5 years ago
by
Pierre Dittgen
Browse files
Options
Downloads
Patches
Plain Diff
Use topics extracted from BUBA website
parent
053e5aac
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
convert.py
+14
-6
14 additions, 6 deletions
convert.py
with
14 additions
and
6 deletions
convert.py
+
14
−
6
View file @
29d433d2
...
...
@@ -51,6 +51,7 @@ DAY_RE = re.compile(r'^([12]\d{3})-([01]\d)-([0-3]\d)$')
# Helps normalize space
NORM_SPACES_RE
=
re
.
compile
(
r
'
\s+
'
)
def
compute_series_observations
(
periods
,
observations
,
obs_addon_cols
,
obs_addon_values
):
"""
Computes series observations
"""
...
...
@@ -383,10 +384,11 @@ def generate_dataset(ds_code, source_dir: Path, structure_file: Path, ds_dir: Pa
write_json_file
(
ds_dir
/
'
dataset.json
'
,
dataset_info
)
def
browse_topics
(
topics_dir
:
Path
,
dataset_info_dict
):
def
browse_topics
(
topics_dir
:
Path
,
dataset_info_dict
,
topics_dict
):
"""
Yields all topics along with datasets ids
"""
for
topic_id
in
sorted
(
bc
.
TOPICS
):
# Sort by topics label
for
topic_id
,
_
in
sorted
(
topics_dict
.
items
(),
key
=
itemgetter
(
1
)):
topic_dir
=
topics_dir
/
topic_id
if
not
topic_dir
.
exists
():
log
.
warning
(
'
Topic directory [{}] not found!
'
.
format
(
str
(
topic_dir
)))
...
...
@@ -396,7 +398,7 @@ def browse_topics(topics_dir: Path, dataset_info_dict):
yield
topic_dir
.
name
,
sorted
(
datasets
,
key
=
lambda
elt
:
elt
[
'
name
'
])
def
compute_category_data
(
topics_dir
:
Path
,
dataset_info_dict
):
def
compute_category_data
(
topics_dir
:
Path
,
dataset_info_dict
,
topics_dict
):
"""
Compute category_tree.json data ready to be serialized in category_tree.json
"""
categ_data
=
[]
...
...
@@ -406,9 +408,9 @@ def compute_category_data(topics_dir: Path, dataset_info_dict):
# Then distribute datasets along topics
# Some datasets belong to more than a topic
for
topic_id
,
datasets
in
browse_topics
(
topics_dir
,
dataset_info_dict
):
for
topic_id
,
datasets
in
browse_topics
(
topics_dir
,
dataset_info_dict
,
topics_dict
):
categ_data
.
append
({
'
name
'
:
bc
.
TOPICS
[
topic_id
]
,
'
name
'
:
topics_dict
.
get
(
topic_id
,
topic_id
)
,
'
children
'
:
datasets
})
# Removes dataset ids that belong to the category
...
...
@@ -454,6 +456,10 @@ def main():
if
not
target_dir
.
exists
():
parser
.
error
(
"
Target dir {!r} not found
"
.
format
(
str
(
target_dir
)))
# Read topics dict from 'help on time series' html page
help_html_filepath
=
source_dir
/
bc
.
TOPICS_HTML_PAGE_NAME
topics_dict
=
bc
.
extract_topics_dict_from_html_help_page
(
help_html_filepath
)
# dataset structure info
structures_dir
=
source_dir
/
'
structures
'
...
...
@@ -476,7 +482,9 @@ def main():
# Category_tree.json
if
not
args
.
skip_category_tree
:
write_json_file
(
target_dir
/
'
category_tree.json
'
,
compute_category_data
(
source_dir
/
'
topics
'
,
dataset_info_dict
))
category_json_data
=
compute_category_data
(
source_dir
/
'
topics
'
,
dataset_info_dict
,
topics_dict
)
write_json_file
(
target_dir
/
'
category_tree.json
'
,
category_json_data
)
def
write_json_file
(
file_path
:
Path
,
data
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment