Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
dares-fetcher
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
dbnomics-fetchers
dares-fetcher
Commits
e8d397fa
Commit
e8d397fa
authored
7 years ago
by
Constance de Quatrebarbes
Browse files
Options
Downloads
Patches
Plain Diff
Build full tree from root_cat >(cat?)> dataset
parent
8f4f2dd4
No related branches found
Branches containing commit
No related tags found
1 merge request
!1
Implement download script and more
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
dares_to_dbnomics.py
+14
-10
14 additions, 10 deletions
dares_to_dbnomics.py
with
14 additions
and
10 deletions
dares_to_dbnomics.py
+
14
−
10
View file @
e8d397fa
...
...
@@ -421,23 +421,28 @@ def extract_dataset(source_dir, category, dataset):
#For theses categories
if
category
[
"
code
"
]
in
[
1
,
10
,
13
]:
# skip first page 'To Read' or 'Synthèse' as mentionned in Analysis
return
sorted
([
slugify
(
n
)
for
n
in
book
.
sheet_names
()[
1
:]])
return
sorted
(
set
(
[
slugify
(
n
)
for
n
in
book
.
sheet_names
()[
1
:]])
)
else
:
# each sheet is a dataset
datasets
=
sorted
([
slugify
(
n
)
for
n
in
book
.
sheet_names
()])
# ugly skip lisez-moi sheet althought **NOT** mentionned
return
sorted
([
n
for
n
in
datasets
if
not
n
.
startswith
(
"
lisez-moi
"
)])
return
sorted
(
set
(
[
n
for
n
in
datasets
if
not
n
.
startswith
(
"
lisez-moi
"
)])
)
def
build_datasets
(
dest_dir
,
category
,
datasets
):
'''
write category and create dir for datasets
'''
cat_dir_name
=
"
%i %s
"
%
(
category
[
"
code
"
],
category
[
"
slug
"
])
#write category.json into category
category_dir_path
=
os
.
path
.
join
(
dest_dir
,
cat_dir_name
)
if
"
dir_path
"
in
category
.
keys
():
category_dir_path
=
category
[
"
dir_path
"
]
else
:
cat_dir_name
=
"
%s %s
"
%
(
str
(
category
[
"
code
"
]),
category
[
"
slug
"
])
category_dir_path
=
os
.
path
.
join
(
dest_dir
,
cat_dir_name
)
print
(
category_dir_path
)
category_json
=
{
"
name
"
:
category
[
"
slug
"
],
"
category_code
"
:
str
(
category
[
"
code
"
]),
"
datasets
"
:
datasets
,
}
print
(
category_json
)
write_category_to_json
(
category_json
,
category_dir_path
)
write_category_to_md
(
category_json
,
category_dir_path
)
datasets_dir_path
=
set
([
os
.
path
.
join
(
category_dir_path
,
d
)
for
d
in
datasets
])
...
...
@@ -481,15 +486,14 @@ def build_tree(source_dir, dest_dir):
else
:
subcats
=
build_categories
(
dest_dir
,
category
,
category
[
"
sub-categories
"
])
print
(
subcats
)
# datasets = extract_dataset(source_dir, category, category["sub-category"][0])
# datasets_path = build_datasets(dest_dir, category, datasets)
# for subcat in category["sub-categories"]:
# print(subcat, category)
for
subcat_d
,
subcat
in
zip
(
subcats
,
category
[
"
sub-categories
"
]):
subcat
[
"
dir_path
"
]
=
subcat_d
# subcats_path = build_datasets(dest_dir, category, subcat)
#
datasets = extract_dataset
s(category
, subcat, subcat["datasets"][0])
#
datasets_path = build_datasets(
dest_dir,
category, datasets)
datasets
=
extract_dataset
(
source_dir
,
subcat
,
subcat
[
"
datasets
"
][
0
])
datasets_path
=
build_datasets
(
category
,
subcat
,
datasets
)
def
main
():
args
=
docopt
(
__doc__
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment