Skip to content
Snippets Groups Projects
Commit e8d397fa authored by Constance de Quatrebarbes's avatar Constance de Quatrebarbes
Browse files

Build full tree from root_cat >(cat?)> dataset

parent 8f4f2dd4
No related branches found
No related tags found
1 merge request!1Implement download script and more
......@@ -421,23 +421,28 @@ def extract_dataset(source_dir, category, dataset):
#For theses categories
if category["code"] in [1, 10, 13]:
# skip first page 'To Read' or 'Synthèse' as mentionned in Analysis
return sorted([slugify(n) for n in book.sheet_names()[1:]])
return sorted(set([slugify(n) for n in book.sheet_names()[1:]]))
else:
# each sheet is a dataset
datasets = sorted([slugify(n) for n in book.sheet_names()])
# ugly skip lisez-moi sheet althought **NOT** mentionned
return sorted([n for n in datasets if not n.startswith("lisez-moi")])
return sorted(set([n for n in datasets if not n.startswith("lisez-moi")]))
def build_datasets(dest_dir, category, datasets):
'''write category and create dir for datasets'''
cat_dir_name = "%i %s" %(category["code"], category["slug"])
#write category.json into category
category_dir_path = os.path.join(dest_dir, cat_dir_name)
if "dir_path" in category.keys():
category_dir_path = category["dir_path"]
else:
cat_dir_name = "%s %s" %(str(category["code"]), category["slug"])
category_dir_path = os.path.join(dest_dir, cat_dir_name)
print(category_dir_path)
category_json = {
"name": category["slug"],
"category_code": str(category["code"]),
"datasets": datasets,
}
print(category_json)
write_category_to_json(category_json, category_dir_path)
write_category_to_md(category_json, category_dir_path)
datasets_dir_path = set([os.path.join(category_dir_path, d) for d in datasets])
......@@ -481,15 +486,14 @@ def build_tree(source_dir, dest_dir):
else:
subcats = build_categories(dest_dir, category, category["sub-categories"])
print(subcats)
# datasets = extract_dataset(source_dir, category, category["sub-category"][0])
# datasets_path = build_datasets(dest_dir, category, datasets)
# for subcat in category["sub-categories"]:
# print(subcat, category)
for subcat_d,subcat in zip(subcats, category["sub-categories"]):
subcat["dir_path"] = subcat_d
# subcats_path = build_datasets(dest_dir, category, subcat)
# datasets = extract_datasets(category, subcat, subcat["datasets"][0])
# datasets_path = build_datasets(dest_dir, category, datasets)
datasets = extract_dataset(source_dir, subcat, subcat["datasets"][0])
datasets_path = build_datasets(category, subcat, datasets)
def main():
args = docopt(__doc__)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment