Skip to content
Snippets Groups Projects
Commit 31e756e7 authored by Constance de Quatrebarbes's avatar Constance de Quatrebarbes
Browse files

V2

* SOURCE HTML DOWNLOAD
* Settings with CONSTANTS
* Acceptancy test
* Folder architecture root > datasets
* Metadata automatic generation (REAMDE.json + md.json)

TO DO:
* Clear up special cases
* Folder architecture datasets > series
parent 04e54ad2
No related branches found
No related tags found
1 merge request!1Implement download script and more
......@@ -102,15 +102,17 @@ def build_datasets(dest_dir, parent):
else:
# each sheet is a dataset
datasets_name = sorted([slugify(n) for n in book.sheet_names()])
#!warning ugly skip lisez-moi sheet althought **NOT** mentionned
datasets_name = sorted(set([n for n in datasets_name if not n.startswith("lisez-moi")]))
datasets_path = [os.path.join(parent["path"], dataset_name) for dataset_name in datasets_name]
# for dataset in datasets_name:
# try:
# os.makedirs(dataset)
# except Exception as e:
# print(e)
for dataset in datasets_path:
try:
os.makedirs(dataset)
except Exception as e:
print("DUPLICATE dataset", dataset)
#TODO: return more than name
return datasets_name
......@@ -120,11 +122,10 @@ def build_series(parent, datasets):
def write_json_f(node):
filename = "%s.json" % node["type"]
filepath = os.path.join(node["path"], filename)
print(node["type"])
if node["type"] == "provider":
provider_json_data = PROVIDER
provider_json_data[str(node["children"])] = [n["dir_name"] for n in node[node["children"]]]
provider_json_data = verified_value(validate_provider(provider_json_data, format='json'))
with open(filepath, "w") as f:
......@@ -154,8 +155,7 @@ def write_json_f(node):
else:
print(node)
raise NotImplementedError
# print(node["children"], ":")
# print([(n["dir_name"], n["path"]) for n in node[node['children']]])
def write_md_f(node, json_data):
filename = "README.md"
filepath = os.path.join(node["path"], filename)
......@@ -232,72 +232,6 @@ def build_file_tree(source_dir, dest_dir):
build_md(root)
return root_categories
def get_root_categories():
'''from TREE declared as CONSTANT in file create LEVEL 0'''
categories_d = {cat["code"]: cat["slug"] for cat in TREE}
return ["%i %s" %(k,v) for k,v in sorted(categories_d.items(), key=lambda key: key[0])]
def build_root_categories(dest_dir):
'''
build provider
get category category0
build the categories category0
'''
for cat in get_root_categories():
os.makedirs(os.path.join(dest_dir,cat))
create_provider_json(dest_dir)
create_provider_md(dest_dir)
return
def create_provider_md(dest_dir):
header = [
'# Provider '+PROVIDER["name"]+'\n',
'Metadata: [provider.json](provider.json)\n\n'
'## Categories\n']
msg_part = ("\n").join(header)
msg_part2 = '\n'.join(["- [%s](%s)" %(n.replace("-", " "),n) for n in get_root_categories()])
#TODO: categories not sorted
with open(os.path.join(dest_dir, "README.md"), "w") as f:
f.write(msg_part+"\n"+msg_part2)
return
def write_category_to_json(category_json_data, dest_dir):
category_json_data = verified_value(validate_category(category_json_data, format='json'))
with open(os.path.join(dest_dir, "category.json"), "w") as f:
jdata = json.dumps(category_json_data, sort_keys=True,
indent=4, separators=(',', ': '), ensure_ascii=False)
f.write(jdata)
return
def write_category_to_md(category_json_data, dest_dir):
f_datasets = ["- [%s](%s)" %(n.replace("-", " "),n) for n in category_json_data["datasets"]]
msg = [
'# Category %s %s\n' % (category_json_data["category_code"], category_json_data["name"]),
'Metadata: [category.json](category.json)\n\n',
'## Datasets\n',
]
msg.extend(f_datasets)
msg_part = ("\n").join(msg)
with open(os.path.join(dest_dir, "README.md"), "w") as f:
f.write(msg_part)
return
def write_subcategory_to_md(category_json_data, dest_dir):
f_datasets = ["- [%s](%s)" %(n.replace("-", " "),n) for n in category_json_data["categories"]]
msg = [
'# Category %s %s\n' % (category_json_data["category_code"], category_json_data["name"]),
'Metadata: [category.json](category.json)\n\n',
'## Categories\n',
]
msg.extend(f_datasets)
msg_part = ("\n").join(msg)
with open(os.path.join(dest_dir, "README.md"), "w") as f:
f.write(msg_part)
return
def main():
args = docopt(__doc__)
source_dir = os.path.abspath(args["<source_dir>"])
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment