Skip to content
Snippets Groups Projects
Commit 16d48830 authored by Michel Juillard's avatar Michel Juillard
Browse files

refactoring category_tree

parent 3935d068
No related branches found
No related tags found
No related merge requests found
......@@ -76,14 +76,20 @@ def download_datasets(target_dir):
_file.write(resp.text)
headers = resp.headers
soup = bs(resp.text, "lxml")
categories = {}
categories = []
for ul in soup.find_all("ul", class_="extend"):
cat = ul.parent.a.text.encode("utf-8").decode("utf-8", errors="ignore")
categories[cat] = []
# ugly fix of broken unicode char
cat_name = clean_special_char(ul.parent.a.text)
category = {
"name": cat_name,
"code": slugify.slugify(clean_special_char(ul.parent.a.text))
}
category["datasets"] = []
for li in ul.find_all("li"):
series = list(ast.literal_eval(li.a.get("onclick").replace("soumettreTab", "")))
categories[cat].append(series)
category["datasets"].append(series)
categories.append(category)
# no need to have country names
# encoding problems: broken utf-8 from website app: countries and categories é and ô same
countries = {n.text.strip().encode('utf-8').decode("utf-8", errors="ignore"): n.find("input")
......@@ -122,16 +128,13 @@ def download_datasets(target_dir):
post_data.update(country)
categories_tree = []
for cat, series_l in categories.items():
cat_name = clean_special_char(ul.parent.a.text)
for cat in categories:
category = {
# ugly fix of broken unicode char
"name": clean_special_char(ul.parent.a.text),
"code": slugify.slugify(clean_special_char(ul.parent.a.text))
"code": cat["code"],
"name": cat["name"]
}
category["children"] = []
for series in series_l:
freq, id_tab, s_name_id = series
for freq, id_tab, s_name_id in cat["datasets"]:
post_data["idTab"] = id_tab
data = [(k, v) for k, v in post_data.items()]
# html_download
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment