refactoring category_tree

16d48830 · Michel Juillard · 3935d068 · 16d48830
Commit 16d48830 authored 5 years ago by Michel Juillard
--- a/download.py
+++ b/download.py
@@ -76,14 +76,20 @@ def download_datasets(target_dir):
        _file.write(resp.text)
    headers = resp.headers
    soup = bs(resp.text, "lxml")
-    categories = {}
+    categories = []
    for ul in soup.find_all("ul", class_="extend"):
        cat = ul.parent.a.text.encode("utf-8").decode("utf-8", errors="ignore")
-        categories[cat] = []
+        # ugly fix of broken unicode char
+        cat_name = clean_special_char(ul.parent.a.text)
+        category = {
+            "name": cat_name,
+            "code": slugify.slugify(clean_special_char(ul.parent.a.text))
+        }
+        category["datasets"] = []
        for li in ul.find_all("li"):
            series = list(ast.literal_eval(li.a.get("onclick").replace("soumettreTab", "")))
-            categories[cat].append(series)
-
+            category["datasets"].append(series)
+        categories.append(category)
    # no need to have country names
    # encoding problems: broken utf-8 from website app: countries and categories é and ô same
    countries = {n.text.strip().encode('utf-8').decode("utf-8", errors="ignore"): n.find("input")
@@ -122,16 +128,13 @@ def download_datasets(target_dir):
    post_data.update(country)

    categories_tree = []
-    for cat, series_l in categories.items():
-        cat_name = clean_special_char(ul.parent.a.text)
+    for cat in categories:
        category = {
-            # ugly fix of broken unicode char
-            "name": clean_special_char(ul.parent.a.text),
-            "code": slugify.slugify(clean_special_char(ul.parent.a.text))
+            "code": cat["code"],
+            "name": cat["name"]
        }
        category["children"] = []
-        for series in series_l:
-            freq, id_tab, s_name_id = series
+        for freq, id_tab, s_name_id in cat["datasets"]:
            post_data["idTab"] = id_tab
            data = [(k, v) for k, v in post_data.items()]
            # html_download