Skip to content
Snippets Groups Projects
Commit 68dcde78 authored by Constance de Quatrebarbes's avatar Constance de Quatrebarbes
Browse files

insert headers to_source_data

parent 91e324f2
No related branches found
No related tags found
1 merge request!1Implement download script and more
#!/usr/bin/env python3
# dares-fetcher -- Fetch series from DRESS
#By Constance de Quatrebarbes <constance.24barbes@jailbreak.paris>
# dares-fetcher -- Fetch series from DARES
# By Constance de Quatrebarbes <constance.de.quatrebarbes@cepremap.org>
# Copyright (C) 2017 Cepremap
# https://git.nomics.world/dbnomics-fetchers/daess-fetcher
#
# This is free software; you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http:>www.gnu.org/licenses/>.
"""DARES Fetcher.
Usage:
......@@ -79,13 +96,16 @@ def fetch(dataset):
doc_list = sidebar.find("ul", {"class":"docs-joints__liste"})
target_files = []
for doc in doc_list.findAll("li"):
ext, title = [n.text for n in doc.findAll("span")[0:2]]
if ext in ["xls", "xlsx"]:
raw_url = doc.find('a',{"class":ext}).get("href")
f_url = os.path.join(ROOT_PROVIDER_URL, raw_url)
f_name = f_url.split("/")[-1]
target_files.append({"f_name":f_name, "f_url":f_url, "f_title": title, "f_ext": ext})
target_files.append({
"f_name":f_name,
"f_url":f_url,
"f_title": title,
"f_ext": ext})
assert len(target_files) == dataset["file_nb"], \
"Fetcher Error: url %s should retrieve %i xls docs instead of %i" %(url, dataset["file_nb"], len(target_files))
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment