Skip to content
Snippets Groups Projects
Commit b7cbe3e0 authored by Michel Juillard's avatar Michel Juillard
Browse files

replaced assess by logging.warning if number of files

is different from number of expected files
parent c22ef2bb
No related branches found
No related tags found
1 merge request!4fixe for download.py problem
......@@ -19,13 +19,16 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""Download DARES source data.
"""Download DARES series from http://dares.travail-emploi.gouv.fr/
Usage:
download.py <target_dir>
download.py (-h | --help)
{self_filename} <target_dir> [options]
Options:
--debug show debug output
"""
import logging
import os
import subprocess
import sys
......@@ -36,13 +39,17 @@ from docopt import docopt
from dares_settings import PROVIDER_URL, SOURCE_FILES_URL, TOP_CATEGORIES
log = logging.getLogger(__name__)
def main():
args = docopt(__doc__)
target_dir = os.path.abspath(args['<target_dir>'])
debug_mode = args['--debug']
logging.basicConfig(format="%(levelname)s: %(message)s", level=(logging.DEBUG if debug_mode else logging.INFO))
for category in TOP_CATEGORIES:
url = SOURCE_FILES_URL + category["slug"]
log.info('* Download {}'.format(url))
response = requests.get(url)
response.raise_for_status()
soup = bs(response.text, "lxml")
......@@ -64,10 +71,13 @@ def main():
f_url = PROVIDER_URL + a_href
f_name = f_url.split("/")[-1]
targeted_files.append({"f_name": f_name, "f_url": f_url, "f_title": title})
assert len(targeted_files) == category["expected_nb_files"], \
(url, len(targeted_files), category["expected_nb_files"])
if len(targeted_files) != category["expected_nb_files"]:
log.warning("** Number of available files ({}) is different from number of expected files ({})".
format(len(targeted_files), category["expected_nb_files"]))
for targeted_file in targeted_files:
for targeted_file in targeted_files[:category["expected_nb_files"]]:
log.info('** Download {}'.format(targeted_file["f_url"]))
response = requests.get(targeted_file["f_url"])
# f_name = f_url.split("/")[-1] reverting back to url filepath
filepath = targeted_file["f_name"].split("/")[-1]
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment