Commit 25e45486 authored by Christophe Benz's avatar Christophe Benz
Browse files

Fail if zip is bad, do not code a retry routine

parent 31604dbe
......@@ -33,8 +33,6 @@ from pathlib import Path
import requests
MAX_RETRY = 5
log = logging.Logger(__name__)
......@@ -53,61 +51,22 @@ def download_binary_file(url, file_path: Path, cache=False):
log.debug('-> done.')
def die(errmsg):
""" Writes error msg and stops """
log.error(errmsg)
import sys
sys.stderr.write('Error: {}\n'.format(errmsg))
sys.exit(1)
def get_verified_zip_archive(zip_filepath: Path):
"""Return opened zip_archive or None if an error occurred."""
try:
zip_archive = zipfile.ZipFile(str(zip_filepath))
except zipfile.BadZipFile:
return None
if zip_archive.testzip() is not None:
return None
return zip_archive
def download_and_extract(url, target_dir: Path, dir_name, cache_dir: Path = None):
""" Downloads zip archive and extracts it in a folder """
zip_filepath = (cache_dir or target_dir) / '{}.zip'.format(dir_name)
nb_retry = 0
ok = False
while nb_retry < MAX_RETRY:
nb_retry += 1
# Downloads zip
download_binary_file(url, zip_filepath, cache=cache_dir is not None)
assert zip_filepath.exists()
# Gets zip_archive
zip_archive = get_verified_zip_archive(zip_filepath)
if not zip_archive:
log.error('Bad zip file: [%s], retrying (%d)...', str(zip_filepath), nb_retry)
zip_filepath.unlink()
continue
# Extracts all CSV
csv_dir = target_dir / dir_name
csv_dir.mkdir(exist_ok=True)
zip_archive.extractall(str(csv_dir))
log.info('Zip [%s] extracted.', str(zip_filepath))
ok = True
# And removes zip archive
if cache_dir is None:
zip_filepath.unlink()
download_binary_file(url, zip_filepath, cache=cache_dir is not None)
break
# Extracts all CSV
csv_dir = target_dir / dir_name
csv_dir.mkdir(exist_ok=True)
with zipfile.ZipFile(str(zip_filepath)) as zip_archive:
zip_archive.extractall(str(csv_dir))
log.info('Zip [%s] extracted.', str(zip_filepath))
if not ok:
log.error("Max retry exceeded for %s", str(zip_filepath))
# And removes zip archive
if cache_dir is None:
zip_filepath.unlink()
def main():
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment