Commit b1026489 authored by Pierre Dittgen's avatar Pierre Dittgen
Browse files

Download works

parents
#!/usr/bin/env python3
# meti-fetcher -- Fetch series from Meti Japan macro economic database
# By: Pierre Dittgen <pierre.dittgen@cepremap.org>
#
# Copyright (C) 2018 Cepremap
# https://git.nomics.world/dbnomics-fetchers/meti-fetcher
#
# meti-fetcher is free software; you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# meti-fetcher is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
Downloader for Meti Japan provider
http://www.meti.go.jp
"""
import argparse
import logging
import os
import shutil
import zipfile
import requests
LOG = logging.Logger('meti download')
def download_binary_file(url, file_path, cache=False):
""" Download url into binary file """
LOG.debug("Downloading %s... ", os.path.basename(file_path))
if cache and os.path.exists(file_path):
LOG.debug('-> cached.')
return
req = requests.get(url, stream=True)
with open(file_path, mode='wb') as fout:
req.raw.decode_content = True
shutil.copyfileobj(req.raw, fout)
LOG.debug('-> done.')
def die(errmsg):
""" Writes error msg and stops """
LOG.error(errmsg)
import sys
sys.stderr.write('Error: {}\n'.format(errmsg))
sys.exit(1)
def download_and_extract(url, target_dir, dir_name, cache=False):
""" Downloads zip archive and extracts it in a folder """
# Downloads zip
zip_filepath = os.path.join(target_dir, '{}.zip'.format(dir_name))
download_binary_file(url, zip_filepath, cache=cache)
assert os.path.exists(zip_filepath)
# Bad zip :-(
zip_archive = zipfile.ZipFile(zip_filepath)
if zip_archive.testzip() is not None:
die('Bad zip file: [{}]'.format(zip_filepath))
# Extracts all CSV
csv_dir = os.path.join(target_dir, dir_name)
os.mkdir(csv_dir)
zip_archive.extractall(csv_dir)
# And removes zip archive
if not cache:
os.remove(zip_filepath)
def main():
""" Downloads and extracts zip files in folders """
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('target_dir',
help='path of target directory for downloaded data',
)
args = parser.parse_args()
target_dir = args.target_dir
assert os.path.exists(target_dir)
assert os.access(target_dir, os.W_OK)
LOG.info('Downloading meti data')
download_and_extract('http://www.meti.go.jp/english/statistics/tyo/iip/csv/b2010_g1e.zip',
target_dir, 'b2010_g1e')
download_and_extract('http://www.meti.go.jp/english/statistics/tyo/sanzi/csv/b2010_ke.zip',
target_dir, 'b2010_ke')
if __name__ == '__main__':
main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment