Commit 89e7da6d authored by Bruno Duyé's avatar Bruno Duyé

#639 - download: disable SSL check

parent d3062cd2
Pipeline #117641 passed with stage
in 56 minutes and 46 seconds
......@@ -41,13 +41,13 @@ import sys
from pathlib import Path
import lxml.html
import ujson as json
import urllib3
from docopt import docopt
from requests import Session
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util import Retry
import ujson as json
log = logging.getLogger(__name__)
DATASETS_AND_TREE_URL = "https://www.cso.ie/webserviceclient/DatasetListing.aspx"
......@@ -62,6 +62,7 @@ def main():
requests_retry = Retry(total=12, backoff_factor=2, status_forcelist=[500, 501, 502, 503, 504]) # retry when server return one of those statuses
requests_session.mount('http://', HTTPAdapter(max_retries=requests_retry))
requests_session.mount('https://', HTTPAdapter(max_retries=requests_retry))
urllib3.disable_warnings() # Temporary disable warnings (#639)
# Parse command line arguments
args = docopt(__doc__.format(self_filename=os.path.basename(__file__)))
target_dir = str(Path(args['<target_dir>']).resolve())
......@@ -119,7 +120,7 @@ def get_category_tree(html_tree):
'name': 'Preliminary Actual and Percentage Change in Population 20112016 by Sex, Province County or City, CensusYear and Statistic',
}, ... ]
"""
html = try_to_download(page_url)
html = try_to_download(page_url, ssl_check=False)
xml_tree = get_xml_tree(html)
links_dicts = []
# Iterate throught main table's trs
......@@ -217,12 +218,12 @@ def get_xml_tree(html_tree):
return xml_tree
def try_to_download(url, filepath=None):
def try_to_download(url, filepath=None, ssl_check=True):
""" Try to download file, return False if not downloaded
if filepath is None; return html content, or False if not downloaded
"""
try:
content = download_file(url, filepath)
content = download_file(url, filepath, ssl_check)
except ConnectionError as e:
log.warning(e)
return False
......@@ -232,14 +233,14 @@ def try_to_download(url, filepath=None):
return content
def download_file(url, filepath=None):
def download_file(url, filepath=None, ssh_check=True):
""" Download given url and save content into given filepath, if given
- raise a ConnectionError when download failed
"""
global requests_session
log.debug("downloading {!r}".format(url))
try:
response = requests_session.get(url)
response = requests_session.get(url, verify=ssh_check)
except Exception as e:
log.exception(e)
raise ConnectionError
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment