Commit 491737a2 authored by Christophe Benz's avatar Christophe Benz
Browse files

Fix flake8 errors

parent 92170282
......@@ -21,12 +21,11 @@
"""Access DBnomics time series from Python."""
from collections import defaultdict
import itertools
import json
import logging
import os
import urllib.parse
from collections import defaultdict
from urllib.parse import urljoin
import pandas as pd
......@@ -90,7 +89,8 @@ def fetch_series(
If `max_nb_series` is `None`, a default value of 50 series will be used.
If `filters` is not `None`, apply those filters using the Time Series Editor API (Cf https://editor.nomics.world/filters)
If `filters` is not `None`, apply those filters using the Time Series Editor API
(Cf https://editor.nomics.world/filters)
Return a Python Pandas `DataFrame`.
......@@ -114,7 +114,10 @@ def fetch_series(
fetch_series("IMF", "CPI", series_code="M..PCPIEC_IX+PCPIA_IX")
- fetch one series and apply interpolation filter:
fetch_series('AMECO/ZUTN/EA19.1.0.0.0.ZUTN', filters=[{"code": "interpolate", "parameters": {"frequency": "monthly", "method": "spline"}}])
fetch_series(
'AMECO/ZUTN/EA19.1.0.0.0.ZUTN',
filters=[{"code": "interpolate", "parameters": {"frequency": "monthly", "method": "spline"}}],
)
"""
# Parameters validation
global default_api_base_url
......@@ -207,12 +210,14 @@ def fetch_series_by_api_link(
"API link" URLs can be found on DBnomics web site (https://db.nomics.world/) on dataset or series pages
using "Download" buttons.
If `filters` is not `None`, apply those filters using the Time Series Editor API (Cf https://editor.nomics.world/filters)
If `filters` is not `None`, apply those filters using the Time Series Editor API
(Cf https://editor.nomics.world/filters)
Example:
fetch_series(api_link="https://api.db.nomics.world/v22/series?provider_code=AMECO&dataset_code=ZUTN")
"""
# Call API via `iter_series_infos`, add dimensions labels and store result in `series_list`. Fill `datasets_dimensions`
# Call API via `iter_series_infos`, add dimensions labels and store result in `series_list`.
# Fill `datasets_dimensions`
datasets_dimensions = None
series_dims_by_dataset_code = defaultdict(dict)
# series_dims_by_dataset_code example:
......@@ -261,7 +266,8 @@ def fetch_series_by_api_link(
"value",
]
# Flatten series received from the API to prepare Dataframe creation (rename some keys of JSON result to match DataFrame organization)
# Flatten series received from the API to prepare Dataframe creation
# (rename some keys of JSON result to match DataFrame organization)
flat_series_list = []
for series in series_list:
flat_series = flatten_dbnomics_series(series)
......@@ -313,7 +319,8 @@ def fetch_series_by_api_link(
)
else:
if "dimensions_values_labels" in dataset_dimensions:
# No dimensions labels but dimensions_values_labels -> we add " (label)" to the end of dimension code
# No dimensions labels but dimensions_values_labels -> we add " (label)" to the end
# of dimension code
dimensions_labels_columns_names.append("{} (label)".format(dimension_code))
# In the case there's no dimension_label nor dimensions_values_labels, we do not add any column
......
......@@ -24,4 +24,3 @@ exclude=.git,dist,build,.eggs,__pycache__,*.egg-info,.venv
extend-ignore =
# See https://github.com/PyCQA/pycodestyle/issues/373
E203,
......@@ -30,7 +30,13 @@ from setuptools import find_packages, setup
HERE = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with codecs.open(path.join(HERE, "README.md"), encoding="utf-8") as f:
with codecs.open(
path.join(
HERE,
"README.md",
),
encoding="utf-8",
) as f:
LONG_DESCRIPTION = f.read()
setup(
......
......@@ -19,37 +19,76 @@
import logging
import pytest
import pytest_vcr # This import is not strictly needed, but the goal is to raise when pytest-vcr is not installed
# This import is not strictly needed, but the goal is to raise when pytest-vcr is not installed
import pytest_vcr # noqa
from dbnomics import default_api_base_url, fetch_series, fetch_series_by_api_link
def test_fetch_series_with_filter_on_one_series_with_filter_parameter_error(caplog):
filters = [{"code": "interpolate", "parameters": {"foo": "bar"}}]
def test_fetch_series_with_filter_on_one_series_with_filter_parameter_error(
caplog,
):
filters = [
{
"code": "interpolate",
"parameters": {"foo": "bar"},
}
]
with caplog.at_level(logging.INFO):
df = fetch_series("AMECO", "ZUTN", "DEU.1.0.0.0.ZUTN", filters=filters)
assert all(df.filtered == False)
df = fetch_series(
"AMECO",
"ZUTN",
"DEU.1.0.0.0.ZUTN",
filters=filters,
)
assert all(df.filtered == False) # noqa: == is a Pandas operator
assert len(caplog.records) == 1
assert caplog.records[0].levelname == "ERROR"
assert "Error with filter parameters" in caplog.records[0].message
def test_fetch_series_with_filter_on_one_series_with_wrong_frequency(caplog):
filters = [{"code": "aggregate", "parameters": {"frequency": "annual"}}]
def test_fetch_series_with_filter_on_one_series_with_wrong_frequency(
caplog,
):
filters = [
{
"code": "aggregate",
"parameters": {"frequency": "annual"},
}
]
with caplog.at_level(logging.INFO):
df = fetch_series("AMECO", "ZUTN", "DEU.1.0.0.0.ZUTN", filters=filters)
assert all(df.filtered == False)
df = fetch_series(
"AMECO",
"ZUTN",
"DEU.1.0.0.0.ZUTN",
filters=filters,
)
assert all(df.filtered == False) # noqa: == is a Pandas operator
assert len(caplog.records) == 1
assert caplog.records[0].levelname == "ERROR"
assert "Annual is already the lowest frequency" in caplog.records[0].message
def test_fetch_series_with_filter_on_one_series_with_filter_error(caplog):
filters = [{"code": "foo", "parameters": {}}]
def test_fetch_series_with_filter_on_one_series_with_filter_error(
caplog,
):
filters = [
{
"code": "foo",
"parameters": {},
}
]
with caplog.at_level(logging.INFO):
df = fetch_series("AMECO", "ZUTN", "DEU.1.0.0.0.ZUTN", filters=filters)
assert all(df.filtered == False)
df = fetch_series(
"AMECO",
"ZUTN",
"DEU.1.0.0.0.ZUTN",
filters=filters,
)
assert all(df.filtered == False) # noqa: == is a Pandas operator
assert len(caplog.records) == 1
assert caplog.records[0].levelname == "ERROR"
assert "Filter not found" in caplog.records[0].message
......@@ -60,8 +99,12 @@ def test_fetch_series_with_filter_on_one_series_with_filter_error(caplog):
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_by_code():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
df = fetch_series("AMECO", "ZUTN", "EA19.1.0.0.0.ZUTN")
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series(
"AMECO",
"ZUTN",
"EA19.1.0.0.0.ZUTN",
)
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1
......@@ -78,8 +121,12 @@ def test_fetch_series_by_code():
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_by_code_mask():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
df = fetch_series("IMF", "CPI", "M.FR+DE.PCPIEC_IX+PCPIA_IX")
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series(
"IMF",
"CPI",
"M.FR+DE.PCPIEC_IX+PCPIA_IX",
)
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1
......@@ -95,8 +142,12 @@ def test_fetch_series_by_code_mask():
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_by_code_mask_with_plus_in_dimension_code():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
df = fetch_series("SCB", "AKIAM", '"J+K"+"G+H".AM0301C1')
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series(
"SCB",
"AKIAM",
'"J+K"+"G+H".AM0301C1',
)
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1
......@@ -107,12 +158,15 @@ def test_fetch_series_by_code_mask_with_plus_in_dimension_code():
assert dataset_codes[0] == "AKIAM"
series_codes = df["series_code"].unique()
assert set(series_codes) == {"J+K.AM0301C1", "G+H.AM0301C1"}, series_codes
assert set(series_codes) == {
"J+K.AM0301C1",
"G+H.AM0301C1",
}, series_codes
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_by_id():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series("AMECO/ZUTN/EA19.1.0.0.0.ZUTN")
provider_codes = df["provider_code"].unique()
......@@ -130,7 +184,7 @@ def test_fetch_series_by_id():
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_by_ids_in_same_dataset():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series(
[
"AMECO/ZUTN/EA19.1.0.0.0.ZUTN",
......@@ -154,7 +208,7 @@ def test_fetch_series_by_ids_in_same_dataset():
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_by_ids_in_different_datasets():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series(
[
"AMECO/ZUTN/EA19.1.0.0.0.ZUTN",
......@@ -180,12 +234,16 @@ def test_fetch_series_by_ids_in_different_datasets():
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_by_dimension():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series(
"WB",
"DB",
dimensions={
"country": ["FR", "IT", "ES"],
"country": [
"FR",
"IT",
"ES",
],
"indicator": ["IC.REG.COST.PC.FE.ZS.DRFN"],
},
)
......@@ -204,8 +262,11 @@ def test_fetch_series_by_dimension():
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_of_dataset():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
df = fetch_series("AMECO", "ZUTN")
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series(
"AMECO",
"ZUTN",
)
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1
......@@ -221,8 +282,12 @@ def test_fetch_series_of_dataset():
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_with_na_values():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
df = fetch_series("AMECO", "ZUTN", "DEU.1.0.0.0.ZUTN")
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series(
"AMECO",
"ZUTN",
"DEU.1.0.0.0.ZUTN",
)
assert "NA" in list(df.original_value)
assert not any(df.original_value.isna())
assert "NA" not in list(df.value)
......@@ -231,8 +296,12 @@ def test_fetch_series_with_na_values():
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_with_max_nb_series():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
df = fetch_series("AMECO", "ZUTN", max_nb_series=20)
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series(
"AMECO",
"ZUTN",
max_nb_series=20,
)
assert len(df.series_code.unique()) == 20
......@@ -241,22 +310,33 @@ def test_fetch_series_with_filter_on_one_series():
filters = [
{
"code": "interpolate",
"parameters": {"frequency": "monthly", "method": "spline"},
"parameters": {
"frequency": "monthly",
"method": "spline",
},
}
]
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
df = fetch_series("AMECO", "ZUTN", "DEU.1.0.0.0.ZUTN", filters=filters)
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series(
"AMECO",
"ZUTN",
"DEU.1.0.0.0.ZUTN",
filters=filters,
)
assert "filtered" in df
assert all(series_code.endswith("_filtered") for series_code in df[df.filtered == True]["series_code"])
assert all(df[df.filtered == True]["@frequency"] == "monthly")
assert all(
series_code.endswith("_filtered")
for series_code in df[df.filtered == True]["series_code"] # noqa: == is a Pandas operator
)
assert all(df[df.filtered == True]["@frequency"] == "monthly") # noqa: == is a Pandas operator
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_by_api_link():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series_by_api_link(
default_api_base_url
+ "/series/BIS/xru_current?dimensions=%7B%22FREQ%22%3A%5B%22M%22%5D%2C%22REF_AREA%22%3A%5B%22AL%22%5D%2C%22COLLECTION%22%3A%5B%22E%22%5D%7D&observations=1"
+ "/series/BIS/xru_current?dimensions=%7B%22FREQ%22%3A%5B%22M%22%5D%2C%22REF_AREA%22%3A%5B%22AL%22%5D%2C%22COLLECTION%22%3A%5B%22E%22%5D%7D&observations=1" # noqa
)
provider_codes = df["provider_code"].unique()
......@@ -273,18 +353,27 @@ def test_fetch_series_by_api_link():
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_without_dimensions_labels():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series(
"WB",
"DB",
dimensions={
"country": ["FR", "IT", "ES"],
"country": [
"FR",
"IT",
"ES",
],
"indicator": ["IC.REG.COST.PC.FE.ZS.DRFN"],
},
)
# Check that all expected columns are present
expected_columns = {"indicator", "country", "indicator (label)", "country (label)"}
expected_columns = {
"indicator",
"country",
"indicator (label)",
"country (label)",
}
assert expected_columns & set(df.columns) == expected_columns, set(df.columns)
# Check dimensions and dimensions_values_labels
......@@ -295,26 +384,26 @@ def test_fetch_series_without_dimensions_labels():
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_without_dimensions_values_labels():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series("simu/lated/series1")
# In the case of any dimensions_values_labels, we do not want dimensions_labels column to be added
assert not "Data Type" in list(df.columns)
assert "Data Type" not in list(df.columns)
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_without_dimensions_labels_nor_dimensions_values_labels():
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series("simu/lated/series2")
# dimensions_labels column shouldn't exist
assert not "Data Type" in list(df.columns)
assert "Data Type" not in list(df.columns)
@pytest.mark.vcr(decode_compressed_response=True)
def test_fetch_series_that_dont_use_all_defined_dimensions_in_dataset():
"""https://git.nomics.world/dbnomics-fetchers/management/-/issues/660"""
# Thanks to @pytest.mark.vcr decorator, this request result will be read from cassettes yaml file (the one that match the test function name)
# Thanks to @pytest.mark.vcr decorator, this response will be read from cassettes yaml file matching the test name
df = fetch_series("BEA/NIPA-T11706/A191RX-Q")
# Check that all expected columns are present
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment