Commit b08c6132 authored by Christophe Benz's avatar Christophe Benz

Implement filtering series

parent 4d97415d
## 1.1.0
- Implement filtering series by passing a `filters` argument to the `fetch_series` and `fetch_series_by_api_link` functions.
- Enhance order of columns in DataFrame to be more convinient to read: frequency, provider, dataset, series and dimensions.
- Enhance error reporting if a series can't be fetched: error is now displayed in red before the DataFrame.
## 1.0.2
The DataFrame returned by `fetch_series_by_api_link` and `fetch_series` now use `numpy.NaN` to represent "NA" (not available) values in its column `value`. A new column names `original_value` is added, in the same spirit than the `original_period` column, to give access to data as stored by DBnomics, but user may prefer to use the `value` column.
......
This diff is collapsed.
......@@ -35,7 +35,7 @@ with codecs.open(path.join(HERE, 'README.md'), encoding='utf-8') as f:
setup(
name='DBnomics',
version='1.0.2',
version='1.1.0',
description='DBnomics Python Client',
long_description=LONG_DESCRIPTION,
......
......@@ -18,54 +18,54 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import logging
import pandas as pd
from dbnomics import default_api_base_url, fetch_series, fetch_series_by_api_link
np = pd.np
def test_fetch_series_by_code():
df = fetch_series('AMECO', 'ZUTN', 'EA19.1.0.0.0.ZUTN')
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1, df
assert provider_codes[0] == "AMECO", df
assert len(provider_codes) == 1
assert provider_codes[0] == "AMECO"
dataset_codes = df["dataset_code"].unique()
assert len(dataset_codes) == 1, df
assert dataset_codes[0] == "ZUTN", df
assert len(dataset_codes) == 1
assert dataset_codes[0] == "ZUTN"
series_codes = df["series_code"].unique()
assert len(series_codes) == 1, df
assert series_codes[0] == "EA19.1.0.0.0.ZUTN", df
assert len(series_codes) == 1
assert series_codes[0] == "EA19.1.0.0.0.ZUTN"
def test_fetch_series_by_code_mask():
df = fetch_series("IMF", "CPI", "M.FR+DE.PCPIEC_IX+PCPIA_IX")
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1, df
assert provider_codes[0] == "IMF", df
assert len(provider_codes) == 1
assert provider_codes[0] == "IMF"
dataset_codes = df["dataset_code"].unique()
assert len(dataset_codes) == 1, df
assert dataset_codes[0] == "CPI", df
assert len(dataset_codes) == 1
assert dataset_codes[0] == "CPI"
series_codes = df["series_code"].unique()
assert len(series_codes) == 4, df
assert len(series_codes) == 4
def test_fetch_series_by_code_mask_with_plus_in_dimension_code():
df = fetch_series('SCB', 'AKIAM', '"J+K"+"G+H".AM0301C1')
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1, df
assert provider_codes[0] == "SCB", df
assert len(provider_codes) == 1
assert provider_codes[0] == "SCB"
dataset_codes = df["dataset_code"].unique()
assert len(dataset_codes) == 1, df
assert dataset_codes[0] == "AKIAM", df
assert len(dataset_codes) == 1
assert dataset_codes[0] == "AKIAM"
series_codes = df["series_code"].unique()
assert set(series_codes) == {'J+K.AM0301C1', 'G+H.AM0301C1'}, series_codes
......@@ -75,16 +75,16 @@ def test_fetch_series_by_id():
df = fetch_series('AMECO/ZUTN/EA19.1.0.0.0.ZUTN')
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1, df
assert provider_codes[0] == "AMECO", df
assert len(provider_codes) == 1
assert provider_codes[0] == "AMECO"
dataset_codes = df["dataset_code"].unique()
assert len(dataset_codes) == 1, df
assert dataset_codes[0] == "ZUTN", df
assert len(dataset_codes) == 1
assert dataset_codes[0] == "ZUTN"
series_codes = df["series_code"].unique()
assert len(series_codes) == 1, df
assert series_codes[0] == "EA19.1.0.0.0.ZUTN", df
assert len(series_codes) == 1
assert series_codes[0] == "EA19.1.0.0.0.ZUTN"
def test_fetch_series_by_ids_in_same_dataset():
......@@ -94,17 +94,17 @@ def test_fetch_series_by_ids_in_same_dataset():
])
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1, df
assert provider_codes[0] == "AMECO", df
assert len(provider_codes) == 1
assert provider_codes[0] == "AMECO"
dataset_codes = df["dataset_code"].unique()
assert len(dataset_codes) == 1, df
assert dataset_codes[0] == "ZUTN", df
assert len(dataset_codes) == 1
assert dataset_codes[0] == "ZUTN"
series_codes = df["series_code"].unique()
assert len(series_codes) == 2, df
assert series_codes[0] == "EA19.1.0.0.0.ZUTN", df
assert series_codes[1] == "DNK.1.0.0.0.ZUTN", df
assert len(series_codes) == 2
assert series_codes[0] == "EA19.1.0.0.0.ZUTN"
assert series_codes[1] == "DNK.1.0.0.0.ZUTN"
def test_fetch_series_by_ids_in_different_datasets():
......@@ -114,19 +114,19 @@ def test_fetch_series_by_ids_in_different_datasets():
])
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 2, df
assert provider_codes[0] == "AMECO", df
assert provider_codes[1] == "BIS", df
assert len(provider_codes) == 2
assert provider_codes[0] == "AMECO"
assert provider_codes[1] == "BIS"
dataset_codes = df["dataset_code"].unique()
assert len(dataset_codes) == 2, df
assert dataset_codes[0] == "ZUTN", df
assert dataset_codes[1] == "PP-SS", df
assert len(dataset_codes) == 2
assert dataset_codes[0] == "ZUTN"
assert dataset_codes[1] == "PP-SS"
series_codes = df["series_code"].unique()
assert len(series_codes) == 2, df
assert series_codes[0] == "EA19.1.0.0.0.ZUTN", df
assert series_codes[1] == "Q.AU.N.628", df
assert len(series_codes) == 2
assert series_codes[0] == "EA19.1.0.0.0.ZUTN"
assert series_codes[1] == "Q.AU.N.628"
def test_fetch_series_by_dimension():
......@@ -136,30 +136,30 @@ def test_fetch_series_by_dimension():
})
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1, df
assert provider_codes[0] == "WB", df
assert len(provider_codes) == 1
assert provider_codes[0] == "WB"
dataset_codes = df["dataset_code"].unique()
assert len(dataset_codes) == 1, df
assert dataset_codes[0] == "DB", df
assert len(dataset_codes) == 1
assert dataset_codes[0] == "DB"
series_codes = df["series_code"].unique()
assert len(series_codes), df
assert len(series_codes)
def test_fetch_series_of_dataset():
df = fetch_series('AMECO', 'ZUTN')
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1, df
assert provider_codes[0] == "AMECO", df
assert len(provider_codes) == 1
assert provider_codes[0] == "AMECO"
dataset_codes = df["dataset_code"].unique()
assert len(dataset_codes) == 1, df
assert dataset_codes[0] == "ZUTN", df
assert len(dataset_codes) == 1
assert dataset_codes[0] == "ZUTN"
series_codes = df["series_code"].unique()
assert len(series_codes) > 1, df
assert len(series_codes) > 1
def test_fetch_series_by_api_link():
......@@ -168,15 +168,15 @@ def test_fetch_series_by_api_link():
"/series/BIS/PP-SS?dimensions=%7B%22FREQ%22%3A%5B%22Q%22%5D%2C%22REF_AREA%22%3A%5B%22AU%22%5D%7D&observations=1")
provider_codes = df["provider_code"].unique()
assert len(provider_codes) == 1, df
assert provider_codes[0] == "BIS", df
assert len(provider_codes) == 1
assert provider_codes[0] == "BIS"
dataset_codes = df["dataset_code"].unique()
assert len(dataset_codes) == 1, df
assert dataset_codes[0] == "PP-SS", df
assert len(dataset_codes) == 1
assert dataset_codes[0] == "PP-SS"
series_codes = df["series_code"].unique()
assert len(series_codes), df
assert len(series_codes)
def test_fetch_series_with_na_values():
......@@ -185,3 +185,46 @@ def test_fetch_series_with_na_values():
assert not any(df.original_value.isna())
assert "NA" not in list(df.value)
assert any(df.value.isna())
def test_fetch_series_with_max_nb_series():
df = fetch_series('AMECO', 'ZUTN', max_nb_series=20)
assert len(df.series_code.unique()) == 20
def test_fetch_series_with_filter_on_one_series():
filters = [{"code": "interpolate", "parameters": {"frequency": "monthly", "method": "spline"}}]
df = fetch_series('AMECO', 'ZUTN', 'DEU.1.0.0.0.ZUTN', filters=filters)
assert "filtered" in df
assert "source_series_code" in df
assert all(df[df.filtered == True]["@frequency"] == "monthly")
def test_fetch_series_with_filter_on_one_series_with_filter_error(caplog):
filters = [{"code": "foo", "parameters": {}}]
with caplog.at_level(logging.INFO):
df = fetch_series('AMECO', 'ZUTN', 'DEU.1.0.0.0.ZUTN', filters=filters)
assert all(df.filtered == False)
assert len(caplog.records) == 1
assert caplog.records[0].levelname == 'ERROR'
assert "Filter not found" in caplog.records[0].message
def test_fetch_series_with_filter_on_one_series_with_filter_parameter_error(caplog):
filters = [{"code": "interpolate", "parameters": {"foo": "bar"}}]
with caplog.at_level(logging.INFO):
df = fetch_series('AMECO', 'ZUTN', 'DEU.1.0.0.0.ZUTN', filters=filters)
assert all(df.filtered == False)
assert len(caplog.records) == 1
assert caplog.records[0].levelname == 'ERROR'
assert "Error with filter parameters" in caplog.records[0].message
def test_fetch_series_with_filter_on_one_series_with_wrong_frequency(caplog):
filters = [{"code": "aggregate", "parameters": {"frequency": "annual"}}]
with caplog.at_level(logging.INFO):
df = fetch_series('AMECO', 'ZUTN', 'DEU.1.0.0.0.ZUTN', filters=filters)
assert all(df.filtered == False)
assert len(caplog.records) == 1
assert caplog.records[0].levelname == 'ERROR'
assert "Annual is already the lowest frequency" in caplog.records[0].message
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment