Commit ef0557a6 authored by Christophe Benz's avatar Christophe Benz

Replace NA by np.NaN

parent 80d465b0
## next
The DataFrame returned by `fetch_series_by_api_link` and `fetch_series` now use `numpy.NaN` to represent "NA" (not available) values in its column `value`. A new column names `original_value` is added, in the same spirit than the `original_period` column, to give access to data as stored by DBnomics, but user may prefer to use the `value` column.
## 1.0.0 -> 1.0.1
Fix fetching all the series of a dataset.
......
......@@ -67,13 +67,13 @@ def fetch_series(provider_code=None, dataset_code=None, series_code=None, dimens
If the rightmost dimension value code is removed, then the final '.' can be removed too: `A.FR.` = `A.FR`.
If not None, `series_ids` parameter must be a non-empty `list` of series IDs.
If not `None`, `series_ids` parameter must be a non-empty `list` of series IDs.
A series ID is a string formatted like `provider_code/dataset_code/series_code`.
Return a Python Pandas `DataFrame`.
If `max_nb_series` is `None`, a default value of 50 series will be used.
Return a Python Pandas `DataFrame`.
Examples:
- fetch one series:
......@@ -174,12 +174,21 @@ def fetch_series_by_api_link(api_link, max_nb_series=None):
series_json_list = series_json_page['docs']
for series_json in series_json_list:
# Rename keys and convert str to datetime.
# Keep original period and convert str to datetime.
period = series_json.pop("period", [])
period_start_day = series_json.pop("period_start_day", [])
series_json["original_period"] = period
series_json["period"] = list(map(pd.to_datetime, period_start_day))
# Keep original value and convert "NA" to None (or user specified value).
value = series_json.pop("value", [])
series_json["original_value"] = value
series_json['value'] = [
# None will be replaced by np.NaN in DataFrame construction.
None if v == 'NA' else v
for v in value
]
# Flatten dimensions.
dimensions = series_json.pop("dimensions", {})
for dimension_code, dimension_value_code in dimensions.items():
......
This diff is collapsed.
......@@ -18,8 +18,12 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import pandas as pd
from dbnomics import default_api_base_url, fetch_series, fetch_series_by_api_link
np = pd.np
def test_fetch_series_by_code():
df = fetch_series('AMECO', 'ZUTN', 'EA19.1.0.0.0.ZUTN')
......@@ -173,3 +177,11 @@ def test_fetch_series_by_api_link():
series_codes = df["series_code"].unique()
assert len(series_codes), df
def test_fetch_series_with_na_values():
df = fetch_series('AMECO', 'ZUTN', 'DEU.1.0.0.0.ZUTN')
assert "NA" in list(df.original_value)
assert not any(df.original_value.isna())
assert "NA" not in list(df.value)
assert any(df.value.isna())
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment