Commit a7efb003 authored by Christophe Benz's avatar Christophe Benz
Browse files

Add comments about cryptic code

parent f6537444
Pipeline #83226 passed with stage
in 42 seconds
......@@ -180,6 +180,7 @@ def fetch_series_by_api_link(api_link, max_nb_series=None,
# Call API via `iter_series`, store result in `series_list`.
series_list = list(iter_series(api_link, max_nb_series=max_nb_series))
if len(series_list) == 0:
......@@ -188,8 +189,10 @@ def fetch_series_by_api_link(api_link, max_nb_series=None,
common_columns = ["@frequency", "provider_code", "dataset_code", "dataset_name", "series_code", "series_name",
"original_period", "period", "original_value", "value"]
# Normalize series received from the API (rename some keys of JSON result to match DataFrame organization).
normalized_series_list = list(map(normalize_dbnomics_series, series_list))
# Only applies if filters are used.
if filters:
common_columns.insert(common_columns.index("period") + 1, "period_middle_day")
......@@ -203,9 +206,22 @@ def fetch_series_by_api_link(api_link, max_nb_series=None,
for series in normalized_series_list
] + filtered_series_list
all_columns = set.union(*[set(series.keys()) for series in normalized_series_list])
# `normalized_series_list` is a list of dicts like [{"code": "A.B.C", "a_key": 9}, {"code": "X.Y.Z", "other_key": 42}]
# Each series can have different keys so we want to do the union of all the keys of all the series. {"code", "a_key", "other_key"}
# In the DataFrame the different columns will be sparse (there will be `NaN` values when a series does not have a specific key).
# code | a_key | other_key
# ----- | ----- | ---------
# A.B.C | 9 | NaN
# X.Y.Z | NaN | 42
def union_sets(sets):
return set.union(*sets)
all_columns = union_sets([set(series.keys()) for series in normalized_series_list])
dimension_columns = sorted(all_columns - set(common_columns))
# In the DataFrame we want to display the dimension columns at the right so we reorder them.
ordered_columns = common_columns + dimension_columns
dataframes = (
pd.DataFrame(data=series, columns=ordered_columns)
for series in normalized_series_list
......@@ -308,7 +324,11 @@ def iter_series(api_link, max_nb_series=None):
def normalize_dbnomics_series(series):
"""Adapt DBnomics series attributes to ease DataFrame construction."""
"""Adapt DBnomics series attributes to ease DataFrame construction.
Rename some dict attributes, flatten other ones
(the `series` dict is nested but we want a flat dict to build a DataFrame).
series = normalize_period(series)
series = normalize_value(series)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment