Commit 9a826cd2 authored by Christophe Benz's avatar Christophe Benz
Browse files

Relax validation checks in `BaseDatasetJson`

parent 172003e9
Pipeline #339214 passed with stage
in 1 minute and 10 seconds
......@@ -2,6 +2,8 @@
## next
- Relax checks in `BaseDatasetJson`: only raise a `ValidationError` if there are extra keys defined in `dimensions_labels` or `dimensions_values_labels` that are not defined in `dimensions_codes_order`. Do not fail for the inverse situation because labels are optional, and don't run the check if `dimensions_codes_order` is not defined.
## 0.13.25
- Add a `warn_when_scanning` option (default is `True`) to `FileSystemStorage.iter_series_jsonl_variant` to disable the warning which is logged when a JSON Lines file is scanned (and not seeked). This is useful for the indexation script, for example, which scans the file precisely to index the offsets of the different series, in order to enable seeking.
......
......@@ -59,37 +59,33 @@ class BaseDatasetJson(BaseModel):
@root_validator(pre=False)
def check_dimensions_labels_keys(cls, values):
"""Check that dimensions_codes_order and dimensions_labels keys match.
"""Validate the keys of the `dimensions_labels` attribute.
This check is done only if dimensions_codes_order is defined.
Check that every key of `dimensions_labels` is defined in `dimensions_codes_order`, if it's defined.
"""
dimensions_codes_order = values.get("dimensions_codes_order")
if dimensions_codes_order is None:
return values
dimensions_codes_order = set(dimensions_codes_order)
dimensions_labels_keys = set(values["dimensions_labels"].keys())
if dimensions_codes_order != dimensions_labels_keys:
raise ValueError(
f"{dimensions_codes_order=} "
f"and the keys of dimensions_labels {dimensions_labels_keys!r} do not match"
)
cls._check_extra_keys_versus_dimensions_codes_order(values, attribute_name="dimensions_labels")
return values
@root_validator(pre=False)
def check_dimensions_values_labels_keys(cls, values):
"""Check that dimensions_codes_order and dimensions_values_labels keys match.
"""Validate the keys of the `dimensions_values_labels` attribute.
This check is done only if dimensions_codes_order is defined.
Check that every key of `dimensions_values_labels` is defined in `dimensions_codes_order`, if it's defined.
"""
cls._check_extra_keys_versus_dimensions_codes_order(values, attribute_name="dimensions_values_labels")
return values
@classmethod
def _check_extra_keys_versus_dimensions_codes_order(cls, values, *, attribute_name: str):
dimensions_codes_order = values.get("dimensions_codes_order")
if dimensions_codes_order is None:
return values
dimensions_codes_order = set(dimensions_codes_order)
dimensions_values_labels_keys = set(values["dimensions_values_labels"].keys())
if dimensions_codes_order != dimensions_values_labels_keys:
keys = set(values[attribute_name].keys())
extra_keys = keys - dimensions_codes_order
if extra_keys:
raise ValueError(
f"dimensions_codes_order {dimensions_codes_order!r} "
f"and dimensions_values_labels keys {dimensions_values_labels_keys!r} do not match"
f"The keys {extra_keys!r} are defined in {attribute_name!r} but not in {dimensions_codes_order=}"
)
return values
......
......@@ -18,23 +18,72 @@ def test_instance_is_valid():
)
def test_check_dimensions_labels_keys_invalid():
def test_check_dimensions_labels_keys_valid_when_missing():
# Just test that no exception is raised.
TsvDatasetJson(
code="d1",
dimensions_codes_order=["FREQ", "SUBJECT"],
)
def test_check_dimensions_labels_keys_valid_when_empty():
# Just test that no exception is raised.
TsvDatasetJson(
code="d1",
dimensions_codes_order=["FREQ", "SUBJECT"],
dimensions_labels={},
)
def test_check_dimensions_labels_keys_valid_when_partial():
# Just test that no exception is raised.
TsvDatasetJson(
code="d1",
dimensions_codes_order=["FREQ", "SUBJECT"],
dimensions_labels={"FREQ": "Frequency"},
dimensions_values_labels={"FREQ": {"A": "Annual"}, "SUBJECT": {"S1": "Subject 1"}},
)
def test_check_dimensions_labels_keys_invalid_when_extra():
with pytest.raises(ValidationError):
TsvDatasetJson(
code="d1",
dimensions_codes_order=["FREQ", "SUBJECT"],
dimensions_labels={"FREQ": "Frequency"},
dimensions_values_labels={"FREQ": {"A": "Annual"}, "SUBJECT": {"S1": "Subject 1"}},
dimensions_labels={"COUNTRY": "France"},
)
def test_check_dimensions_values_labels_keys_invalid():
def test_check_dimensions_values_labels_keys_valid_when_missing():
TsvDatasetJson(
code="d1",
dimensions_codes_order=["FREQ", "SUBJECT"],
dimensions_values_labels={},
)
def test_check_dimensions_values_labels_keys_valid_when_empty():
TsvDatasetJson(
code="d1",
dimensions_codes_order=["FREQ", "SUBJECT"],
dimensions_values_labels={},
)
def test_check_dimensions_values_labels_keys_valid_when_partial():
TsvDatasetJson(
code="d1",
dimensions_codes_order=["FREQ", "SUBJECT"],
dimensions_values_labels={"FREQ": {"A": "Annual"}},
)
def test_check_dimensions_values_labels_keys_invalid_when_extra():
with pytest.raises(ValidationError):
TsvDatasetJson(
code="d1",
dimensions_codes_order=["FREQ", "SUBJECT"],
dimensions_labels={"FREQ": "Frequency", "SUBJECT": "Subject"},
dimensions_values_labels={"FREQ": {"A": "Annual"}},
dimensions_values_labels={"COUNTRY": {"FR": "France"}},
)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment