Commit 7a2a492e authored by Bruno Duyé's avatar Bruno Duyé

Model 0.9.1: add notes to model + doc

parent 0aacc3db
......@@ -15,6 +15,10 @@ Breaking changes
Breaking changes
- remove `notes` property; use `description` instead
### 0.9.0 -> 0.9.1
Non-breaking changes
- re-add `notes` property; complementary of `description` property
## series.json
......@@ -25,3 +29,7 @@ Breaking changes
Non-breaking changes
- add `attributes`, `description`, `doc_href`, `next_release_at`, `updated_at` properties
- example: see [here](./tests/fixtures/provider3-json-data/dataset1/dataset.json)
### 0.9.0 -> 0.9.1
Non-breaking changes
- re-add `notes` property; complementary of `description` property
......@@ -171,6 +171,14 @@ Examples:
- [this dataset](./tests/fixtures/provider2-json-data/dataset1) stores observations in TSV files
- [this dataset](./tests/fixtures/provider2-json-data/dataset2) stores observations in `series.jsonl`
## Adding documentation to data (description and notes fields)
Datasets and series can be documented using `description` and `notes` fields.
- `description` presents what is the meaning of the data
- `notes` presents some remarks about the data. Example: "Before March 2002, exposures were netted across the banking and trading books. This has necessitated a break in the series."
=> see [this example](tests/fixtures/provider3-json-data/dataset1/dataset.json)
## Data validation
DBnomics-data-model comes with a validation script.
......
{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "Dataset",
"description": "A dataset of a provider, containing time series",
"type": "object",
"additionalProperties": false,
"properties": {
"attributes_labels": {
"$ref": "definitions.json#/objectOfLabels"
},
"attributes_values_labels": {
"$ref": "definitions.json#/objectOfValuesLabels"
},
"code": {
"$ref": "definitions.json#/datasetCodeString"
},
"description": {
"description": "Description of the data given by this dataset (different from notes field; see README for more details)",
"$ref": "definitions.json#/nonEmptyString"
},
"notes": {
"type": "array",
"description": "List of notes on the data (this is different from description field; see README for more details)",
"items": {
"$ref": "definitions.json#/nonEmptyString"
}
},
"dimensions_codes_order": {
"type": "array",
"items": {
"$ref": "definitions.json#/nonEmptyString"
}
},
"dimensions_labels": {
"$ref": "definitions.json#/objectOfLabels"
},
"dimensions_values_labels": {
"type": "object",
"patternProperties": {
"^.*$": {
"anyOf": [
{
"$ref": "definitions.json#/objectOfLabels"
},
{
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 2,
"maxItems": 2
}
}
],
"description": "Labels of the values of the dimensions (e.g. `Monthly` for `M`)."
}
}
},
"doc_href": {
"description": "Resource representing the dataset on the provider's website",
"type": "string",
"format": "uri"
},
"name": {
"$ref": "definitions.json#/nonEmptyString"
},
"next_release_at": {
"description": "Instant when the dataset will be released, as given by the provider, if known",
"type": "string",
"format": "date-time"
},
"source_href": {
"description": "Resource containing series values of the dataset",
"type": "string",
"format": "uri"
},
"series": {
"description": "Series of the dataset. Can be stored in series.jsonl also.",
"type": "array",
"items": {
"$ref": "series.json#/"
}
},
"updated_at": {
"description": "Instant when the dataset was last updated, as given by the provider, if known",
"type": "string",
"format": "date-time"
}
},
"required": [
"code"
]
}
\ No newline at end of file
{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "Series",
"description": "A time series of a dataset",
"type": "object",
"additionalProperties": false,
"properties": {
"attributes": {
"description": "Attributes of the time series. Attributes are informative and don't partition the dataset in groups of time series.",
"type": "object",
"patternProperties": {
"^.*$": {
"$ref": "definitions.json#/nonEmptyString"
}
}
},
"code": {
"$ref": "definitions.json#/seriesCodeString"
},
"description": {
"description": "Description of the data given this series (different from notes field; see README for more details)",
"$ref": "definitions.json#/nonEmptyString"
},
"notes": {
"type": "array",
"description": "List of notes on the data (this is different from description field; see README for more details)",
"items": {
"$ref": "definitions.json#/nonEmptyString"
}
},
"dimensions": {
"description": "Dimensions of the time series. Dimensions partition the dataset in groups of time series.",
"type": "object",
"patternProperties": {
"^.*$": {
"$ref": "definitions.json#/nonEmptyString"
}
}
},
"doc_href": {
"description": "Resource representing the time series on the provider's website",
"type": "string",
"format": "uri"
},
"name": {
"$ref": "definitions.json#/nonEmptyString"
},
"next_release_at": {
"description": "Instant when the time series will be released, as given by the provider, if known",
"type": "string",
"format": "date-time"
},
"observations": {
"description": "Observations of the time series in a row-oriented way",
"type": "array",
"items": [
{
"description": "Column names (the header)",
"type": "array",
"items": {
"type": "string"
}
}
],
"additionalItems": {
"description": "Cells of the observations (period, value, other attributes...)",
"type": "array",
"items": [
{
"description": "Period",
"type": "string"
},
{
"description": "Value",
"oneOf": [
{
"description": "Floating-point value",
"type": "number"
},
{
"description": "NA value",
"type": "string",
"enum": [
"NA"
]
}
]
}
],
"additionalItems": {
"description": "Observation attributes",
"type": "string"
}
}
},
"updated_at": {
"description": "Instant when the time series was last updated, as given by the provider, if known",
"type": "string",
"format": "date-time"
}
},
"required": [
"code"
]
}
\ No newline at end of file
{
"$schema": "https://git.nomics.world/dbnomics/dbnomics-data-model/raw/master/dbnomics_data_model/schemas/v0.9.1/dataset.json",
"code": "dataset1",
"name": "Goods import",
"attributes_labels": {
......@@ -52,6 +53,10 @@
]
]
},
"description": "This dataset present interesting data about statistical things",
"notes": [
"Data from May in Statistical Tables C1 to C6 incorporate changes to the coverage and methodology of the Retail Payments Statistics survey. These changes introduced data breaks for some series in May. More information are available in ‘Notes’ and ‘Series Breaks’ sections of the data files."
],
"doc_href": "https://en.wikipedia.org/wiki/Kilogram#SI_multiples",
"series": [
{
......@@ -63,13 +68,21 @@
"unit": "KG"
},
"attributes": {
"UNIT_MULT":"9"
}
"UNIT_MULT": "9"
},
"description": "Banks' consolidated group off-balance sheet business: Direct credit substitutes; For series breaks see Series breaks",
"notes": [
"These data are derived from returns submitted to the Australian Prudential Regulation Authority (APRA) by banks authorised under the Banking Act 1959. APRA assumed responsibility for the supervision and regulation of banks on 1 July 1998. Data prior to that date were submitted to the RBA.",
"Prior to March 2002, banks reported quarterly to APRA on the Off-balance Sheet Business Return. From that date until the end of 2007, banks reported quarterly on ARF 112.2: Capital Adequacy – Off-balance Sheet Business. Following the introduction of a new capital framework (Basel II) on 1 January 2008, the data between March 2008 and March 2011 were reported on either ARF 112.2: Capital Adequacy – Off-balance Sheet Business, ARF 112.2A: Standardised Credit Risk – Off-balance Sheet Exposures, or ARF 118.0: Off-balance Sheet Business, depending on whether the bank had been approved by APRA to use a Basel II advanced approach to credit risk. Following the revocation of Australian Prudential Standard APS150 on 30 June 2011, banks using the advanced approach to credit risk have been required to report data with reference to the Basel II framework. From June 2011, data are reported on ARF 112.2A: Standardised Credit Risk – Off-balance Sheet Exposures, ARF 118.0: Off-balance Sheet Business, or ARF 118.1: Other Off-balance Sheet Exposures, depending on whether the bank has been approved by APRA to use a Basel II advanced approach to credit risk."
]
},
{
"code": "A.SP.GG",
"name": "Annual Spain Gigagram",
"description": "This series is cool",
"notes": [
"Data after january 2018 is estimated"
],
"doc_href": "http://example.com",
"dimensions": {
"freq": "A",
......@@ -77,7 +90,7 @@
"unit": "GG"
},
"attributes": {
"UNIT_MULT":"9"
"UNIT_MULT": "9"
}
},
{
......@@ -98,7 +111,7 @@
"unit": "KG"
},
"attributes": {
"UNIT_MULT":"9"
"UNIT_MULT": "9"
}
}
]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment