M changing datasetNAME w/ parent_code code

parent a95cd642
......@@ -211,7 +211,7 @@ def load_dataset_category_code_2_file_0_sheet_1(node):
'''specific dataset'''
sheet = node["_sheet"]
name = sheet.cell(0, 1).value.strip()
code = "{}-{}".format(sheet.cell(1, 1).value.strip(), slugify(node["name"]))
code = "{}-{}-{}".format(node["_parent"]["code"], sheet.cell(1, 1).value.strip(), slugify(node["name"]))
unite = sheet.cell(2, 1).value.strip()
# default and unique dimensions for observations
if name is None:
......@@ -291,6 +291,89 @@ def load_dataset_category_code_2_file_0_sheet_1(node):
return dataset
def load_dataset_category_code_2_file_0_sheet_2(node):
'''specific dataset'''
sheet = node["_sheet"]
name = sheet.cell(0, 1).value.strip()
code = "{}-{}".format(sheet.cell(1, 1).value.strip(), slugify(node["name"]))
unite = sheet.cell(2, 1).value.strip()
# default and unique dimensions for observations
if name is None:
name = node["name"]
unit = {unite.lower(): unite.capitalize()}
frequency = {"M": "Mensuel"}
path = os.path.join(node["_parent"]["_path"], code)
dataset = {
"code": code,
"name": name,
"_type": "dataset",
"_path": path,
"dimensions_labels": {
"frequency": "Frequency",
"region": "Region",
"category": "Category",
"unit": "Unit"
},
"dimensions_values_labels": {
"frequency": {"M": "Mensuel"},
"unit": unit
},
"dimensions_codes_order": [
"region",
"category",
"unit",
"frequency"
],
"_children": "series",
}
start_line = 10
end_line = sheet.nrows
# dimensions definition
categories = {}
regions = {}
# define dimensions from XLS line headers
dimensions = []
# define headers from XLS line headers and store into alist of code to preserve order
for category_label1, category_label2 in zip(sheet.row_values(8)[1:], sheet.row_values(9)[1:]):
label = category_label2.split("(France)")
if len(label) == 1:
#'France métropolitaine sauf mention contraire'
region_label = "France métropolitaine"
region_code = "fx"
category_label = re.sub("\n", "", label[0])
category_code = slugify(category_label1)
else:
region_label = "France"
region_code = "fr"
category_label = re.sub("\n", "", label[0])
category_code = slugify(category_label1)
categories[category_code] = "{} – {}".format(category_label1, category_label)
regions[region_code] = region_label
dimension_code = "_".join([region_code, category_code, "milliers", "M"])
dimensions.append(dimension_code)
# build missing dimensions_values_labels
dataset["dimensions_values_labels"]["category"] = categories
dataset["dimensions_values_labels"]["region"] = regions
# store dimension_label_name for dimension_code for later
# dataset["_dimensions"] = dimensions
# observations raw data
raw_data = [sheet.row_values(line_nb) for line_nb in range(start_line, end_line)]
# observations final dict
observations_d = {dimension_k: {} for dimension_k in dimensions}
# dimensions =
for line in raw_data:
period = datetime.datetime(*xlrd.xldate.xldate_as_tuple(line[0], node["_book"].datemode)
).strftime("%Y-%m")
for code, value in zip(dimensions, line[1:]):
observations_d[code][period] = value
# print(observations_d[k])
dataset["series"] = build_series(dataset, observations_d)
dataset["_observations"] = build_observations(dataset, observations_d)
return dataset
def build_series(dataset, observations):
'''given the dataset parent and the dimension name build the corresponding series_l'''
series_l = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment