Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
entsoe-fetcher
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Isaac AFAMBO
entsoe-fetcher
Commits
3c6297e6
Commit
3c6297e6
authored
4 years ago
by
Pierre Dittgen
Browse files
Options
Downloads
Patches
Plain Diff
1st convert
parent
aa9049a3
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
convert.py
+50
-17
50 additions, 17 deletions
convert.py
with
50 additions
and
17 deletions
convert.py
+
50
−
17
View file @
3c6297e6
...
...
@@ -40,10 +40,11 @@ import sys
from
operator
import
itemgetter
from
pathlib
import
Path
from
collections
import
defaultdict
from
typing
import
List
,
Dict
from
typing
import
List
,
Dict
,
Iterator
import
pycountry
import
pandas
as
pd
from
slugify
import
slugify
PROVIDER_DATA
=
{
"
code
"
:
"
ENTSOE
"
,
...
...
@@ -56,28 +57,54 @@ PROVIDER_DATA = {
log
=
logging
.
getLogger
(
__name__
)
def
compu
te_series_info
(
def
i
te
r
_series_info
(
country_code
:
str
,
csv_files
:
List
[
Path
],
dim_acc
:
Dict
[
str
,
Dict
]
):
country
=
pycountry
.
countries
.
get
(
alpha_2
=
country_code
)
)
->
Iterator
[
Dict
]:
"""
Iterate series information from given CSV files.
Update dimensions accumulator (dim_acc) as a side effect
"""
# Set country name for series
country
=
pycountry
.
countries
.
get
(
alpha_2
=
country_code
)
dim_acc
[
"
country
"
][
country_code
]
=
country
.
name
df_total
=
None
for
idx
,
csv_file
in
enumerate
(
csv_files
):
df
=
pd
.
read_csv
(
csv_file
,
index_col
=
1
,
header
=
[
0
,
1
])
df_total
=
df
if
df_total
is
None
else
df_total
.
append
(
df
)
# aggregate dataframe along years
frames
=
[
pd
.
read_csv
(
csv_file
,
index_col
=
0
,
header
=
[
0
,
1
])
for
csv_file
in
csv_files
]
df_total
=
pd
.
concat
(
frames
)
period_list
=
df_total
.
index
.
to_list
()
for
col_name
in
df_total
.
columns
[
1
:]:
type_label
,
indicator_label
=
col_name
type_code
=
slugify
(
type_label
,
separator
=
"
_
"
)
indicator_code
=
slugify
(
indicator_label
,
separator
=
"
_
"
)
# TODO: generate series_info and update dim_acc
dim_acc
[
"
type
"
][
type_code
]
=
type_label
dim_acc
[
"
indicator
"
][
indicator_code
]
=
indicator_label
breakpoint
()
series_info
=
{
"
code
"
:
"
.
"
.
join
([
country_code
,
type_code
,
indicator_code
,
"
D
"
]),
"
dimensions
"
:
{
"
country
"
:
country_code
,
"
type
"
:
type_code
,
"
indicator
"
:
indicator_code
,
"
frequency
"
:
"
D
"
,
},
"
observations
"
:
[(
"
PERIOD
"
,
"
VALUE
"
)]
+
list
(
zip
(
period_list
,
df_total
[
col_name
].
fillna
(
"
NA
"
).
values
.
tolist
())),
}
yield
series_info
def
convert_agpt_dataset
(
source_dir
:
Path
,
target_dir
:
Path
):
target_dir
.
mkdir
(
exist_ok
=
True
)
dimension_code_list
=
[
"
country
"
,
"
type
"
,
"
indicator
"
]
dim_acc
=
{
dim
:
{}
for
dim
in
dimension_code_list
}
dimension_code_list
=
[
"
country
"
,
"
type
"
,
"
indicator
"
,
"
frequency
"
]
dim_acc
:
Dict
[
str
,
Dict
[
str
,
str
]]
=
{
dim
:
{}
for
dim
in
dimension_code_list
}
dim_acc
[
"
frequency
"
]
=
{
"
D
"
:
"
daily
"
}
dataset_json_data
=
{
"
code
"
:
"
AGPT
"
,
"
name
"
:
"
Actual generation per type, realised, daily
"
,
...
...
@@ -95,11 +122,10 @@ def convert_agpt_dataset(source_dir: Path, target_dir: Path):
continue
csv_files_by_country_code
[
m
.
group
(
1
)].
append
(
csv_file
)
breakpoint
()
series_info_list
=
[]
for
country_code
,
csv_files
in
sorted
(
csv_files_by_country_code
.
items
()):
series_info_list
.
extend
(
compute_series_info
(
country_code
,
csv_files
,
dim_acc
))
for
series_info
in
iter_series_info
(
country_code
,
csv_files
,
dim_acc
):
series_info_list
.
append
(
series_info
)
# dataset.json
write_json_file
(
target_dir
/
"
dataset.json
"
,
dataset_json_data
)
...
...
@@ -110,6 +136,9 @@ def convert_agpt_dataset(source_dir: Path, target_dir: Path):
json
.
dump
(
series_info
,
fd
,
sort_keys
=
True
,
ensure_ascii
=
False
)
fd
.
write
(
"
\n
"
)
# return dataset info for category_tree
return
{
"
code
"
:
dataset_json_data
[
"
code
"
],
"
name
"
:
dataset_json_data
[
"
name
"
]}
def
main
():
parser
=
argparse
.
ArgumentParser
(
...
...
@@ -137,11 +166,15 @@ def main():
if
not
target_dir
.
exists
():
parser
.
error
(
"
Target dir %r not found
"
,
target_dir
)
convert_agpt_dataset
(
source_dir
/
"
AGPT
"
,
target_dir
/
"
AGPT
"
)
# provider.json
write_json_file
(
target_dir
/
"
provider.json
"
,
PROVIDER_DATA
)
# AGPT dataset
dataset_info
=
convert_agpt_dataset
(
source_dir
/
"
AGPT
"
,
target_dir
/
"
AGPT
"
)
# category_tree.json
write_json_file
(
target_dir
/
"
category_tree.json
"
,
[
dataset_info
])
return
0
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment