-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathenertrag_data.py
138 lines (120 loc) · 4.87 KB
/
enertrag_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
The ``enertrag_data`` module contains functions to read and dump measured
feed-in time series from a Enertrag wind farm.
The following data is available (year 2016) for the 17 turbines:
- meter (Zählerstand) in kW
- power output in kW
- wind speed in m/s
- wind direction (gondel position) in °
ATTENTION: gondel position is not correct!!
Additionally the sum of the power output of all wind turbines is available in
column 'wf_9_power_output'.
DateTimeIndex in 'Europe/Berlin' time zone.
"""
# Other imports
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import os
import pickle
def read_data(filename):
r"""
Fetches data from a csv file.
Parameters
----------
filename : string
Name of data file.
Returns
-------
pandas.DataFrame
"""
df = pd.read_csv(os.path.join(os.path.dirname(__file__),
'data/Enertrag', filename),
sep=',', decimal='.', index_col=0)
return df
def get_enertrag_data(pickle_load=False, filename='enertrag_dump.p',
resample=True, plot=False, x_limit=None,
frequency='30T', curtailment=True):
# TODO: add plots to check data
r"""
Fetches Enertrag data.
Parameters
----------
pickle_load : Boolean
If True data frame is loaded from the pickle dump if False the data is
loaded from the original csv files (or from smaller csv file that was
created in an earlier run if `csv_load` is True).
Either set `pickle_load` or `csv_load` to True. Default: False.
filename : String
Filename including path of pickle dump. Default: 'enertrag_dump.p'.
resample : Boolean
If True the data will be resampled to the `frequency`. (mean power)
plot : Boolean
If True each column of the data farme is plotted into a seperate
figure. Default: False
x_limit : list of floats or integers
Values for xmin and xmax in case of `plot` being True and x limits
wanted. Default: None.
frequency : String (or freq object...?)
# TODO add
curtailment : Boolean
If True an average (30min) curtailment of the wind farm power output is
added.
Returns
-------
enertrag_df : pandas.DataFrame
Enertrag wind farm data.
"""
if pickle_load:
enertrag_df = pickle.load(open(filename, 'rb'))
else:
filename_files = os.path.join(os.path.dirname(__file__),
'helper_files/filenames_enertrag.txt')
enertrag_df = pd.DataFrame()
with open(filename_files) as file:
for line in file:
name = line.strip()
df_part = read_data(name)
turbine_name = name.split('_')[1].split('.')[0]
# Rename columns
df_part.rename(columns={
'Zählerstand[kWh]': 'wf_9_{0}_meter'.format(turbine_name),
'Windgeschwindigkeit[m/s]': 'wf_9_{0}_wind_speed'.format(
turbine_name),
'Leistung[kW]': 'wf_9_{0}_power_output'.format(
turbine_name),
'Gondelposition': 'wf_9_{0}_wind_dir'.format(
turbine_name)}, inplace=True)
# Add to DataFrame
enertrag_df = pd.concat([enertrag_df, df_part], axis=1)
# Convert index to DatetimeIndex and make time zone aware
enertrag_df.index = pd.to_datetime(enertrag_df.index).tz_localize(
'UTC').tz_convert('Europe/Berlin')
if resample:
enertrag_df = enertrag_df.resample(frequency).mean()
# Add frequency attribute
freq = pd.infer_freq(enertrag_df.index)
enertrag_df.index.freq = pd.tseries.frequencies.to_offset(freq)
# Get wind farm power output
enertrag_df['wf_9_power_output'] = enertrag_df.loc[
:, [column for column in list(enertrag_df) if
'power_output' in column]].sum(skipna=True, axis=1)
pickle.dump(enertrag_df, open(filename, 'wb'))
return enertrag_df
def get_enertrag_curtailment_data(frequency):
r"""
"""
data = read_data(
'windpark_nechlin_production_and_curtailment_2016_15min.csv')
data.index = pd.to_datetime(data.index).tz_localize(
'UTC').tz_convert('Europe/Berlin')
curtailment_data = data.drop(
['power_rel', 'wind_mean'], axis=1).resample(frequency).mean()
return curtailment_data
if __name__ == "__main__":
# Decide whether to resample to a certain frequency
resample = True
frequency = '30T'
filename = os.path.join(os.path.dirname(__file__), 'dumps/validation_data',
'enertrag_data_2016.p') # Filename for pickle dump
df = get_enertrag_data(resample=resample, filename=filename)