-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathEM1PythonClasses.py
128 lines (116 loc) · 4.99 KB
/
EM1PythonClasses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import glob
import re
import scipy.io as sio
import pandas as pd
from pandas import DataFrame
from EM1PythonDictionaries import variables_list
class DataProcessor:
start = 50
end = 100
def __init__(
self,
base_path: str,
file_name_template: str,
primary_x_parameter: str,
plot_raw: bool = False,
subsets: bool = False,
) -> None:
self.base_path = base_path
self.file_name_template = file_name_template
self.glob_file_name_template = file_name_template.format("*")
self.primary_x_parameter = primary_x_parameter
self.x_parameter_list: list[dict[str, float]] = self.generate_x_parameter_list()
self.list_of_files_via_glob: list[str] = self.get_files_list()
self.list_of_dataframes: list[DataFrame] = self.generate_dataframes_list()
self.plot_raw = plot_raw
self.subsets = subsets
def get_files_list(self) -> list[str]:
glob_file_path = os.path.join(self.base_path, self.glob_file_name_template)
list_of_files_via_glob = glob.glob(glob_file_path)
regex_pattern = self.file_name_template.format(r"(\d+(\.\d+)?)")
list_of_files_via_glob.sort(
key=lambda file: float(match.group(1))
if (match := re.search(regex_pattern, file))
else 0
)
return list_of_files_via_glob
def get_matched_elements(self) -> tuple[list[float], list[int]]:
regex_pattern = self.file_name_template.format(r"(\d+(\.\d+)?)")
matched_elements = [
float(match[1])
for file in self.list_of_files_via_glob
if (match := re.search(regex_pattern, file))
]
matched_elements_int = [int(element) for element in matched_elements]
return matched_elements, matched_elements_int
def generate_x_parameter_list(self) -> list[dict[str, float]]:
# sourcery skip: inline-immediately-returned-variable, use-getitem-for-re-match-groups # noqa: E501
files_list: list[str] = self.get_files_list()
regex_pattern = self.file_name_template.format(r"(\d+(\.\d+)?)")
x_parameter_list = [
{f"{self.primary_x_parameter}": float(match.group(1))}
for file in files_list
if (match := re.search(regex_pattern, file))
]
return x_parameter_list
def mat_to_DataFrame(
self,
file_path,
chosen_structure="post",
chosen_substructure="zerod",
chosen_subsubstructure=None,
) -> DataFrame:
array_data_dict = {}
scalar_data_dict = {}
if chosen_subsubstructure is not None:
substructure = self.get_substructure(
file_path, chosen_structure, chosen_substructure
)
subsubstructure = substructure[chosen_subsubstructure][0, 0]
for field_name in subsubstructure.dtype.names:
field_data = subsubstructure[field_name][0, 0]
if field_data.size == 1:
scalar_data_dict[field_name] = field_data[0]
else:
array_data_dict[field_name] = field_data.squeeze()
else:
substructure = self.get_substructure(
file_path, chosen_structure, chosen_substructure
)
for field_name in substructure.dtype.names:
field_data = substructure[field_name][0, 0]
if field_data.size == 1:
scalar_data_dict[field_name] = field_data[0]
else:
array_data_dict[field_name] = field_data.squeeze()
return pd.DataFrame(array_data_dict)
def get_substructure(self, file_path, chosen_structure, chosen_substructure):
mat_data = sio.loadmat(file_path)
structure = mat_data[chosen_structure]
return structure[chosen_substructure][0, 0]
def load_data_into_dataframe(self, file_path) -> DataFrame:
file_dataframe: DataFrame = self.mat_to_DataFrame(file_path)
file_dataframe = file_dataframe.filter(items=variables_list)
file_dataframe["tim"] = file_dataframe["tite"] * file_dataframe["tem"]
file_dataframe["nimtimtaue"] = (
file_dataframe["nim"] * file_dataframe["tim"] * file_dataframe["taue"]
)
file_dataframe["ne0te0taue"] = (
file_dataframe["ne0"] * file_dataframe["te0"] * file_dataframe["taue"]
)
file_dataframe["nTtau"] = file_dataframe["ne0te0taue"]
dataframe_for_b0: DataFrame = self.mat_to_DataFrame(
file_path,
chosen_structure="post",
chosen_substructure="z0dinput",
chosen_subsubstructure="geo",
)
dataframe_for_b0 = dataframe_for_b0.filter(items=["b0"])
file_dataframe["b0"] = dataframe_for_b0["b0"]
return file_dataframe
def generate_dataframes_list(self) -> list[DataFrame]:
return [
self.load_data_into_dataframe(file_path)
for file_path in self.list_of_files_via_glob
]