diff --git a/overviews/_baseline/main.ipynb b/overviews/_baseline/main.ipynb
index 4255230b..d981de7e 100644
--- a/overviews/_baseline/main.ipynb
+++ b/overviews/_baseline/main.ipynb
@@ -27,7 +27,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "This Jupyter Notebook presents the 3W Dataset in a general way. For this, some tables, graphs, and statistics are presented."
+ "This Jupyter Notebook presents the 3W Dataset 2.0.0 in a general way. For this, some tables, graphs, and statistics are presented."
]
},
{
@@ -96,15 +96,24 @@
"
\n",
+ ""
+ ],
+ "text/plain": [
+ " label well id ABER-CKGL ABER-CKP \\\n",
+ "timestamp \n",
+ "2018-10-06 03:57:02 1 SIMULATED 00001 NaN NaN \n",
+ "2018-10-06 03:57:03 1 SIMULATED 00001 NaN NaN \n",
+ "2018-10-06 03:57:04 1 SIMULATED 00001 NaN NaN \n",
+ "2018-10-06 03:57:05 1 SIMULATED 00001 NaN NaN \n",
+ "2018-10-06 03:57:06 1 SIMULATED 00001 NaN NaN \n",
+ "... ... ... ... ... ... \n",
+ "2018-10-06 16:36:56 1 SIMULATED 00001 NaN NaN \n",
+ "2018-10-06 16:36:57 1 SIMULATED 00001 NaN NaN \n",
+ "2018-10-06 16:36:58 1 SIMULATED 00001 NaN NaN \n",
+ "2018-10-06 16:36:59 1 SIMULATED 00001 NaN NaN \n",
+ "2018-10-06 16:37:00 1 SIMULATED 00001 NaN NaN \n",
+ "\n",
+ " ESTADO-DHSV ESTADO-M1 ESTADO-M2 ESTADO-PXO \\\n",
+ "timestamp \n",
+ "2018-10-06 03:57:02 NaN NaN NaN NaN \n",
+ "2018-10-06 03:57:03 NaN NaN NaN NaN \n",
+ "2018-10-06 03:57:04 NaN NaN NaN NaN \n",
+ "2018-10-06 03:57:05 NaN NaN NaN NaN \n",
+ "2018-10-06 03:57:06 NaN NaN NaN NaN \n",
+ "... ... ... ... ... \n",
+ "2018-10-06 16:36:56 NaN NaN NaN NaN \n",
+ "2018-10-06 16:36:57 NaN NaN NaN NaN \n",
+ "2018-10-06 16:36:58 NaN NaN NaN NaN \n",
+ "2018-10-06 16:36:59 NaN NaN NaN NaN \n",
+ "2018-10-06 16:37:00 NaN NaN NaN NaN \n",
"\n",
- " QGL class \n",
- "timestamp \n",
- "2018-10-06 03:57:02 NaN 0 \n",
- "2018-10-06 03:57:03 NaN 0 \n",
- "2018-10-06 03:57:04 NaN 0 \n",
- "2018-10-06 03:57:05 NaN 0 \n",
- "2018-10-06 03:57:06 NaN 0 \n",
- "... ... ... \n",
- "2018-10-06 16:36:56 NaN 1 \n",
- "2018-10-06 16:36:57 NaN 1 \n",
- "2018-10-06 16:36:58 NaN 1 \n",
- "2018-10-06 16:36:59 NaN 1 \n",
- "2018-10-06 16:37:00 NaN 1 \n",
+ " ESTADO-SDV-GL ... PT-P P-TPT QBS QGL \\\n",
+ "timestamp ... \n",
+ "2018-10-06 03:57:02 NaN ... NaN 1.306863e+07 NaN NaN \n",
+ "2018-10-06 03:57:03 NaN ... NaN 1.306865e+07 NaN NaN \n",
+ "2018-10-06 03:57:04 NaN ... NaN 1.306864e+07 NaN NaN \n",
+ "2018-10-06 03:57:05 NaN ... NaN 1.306860e+07 NaN NaN \n",
+ "2018-10-06 03:57:06 NaN ... NaN 1.306856e+07 NaN NaN \n",
+ "... ... ... ... ... ... ... \n",
+ "2018-10-06 16:36:56 NaN ... NaN 1.605884e+07 NaN NaN \n",
+ "2018-10-06 16:36:57 NaN ... NaN 1.606439e+07 NaN NaN \n",
+ "2018-10-06 16:36:58 NaN ... NaN 1.606811e+07 NaN NaN \n",
+ "2018-10-06 16:36:59 NaN ... NaN 1.607405e+07 NaN NaN \n",
+ "2018-10-06 16:37:00 NaN ... NaN 1.607853e+07 NaN NaN \n",
"\n",
- "[45599 rows x 12 columns]"
+ " T-JUS-CKP T-MON-CKP T-PDG T-TPT class state \n",
+ "timestamp \n",
+ "2018-10-06 03:57:02 70.334018 NaN NaN 96.932750 0 0 \n",
+ "2018-10-06 03:57:03 70.334248 NaN NaN 96.932790 0 0 \n",
+ "2018-10-06 03:57:04 70.334488 NaN NaN 96.932830 0 0 \n",
+ "2018-10-06 03:57:05 70.334728 NaN NaN 96.932870 0 0 \n",
+ "2018-10-06 03:57:06 70.334958 NaN NaN 96.932900 0 0 \n",
+ "... ... ... ... ... ... ... \n",
+ "2018-10-06 16:36:56 39.772071 NaN NaN 83.103355 1 0 \n",
+ "2018-10-06 16:36:57 39.866035 NaN NaN 83.103220 1 0 \n",
+ "2018-10-06 16:36:58 39.970728 NaN NaN 83.102733 1 0 \n",
+ "2018-10-06 16:36:59 40.076864 NaN NaN 83.102536 1 0 \n",
+ "2018-10-06 16:37:00 40.174778 NaN NaN 83.101983 1 0 \n",
+ "\n",
+ "[45599 rows x 32 columns]"
]
},
"execution_count": 3,
@@ -659,15 +921,24 @@
"
label
\n",
"
well
\n",
"
id
\n",
- "
P-PDG
\n",
+ "
ABER-CKGL
\n",
+ "
ABER-CKP
\n",
+ "
ESTADO-DHSV
\n",
+ "
ESTADO-M1
\n",
+ "
ESTADO-M2
\n",
+ "
ESTADO-PXO
\n",
+ "
ESTADO-SDV-GL
\n",
+ "
...
\n",
+ "
PT-P
\n",
"
P-TPT
\n",
- "
T-TPT
\n",
- "
P-MON-CKP
\n",
- "
T-JUS-CKP
\n",
- "
P-JUS-CKGL
\n",
- "
T-JUS-CKGL
\n",
+ "
QBS
\n",
"
QGL
\n",
+ "
T-JUS-CKP
\n",
+ "
T-MON-CKP
\n",
+ "
T-PDG
\n",
+ "
T-TPT
\n",
"
class
\n",
+ "
state
\n",
" \n",
"
\n",
"
timestamp
\n",
@@ -683,6 +954,15 @@
"
\n",
"
\n",
"
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
"
\n",
" \n",
" \n",
@@ -691,14 +971,23 @@
"
1
\n",
"
DRAWN
\n",
"
00001
\n",
- "
420.2314
\n",
- "
175.34060
\n",
- "
114.890700
\n",
- "
96.64524
\n",
- "
66.75450
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
...
\n",
+ "
NaN
\n",
+ "
175.340600
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
66.75450
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
114.890700
\n",
+ "
0
\n",
"
0
\n",
" \n",
"
\n",
@@ -706,14 +995,23 @@
"
1
\n",
"
DRAWN
\n",
"
00001
\n",
- "
420.2314
\n",
- "
175.34060
\n",
- "
114.890700
\n",
- "
96.64524
\n",
- "
66.75450
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
...
\n",
+ "
NaN
\n",
+ "
175.340600
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
66.75450
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
114.890700
\n",
+ "
0
\n",
"
0
\n",
"
\n",
"
\n",
@@ -721,14 +1019,23 @@
"
1
\n",
"
DRAWN
\n",
"
00001
\n",
- "
420.2314
\n",
- "
175.34060
\n",
- "
114.890700
\n",
- "
96.64524
\n",
- "
66.75450
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
...
\n",
+ "
NaN
\n",
+ "
175.340600
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
66.75450
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
114.890700
\n",
+ "
0
\n",
"
0
\n",
"
\n",
"
\n",
@@ -736,14 +1043,23 @@
"
1
\n",
"
DRAWN
\n",
"
00001
\n",
- "
420.2314
\n",
- "
175.34060
\n",
- "
114.890700
\n",
- "
96.64524
\n",
- "
66.75450
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
...
\n",
+ "
NaN
\n",
+ "
175.340600
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
66.75450
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
114.890700
\n",
+ "
0
\n",
"
0
\n",
"
\n",
"
\n",
@@ -751,14 +1067,23 @@
"
1
\n",
"
DRAWN
\n",
"
00001
\n",
- "
420.2314
\n",
- "
175.34060
\n",
- "
114.890700
\n",
- "
96.64524
\n",
- "
66.75450
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
...
\n",
+ "
NaN
\n",
+ "
175.340600
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
66.75450
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
114.890700
\n",
+ "
0
\n",
"
0
\n",
"
\n",
"
\n",
@@ -775,117 +1100,199 @@
"
...
\n",
"
...
\n",
"
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
+ "
...
\n",
"
\n",
"
\n",
"
2018-09-07 20:44:31
\n",
"
1
\n",
"
DRAWN
\n",
"
00001
\n",
- "
429.8329
\n",
- "
60.13675
\n",
- "
8.354756
\n",
- "
12.89203
\n",
- "
31.27121
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
...
\n",
+ "
NaN
\n",
+ "
60.136751
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
31.27121
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
8.354756
\n",
"
1
\n",
+ "
0
\n",
"
\n",
"
\n",
"
2018-09-07 20:44:32
\n",
"
1
\n",
"
DRAWN
\n",
"
00001
\n",
- "
429.8329
\n",
- "
60.13789
\n",
- "
8.354756
\n",
- "
12.89203
\n",
- "
31.27121
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
...
\n",
+ "
NaN
\n",
+ "
60.137893
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
31.27121
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
8.354756
\n",
"
1
\n",
+ "
0
\n",
"
\n",
"
\n",
"
2018-09-07 20:44:33
\n",
"
1
\n",
"
DRAWN
\n",
"
00001
\n",
- "
429.8329
\n",
- "
60.13904
\n",
- "
8.354756
\n",
- "
12.89203
\n",
- "
31.27121
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
...
\n",
+ "
NaN
\n",
+ "
60.139036
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
31.27121
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
8.354756
\n",
"
1
\n",
+ "
0
\n",
"
\n",
"
\n",
"
2018-09-07 20:44:34
\n",
"
1
\n",
"
DRAWN
\n",
"
00001
\n",
- "
429.8329
\n",
- "
60.14018
\n",
- "
8.354756
\n",
- "
12.89203
\n",
- "
31.27121
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
...
\n",
+ "
NaN
\n",
+ "
60.140179
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
31.27121
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
8.354756
\n",
"
1
\n",
+ "
0
\n",
"
\n",
"
\n",
"
2018-09-07 20:44:35
\n",
"
1
\n",
"
DRAWN
\n",
"
00001
\n",
- "
429.8329
\n",
- "
60.14132
\n",
- "
8.354756
\n",
- "
12.89203
\n",
- "
31.27121
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
...
\n",
+ "
NaN
\n",
+ "
60.141321
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
31.27121
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
8.354756
\n",
"
1
\n",
+ "
0
\n",
"
\n",
" \n",
"\n",
- "
172800 rows × 12 columns
\n",
+ "
172800 rows × 32 columns
\n",
""
],
"text/plain": [
- " label well id P-PDG P-TPT T-TPT \\\n",
- "timestamp \n",
- "2018-09-05 20:44:36 1 DRAWN 00001 420.2314 175.34060 114.890700 \n",
- "2018-09-05 20:44:37 1 DRAWN 00001 420.2314 175.34060 114.890700 \n",
- "2018-09-05 20:44:38 1 DRAWN 00001 420.2314 175.34060 114.890700 \n",
- "2018-09-05 20:44:39 1 DRAWN 00001 420.2314 175.34060 114.890700 \n",
- "2018-09-05 20:44:40 1 DRAWN 00001 420.2314 175.34060 114.890700 \n",
- "... ... ... ... ... ... ... \n",
- "2018-09-07 20:44:31 1 DRAWN 00001 429.8329 60.13675 8.354756 \n",
- "2018-09-07 20:44:32 1 DRAWN 00001 429.8329 60.13789 8.354756 \n",
- "2018-09-07 20:44:33 1 DRAWN 00001 429.8329 60.13904 8.354756 \n",
- "2018-09-07 20:44:34 1 DRAWN 00001 429.8329 60.14018 8.354756 \n",
- "2018-09-07 20:44:35 1 DRAWN 00001 429.8329 60.14132 8.354756 \n",
+ " label well id ABER-CKGL ABER-CKP ESTADO-DHSV \\\n",
+ "timestamp \n",
+ "2018-09-05 20:44:36 1 DRAWN 00001 NaN NaN NaN \n",
+ "2018-09-05 20:44:37 1 DRAWN 00001 NaN NaN NaN \n",
+ "2018-09-05 20:44:38 1 DRAWN 00001 NaN NaN NaN \n",
+ "2018-09-05 20:44:39 1 DRAWN 00001 NaN NaN NaN \n",
+ "2018-09-05 20:44:40 1 DRAWN 00001 NaN NaN NaN \n",
+ "... ... ... ... ... ... ... \n",
+ "2018-09-07 20:44:31 1 DRAWN 00001 NaN NaN NaN \n",
+ "2018-09-07 20:44:32 1 DRAWN 00001 NaN NaN NaN \n",
+ "2018-09-07 20:44:33 1 DRAWN 00001 NaN NaN NaN \n",
+ "2018-09-07 20:44:34 1 DRAWN 00001 NaN NaN NaN \n",
+ "2018-09-07 20:44:35 1 DRAWN 00001 NaN NaN NaN \n",
"\n",
- " P-MON-CKP T-JUS-CKP P-JUS-CKGL T-JUS-CKGL QGL class \n",
+ " ESTADO-M1 ESTADO-M2 ESTADO-PXO ESTADO-SDV-GL ... \\\n",
+ "timestamp ... \n",
+ "2018-09-05 20:44:36 NaN NaN NaN NaN ... \n",
+ "2018-09-05 20:44:37 NaN NaN NaN NaN ... \n",
+ "2018-09-05 20:44:38 NaN NaN NaN NaN ... \n",
+ "2018-09-05 20:44:39 NaN NaN NaN NaN ... \n",
+ "2018-09-05 20:44:40 NaN NaN NaN NaN ... \n",
+ "... ... ... ... ... ... \n",
+ "2018-09-07 20:44:31 NaN NaN NaN NaN ... \n",
+ "2018-09-07 20:44:32 NaN NaN NaN NaN ... \n",
+ "2018-09-07 20:44:33 NaN NaN NaN NaN ... \n",
+ "2018-09-07 20:44:34 NaN NaN NaN NaN ... \n",
+ "2018-09-07 20:44:35 NaN NaN NaN NaN ... \n",
+ "\n",
+ " PT-P P-TPT QBS QGL T-JUS-CKP T-MON-CKP T-PDG \\\n",
"timestamp \n",
- "2018-09-05 20:44:36 96.64524 66.75450 NaN NaN NaN 0 \n",
- "2018-09-05 20:44:37 96.64524 66.75450 NaN NaN NaN 0 \n",
- "2018-09-05 20:44:38 96.64524 66.75450 NaN NaN NaN 0 \n",
- "2018-09-05 20:44:39 96.64524 66.75450 NaN NaN NaN 0 \n",
- "2018-09-05 20:44:40 96.64524 66.75450 NaN NaN NaN 0 \n",
- "... ... ... ... ... ... ... \n",
- "2018-09-07 20:44:31 12.89203 31.27121 NaN NaN NaN 1 \n",
- "2018-09-07 20:44:32 12.89203 31.27121 NaN NaN NaN 1 \n",
- "2018-09-07 20:44:33 12.89203 31.27121 NaN NaN NaN 1 \n",
- "2018-09-07 20:44:34 12.89203 31.27121 NaN NaN NaN 1 \n",
- "2018-09-07 20:44:35 12.89203 31.27121 NaN NaN NaN 1 \n",
+ "2018-09-05 20:44:36 NaN 175.340600 NaN NaN 66.75450 NaN NaN \n",
+ "2018-09-05 20:44:37 NaN 175.340600 NaN NaN 66.75450 NaN NaN \n",
+ "2018-09-05 20:44:38 NaN 175.340600 NaN NaN 66.75450 NaN NaN \n",
+ "2018-09-05 20:44:39 NaN 175.340600 NaN NaN 66.75450 NaN NaN \n",
+ "2018-09-05 20:44:40 NaN 175.340600 NaN NaN 66.75450 NaN NaN \n",
+ "... ... ... ... ... ... ... ... \n",
+ "2018-09-07 20:44:31 NaN 60.136751 NaN NaN 31.27121 NaN NaN \n",
+ "2018-09-07 20:44:32 NaN 60.137893 NaN NaN 31.27121 NaN NaN \n",
+ "2018-09-07 20:44:33 NaN 60.139036 NaN NaN 31.27121 NaN NaN \n",
+ "2018-09-07 20:44:34 NaN 60.140179 NaN NaN 31.27121 NaN NaN \n",
+ "2018-09-07 20:44:35 NaN 60.141321 NaN NaN 31.27121 NaN NaN \n",
+ "\n",
+ " T-TPT class state \n",
+ "timestamp \n",
+ "2018-09-05 20:44:36 114.890700 0 0 \n",
+ "2018-09-05 20:44:37 114.890700 0 0 \n",
+ "2018-09-05 20:44:38 114.890700 0 0 \n",
+ "2018-09-05 20:44:39 114.890700 0 0 \n",
+ "2018-09-05 20:44:40 114.890700 0 0 \n",
+ "... ... ... ... \n",
+ "2018-09-07 20:44:31 8.354756 1 0 \n",
+ "2018-09-07 20:44:32 8.354756 1 0 \n",
+ "2018-09-07 20:44:33 8.354756 1 0 \n",
+ "2018-09-07 20:44:34 8.354756 1 0 \n",
+ "2018-09-07 20:44:35 8.354756 1 0 \n",
"\n",
- "[172800 rows x 12 columns]"
+ "[172800 rows x 32 columns]"
]
},
"execution_count": 4,
@@ -901,20 +1308,47 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Each instance is stored in a CSV file and loaded into a pandas DataFrame. Each observation is stored in a line in the CSV file and loaded as a line in the pandas DataFrame. The first line of each CSV file contains a header with column identifiers. Each column of CSV files stores the following type of information:\n",
+ "Each instance is stored in a [Parquet file](https://parquet.apache.org/docs/) and loaded into a pandas DataFrame as follows:\n",
"\n",
- "* **timestamp**: observations timestamps loaded into pandas DataFrame as its index;\n",
- "* **P-PDG**: pressure variable at the Permanent Downhole Gauge (PDG);\n",
- "* **P-TPT**: pressure variable at the Temperature and Pressure Transducer (TPT);\n",
- "* **T-TPT**: temperature variable at the Temperature and Pressure Transducer (TPT);\n",
- "* **P-MON-CKP**: pressure variable upstream of the production choke (CKP);\n",
- "* **T-JUS-CKP**: temperature variable downstream of the production choke (CKP);\n",
- "* **P-JUS-CKGL**: pressure variable upstream of the gas lift choke (CKGL);\n",
- "* **T-JUS-CKGL**: temperature variable upstream of the gas lift choke (CKGL);\n",
- "* **QGL**: gas lift flow rate;\n",
- "* **class**: observations labels associated with three types of periods (normal, fault transient, and faulty steady state).\n",
+ "* All Parquet files are created and read with pandas functions, `pyarrow` engine and `brotli` compression;\n",
+ "* For each instance, timestamps corresponding to observations are stored in Parquet file as its index and loaded into pandas DataFrame as its index;\n",
+ "* Each observation is stored in a line of a Parquet file and loaded as a line of a pandas DataFrame; \n",
+ "* All variables are stored as float in columns of Parquet files and loaded as float in columns of pandas DataFrame;\n",
+ "* All labels are stored as `Int64` (not `int64`) in columns of Parquet files and loaded as `Int64` (not `int64`) in columns of pandas DataFrame.\n",
"\n",
- "Other information are also loaded into each pandas Dataframe:\n",
+ "The variables and labels are as follows:\n",
+ "\n",
+ "* **ABER-CKGL**: Opening of the GLCK (gas lift choke) [%];\n",
+ "* **ABER-CKP**: Opening of the PCK (production choke) [%];\n",
+ "* **ESTADO-DHSV**: State of the DHSV (downhole safety valve) [0, 0.5, or 1];\n",
+ "* **ESTADO-M1**: State of the PMV (production master valve) [0, 0.5, or 1];\n",
+ "* **ESTADO-M2**: State of the AMV (annulus master valve) [0, 0.5, or 1];\n",
+ "* **ESTADO-PXO**: State of the PXO (pig-crossover) valve [0, 0.5, or 1];\n",
+ "* **ESTADO-SDV-GL**: State of the gas lift SDV (shutdown valve) [0, 0.5, or 1];\n",
+ "* **ESTADO-SDV-P**: State of the production SDV (shutdown valve) [0, 0.5, or 1];\n",
+ "* **ESTADO-W1**: State of the PWV (production wing valve) [0, 0.5, or 1];\n",
+ "* **ESTADO-W2**: State of the AWV (annulus wing valve) [0, 0.5, or 1];\n",
+ "* **ESTADO-XO**: State of the XO (crossover) valve [0, 0.5, or 1];\n",
+ "* **P-ANULAR**: Pressure in the well annulus [Pa];\n",
+ "* **P-JUS-BS**: Downstream pressure of the SP (service pump) [Pa];\n",
+ "* **P-JUS-CKGL**: Downstream pressure of the GLCK (gas lift choke) [Pa];\n",
+ "* **P-JUS-CKP**: Downstream pressure of the PCK (production choke) [Pa];\n",
+ "* **P-MON-CKGL**: Upstream pressure of the GLCK (gas lift choke) [Pa];\n",
+ "* **P-MON-CKP**: Upstream pressure of the PCK (production choke) [Pa];\n",
+ "* **P-MON-SDV-P**: Upstream pressure of the production SDV (shutdown valve) [Pa];\n",
+ "* **P-PDG**: Pressure at the PDG (permanent downhole gauge) [Pa];\n",
+ "* **PT-P**: Downstream pressure of the PWV (production wing valve) in the production tube [Pa];\n",
+ "* **P-TPT**: Pressure at the TPT (temperature and pressure transducer) [Pa];\n",
+ "* **QBS**: Flow rate at the SP (service pump) [m3/s];\n",
+ "* **QGL**: Gas lift flow rate [m3/s];\n",
+ "* **T-JUS-CKP**: Downstream temperature of the PCK (production choke) [oC];\n",
+ "* **T-MON-CKP**: Upstream temperature of the PCK (production choke) [oC];\n",
+ "* **T-PDG**: Temperature at the PDG (permanent downhole gauge) [oC];\n",
+ "* **T-TPT**: Temperature at the TPT (temperature and pressure transducer) [oC];\n",
+ "* **class**: Label of the observation;\n",
+ "* **state**: Well operational status.\n",
+ "\n",
+ "Other informations are also loaded into each pandas Dataframe:\n",
"\n",
"* **label**: instance label (event type);\n",
"* **well**: well name. Hand-drawn and simulated instances have fixed names. Real instances have names masked with incremental id;\n",
@@ -991,10 +1425,10 @@
" \n",
"
\n",
"
1 - Abrupt Increase of BSW
\n",
- "
5
\n",
+ "
4
\n",
"
114
\n",
"
10
\n",
- "
129
\n",
+ "
128
\n",
"
\n",
"
\n",
"
2 - Spurious Closure of DHSV
\n",
@@ -1012,10 +1446,10 @@
"
\n",
"
\n",
"
4 - Flow Instability
\n",
- "
344
\n",
+ "
343
\n",
"
0
\n",
"
0
\n",
- "
344
\n",
+ "
343
\n",
"
\n",
"
\n",
"
5 - Rapid Productivity Loss
\n",
@@ -1033,24 +1467,31 @@
"
\n",
"
\n",
"
7 - Scaling in PCK
\n",
- "
5
\n",
+ "
36
\n",
"
0
\n",
"
10
\n",
- "
15
\n",
+ "
46
\n",
"
\n",
"
\n",
"
8 - Hydrate in Production Line
\n",
- "
0
\n",
+ "
14
\n",
"
81
\n",
"
0
\n",
- "
81
\n",
+ "
95
\n",
+ "
\n",
+ "
\n",
+ "
9 - Hydrate in Service Line
\n",
+ "
57
\n",
+ "
150
\n",
+ "
0
\n",
+ "
207
\n",
"
\n",
"
\n",
"
TOTAL
\n",
- "
1019
\n",
- "
939
\n",
+ "
1119
\n",
+ "
1089
\n",
"
20
\n",
- "
1978
\n",
+ "
2228
\n",
"
\n",
" \n",
"\n",
@@ -1060,15 +1501,16 @@
"SOURCE REAL SIMULATED HAND-DRAWN TOTAL\n",
"INSTANCE LABEL \n",
"0 - Normal Operation 594 0 0 594\n",
- "1 - Abrupt Increase of BSW 5 114 10 129\n",
+ "1 - Abrupt Increase of BSW 4 114 10 128\n",
"2 - Spurious Closure of DHSV 22 16 0 38\n",
"3 - Severe Slugging 32 74 0 106\n",
- "4 - Flow Instability 344 0 0 344\n",
+ "4 - Flow Instability 343 0 0 343\n",
"5 - Rapid Productivity Loss 11 439 0 450\n",
"6 - Quick Restriction in PCK 6 215 0 221\n",
- "7 - Scaling in PCK 5 0 10 15\n",
- "8 - Hydrate in Production Line 0 81 0 81\n",
- "TOTAL 1019 939 20 1978"
+ "7 - Scaling in PCK 36 0 10 46\n",
+ "8 - Hydrate in Production Line 14 81 0 95\n",
+ "9 - Hydrate in Service Line 57 150 0 207\n",
+ "TOTAL 1119 1089 20 2228"
]
},
"execution_count": 5,
@@ -1092,7 +1534,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Considering only **real instances** and **threshold of 1%**, the 3W Dataset has the following amount of instances."
+ "Considering only **real instances** and **threshold of 1%**, the 3W Dataset has the following amount of instances with rare events."
]
},
{
@@ -1137,10 +1579,17 @@
" \n",
"
\n",
"
1 - Abrupt Increase of BSW
\n",
- "
5
\n",
+ "
4
\n",
"
114
\n",
"
10
\n",
- "
129
\n",
+ "
128
\n",
+ "
\n",
+ "
\n",
+ "
5 - Rapid Productivity Loss
\n",
+ "
11
\n",
+ "
439
\n",
+ "
0
\n",
+ "
450
\n",
"
\n",
"
\n",
"
6 - Quick Restriction in PCK
\n",
@@ -1150,38 +1599,23 @@
"
221
\n",
"
\n",
"
\n",
- "
7 - Scaling in PCK
\n",
- "
5
\n",
- "
0
\n",
- "
10
\n",
- "
15
\n",
- "
\n",
- "
\n",
- "
8 - Hydrate in Production Line
\n",
- "
0
\n",
- "
81
\n",
- "
0
\n",
- "
81
\n",
- "
\n",
- "
\n",
"
TOTAL
\n",
- "
16
\n",
- "
410
\n",
- "
20
\n",
- "
446
\n",
+ "
21
\n",
+ "
768
\n",
+ "
10
\n",
+ "
799
\n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- "SOURCE REAL SIMULATED HAND-DRAWN TOTAL\n",
- "INSTANCE LABEL \n",
- "1 - Abrupt Increase of BSW 5 114 10 129\n",
- "6 - Quick Restriction in PCK 6 215 0 221\n",
- "7 - Scaling in PCK 5 0 10 15\n",
- "8 - Hydrate in Production Line 0 81 0 81\n",
- "TOTAL 16 410 20 446"
+ "SOURCE REAL SIMULATED HAND-DRAWN TOTAL\n",
+ "INSTANCE LABEL \n",
+ "1 - Abrupt Increase of BSW 4 114 10 128\n",
+ "5 - Rapid Productivity Loss 11 439 0 450\n",
+ "6 - Quick Restriction in PCK 6 215 0 221\n",
+ "TOTAL 21 768 10 799"
]
},
"execution_count": 6,
@@ -1199,7 +1633,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "If **simulated instances** are also considered, the amount of instances in 3W Dataset become the one listed below."
+ "If **simulated instances** are also considered, the amount of instances with rare events in 3W Dataset become the one listed below."
]
},
{
@@ -1243,28 +1677,20 @@
" \n",
" \n",
"
\n",
- "
7 - Scaling in PCK
\n",
- "
5
\n",
- "
0
\n",
- "
10
\n",
- "
15
\n",
- "
\n",
- "
\n",
"
TOTAL
\n",
- "
5
\n",
- "
0
\n",
- "
10
\n",
- "
15
\n",
+ "
0.0
\n",
+ "
0.0
\n",
+ "
0.0
\n",
+ "
0.0
\n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- "SOURCE REAL SIMULATED HAND-DRAWN TOTAL\n",
- "INSTANCE LABEL \n",
- "7 - Scaling in PCK 5 0 10 15\n",
- "TOTAL 5 0 10 15"
+ "SOURCE REAL SIMULATED HAND-DRAWN TOTAL\n",
+ "INSTANCE LABEL \n",
+ "TOTAL 0.0 0.0 0.0 0.0"
]
},
"execution_count": 7,
@@ -1281,7 +1707,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "After also considering the **hand-drawn instances**, we get the final amount of instances in 3W Dataset."
+ "After also considering the **hand-drawn instances**, we get the final amount of instances with rare events in 3W Dataset."
]
},
{
@@ -1325,28 +1751,20 @@
" \n",
" \n",
"
\n",
- "
7 - Scaling in PCK
\n",
- "
5
\n",
- "
0
\n",
- "
10
\n",
- "
15
\n",
- "
\n",
- "
\n",
"
TOTAL
\n",
- "
5
\n",
- "
0
\n",
- "
10
\n",
- "
15
\n",
+ "
0.0
\n",
+ "
0.0
\n",
+ "
0.0
\n",
+ "
0.0
\n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- "SOURCE REAL SIMULATED HAND-DRAWN TOTAL\n",
- "INSTANCE LABEL \n",
- "7 - Scaling in PCK 5 0 10 15\n",
- "TOTAL 5 0 10 15"
+ "SOURCE REAL SIMULATED HAND-DRAWN TOTAL\n",
+ "INSTANCE LABEL \n",
+ "TOTAL 0.0 0.0 0.0 0.0"
]
},
"execution_count": 8,
@@ -1370,7 +1788,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "A scatter map with all the **real instances** is shown below. The oldest one occurred in the middle of 2012 and the most recent one in the middle of 2018. In addition to the total number of considered wells, this map provides an overview of the occurrences distributions of each instance over time and between wells."
+ "A scatter map with all the **real instances** is shown below. The oldest one occurred in the middle of 2011 and the most recent one in the middle of 2023. In addition to the total number of considered wells, this map provides an overview of the occurrences distributions of each instance over time and between wells."
]
},
{
@@ -1383,7 +1801,7 @@
{
"data": {
"text/plain": [
- "(2013, 2019)"
+ "(2011, 2023)"
]
},
"execution_count": 9,
@@ -1396,12 +1814,12 @@
"\n",
"\n",
- "