-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathfunctions.py
63 lines (48 loc) · 1.83 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import io
import pandas as pd
import streamlit as st
def df_info(df):
df.columns = df.columns.str.replace(' ', '_')
buffer = io.StringIO()
df.info(buf=buffer)
s = buffer.getvalue()
df_info = s.split('\n')
counts = []
names = []
nn_count = []
dtype = []
for i in range(5, len(df_info)-3):
line = df_info[i].split()
counts.append(line[0])
names.append(line[1])
nn_count.append(line[2])
dtype.append(line[4])
df_info_dataframe = pd.DataFrame(data = {'#':counts, 'Column':names, 'Non-Null Count':nn_count, 'Data Type':dtype})
return df_info_dataframe.drop('#', axis = 1)
def df_isnull(df):
res = pd.DataFrame(df.isnull().sum()).reset_index()
res['Percentage'] = round(res[0] / df.shape[0] * 100, 2)
res['Percentage'] = res['Percentage'].astype(str) + '%'
return res.rename(columns = {'index':'Column', 0:'Number of null values'})
def number_of_outliers(df):
df = df.select_dtypes(exclude = 'object')
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
ans = ((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).sum()
df = pd.DataFrame(ans).reset_index().rename(columns = {'index':'column', 0:'count_of_outliers'})
return df
def space(num_lines=1):
for _ in range(num_lines):
st.write("")
def sidebar_space(num_lines=1):
for _ in range(num_lines):
st.sidebar.write("")
def sidebar_multiselect_container(massage, arr, key):
container = st.sidebar.container()
select_all_button = st.sidebar.checkbox("Select all for " + key + " plots")
if select_all_button:
selected_num_cols = container.multiselect(massage, arr, default = list(arr))
else:
selected_num_cols = container.multiselect(massage, arr, default = arr[0])
return selected_num_cols