forked from ASR373/diabetes-risk-prediction-app
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eda_app.py
69 lines (51 loc) · 1.75 KB
/
eda_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
def run_eda_app():
st.subheader("Exploratory Data Analysis")
submenu = st.sidebar.selectbox("Submenu", ["Descriptive", "Plots"])
df = pd.read_csv("diabetes_data_upload.csv")
df_cleaned = pd.read_csv("diabetes_data_upload_clean.csv")
freq = pd.read_csv("freqdist_of_age_data.csv")
if submenu == "Descriptive":
st.dataframe(df)
with st.beta_expander("Data Types"):
st.dataframe(df.dtypes)
with st.beta_expander("Summary"):
st.dataframe(df.describe())
with st.beta_expander("Age"):
st.dataframe(freq)
with st.beta_expander("Gender Distribution"):
st.dataframe(df["Gender"].value_counts())
with st.beta_expander("Class Distribution"):
st.dataframe(df["class"].value_counts())
elif submenu == "Plots":
st.subheader("Visualization Plots")
with st.beta_expander("Plots based on Gender"):
#fig = plt.figure()
#sns.countplot(df['Gender'])
#st.pyplot(fig)
gender_df = df["Gender"].value_counts()
gender_df = gender_df.reset_index()
gender_df.columns = ["Gender", "Count"]
#st.dataframe(gender_df)
p1 = px.pie(gender_df, names = "Gender", values = "Count")
st.plotly_chart(p1)
with st.beta_expander("Plots based on Class"):
fig = plt.figure()
sns.countplot(df['class'])
st.pyplot(fig)
class_df = df["class"].value_counts()
class_df = class_df.reset_index()
class_df.columns = ["Class", "Count"]
#st.dataframe(class_df)
with st.beta_expander("Plots based on age"):
#st.dataframe(freq)
p2 = px.bar(freq,x = "s",y = "count")
st.plotly_chart(p2)
with st.beta_expander("Outlier Detection"):
p3 = px.box(df, x = "Age")
st.plotly_chart(p3)