-
Notifications
You must be signed in to change notification settings - Fork 36
/
visualize.py
156 lines (128 loc) · 6.64 KB
/
visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
st.set_page_config(page_title="Mining Site Visualization", page_icon="🔍")
def load_data():
# Load the dataset
data = pd.read_csv("space_mining_dataset.csv")
return data
def show_visualize_page():
st.title("Mining Site Visualization")
st.write("Explore different visualizations to understand the dataset and the impact of user preferences.")
data = load_data()
# Check available columns in the dataset
st.write("Available Columns:", data.columns)
# If 'final_score' does not exist, calculate it based on other features
if 'final_score' not in data.columns:
st.write("The 'final_score' column does not exist, calculating it based on weights.")
# Assuming the columns 'iron', 'nickel', 'water_ice', 'other_minerals', 'sustainability_index', and 'distance_from_earth' exist.
iron_weight = 0.3
nickel_weight = 0.2
water_ice_weight = 0.2
other_minerals_weight = 0.1
sustainability_weight = 0.1
distance_weight = -0.1
# Calculate the final score
data['final_score'] = (
iron_weight * data['iron'] +
nickel_weight * data['nickel'] +
water_ice_weight * data['water_ice'] +
other_minerals_weight * data['other_minerals'] +
sustainability_weight * data['sustainability_index'] +
distance_weight * data['distance_from_earth']
)
# Check again if final_score is now available
st.write("Updated Columns:", data.columns)
# Visualization 1: Distribution of Features
st.subheader("Distribution of Features")
feature = st.selectbox("Select Feature to Visualize", data.columns[1:]) # Exclude non-numeric columns if necessary
fig, ax = plt.subplots()
# Use a more colorful palette for the histogram
sns.histplot(data[feature], bins=20, kde=True, ax=ax, color='teal')
ax.set_xlabel(feature)
st.pyplot(fig)
# Visualization 2: Pairplot of Selected Features
st.subheader("Pairplot of Selected Features")
features = st.multiselect("Select Features for Pairplot", data.columns[1:]) # Exclude non-numeric columns if necessary
if len(features) > 1:
fig, ax = plt.subplots()
# Customizing pairplot with a color palette
pairplot_fig = sns.pairplot(data[features + ['final_score']], diag_kind='kde', hue='final_score', palette="coolwarm")
st.pyplot(pairplot_fig.fig)
else:
st.write("Please select more than one feature.")
# Visualization 3: Correlation Heatmap (with only numeric columns)
st.subheader("Correlation Heatmap")
# Select only numeric columns for correlation calculation
numeric_data = data.select_dtypes(include='number')
corr_matrix = numeric_data.corr()
# Displaying the heatmap
fig, ax = plt.subplots()
sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5, ax=ax)
ax.set_title("Correlation Heatmap")
st.pyplot(fig)
# Visualization 4: Boxplot of Feature Distribution by Category
st.subheader("Boxplot of Feature Distribution by Final Score")
box_feature = st.selectbox("Select Feature for Boxplot", data.columns[1:])
# Create a boxplot based on the 'final_score'
fig, ax = plt.subplots()
sns.boxplot(x='final_score', y=box_feature, data=data, palette="Set2", ax=ax)
ax.set_title(f"Boxplot of {box_feature} by Final Score")
st.pyplot(fig)
# Visualization 5: Barplot for Aggregate Feature Insights
st.subheader("Barplot for Aggregate Insights by Celestial Body")
aggregate_feature = st.selectbox("Select Feature for Aggregate Barplot", data.columns[1:])
# Create a barplot of average feature values by celestial body
fig, ax = plt.subplots()
sns.barplot(x='Celestial Body', y=aggregate_feature, data=data, palette="coolwarm", ax=ax)
ax.set_title(f"Average {aggregate_feature} by Celestial Body")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right') # Rotate x-axis labels for clarity
st.pyplot(fig)
# Visualization 6: Impact of Weights on Recommendations
st.subheader("Impact of Weights on Recommendations")
st.write("Adjust the weights to see how the recommendations change.")
iron_weight = st.slider("Iron Weight", 0.0, 1.0, 0.3)
nickel_weight = st.slider("Nickel Weight", 0.0, 1.0, 0.2)
water_ice_weight = st.slider("Water Ice Weight", 0.0, 1.0, 0.2)
other_minerals_weight = st.slider("Other Minerals Weight", 0.0, 1.0, 0.1)
sustainability_weight = st.slider("Sustainability Weight", 0.0, 1.0, 0.1)
distance_weight = st.slider("Distance Weight", -1.0, 0.0, -0.1)
# Calculate and display adjusted scores
adjusted_scores = data.copy()
adjusted_scores['adjusted_score'] = (
iron_weight * adjusted_scores['iron'] +
nickel_weight * adjusted_scores['nickel'] +
water_ice_weight * adjusted_scores['water_ice'] +
other_minerals_weight * adjusted_scores['other_minerals'] +
sustainability_weight * adjusted_scores['sustainability_index'] +
distance_weight * adjusted_scores['distance_from_earth']
)
# Display top N sites based on adjusted scores
top_n = st.slider("Number of Top Sites to Display", 1, 10, 5)
top_sites = adjusted_scores.sort_values(by='adjusted_score', ascending=False).head(top_n)
# Customizing the table display with color for better insight
st.subheader(f"Top {top_n} Sites Based on Adjusted Scores")
top_sites_display = top_sites[['Celestial Body', 'iron', 'nickel', 'water_ice', 'distance_from_earth', 'adjusted_score']]
# Use a color gradient for the 'adjusted_score' column for better visual appeal
st.write(top_sites_display.style.background_gradient(subset=['adjusted_score'], cmap='coolwarm'))
# Downloadable Visualization
if st.button("Download Visualization"):
fig.savefig("visualization.png") # Save the current figure
with open("visualization.png", "rb") as file:
st.download_button(
label="Download Chart",
data=file,
file_name="visualization.png",
mime="image/png"
)
# Downloadable Data
if st.button("Download Data"):
csv = data.to_csv(index=False)
st.download_button(
label="Download CSV",
data=csv,
file_name="space_mining_data.csv",
mime="text/csv"
)
show_visualize_page()