Skip to content

Commit

Permalink
Update app.py
Browse files Browse the repository at this point in the history
  • Loading branch information
EJmpa authored Dec 14, 2023
1 parent 130bbfb commit 44b3d35
Showing 1 changed file with 53 additions and 26 deletions.
79 changes: 53 additions & 26 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,41 @@
# Library importation
import streamlit as st
import pytz
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Streamlit's cache mechanism to load datasets efficiently
# @st.cache_resource
@st.cache_resource
def load_data(filename):
df = pd.read_csv(filename)
df = df.sort_values(by='year').set_index('year')
df.index = pd.to_datetime(df.index, format='%Y')
# df.index = df.index.strftime('%Y')
return df

# Load your dataset
data = load_data("./data/cleaned_dataset_with_incomegroup.csv")
print(data.head())

# Filter data for Low-Income Countries
developing_countries = data[data['IncomeGroup'] != 'High income']

# Sidebar for ARIMA model configuration
st.sidebar.header('ARIMA Model Configuration')
order_p = st.sidebar.slider('Order (p) for ARIMA', min_value=0, max_value=5, value=1)
order_d = st.sidebar.slider('Order (d) for ARIMA', min_value=0, max_value=5, value=1)
order_q = st.sidebar.slider('Order (q) for ARIMA', min_value=0, max_value=5, value=1)

# Sidebar for selecting country and elecrate_total range
selected_country = st.sidebar.selectbox('Select a Country', developing_countries['countryname'].unique())
selected_elecrate_total = st.sidebar.slider('Select elecrate_total',
selected_elecrate_total = st.sidebar.slider('Electrification Level',
min_value=developing_countries['elecrate_total'].min(),
max_value=developing_countries['elecrate_total'].max(),
value=developing_countries['elecrate_total'].median())
# Function to fit ARIMA model
def fit_arima_model(data, order):
model = ARIMA(data, order=order)
def fit_arima_model(data):
model = ARIMA(data, order=(16, 0, 3))
fitted_model = model.fit()
return fitted_model

Expand All @@ -43,11 +44,17 @@ def make_arima_predictions(model, start, end):
predictions = model.predict(start=start, end=end, typ='levels')
return predictions

# Function to make ARIMA forecast
def make_arima_forecast(model, steps):
forecast = model.forecast(steps=steps)
return forecast

# Function to evaluate ARIMA model
def evaluate_arima_model(actual, predicted):
mae = mean_absolute_error(actual, predicted)
mse = mean_squared_error(actual, predicted)
rmse = np.sqrt(mse)
return rmse
return mae, rmse

# ARIMA Model Building
target_column = "economicgap (GDP difference)"
Expand All @@ -58,34 +65,54 @@ def evaluate_arima_model(actual, predicted):


# Split the data into train and test
train_size = int(len(selected_country_data) * 0.8)
train, test = selected_country_data[[target_column, 'elecrate_total']][:train_size], selected_country_data[[target_column, 'elecrate_total']][train_size:]

# Drop rows with missing values (if any)
train = train.dropna()
test = test.dropna()
train_size = int(len(developing_countries) * 0.8)
train, test = developing_countries[[target_column, 'elecrate_total']][:train_size], developing_countries[[target_column, 'elecrate_total']][train_size:]
# Check if train DataFrame is empty

# Fit ARIMA model
arima_order = (order_p, order_d, order_q)
arima_model = fit_arima_model(train[target_column], order=arima_order)
if train.empty:
st.error('Training data is empty.')
else:
# Fit ARIMA model
arima_model = fit_arima_model(train[target_column])

# Make predictions
predictions = make_arima_predictions(arima_model, start=len(train), end=len(train) + len(test) - 1)
# Make predictions
predictions = make_arima_predictions(arima_model, start=len(train), end=len(train) + len(test) - 1)

# Evaluate the model
rmse = evaluate_arima_model(test[target_column], predictions)
# Evaluate the model
mae = evaluate_arima_model(test[target_column], predictions)[0]
rmse = evaluate_arima_model(test[target_column], predictions)[1]

# Streamlit app
st.title('ARIMA Time Series Model for Low-Income Countries')
st.write(f'ARIMA Model Order: {arima_order}')
st.title('Forecasting Economic Gap of Developing Countries')
st.write(f'Mean Absolute Error (MAE): {mae}')
st.write(f'Root Mean Squared Error (RMSE): {rmse}')


# Extend the time index for forecasting
forecast_years = 20
forecast_index = pd.date_range(test.index[-1], periods=forecast_years * 12 + 1, freq='M')[1:]

# Make predictions for the forecast period
# forecast_start = len(train) + len(test)
# forecast_end = forecast_start + len(forecast_index) - 1
# forecast = make_arima_predictions(arima_model, start=forecast_start, end=forecast_end)
forecast = make_arima_forecast(arima_model, steps=forecast_years*12)

# Function to format y-axis values
def billions(x, pos):
'The two args are the value and tick position'
return '%1.0f Billion' % (x * 1e-9)

formatter = FuncFormatter(billions)

# Plot actual vs predicted values
fig, ax = plt.subplots(figsize=(10, 6))
ax.yaxis.set_major_formatter(formatter)
# ax.plot(test.index, test[target_column], label='Actual')
ax.plot(test.index, predictions, label='Predicted', color='blue')
# ax.plot(test.index, predictions, label='Predicted', color='blue')
ax.plot(forecast_index, forecast, label='Forecast', linestyle='dashed', color='orange')
ax.set_title('Economic Gap Prediction')
ax.set_xlabel('Year')
ax.set_ylabel('Economic Gap (GDP Differrence)')
ax.set_ylabel('Economic Gap (GDP Differrence) $')
ax.legend()
st.pyplot(fig)

0 comments on commit 44b3d35

Please sign in to comment.