.Rhistory

select(df, budget, revenue, rating))
my_table
# df <- select(df, popularity, budget, revenue, runtime, rating)
#df <- sample(df, 5)
#df <- melt(df)
#df <- data.table(df)
#df[,value_rescaled:=value/mean(value),by=.(var)]
my_table <- as.data.frame(sample(5,
select(df, budget, revenue, rating)))
# df <- select(df, popularity, budget, revenue, runtime, rating)
#df <- sample(df, 5)
sample(select(df, budget, revenue, rating), 5)
# df <- select(df, popularity, budget, revenue, runtime, rating)
#df <- sample(df, 5)
sample(select(df, budget, revenue, rating, popularity, vote_count), 5)
#df <- melt(df)
#df <- data.table(df)
#df[,value_rescaled:=value/mean(value),by=.(var)]
#my_table <- as.data.frame(sample(5,
# select(df, budget, revenue, rating)))
#my_table
# df <- select(df, popularity, budget, revenue, runtime, rating)
#df <- sample(df, 5)
sample(select(df, budget, revenue, rating, popularity, vote_count), 5)
#df <- melt(df)
#df <- data.table(df)
#df[,value_rescaled:=value/mean(value),by=.(var)]
#my_table <- as.data.frame(sample(5,
# select(df, budget, revenue, rating)))
#my_table
my_table <- as.data.frame(sample(5,
select(df, budget, revenue, rating, popularity, vote_count)))
my_table <- as.data.frame(sample(5,
select(df, budget, revenue, rating, popularity, vote_count)))
my_table <- as.data.frame(sample(
select(df, budget, revenue, rating, popularity, vote_count), 5))
my_table
my_table <- as.data.frame(sample(
select(df, budget, revenue, rating, popularity, vote_count), 5))
ggplot(my_table, aes(x=revenue))+
geom_bar(y = budget)
my_table <- as.data.frame(sample(
select(df, budget, revenue, rating, popularity, vote_count), 5))
ggplot(my_table, aes(x=revenue))+
geom_bar(y = Freq)
my_table <- as.data.frame(sample(
select(df, budget, revenue, rating, popularity, vote_count), 5))
ggplot(my_table, aes(x=revenue))+
geom_bar(y = popularity)
my_table <- as.data.frame(sample(
select(df, budget, revenue, rating, popularity, vote_count), 5))
ggplot(data = my_table, aes(x = factor(column.names), y = factor(rownames(my_table)))) +
geom_tile(aes(fill = value), color = "white") +
scale_fill_gradient(low = "white", high = "blue") +
labs(title = "Heatmap of my_table Data") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
my_table <- as.data.frame(sample(
select(df, budget, revenue, rating, popularity, vote_count), 5))
ggplot(data = my_table, aes(x = factor(column.names), y = factor(rownames(my_table)))) +
scale_fill_gradient(low = "white", high = "blue") +
labs(title = "Heatmap of my_table Data") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
my_table <- as.data.frame(sample(
select(df, budget, revenue, rating, popularity, vote_count), 5))
ggplot(data = my_table, aes(x = factor(column.names), y = factor(rownames(my_table)))) +
geom_tile() +
scale_fill_gradient(low = "white", high = "blue") +
labs(title = "Heatmap of my_table Data") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
my_table <- as.data.frame(sample(
select(df, budget, revenue, rating, popularity, vote_count), 5))
ggplot(data = reshape2::melt(my_table), aes(x = Var2, y = Var1, fill = value)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "blue") +
labs(title = "Heatmap of my_table Data") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
table <- as.data.frame(sample(
select(df, budget, revenue, rating, popularity, vote_count), 5))
table
library(ggplot2)
library(dplyr)
cm <- colorRampPalette(c(COLORS[1]))(256)
df <- read.csv('data/tmdb_movies_data.csv')
df <- df %>% select(-overview, -keywords, -genres, -production_companies, -tagline, -cast)
df_sample <- df %>% sample_n(5)
df_sample <- df_sample %>% mutate_all(as.character)
df_sample <- df_sample %>% mutate_all(as.numeric)
df_sample <- df_sample %>% mutate_all(as.factor)
df_sample <- df_sample %>% mutate_all(as.Date)
library(ggplot2)
library(dplyr)
cm <- colorRampPalette(c(COLORS[1]))(256)
df <- read.csv('data/tmdb_movies_data.csv')
df <- df %>% select(-overview, -keywords, -genres, -production_companies, -tagline, -cast)
df_sample <- df %>% sample_n(5)
df_sample <- df_sample %>% mutate_all(as.character)
df_sample <- df_sample %>% mutate_all(as.numeric)
df_sample <- df_sample %>% mutate_all(as.factor)
df_sample <- df_sample %>% mutate_all(as.POSIXct)
library(ggplot2)
library(dplyr)
cm <- colorRampPalette(c(COLORS[1]))(256)
df <- read.csv('data/tmdb_movies_data.csv')
df <- df %>% select(-overview, -keywords, -genres, -production_companies, -tagline, -cast)
df_sample <- df %>% sample_n(5)
df_sample <- df_sample %>% mutate_all(as.character)
df_sample <- df_sample %>% mutate_all(as.numeric)
df_sample <- df_sample %>% mutate_all(as.factor)
df_sample <- df_sample %>% mutate_all(as.POSIXlt)
library(ggplot2)
library(dplyr)
cm <- colorRampPalette(c(COLORS[1]))(256)
df <- read.csv('data/tmdb_movies_data.csv')
df <- df %>% select(-overview, -keywords, -genres, -production_companies, -tagline, -cast)
df_sample <- df %>% sample_n(5)
df_sample <- df_sample %>% mutate_all(as.character)
df_sample <- df_sample %>% mutate_all(as.numeric)
df_sample <- df_sample %>% mutate_all(as.factor)
df_sample <- df_sample %>% mutate_all(as.POSIXlt)
library(ggplot2)
library(dplyr)
cm <- colorRampPalette(c(COLORS[1]))(256)
df <- read.csv('data/tmdb_movies_data.csv')
df <- df %>% select(-overview, -keywords, -genres, -production_companies, -tagline, -cast)
df_sample <- df %>% sample_n(5)
df_sample <- df_sample %>% mutate_all(as.character)
df_sample <- df_sample %>% mutate_all(as.numeric)
df_sample <- df_sample %>% mutate_all(as.factor)
df_sample <- df_sample
library(ggplot2)
library(dplyr)
cm <- colorRampPalette(c(COLORS[1]))(256)
df <- read.csv('data/tmdb_movies_data.csv')
df <- df %>% select(-overview, -keywords, -genres, -production_companies, -tagline, -cast)
df_sample <- df %>% sample_n(5)
df_sample <- df_sample %>% mutate_all(as.character)
df_sample <- df_sample %>% mutate_all(as.numeric)
df_sample <- df_sample %>% mutate_all(as.factor)
df_sample
# Define UI for application that draws a histogram
ui <- fluidPage(
# Application title
titlePanel("Old Faithful Geyser Data"),
# Sidebar with a slider input for number of bins
sidebarLayout(
sidebarPanel(
sliderInput("bins",
"Number of bins:",
min = 1,
max = 50,
value = 30)
),
# Show a plot of the generated distribution
mainPanel(
plotOutput("distPlot")
)
)
)
library(shiny)
library(shiny)
# Define UI for application that draws a histogram
ui <- fluidPage(
# Application title
titlePanel("Old Faithful Geyser Data"),
# Sidebar with a slider input for number of bins
sidebarLayout(
sidebarPanel(
sliderInput("bins",
"Number of bins:",
min = 1,
max = 50,
value = 30)
),
# Show a plot of the generated distribution
mainPanel(
plotOutput("distPlot")
)
)
)
# Define server logic required to draw a histogram
server <- function(input, output) {
output$distPlot <- renderPlot({
# generate bins based on input$bins from ui.R
x    <- faithful[, 2]
bins <- seq(min(x), max(x), length.out = input$bins + 1)
# draw the histogram with the specified number of bins
hist(x, breaks = bins, col = 'darkgray', border = 'white',
xlab = 'Waiting time to next eruption (in mins)',
main = 'Histogram of waiting times')
})
}
# Run the application
shinyApp(ui = ui, server = server)
reticulate::repl_python()
import keras
install.packages("reticulate")
install.packages("reticulate")
nn_model             = keras.models.load_model('../models/nn-model.h5')
View(r)
install.packages("reticulate")
import keras
import joblib
import sklearn
import polars as pl
import pandas as pd
import numpy as np
import shiny.experimental as sh
from shiny import App, ui, render
COLORS  = ['#102542', '#F87060', '#CDD7D6', '#B3A394', '#FFFFFF']
COLUMNS = ['Gender', 'Married', 'Education',
'Property_Area', 'Monthly_Income',
'Extra_Monthly_Income', 'Loan_Term',
'Credit_History', 'Loan_Status',
'Dependents', 'Employment_Type',
'Loan_Amount', 'Total_Monthly_Income']
df = pd.read_csv('../data-cleaning/cleaned-data/processsed-data.csv')
# ===============================================================================
# >>>>>>>>>>>>>>>>>>>> Loading the models & data preparing <<<<<<<<<<<<<<<<<<<<<<
reticulate::repl_python()
import keras
import joblib
import keras
import sklearn
import polars as pl
import pandas as pd
import numpy as np
import shiny.experimental as sh
COLORS  = ['#102542', '#F87060', '#CDD7D6', '#B3A394', '#FFFFFF']
COLUMNS = ['Gender', 'Married', 'Education',
'Property_Area', 'Monthly_Income',
'Extra_Monthly_Income', 'Loan_Term',
'Credit_History', 'Loan_Status',
'Dependents', 'Employment_Type',
'Loan_Amount', 'Total_Monthly_Income']
df = pd.read_csv('../data-cleaning/cleaned-data/processsed-data.csv')
COLUMNS = ['Gender', 'Married', 'Education',
'Property_Area', 'Monthly_Income',
'Extra_Monthly_Income', 'Loan_Term',
'Credit_History', 'Loan_Status',
'Dependents', 'Employment_Type',
'Loan_Amount', 'Total_Monthly_Income']
df = pd.read_csv('../data-cleaning/cleaned-data/processsed-data.csv')
# ===============================================================================
df = pd.read_csv('data-cleaning/cleaned-data/processsed-data.csv')
os.getcwd()
knitr::opts_chunk$set(echo = TRUE)
install.packages("hrbrthemes")
install.packages("psych")
install.packages("waffle")
install.packages("ggpubr")
install.packages("DT")
install.packages("GGally")
library(extrafont)
library(hrbrthemes)
library(scales)
library(reshape2)
library(ggpubr)
library(tidyverse)
library(showtext)
library(Hmisc)
library(psych)
library(waffle)
library(GGally)
library(DT)
COLORS <- c('#102542', '#F87060', '#CDD7D6', '#B3A394', '#FFFFFF')
FONT   <- 20
PAD    <- 40
df <- read.csv("../data-cleaning/cleaned-data/processsed-data.csv")
datatable(data= df)
summary(df)
contingency_table  <- select(df, Loan_Status,Credit_History) %>%
table() %>%
as.data.frame() %>%
pivot_wider(names_from = Loan_Status, values_from = Freq) %>%
column_to_rownames(var= "Credit_History") %>%
as.matrix()
p_value            <- chisq.test(contingency_table)$p.value
chi_squared        <- chisq.test(contingency_table)$statistic
chi_squared_matrix <- matrix(c(chi_squared, 0, 0, chi_squared),
nrow = 2, ncol = 2, byrow = TRUE )
degrees_of_freedom <- ncol(contingency_table - 1) * nrow(contingency_table - 1)
chi_critical       <- qchisq(0.05, degrees_of_freedom)
n <- sum(contingency_table)
k <- ncol(contingency_table)
r <- nrow(contingency_table)
cramers_v <- sqrt(chi_squared / (n * min(k - 1, r - 1)))
sprintf("The Credit History and the Loan Status have:\n
Chi critical = %.3f (Chi square should be higher to reject H0)
P-Value= %e (Very low)
Chi Square = %.3f
Cramer's V = %.3f\n
Which means moderate levels of relationship and we can use those numbers as a standard to compare between them to other months's because those values alone are useless.",
chi_critical,  p_value, chi_squared, cramers_v) %>%
cat()
chart_data <- df[df$Credit_History == 'Good', ]$Loan_Status
counts     <- table(df[df$Credit_History == 'Good', ]$Loan_Status)
percentage <- (counts / length(chart_data)) * 101
chart_data <- data.frame(loan_state = names(counts),
percentage = percentage) %>% select(-c(percentage.Var1))
chart_data <- chart_data %>%
pivot_wider(names_from= loan_state, values_from= percentage.Freq)
waffle_chart <- waffle(chart_data, rows= 10,
colors= c(COLORS[2], COLORS[3]),
legend_pos= "bottom")
waffle_chart +
labs(title = 'How many people got their Loan Accepted',
subtitle= "with Credit history > 70\n") +
theme(plot.title = element_text(hjust = .5),
plot.subtitle = element_text(hjust = .5))
# ggsave("plots/getting_loan_chance_with_good_credit_history.png", width = 7, height = 7, dpi = 300)
chart_data <- df[df$Credit_History == "Good", ]
line_y     <- min(chart_data[chart_data$Loan_Status == "Accepted", ]$Total_Monthly_Income)
ggplot(chart_data, aes(x= Loan_Status, y =Total_Monthly_Income)) +
geom_jitter(colour= COLORS[3], width= 0.3)                                     +
stat_summary(fun= median, geom= "point", shape= 18, size= 5, color= COLORS[2]) +
geom_hline(yintercept= line_y, linetype= "dashed", color= COLORS[1], size=1)   +
ggtitle("Total Monlthly Income per Accepted & Rejected Loans")                 +
labs(y = "Total Monthly Income", x= "", subtitle= "With Credit score > 700")   +
scale_y_continuous(n.breaks= 10, labels= function(y) paste0(y/1000, "K"))      +
theme_classic()                                                                +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# ggsave("plots/loan_acceptance_per_monthly_income.png", width = 7, height = 7, dpi = 300)
ggplot(chart_data, aes(x= Loan_Status, y =Loan_Amount)) +
geom_jitter(colour= COLORS[3], width= 0.3)                                   +
stat_summary(fun.y=median, geom="point", shape=15, size=5, color= COLORS[1]) +
ggtitle("Loan Amount per Accepted & Rejected Loans")                         +
labs(y = "Loan Amount", x= "", subtitle= "With Credit score > 700")          +
scale_y_continuous(n.breaks= 10, labels= function(y) paste0(y/1000, "K"))    +
theme_classic()                                                              +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# ggsave("plots/loan_amount_per_accepted_&_rejected_loans.png", width = 7, height = 7, dpi = 300)
# 1st Chart Data
chart_data <- select(df, Loan_Status, Employment_Type)
chart_data$Employment_Type <- gsub("-", " ", chart_data$Employment_Type)
chart_data <- table(chart_data) %>%
as.data.frame()
# 2nd Chart Data
chart_data_filtered <- select(df[df$Credit_History == "Good", ],
Loan_Status, Employment_Type)
chart_data_filtered$Employment_Type <- gsub("-", " ", chart_data_filtered$Employment_Type)
chart_data_filtered <- table(chart_data_filtered) %>%
as.data.frame()
g1 <- ggplot(chart_data, aes(x= Employment_Type, y= Freq, fill= Loan_Status))      +
geom_bar(stat= "identity", position= "dodge")                                    +
labs(title= "Loan Acceptableness by Employment Type",
x= "Employment type",
y= "Frequency")                                                             +
scale_fill_manual(values= c("Accepted"= COLORS[4], "Rejected"= COLORS[3]))       +
theme_classic()                                                                  +
theme(plot.title = element_text(hjust = 0.7),
plot.margin = margin(t= 20, b= 20, l= 20, r=20))                           +
guides(fill=guide_legend(title="Loan Status"))
g2 <- ggplot(chart_data_filtered,
aes(x= Employment_Type, y= Freq, fill= Loan_Status))                  +
geom_bar(stat= "identity", position= "dodge")                                    +
labs(title= "Loan Acceptableness by Employment Type",
subtitle= "Credit History > 700",
x= "Employment type",
y= "Frequency")                                                             +
scale_fill_manual(values= c("Accepted"= COLORS[1], "Rejected"= COLORS[3]))       +
theme_classic()                                                                  +
theme(plot.title= element_text(hjust = 0.7),
plot.subtitle= element_text(hjust = 0.6),
plot.margin = margin(t= 20, b= 20, l= 20, r=20))                           +
guides(fill=guide_legend(title="Loan Status"))
figure <- ggarrange(g1, g2,
labels = c("Non filtered", "Filtered"),
ncol = 1, nrow = 2)
figure
# ggsave("plots/loan_acceptance_per_employment_type.png", width = 7, height = 7, dpi = 300)
non_filtered_data  <-  select(df, Loan_Status, Employment_Type)
filtered_data      <-  select(df, Loan_Status, Employment_Type)[
df$Credit_History == "Good", ]
chi_squared            <- chisq.test(non_filtered_data$Loan_Status,
non_filtered_data$Employment_Type)$statistic
chi_squared_filtered   <- chisq.test(filtered_data$Loan_Status,
filtered_data$Employment_Type)$statistic
n <- sum(table(non_filtered_data))
k <- ncol(table(non_filtered_data))
r <- nrow(table(non_filtered_data))
n_filtered <- sum(table(filtered_data))
k_filtered <- ncol(table(filtered_data))
r_filtered <- nrow(table(filtered_data))
cramers_v          <- sqrt(chi_squared / (n * min(k - 1, r - 1)))
cramers_v_filtered <-sqrt(chi_squared_filtered /
(n_filtered * min(k_filtered - 1, r_filtered - 1)))
sprintf("The correlation between Employment type and Loan status in general is:
Cramer's V= %.3f
But when we filter the data for the Good credit histories only we get:
Cramer's V= %.3f",
cramers_v, cramers_v_filtered) %>%
cat()
gender_data   <-  select(df[df$Credit_History == "Good", ], Loan_Status, Gender)
area_data     <-  select(df[df$Credit_History == "Good", ], Loan_Status, Property_Area)
married_data  <-  select(df[df$Credit_History == "Good", ], Loan_Status, Married)
gender_chi_squared            <- chisq.test(gender_data$Loan_Status,
gender_data$Gender)$statistic
area_chi_squared              <- chisq.test(area_data$Loan_Status,
area_data$Property_Area)$statistic
married_chi_squared           <- chisq.test(married_data$Loan_Status,
married_data$Married)$statistic
n_gender <- table(gender_data) %>% sum()
k_gender <- table(gender_data) %>% ncol()
r_gender <- table(gender_data) %>% nrow()
gender_cramers_v  <- sqrt(gender_chi_squared /
(n_gender * min(k_gender - 1, r_gender - 1)))
n_area <- table(area_data) %>% sum()
k_area <- table(area_data) %>% ncol()
r_area <- table(area_data) %>% nrow()
area_cramers_v <- sqrt(area_chi_squared /
(n_area * min(k_area - 1, r_area - 1)))
n_married <- sum(table(married_data))
k_married <- ncol(table(married_data))
r_married <- nrow(table(married_data))
married_cramers_v  <- sqrt(married_chi_squared /
(n_married * min(k_married - 1, r_married - 1)))
sprintf("The Relationship between the Gender and Loan status is:
Cramer's V= %.3f
The Relationship between the Property area and Loan status is:
Cramer's V= %.3f
The Relationship between the Marriage and Loan status is:
Cramer's V= %.3f
",
gender_cramers_v, area_cramers_v, married_cramers_v) %>%
cat()
bin_labels <- c("0 ~ 2.5K", "2.5K ~ 5K", "5K ~ 7.5K", "7.5K ~ 10K", "10K ~ 12.5K",
"12.5K ~ 15K", "15K ~ 17.5K", "17.5K ~ 20K", "20K ~ 22.5K",
"22.5K ~ 25K", "25K ~ ...")
chart_data <- df %>%
mutate(Monthly_Income_Bins = cut(Total_Monthly_Income, labels = bin_labels,
breaks = c(0, 2500, 5000, 7500, 10000, 12500,
15000, 17500, 20000, 22500, 25000, Inf)))
chart_data$Loan_Status_New <- ifelse(chart_data$Loan_Status == "Accepted", 100,
ifelse(chart_data$Loan_Status == 'Rejected', 0,
chart_data$Loan_Status)) %>%
as.numeric()
chart_data <- aggregate(Loan_Status_New ~ Monthly_Income_Bins,
data= chart_data, FUN= mean)
ggplot(chart_data, aes(x= Monthly_Income_Bins, y= Loan_Status_New))          +
geom_segment(aes(x= Monthly_Income_Bins, xend= Monthly_Income_Bins,
y=0, yend= Loan_Status_New), color= COLORS[3],
size= 1.5)                                               +
geom_point(color= COLORS[2], size=7)                                       +
theme_classic()                                                            +
labs(title= "Loan Acceptance per Monthly Income\n",
x= "Monthly Income",
y= "Loan Acceptness")                                                 +
scale_y_continuous(n.breaks= 10, labels= function(y) paste0(y, "%"))       +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle=90, hjust=1))
ggsave("plots/loan_acceptance_per_monthly_income.png", width = 10, height = 5, dpi = 300)
ggplot(df, aes(x= Loan_Amount, y= Loan_Term, color= Loan_Status))               +
geom_line(color= COLORS[3])                                                   +
geom_point(shape= 18, size= 3)                                                +
theme_classic()                                                               +
scale_color_manual(values= c("Accepted"= COLORS[1], "Rejected"= COLORS[3]))   +
labs(color= "Loan Status", x= "Loan amount", y= "Loan term",
title= "Loan amount & term affect on the Loan Acceptance\n")             +
scale_x_continuous(n.breaks= 8, labels= function(x) paste0(x/1000, "K"))      +
scale_y_continuous(n.breaks= 6, labels= function(y) paste0(y, " Day"))      +
theme(legend.position = "bottom", legend.background= element_rect("#f0f0f0"),
plot.title = element_text(hjust = 0.5))
ggsave("plots/acceptance_per_loan_amount_&_term.png", width = 10, height = 7, dpi = 300)
chart_data  <- select(df, Loan_Amount, Loan_Term,
Total_Monthly_Income, Loan_Status)
ggpairs(chart_data, columns= 1:3, ggplot2::aes(colour= Loan_Status),
upper  = list(continuous= "cor"),
lower  = list(continuous= "points", combo = "dot_no_facet", color = "Loan_Status")) +
scale_y_continuous(
n.breaks = 5,
labels = function(y) {
ifelse(y >= 1000, paste0(y / 1000, "K"), y)})                      +
scale_x_continuous(
n.breaks = 5,
labels = function(x) {
ifelse(x >= 1000, paste0(x / 1000, "K"), x)})                      +
theme_bw()                                                             +
scale_fill_manual  (values = c(COLORS[2], COLORS[3]))                  +
scale_color_manual (values = c(COLORS[2], COLORS[3]))                  +
labs(title= "Loan Features Correlation Pair plot\n")                   +
theme(plot.title = element_text(hjust = 0.5))
ggsave("plots/loan_features_corr_paiplot.png", width = 7, height = 7, dpi = 300)
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
chart_data <- select(df, Married, Education, Property_Area, Loan_Status) %>%
tibble::rowid_to_column("Loan_ID")                         %>%
melt(id.vars="Loan_ID")                                    %>%
group_by(variable)                                         %>%
mutate(max_frequency = value == getmode(value))
ggplot(chart_data, aes(Loan_ID, value, fill= max_frequency))             +
geom_bar(stat = "identity")                                            +
stat_smooth()                                                          +
facet_wrap(~variable, , scales = "free")                               +
theme_classic()                                                        +
labs(x= NULL, y= NULL, title= "Categorical columns distrbution\n")     +
scale_fill_manual(values = c(COLORS[3], COLORS[4]))                    +
guides(fill = FALSE)                                                   +
scale_x_continuous(
n.breaks = 5,
labels = function(x) {
ifelse(x >= 1000, paste0(x / 1000, "K"), x)})                      +
theme(plot.title = element_text(hjust = 0.5))
ggsave("plots/categorical_cols_dist.png", height= 7, width= 8 , dpi= 300)
setwd("C:/Users/Muhammad/Documents/Data science projects/Loan-prediction-project")