diff --git a/MachineLearning/2021-10-30_ML-Class-3/LogisticRegression.html b/MachineLearning/2021-10-30_ML-Class-3/LogisticRegression.html new file mode 100644 index 0000000..2b49b39 --- /dev/null +++ b/MachineLearning/2021-10-30_ML-Class-3/LogisticRegression.html @@ -0,0 +1,13933 @@ + + +
+ +import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import plotly.figure_factory as ff
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+
For demonstration purposes, let us take a 2 dimensional dataset with tow features (Feature_1 and Feature_2) and consisting of two classes (Class A and Class B) having a distribution specifications as follows:
+Class A: The Class A is centred around the mean of (1,1) and has the covariance matrix [[1,-0.2],[-0.2,1]]
+Class B: The Class B is centred around the mean of (3,4) and has the covariance matrix [[1,0.1],[0.1,1]]
+Defintions:
+Mean: A Class with centre (x1, x2) as mean denotes that the average value along "Feature_1" is x1 and the average value along "Feature_2" is x2
+P.S: Since we would like to ensure that the outputs corrosponds to the desired output, we will also add the seed value of 42 while generating these distributions.
+ +mean_01=np.array([1,1])
+cov_01=np.array([[1,-0.2],[-0.2,1]])
+mean_02=np.array([3,4])
+cov_02=np.array([[1,0.1],[0.1,1]])
+np.random.seed(42)
+data_01=np.random.multivariate_normal(mean_01,cov_01,500, check_valid= "warn")
+data_02=np.random.multivariate_normal(mean_02,cov_02,500, check_valid= "warn")
+data = np.vstack((data_01,data_02))
+df_train = pd.DataFrame(data, columns = ["Feature_1", "Feature_2"])
+df_train["class"] = [0]*500 + [1]*500
+
fig = go.Figure(
+ layout = dict(
+ width = 800,
+ height = 800,
+ title_text = "Visualization of the dataset",
+ xaxis = dict(
+ title = dict(
+ text = "Feature_1"
+ )
+ ),
+ yaxis = dict(
+ title = dict(
+ text = "Feature_2"
+ )
+ )
+ )
+)
+
+scatter_trace_1 = go.Scatter(
+ x = df_train["Feature_1"][:500],
+ y = df_train["Feature_2"][:500],
+ mode = "markers",
+ name= "Class A",
+ hovertemplate = "Feature_1: %{x}<br>Feature_2: %{y}",
+ marker = dict(
+ size = 9,
+ opacity = .80,
+ color = "lightblue",
+ line = dict(
+ color = "blue",
+ width = 1,
+ )
+ )
+)
+
+scatter_trace_2 = go.Scatter(
+ x = df_train["Feature_1"][500:],
+ y = df_train["Feature_2"][500:],
+ mode = "markers",
+ name= "Class B",
+ hovertemplate = "Feature_1: %{x}<br>Feature_2: %{y}",
+ marker = dict(
+ symbol = "star-triangle-up",
+ size = 10,
+ opacity = 0.65,
+ color ="darkorange",
+ line = dict(
+ color = "red",
+ width = 1,
+ )
+ )
+)
+
+fig.add_trace(scatter_trace_1)
+fig.add_trace(scatter_trace_2)
+fig.show()
+# fig.write_html(r".\expected outputs\expectedoutput1.html")
+
X = df_train[["Feature_1","Feature_2"]]
+Y = df_train[["class"]]
+X = np.hstack((np.ones((1000,1)),X.to_numpy()))
+X_train, X_test, Y_train, Y_test = train_test_split(X, Y.to_numpy(), test_size=0.2, random_state=42)
+print(X_train.shape,Y_train.shape,X_test.shape,Y_test.shape)
+
def hypothesis(x,theta):
+ sigmoid=(1.0/(1.0 + np.exp(-1.0*np.dot(x,theta))))
+ return(sigmoid)
+
+def error(X,Y,theta):
+ m=X.shape[0]
+ err=0
+ for i in range(m):
+ hx=hypothesis(X[i],theta)
+ err+=Y[i]*np.log2(hx) + (1-Y[i])*np.log2(1-hx)
+ err /=m
+ return(-err)
+
+def gradient(X,Y,theta):
+ grad=np.zeros((X.shape[1]))
+ m=X.shape[0]
+ fea=X.shape[1]
+ for i in range(m):
+ hx=hypothesis(X[i],theta)
+ for j in range(fea):
+ grad[j]+=(hx-Y[i])*X[i,j]
+ grad=grad/m
+ return(grad)
+
+def gradient_ascent(X,Y,learning_rate=0.5):
+ theta=2*np.random.random(X.shape[1])
+ theta[0]=0
+ error_list=[]
+ acc_list=[]
+ theta_list = []
+ for i in range(100):
+ grad=gradient(X,Y,theta)
+ err=error(X,Y,theta)
+ error_list.append(err)
+ acc_list.append(accuracy(X,Y,theta))
+ theta_list.append(theta.copy())
+ for j in range(X.shape[1]):
+ theta[j]-=learning_rate*grad[j]
+ probabilty_list = predic_proba(X, theta)
+ return(theta, theta_list, error_list, acc_list, probabilty_list)
+
+def predict(x,theta):
+ p=hypothesis(x,theta)
+ if p<0.5:
+ return 0
+ else:
+ return 1
+
+def predic_proba(x,theta):
+ probabilty_list = []
+ for i in range(X.shape[0]):
+ probability = hypothesis(X[i],theta)
+ probabilty_list.append(probability)
+ return probabilty_list
+
+
+def accuracy(X,Y,theta):
+ y_pred=[]
+ for i in range(X.shape[0]):
+ p=predict(X[i],theta)
+ y_pred.append(p)
+ y_pred=np.array(y_pred)
+ y_pred=y_pred.reshape((-1,1))
+ return(Y==y_pred).sum()/X.shape[0]
+
theta, theta_list, error_list, acc_list, probabilty_list=gradient_ascent(X_train,Y_train)
+
fig = go.Figure(
+ layout = dict(
+ width = 800,
+ height = 800,
+ title_text = "Visualising the error",
+ xaxis = dict(
+ title = dict(
+ text = "Iteration"
+ )
+ ),
+ yaxis = dict(
+ title = dict(
+ text = "Error (Negative of maximum likelihood)"
+ )
+ )
+ )
+)
+
+fig.add_trace(go.Scatter(
+ x = [x for x in range(1,101,1)],
+ y = [x[0] for x in error_list],
+ mode = "lines+markers",
+ name= "",
+ marker = dict(
+ color = "lightblue",
+ line = dict(
+ color = "blue",
+ width = 1,
+ )
+ ),
+ hovertemplate = "Iteration: %{x}<br>Error: %{y}",
+))
+
+fig.show()
+# fig.write_html(r".\expected outputs\expectedoutput2.html")how()
+
fig = go.Figure(
+ layout = dict(
+ width = 800,
+ height = 800,
+ title_text = "Visualising the Accuracy",
+ xaxis = dict(
+ title = dict(
+ text = "Iteration"
+ )
+ ),
+ yaxis = dict(
+ title = dict(
+ text = "Accuracy"
+ )
+ )
+ )
+)
+
+fig.add_trace(go.Scatter(
+ x = [x for x in range(1,101,1)],
+ y = acc_list,
+ mode = "lines+markers",
+ name= "",
+ marker = dict(
+ color = "lightblue",
+ line = dict(
+ color = "blue",
+ width = 1,
+ )
+ ),
+ hovertemplate = "Iteration: %{x}<br>Accuracy: %{y}",
+))
+
+fig.show()
+# fig.write_html(r".\expected outputs\expectedoutput3.html")
+
print("The accuracy for the algorithm is:",acc_list[-1])
+print("The final theta parameters calculated are:",theta)
+
sliders_dict = {
+ 'active': 0,
+ 'yanchor': 'top',
+ 'xanchor': 'left',
+ 'currentvalue': {
+ 'font': {'size': 20},
+ 'prefix': 'No. of iterations:',
+ 'visible': True,
+ 'xanchor': 'right'
+ },
+ 'transition': {'duration': 300, 'easing': 'linear'},
+ 'pad': {'b': 10, 't': 50},
+ 'len': 0.9,
+ 'x': 0.1,
+ 'y': 0,
+ 'steps': []
+}
+
+frames = []
+for i in range(100):
+ frame = go.Frame(
+ data = [scatter_trace_1, scatter_trace_2,
+ go.Scatter(
+ x = np.linspace(-3,7,2),
+ y = -1*(theta_list[i][0]+np.linspace(-4,8,2)*theta_list[i][1])/theta_list[i][2],
+ mode = "lines",
+ name = "Decision Boundry",
+ hoverinfo = "none"
+ )
+ ],
+ name = str(i+1)
+ )
+ frames.append(frame)
+
+for i in range(100):
+ slider_step = {'args': [
+ [i+1],{
+ 'frame': {'duration': 300, 'redraw': True},
+ 'mode': 'immediate',
+ 'transition': {'duration': 300}
+ }],
+ 'label': i+1,
+ 'method': 'animate'}
+ sliders_dict['steps'].append(slider_step)
+fig = go.Figure(
+ data = [scatter_trace_1, scatter_trace_2,
+ go.Scatter(
+ x = np.linspace(-3,7,2),
+ y = -1*(theta_list[0][0]+np.linspace(-4,8,2)*theta_list[0][1])/theta_list[0][2],
+ mode = "lines",
+ name = "Decision Boundry",
+ hoverinfo = "none"
+ )],
+ layout = go.Layout(updatemenus=[{
+ 'buttons': [{
+ "args": [None,{"fromcurrent": True,
+ "transition": {"duration": 50,
+ "easing": "linear"}}],
+ 'label': 'Play',
+ 'method': 'animate'
+ },
+ {
+ 'args': [[None],{'frame': {'duration': 0, 'redraw': False},
+ 'mode': 'immediate',
+ 'transition': {'duration': 0}}],
+ 'label': 'Pause',
+ 'method': 'animate'
+ }],
+ 'direction': 'left',
+ 'pad': {'r': 10, 't': 87},
+ 'showactive': False,
+ 'type': 'buttons',
+ 'x': 0.1,
+ 'xanchor': 'right',
+ 'y': 0,
+ 'yanchor': 'top'
+ }]),
+ frames = frames
+)
+fig.update_layout(
+ width = 800,
+ height = 800,
+ title_text = "Visualising the convergence of decision boundary",
+ xaxis = dict(
+ range = [-3.5,7.5],
+ title = dict(
+ text = "Feature_1"
+ )
+ ),
+ yaxis = dict(
+ title = dict(
+ text = "Feature_2"
+ )
+ )
+)
+fig['layout']['sliders'] = [sliders_dict]
+fig.show()
+# fig.write_html(r".\expected outputs\expectedoutput4.html")
+
fig = go.Figure(
+ layout = dict(
+ width = 800,
+ height = 800,
+ title_text = "Visualization of the decision boundary",
+ xaxis = dict(
+ range = [-4,8],
+ title = dict(
+ text = "Feature_1"
+ )
+ ),
+ yaxis = dict(
+ title = dict(
+ text = "Feature_2"
+ )
+ )
+ )
+)
+
+fig.add_trace(scatter_trace_1)
+fig.add_trace(scatter_trace_2)
+
+fig.add_trace(go.Scatter(
+ x = np.linspace(-3,7,2),
+ y = -1*(theta[0]+np.linspace(-4,8,2)*theta[1])/theta[2],
+ mode = "lines",
+ name = "Decision Boundry",
+ hoverinfo = "none"
+))
+
+fig.show()
+# fig.write_html(r".\expected outputs\expectedoutput5.html")
+plt.show()
+