-
Notifications
You must be signed in to change notification settings - Fork 10
/
BayesianRegression.py
executable file
·122 lines (91 loc) · 3.2 KB
/
BayesianRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/local/Cellar/python/2.7.6/bin/python
# -*- coding: utf-8 -*-
'''Standard python modules'''
import sys
'''For scientific computing'''
from numpy import *
import scipy
'''For plotting'''
from matplotlib import pyplot, cm
from mpl_toolkits.mplot3d import Axes3D
import random
import numpy as np
from numpy import linalg as LA
import matplotlib.pyplot as plt
def hypothesis(X, theta):
"""Hypothesis function for linear regression
this should be inlined code, but it's here for clear representation purpsoes
"""
return X.dot(theta)
def computeCostLoop(X, y, theta):
"""Compute cost but in the slower loop method, in order to show how the
pre-vectorization algorithm initially works. Always start with loop version first
and code the vectorized version later
"""
m = len(y) # or m = shape(y)[0], since y is 1D
cumulative_sum = 0
for i in range(0, m):
cumulative_sum += (hypothesis(X[i], theta) - y[i]) ** 2
cumulative_sum = (1.0 / (2 * m)) * cumulative_sum
return cumulative_sum
def computeCost(X, y, theta):
"""Compute cost, vectorized version"""
sigma = 1
term = hypothesis(X, theta) - y
# sum( term**2 ) in this case ~= term.T.dot( term )
return ((term.T.dot(term)+theta.T.dot(theta)) / (2 * sigma**2))[0, 0]
def gradientDescentLoop(X, y, theta, alpha, iterations):
"""Gradient descent in loop version"""
m = len(y)
n = shape(X)[1]
for counter in range(0, iterations):
# create n number of cumulative inner sums
cum_sum = [0 for x in range(0, n)] #range(0,n)==range(n)
for j in range(0, n):
for i in range(0, m):
term = (hypothesis(X[i], grad) - y[i])
cum_sum[j] += X[i, j] * (term)
# assign new values for each gradient, this should be separate from the loop above
# in order to achieve simulataneous update effect
for j in range(0, n):
grad[j] = grad[j] - cum_sum[j] * (alpha / m)
return theta
def gradientDescent(X, y, theta, alpha, iterations):
"""Vectorized gradient descent"""
for counter in range(0, iterations):
inner_sum = X.T.dot(hypothesis(X, theta) - y)+ theta
theta = theta - alpha / sigma**2 * inner_sum
return theta
def func():
datax= [10*random.random() for i in range(100)]
datax=np.array(datax)
v=np.var(datax)
linex=np.arange(0,10,0.1)
sigma=1
w0=random.random()
w1=random.random()
liney=[w0+w1*x for x in linex]
y = [w0+w1*x for x in datax]
y=np.array(y)
t= [np.random.normal(mu,sigma) for mu in y]
t=np.array(t)
m = len(t)
t = t.reshape(m, 1)
X = c_[ones((m, 1)), datax] # c_ is useful for concatentation. Add a column of ones to X becomes (m+1,2) shaped
theta = zeros((2, 1))
iterations = 2000
alpha = 0.01
cost = computeCost(X, t, theta) # should be 32.07
theta = gradientDescent(X, t, theta, alpha, iterations)
print cost
print theta
print w0,w1
#predict1 = array([1, 3.5]).dot(theta)
#predict2 = array([1, 7]).dot(theta)
#print predict1
#print predict2
plt.scatter(datax,t)
pyplot.plot(X[:, 1], X.dot(theta), 'g-')
pyplot.legend(['Linear regression','Train data'],loc='upper right')
pyplot.show(block=True)
func()