-
Notifications
You must be signed in to change notification settings - Fork 0
/
pca.py
49 lines (32 loc) · 1.44 KB
/
pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import numpy as np
import pandas as pd
import normalizer
from Utils import plot_rotated
def main():
# import data
data = pd.read_csv("data/data-large.csv")
rotated_points = define_matrix_space(features=2, data=data)
# plot_rotated(data, lin_reg, color='b', draw=False)
plot_rotated(rotated_points, lin_reg, color='r', draw=True)
def define_matrix_space(features: int, data: pd.DataFrame) -> pd.DataFrame:
# Keep original data for later use. Copy values to apply normalization
# data_norm = data_normalizer.stretch_to_unary(data)
data_norm = normalizer.Map(lambda col: (col - col.mean()) / col.std(), data)
# Eigenvectors for the covariance matrix
np_vec = np.linalg.eig(data.cov())[1]
# Dividing row-wise to normalize vectors
np_matrix = np_vec / np_vec[0][:, None][::-1]
# select the number of features desired after transformation
np_matrix = np_matrix[:features].T
# Rotating data points with respect to the eigenvectors
rotated_points = np.matmul(data_norm, np_matrix)
return pd.DataFrame(rotated_points, columns=data.columns[:features])
def lin_reg(df: pd.DataFrame) -> (float, float):
x, y = df[df.columns[0]], df[df.columns[1]]
x_mean, y_mean = x.mean(), y.mean()
# m = ((x-X)*(y-Y))/(x-X)**2
m_val = ((x - x_mean) * (y - y_mean)).sum() / ((x - x_mean) ** 2).sum()
b_val = y_mean - m_val * x_mean
return m_val, b_val
if __name__ == '__main__':
main()