-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCode
52 lines (44 loc) · 1.55 KB
/
Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as snb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
df = pd.read_csv('http://bit.ly/w-data')
df.head(5)
snb.set_style('darkgrid')
snb.scatterplot(y= df['Scores'], x= df['Hours'])
plt.title('Marks Vs Study Hours',size=20)
plt.ylabel('Marks Percentage', size=12)
plt.xlabel('Hours Studied', size=12)
plt.show()
snb.regplot(x= df['Hours'], y= df['Scores'])
plt.title('Regression Plot',size=20)
plt.ylabel('Marks Percentage', size=12)
plt.xlabel('Hours Studied', size=12)
plt.show()
print(df.corr())
X = df.iloc[:, :-1].values
y = df.iloc[:, 1].values
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state = 0)
#fitting data into model
regression = LinearRegression()
regression.fit(train_X, train_y)
p_y = regression.predict(val_X)
prediction = pd.DataFrame({'Hours': [i[0] for i in val_X], 'Predicted Marks': [k for k in p_y]})
prediction
compare_scores = pd.DataFrame({'Actual Marks': val_y, 'Predicted Marks': p_y})
compare_scores
plt.scatter(x=val_X, y=val_y, color='blue')
plt.plot(val_X, p_y, color='Black')
plt.title('Actual vs Predicted', size=20)
plt.ylabel('Marks Percentage', size=12)
plt.xlabel('Hours Studied', size=12)
plt.show()
#accuracy?
print('Mean absolute error: ',mean_absolute_error(val_y,p_y))
#predicted score if a student studies for 9.25 hrs/ day
hours = [9.25]
answer = regression.predict([hours])
print("Score = {}".format(round(answer[0],2)))