-
Notifications
You must be signed in to change notification settings - Fork 204
/
Copy pathload_data.py
35 lines (28 loc) · 836 Bytes
/
load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
"""
Functions to load the dataset.
"""
import numpy as np
def read_data(file_name):
"""This function is adapted from:
https://github.com/benhamner/BioResponse/blob/master/Benchmarks/csv_io.py
"""
f = open(file_name)
# skip header
f.readline()
samples = []
for line in f:
line = line.strip().split(",")
sample = [float(x) for x in line]
samples.append(sample)
return samples
def load():
"""Conveninence function to load all data as numpy arrays.
"""
print "Loading data..."
train = read_data("data/train.csv")
y_train = np.array([x[0] for x in train])
X_train = np.array([x[1:] for x in train])
X_test = np.array(read_data("data/test.csv"))
return X_train, y_train, X_test
if __name__ == '__main__':
X_train, y_train, X_test = load()