-
Notifications
You must be signed in to change notification settings - Fork 6
/
dataset.py
76 lines (62 loc) · 2.28 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Based on: https://github.com/caglar/autoencoders.git
# http://www-etud.iro.umontreal.ca/~gulcehrc/
from __future__ import division
import pickle as pkl
import math
import numpy as np
class Dataset(object):
def __init__(self, is_binary=False):
self.is_binary = is_binary
# Examples
self.Xtrain = None
self.Xtest = None
# Labels
self.Ytrain = None
self.Ytest = None
self.Xtrain_pres = None
self.Xtest_pres = None
self.sparsity = 0.0
self.n_examples = 0
def _get_data(self, data_path):
if data_path.endswith("pkl") or data_path.endswith("pickle"):
data = pkl.load(open(data_path, "rb"))
else:
data = np.load(data_path)
return data
def binarize_labels(self, labels=None):
# Largest label is for the images without different objects.
last_lbl = np.max(labels)
binarized_lbls = []
if self.is_binary:
for label in labels:
if label == last_lbl:
binarized_lbls.append(0)
else:
binarized_lbls.append(1)
return binarized_lbls
def setup_dataset(self, data_path=None, train_split_scale=0.0):
data = self._get_data(data_path)
self.n_examples = data[0].shape[0]
ntrain = math.floor(self.n_examples * train_split_scale)
self.Xtrain = data[0][:ntrain]
self.Xtrain_pres = data[2][:ntrain]
self.Xtest = data[0][ntrain:]
self.Xtest_pres = data[2][ntrain:]
if train_split_scale != 0.0:
self.Ytrain = np.array(
self.binarize_labels(data[1][:ntrain].flatten())
if self.is_binary else data[1][:ntrain].flatten())
if train_split_scale != 1.0:
self.Ytest = np.array(
self.binarize_labels(data[1][ntrain:].flatten())
if self.is_binary else data[1][ntrain:].flatten())
def comp_sparsity(self):
num_sparse_els = 0
for el in self.Xtrain.flatten():
if el == 0:
num_sparse_els += 1
for el in self.Xtest.flatten():
if el == 0:
num_sparse_els += 1
self.sparsity = (num_sparse_els / self.n_examples)
return self.sparsity