-
Notifications
You must be signed in to change notification settings - Fork 13
/
dataset.py
107 lines (76 loc) · 3.46 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from hbconfig import Config
import tensorflow as tf
class IteratorInitializerHook(tf.train.SessionRunHook):
"""Hook to initialise data iterator after Session is created."""
def __init__(self):
super(IteratorInitializerHook, self).__init__()
self.iterator_initializer_func = None
def after_create_session(self, session, coord):
"""Initialise the iterator after the session has been created."""
self.iterator_initializer_func(session)
def get_train_inputs(X, y):
iterator_initializer_hook = IteratorInitializerHook()
def train_inputs():
with tf.name_scope('training'):
nonlocal X
nonlocal y
X = X.reshape([-1, Config.model.seq_length])
y = y.reshape([-1, Config.model.seq_length])
# Define placeholders
input_placeholder = tf.placeholder(
tf.int32, X.shape)
output_placeholder = tf.placeholder(
tf.int32, y.shape)
# Build dataset iterator
dataset = tf.data.Dataset.from_tensor_slices(
(input_placeholder, output_placeholder))
dataset = dataset.repeat(None) # Infinite iterations
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(Config.model.batch_size)
iterator = dataset.make_initializable_iterator()
next_X, next_y = iterator.get_next()
tf.identity(next_X[0], 'input_0')
tf.identity(next_y[0], 'output_0')
# Set runhook to initialize iterator
iterator_initializer_hook.iterator_initializer_func = \
lambda sess: sess.run(
iterator.initializer,
feed_dict={input_placeholder: X,
output_placeholder: y})
# Return batched (features, labels)
return next_X, next_y
# Return function and hook
return train_inputs, iterator_initializer_hook
def get_test_inputs(X, y):
iterator_initializer_hook = IteratorInitializerHook()
def test_inputs():
with tf.name_scope('test'):
nonlocal X
nonlocal y
X = X.reshape([-1, Config.model.seq_length])
y = y.reshape([-1, Config.model.seq_length])
# Define placeholders
input_placeholder = tf.placeholder(
tf.int32, X.shape)
output_placeholder = tf.placeholder(
tf.int32, y.shape)
# Build dataset iterator
dataset = tf.data.Dataset.from_tensor_slices(
(input_placeholder, output_placeholder))
dataset = dataset.repeat(None) # Infinite iterations
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(Config.model.batch_size)
iterator = dataset.make_initializable_iterator()
next_X, next_y = iterator.get_next()
tf.identity(next_X[0], 'input_0')
tf.identity(next_y[0], 'output_0')
# Set runhook to initialize iterator
iterator_initializer_hook.iterator_initializer_func = \
lambda sess: sess.run(
iterator.initializer,
feed_dict={input_placeholder: X,
output_placeholder: y})
# Return batched (features, labels)
return next_X, next_y
# Return function and hook
return test_inputs, iterator_initializer_hook