Skip to content

Commit

Permalink
Fixes spcl#4 and spcl#6 by being slightly more verbose.
Browse files Browse the repository at this point in the history
Made changes to let tensorflow only block as much GPU memory as necessary.
Fixed path for saving weights on ctrl+C and path scheme for models and predictions of classifyapp task.
  • Loading branch information
Zacharias030 committed May 6, 2019
1 parent db54b11 commit c3300a9
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 24 deletions.
10 changes: 5 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
.idea/
__pycache__
data/*/
venv/
data/*
*.np
*.html
published_results/devmap
published_results/threadcoarsening
published_results/vocabulary
task/classifyapp/*/
task/devmap/*/
task/threadcoarsening/*/

task/classifyapp/*
task/devmap/*
task/threadcoarsening/*
3 changes: 3 additions & 0 deletions task_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,12 @@ def download_and_unzip(url, dataset_name, data_folder):
:param data_folder: folder in which to put the downloaded data
"""
print('Downloading', dataset_name, 'data set...')
if not os.path.exists(data_folder):
os.makedirs(data_folder)
data_zip = wget.download(url, out=data_folder)
print('\tunzipping...')
zip_ = zipfile.ZipFile(data_zip, 'r')
assert os.path.isdir(data_folder), data_folder
zip_.extractall(data_folder)
zip_.close()
print('\tdone')
Expand Down
38 changes: 22 additions & 16 deletions train_task_classifyapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from absl import flags

# Parameters of classifyapp
flags.DEFINE_string('input_data', 'task/classifyapp/ir', 'Path to input data')
flags.DEFINE_string('input_data', 'task/classifyapp', 'Path to input data')
flags.DEFINE_string('out', 'task/classifyapp', 'Path to folder in which to write saved Keras models and predictions')
flags.DEFINE_integer('num_epochs', 50, 'number of training epochs')
flags.DEFINE_integer('batch_size', 64, 'training batch size')
Expand Down Expand Up @@ -122,7 +122,10 @@ def __init__(self, batch_size, x_seq, y_1hot, embedding_mat):
self.x_seq = x_seq
self.y_1hot = y_1hot
self.emb = embedding_mat
self.sess = tf.Session()
# Make tf block less gpu memory
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
self.sess = tf.Session(config=config)
self._set_index_array()

def _set_index_array(self):
Expand Down Expand Up @@ -152,7 +155,10 @@ def __init__(self, batch_size, x_seq, embedding_mat):
self.x_seq = x_seq
self.dataset_len = int(np.shape(x_seq)[0] // self.batch_size)
self.emb = embedding_mat
self.sess = tf.Session()
# Make tf block less gpu memory
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
self.sess = tf.Session(config=config)

def __len__(self):
return self.dataset_len
Expand Down Expand Up @@ -233,7 +239,7 @@ def train_gen(self, train_generator: EmbeddingSequence, validation_generator: Em
shuffle=True, callbacks=[checkpoint])
except KeyboardInterrupt:
print('Ctrl-C detected, saving weights to file')
self.model.save_weights('weights-kill.h5')
self.model.save_weights(os.path.join(FLAGS.out, 'weights-kill.h5'))

def predict(self, sequences: np.array, batch_size: int) -> np.array:
# directly predict application class from source sequences:
Expand Down Expand Up @@ -264,13 +270,13 @@ def evaluate(model, embeddings, folder_data, samples_per_class, folder_results,
num_classes = 104
y_train = np.empty(0) # training
X_train = list()
folder_data_train = folder_data + '_train'
folder_data_train = os.path.join(folder_data, 'seq_train')
y_val = np.empty(0) # validation
X_val = list()
folder_data_val = folder_data + '_val'
folder_data_val = os.path.join(folder_data, 'seq_val')
y_test = np.empty(0) # testing
X_test = list()
folder_data_test = folder_data + '_test'
folder_data_test = os.path.join(folder_data, 'seq_test')
print('Getting file names for', num_classes, 'classes from folders:')
print(folder_data_train)
print(folder_data_val)
Expand Down Expand Up @@ -349,9 +355,9 @@ def evaluate(model, embeddings, folder_data, samples_per_class, folder_results,
# Set up names paths
model_name = model.__name__
model_path = os.path.join(folder_results,
"classifyapp/models/{}.model".format(model_name))
"models/{}.model".format(model_name))
predictions_path = os.path.join(folder_results,
"classifyapp/predictions/{}.result".format(model_name))
"predictions/{}.result".format(model_name))

# If predictions have already been made with these embeddings, load them
if fs.exists(predictions_path):
Expand Down Expand Up @@ -442,17 +448,17 @@ def main(argv):
train_samples = FLAGS.train_samples

# Acquire data
if not os.path.exists(folder_data + '_train'):

if not os.path.exists(os.path.join(folder_data, 'ir_train')):
# Download data
task_utils.download_and_unzip('https://polybox.ethz.ch/index.php/s/JOBjrfmAjOeWCyl/download',
'classifyapp_training_data', folder_data)

task_utils.llvm_ir_to_trainable(folder_data + '_train')
assert os.path.exists(folder_data + '_val'), "Folder not found: " + folder_data + '_val'
task_utils.llvm_ir_to_trainable(folder_data + '_val')
assert os.path.exists(folder_data + '_test'), "Folder not found: " + folder_data + '_test'
task_utils.llvm_ir_to_trainable(folder_data + '_test')
task_utils.llvm_ir_to_trainable(os.path.join(folder_data, 'ir_train'))
assert os.path.exists(os.path.join(folder_data, 'ir_val')), "Folder not found: " + folder_data + '/ir_val'
task_utils.llvm_ir_to_trainable(os.path.join(folder_data, 'ir_val'))
assert os.path.exists(os.path.join(folder_data, 'ir_test')), "Folder not found: " + folder_data + '/ir_test'
task_utils.llvm_ir_to_trainable(os.path.join(folder_data, 'ir_test'))


# Create directories if they do not exist
if not os.path.exists(folder_results):
Expand Down
7 changes: 6 additions & 1 deletion train_task_devmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,12 @@ def evaluate(model, device, data_folder, out_folder, embeddings,
# Tensor of shape (num_input_files, sequence length, embbedding dimension)
embedding_input_ = tf.nn.embedding_lookup(embedding_matrix_normalized,
seq_)
with tf.Session() as sess:

# Make tf block less gpu memory
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:
embedding_input = sess.run(embedding_input_, feed_dict={seq_: sequences})

# Values used for training & predictions
Expand Down
8 changes: 6 additions & 2 deletions train_task_threadcoarsening.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,12 @@ def evaluate(model, device, data_folder, out_folder, embeddings, dense_layer_siz

# Tensor of shape (num_input_files, sequence length, embbedding dimension)
embedding_input_ = tf.nn.embedding_lookup(embedding_matrix_normalized, seq_)
with tf.Session() as sess:

# Make tf block less gpu memory
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:
embedding_input = sess.run(embedding_input_, feed_dict={seq_: X_seq})

# Leave-one-out cross-validation
Expand Down Expand Up @@ -424,4 +429,3 @@ def main(argv):

if __name__ == '__main__':
app.run(main)

0 comments on commit c3300a9

Please sign in to comment.