diff --git a/.gitignore b/.gitignore index a813ad2..abcd3f4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,12 @@ .idea/ __pycache__ -data/*/ +venv/ +data/* *.np *.html published_results/devmap published_results/threadcoarsening published_results/vocabulary -task/classifyapp/*/ -task/devmap/*/ -task/threadcoarsening/*/ - +task/classifyapp/* +task/devmap/* +task/threadcoarsening/* \ No newline at end of file diff --git a/task_utils.py b/task_utils.py index 5990ad8..20387ae 100644 --- a/task_utils.py +++ b/task_utils.py @@ -52,9 +52,12 @@ def download_and_unzip(url, dataset_name, data_folder): :param data_folder: folder in which to put the downloaded data """ print('Downloading', dataset_name, 'data set...') + if not os.path.exists(data_folder): + os.makedirs(data_folder) data_zip = wget.download(url, out=data_folder) print('\tunzipping...') zip_ = zipfile.ZipFile(data_zip, 'r') + assert os.path.isdir(data_folder), data_folder zip_.extractall(data_folder) zip_.close() print('\tdone') diff --git a/train_task_classifyapp.py b/train_task_classifyapp.py index c9466de..f809889 100644 --- a/train_task_classifyapp.py +++ b/train_task_classifyapp.py @@ -37,7 +37,7 @@ from absl import flags # Parameters of classifyapp -flags.DEFINE_string('input_data', 'task/classifyapp/ir', 'Path to input data') +flags.DEFINE_string('input_data', 'task/classifyapp', 'Path to input data') flags.DEFINE_string('out', 'task/classifyapp', 'Path to folder in which to write saved Keras models and predictions') flags.DEFINE_integer('num_epochs', 50, 'number of training epochs') flags.DEFINE_integer('batch_size', 64, 'training batch size') @@ -122,7 +122,10 @@ def __init__(self, batch_size, x_seq, y_1hot, embedding_mat): self.x_seq = x_seq self.y_1hot = y_1hot self.emb = embedding_mat - self.sess = tf.Session() + # Make tf block less gpu memory + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + self.sess = tf.Session(config=config) self._set_index_array() def _set_index_array(self): @@ -152,7 +155,10 @@ def __init__(self, batch_size, x_seq, embedding_mat): self.x_seq = x_seq self.dataset_len = int(np.shape(x_seq)[0] // self.batch_size) self.emb = embedding_mat - self.sess = tf.Session() + # Make tf block less gpu memory + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + self.sess = tf.Session(config=config) def __len__(self): return self.dataset_len @@ -233,7 +239,7 @@ def train_gen(self, train_generator: EmbeddingSequence, validation_generator: Em shuffle=True, callbacks=[checkpoint]) except KeyboardInterrupt: print('Ctrl-C detected, saving weights to file') - self.model.save_weights('weights-kill.h5') + self.model.save_weights(os.path.join(FLAGS.out, 'weights-kill.h5')) def predict(self, sequences: np.array, batch_size: int) -> np.array: # directly predict application class from source sequences: @@ -264,13 +270,13 @@ def evaluate(model, embeddings, folder_data, samples_per_class, folder_results, num_classes = 104 y_train = np.empty(0) # training X_train = list() - folder_data_train = folder_data + '_train' + folder_data_train = os.path.join(folder_data, 'seq_train') y_val = np.empty(0) # validation X_val = list() - folder_data_val = folder_data + '_val' + folder_data_val = os.path.join(folder_data, 'seq_val') y_test = np.empty(0) # testing X_test = list() - folder_data_test = folder_data + '_test' + folder_data_test = os.path.join(folder_data, 'seq_test') print('Getting file names for', num_classes, 'classes from folders:') print(folder_data_train) print(folder_data_val) @@ -349,9 +355,9 @@ def evaluate(model, embeddings, folder_data, samples_per_class, folder_results, # Set up names paths model_name = model.__name__ model_path = os.path.join(folder_results, - "classifyapp/models/{}.model".format(model_name)) + "models/{}.model".format(model_name)) predictions_path = os.path.join(folder_results, - "classifyapp/predictions/{}.result".format(model_name)) + "predictions/{}.result".format(model_name)) # If predictions have already been made with these embeddings, load them if fs.exists(predictions_path): @@ -442,17 +448,17 @@ def main(argv): train_samples = FLAGS.train_samples # Acquire data - if not os.path.exists(folder_data + '_train'): - + if not os.path.exists(os.path.join(folder_data, 'ir_train')): # Download data task_utils.download_and_unzip('https://polybox.ethz.ch/index.php/s/JOBjrfmAjOeWCyl/download', 'classifyapp_training_data', folder_data) - task_utils.llvm_ir_to_trainable(folder_data + '_train') - assert os.path.exists(folder_data + '_val'), "Folder not found: " + folder_data + '_val' - task_utils.llvm_ir_to_trainable(folder_data + '_val') - assert os.path.exists(folder_data + '_test'), "Folder not found: " + folder_data + '_test' - task_utils.llvm_ir_to_trainable(folder_data + '_test') + task_utils.llvm_ir_to_trainable(os.path.join(folder_data, 'ir_train')) + assert os.path.exists(os.path.join(folder_data, 'ir_val')), "Folder not found: " + folder_data + '/ir_val' + task_utils.llvm_ir_to_trainable(os.path.join(folder_data, 'ir_val')) + assert os.path.exists(os.path.join(folder_data, 'ir_test')), "Folder not found: " + folder_data + '/ir_test' + task_utils.llvm_ir_to_trainable(os.path.join(folder_data, 'ir_test')) + # Create directories if they do not exist if not os.path.exists(folder_results): diff --git a/train_task_devmap.py b/train_task_devmap.py index 4825520..9f3058c 100644 --- a/train_task_devmap.py +++ b/train_task_devmap.py @@ -235,7 +235,12 @@ def evaluate(model, device, data_folder, out_folder, embeddings, # Tensor of shape (num_input_files, sequence length, embbedding dimension) embedding_input_ = tf.nn.embedding_lookup(embedding_matrix_normalized, seq_) - with tf.Session() as sess: + + # Make tf block less gpu memory + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + + with tf.Session(config=config) as sess: embedding_input = sess.run(embedding_input_, feed_dict={seq_: sequences}) # Values used for training & predictions diff --git a/train_task_threadcoarsening.py b/train_task_threadcoarsening.py index 4b93b5c..5872585 100644 --- a/train_task_threadcoarsening.py +++ b/train_task_threadcoarsening.py @@ -252,7 +252,12 @@ def evaluate(model, device, data_folder, out_folder, embeddings, dense_layer_siz # Tensor of shape (num_input_files, sequence length, embbedding dimension) embedding_input_ = tf.nn.embedding_lookup(embedding_matrix_normalized, seq_) - with tf.Session() as sess: + + # Make tf block less gpu memory + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + + with tf.Session(config=config) as sess: embedding_input = sess.run(embedding_input_, feed_dict={seq_: X_seq}) # Leave-one-out cross-validation @@ -424,4 +429,3 @@ def main(argv): if __name__ == '__main__': app.run(main) -