diff --git a/README.md b/README.md index aa356b6..ee7808c 100644 --- a/README.md +++ b/README.md @@ -31,5 +31,7 @@ Along with our example data processing script in `data_processing_example` you c Our best CITE model on Flickr30K Entities using these precomputed features can be found [here](https://drive.google.com/open?id=1rmeIqYTCIduNc2QWUEdXLHFGrlOzz2xO). +You can download the raw Flickr30K Entities data [here](http://web.engr.illinois.edu/~bplumme2/Flickr30kEntities/), but isn't necessary to use our precomputed features. -Many thanks to [Kevin Shih](https://scholar.google.com/citations?user=4x3DhzAAAAAJ&hl=en) and [Liwei Wang](https://scholar.google.com/citations?user=qnbdnZEAAAAJ&hl=en) for access to their Similarity Network code that was used as the basis for this implementation. \ No newline at end of file + +Many thanks to [Kevin Shih](https://scholar.google.com/citations?user=4x3DhzAAAAAJ&hl=en) and [Liwei Wang](https://scholar.google.com/citations?user=qnbdnZEAAAAJ&hl=en) for access to their [Similarity Network](https://arxiv.org/abs/1704.03470) code that was used as the basis for this implementation. \ No newline at end of file diff --git a/data_loader.py b/data_loader.py index f7277c4..4e7e520 100644 --- a/data_loader.py +++ b/data_loader.py @@ -17,11 +17,11 @@ def __init__(self, args, region_dim, phrase_dim, plh, split): datafn = os.path.join('data', 'flickr', '%s_imfeats.h5' % split) self.data = h5py.File(datafn, 'r') vecs = np.array(self.data['phrase_features'], np.float32) - uniquePhrases = list(self.data['phrases']) - assert(vecs.shape[0] == len(uniquePhrases)) + phrases = list(self.data['phrases']) + assert(vecs.shape[0] == len(phrases)) w2v_dict = {} - for index, phrase in enumerate(uniquePhrases): + for index, phrase in enumerate(phrases): w2v_dict[phrase] = vecs[index, :] # mapping from uniquePhrase to w2v @@ -30,8 +30,6 @@ def __init__(self, args, region_dim, phrase_dim, plh, split): self.n_pairs = len(self.pairs[0]) self.pair_index = range(self.n_pairs) - self.uniquePhrases = uniquePhrases # set of unique phrases - self.split = split self.plh = plh self.is_train = split == 'train' diff --git a/data_processing_example/README.md b/data_processing_example/README.md index 25d9335..7679da8 100644 --- a/data_processing_example/README.md +++ b/data_processing_example/README.md @@ -1,6 +1,6 @@ # Conditional Image-Text Embedding Networks -The code currently assumes datasets are divided into three hdf5 files named `_imfeats.h5` where `split` takes on the value train, test, or val. It assumes it has the following items: +The code currently assumes datasets are divided into three hdf5 files named `_imfeats.h5` where `split` takes on the value train, test, or val. It assumes the hdf5 files contain the following items: 1. phrase_features: #num_phrase X 6000 dimensional matrix of phrase features 2. phrases: array of #num_phrase strings corresponding to the phrase features diff --git a/main.py b/main.py index c3ccf30..97e6f92 100644 --- a/main.py +++ b/main.py @@ -28,7 +28,7 @@ parser.add_argument('--embed_l1', type=float, default=5e-5, help='weight of the L1 regularization term used on the concept weight branch (default: 5e-5)') parser.add_argument('--max_epoch', type=int, default=0, - help='maximum number of epochs, <1 indicates no limit (default: 0)') + help='maximum number of epochs, less than 1 indicates no limit (default: 0)') parser.add_argument('--no_gain_stop', type=int, default=5, help='number of epochs used to perform early stopping based on validation performance (default: 5)') parser.add_argument('--neg_to_pos_ratio', type=int, default=2, diff --git a/model.py b/model.py index 2b6abca..a2a4aa3 100644 --- a/model.py +++ b/model.py @@ -81,6 +81,12 @@ def setup_model(args, phrase_plh, region_plh, train_phase_plh, labels_plh, num_b labels_plh -- indicates positive (1), negative (-1), or ignore (0) num_boxes_plh -- number of boxes per example in the batch region_feature_dim -- dimensions of the region features + + Returns: + total_loss -- weighted combination of the region and concept loss + region_loss -- logistic loss for phrase-region prediction + concept_loss -- L1 loss for the output of the concept weight branch + region_prob -- each row contains the probability a region is associated with a phrase """ labels_plh = tf.reshape(labels_plh, [-1, num_boxes_plh]) eb_fea_plh = tf.reshape(region_plh, [-1, num_boxes_plh, region_feature_dim])