ryankiros · ca-joe-yang · May 4, 2017 · May 4, 2017 · May 25, 2017
diff --git a/skipthoughts.py b/skipthoughts.py
@@ -6,7 +6,10 @@
 import theano
 import theano.tensor as tensor
 
-import cPickle as pkl
+try:
+    import _pickle as pkl
+except:
+    import pickle as pkl
 import numpy
 import copy
 import nltk
@@ -20,20 +23,25 @@
 #-----------------------------------------------------------------------------#
 # Specify model and table locations here
 #-----------------------------------------------------------------------------#
-path_to_models = '/u/rkiros/public_html/models/'
-path_to_tables = '/u/rkiros/public_html/models/'
+path_to_skipthoughts = './skipthoughts'
+path_to_models = os.path.join(path_to_skipthoughts, 'models')
+path_to_tables = os.path.join(path_to_skipthoughts, 'models')
 #-----------------------------------------------------------------------------#
 
-path_to_umodel = path_to_models + 'uni_skip.npz'
-path_to_bmodel = path_to_models + 'bi_skip.npz'
+path_to_umodel = os.path.join(path_to_models, 'uni_skip.npz')
+path_to_bmodel = os.path.join(path_to_models, 'bi_skip.npz')
 
+path_to_utable = os.path.join(path_to_tables, 'utable.npy')
+path_to_btable = os.path.join(path_to_tables, 'btable.npy')
+
+path_to_dictionary = os.path.join(path_to_tables, 'dictionary.txt')
 
 def load_model():
     """
     Load the model with saved tables
     """
     # Load model options
-    print 'Loading model parameters...'
+    print('Loading model parameters...')
     with open('%s.pkl'%path_to_umodel, 'rb') as f:
         uoptions = pkl.load(f)
     with open('%s.pkl'%path_to_bmodel, 'rb') as f:
@@ -48,18 +56,18 @@ def load_model():
     btparams = init_tparams(bparams)
 
     # Extractor functions
-    print 'Compiling encoders...'
+    print('Compiling encoders...')
     embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions)
     f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')
     embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions)
     f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2')
 
     # Tables
-    print 'Loading tables...'
+    print('Loading tables...')
     utable, btable = load_tables()
 
     # Store everything we need in a dictionary
-    print 'Packing up...'
+    print('Packing up...')
     model = {}
     model['uoptions'] = uoptions
     model['boptions'] = boptions
@@ -76,9 +84,9 @@ def load_tables():
     Load the tables
     """
     words = []
-    utable = numpy.load(path_to_tables + 'utable.npy')
-    btable = numpy.load(path_to_tables + 'btable.npy')
-    f = open(path_to_tables + 'dictionary.txt', 'rb')
+    utable = numpy.load(path_to_utable, encoding='latin1')
+    btable = numpy.load(path_to_btable, encoding='latin1')
+    f = open(path_to_dictionary, 'rb')
     for line in f:
         words.append(line.decode('utf-8').strip())
     f.close()
@@ -125,8 +133,8 @@ def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False)
     # Get features. This encodes by length, in order to avoid wasting computation
     for k in ds.keys():
         if verbose:
-            print k
-        numbatches = len(ds[k]) / batch_size + 1
+            print(k)
+        numbatches = int(len(ds[k]) / batch_size) + 1
         for minibatch in range(numbatches):
             caps = ds[k][minibatch::numbatches]
 
@@ -194,10 +202,10 @@ def nn(model, text, vectors, query, k=5):
     scores = numpy.dot(qf, vectors.T).flatten()
     sorted_args = numpy.argsort(scores)[::-1]
     sentences = [text[a] for a in sorted_args[:k]]
-    print 'QUERY: ' + query
-    print 'NEAREST: '
+    print('QUERY: ' + query)
+    print('NEAREST: ')
     for i, s in enumerate(sentences):
-        print s, sorted_args[i]
+        print(s, sorted_args[i])
 
 
 def word_features(table):
@@ -221,10 +229,10 @@ def nn_words(table, wordvecs, query, k=10):
     scores = numpy.dot(qf, wordvecs.T).flatten()
     sorted_args = numpy.argsort(scores)[::-1]
     words = [keys[a] for a in sorted_args[:k]]
-    print 'QUERY: ' + query
-    print 'NEAREST: '
+    print('QUERY: ' + query)
+    print('NEAREST: ')
     for i, w in enumerate(words):
-        print w
+        print(w)
 
 
 def _p(pp, name):
@@ -239,7 +247,7 @@ def init_tparams(params):
     initialize Theano shared variables according to the initial parameters
     """
     tparams = OrderedDict()
-    for kk, pp in params.iteritems():
+    for kk, pp in params.items():
         tparams[kk] = theano.shared(params[kk], name=kk)
     return tparams
 
@@ -249,7 +257,7 @@ def load_params(path, params):
     load parameters
     """
     pp = numpy.load(path)
-    for kk, vv in params.iteritems():
+    for kk, vv in params.items():
         if kk not in pp:
             warnings.warn('%s is not in the archive'%kk)
             continue
@@ -436,5 +444,3 @@ def _step_slice(m_, x_, xx_, h_, U, Ux):
                                 strict=True)
     rval = [rval]
     return rval
-
-