Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python3 compatibility #53

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 30 additions & 24 deletions skipthoughts.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
import theano
import theano.tensor as tensor

import cPickle as pkl
try:
import _pickle as pkl
except:
import pickle as pkl
import numpy
import copy
import nltk
Expand All @@ -20,20 +23,25 @@
#-----------------------------------------------------------------------------#
# Specify model and table locations here
#-----------------------------------------------------------------------------#
path_to_models = '/u/rkiros/public_html/models/'
path_to_tables = '/u/rkiros/public_html/models/'
path_to_skipthoughts = './skipthoughts'
path_to_models = os.path.join(path_to_skipthoughts, 'models')
path_to_tables = os.path.join(path_to_skipthoughts, 'models')
#-----------------------------------------------------------------------------#

path_to_umodel = path_to_models + 'uni_skip.npz'
path_to_bmodel = path_to_models + 'bi_skip.npz'
path_to_umodel = os.path.join(path_to_models, 'uni_skip.npz')
path_to_bmodel = os.path.join(path_to_models, 'bi_skip.npz')

path_to_utable = os.path.join(path_to_tables, 'utable.npy')
path_to_btable = os.path.join(path_to_tables, 'btable.npy')

path_to_dictionary = os.path.join(path_to_tables, 'dictionary.txt')

def load_model():
"""
Load the model with saved tables
"""
# Load model options
print 'Loading model parameters...'
print('Loading model parameters...')
with open('%s.pkl'%path_to_umodel, 'rb') as f:
uoptions = pkl.load(f)
with open('%s.pkl'%path_to_bmodel, 'rb') as f:
Expand All @@ -48,18 +56,18 @@ def load_model():
btparams = init_tparams(bparams)

# Extractor functions
print 'Compiling encoders...'
print('Compiling encoders...')
embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions)
f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')
embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions)
f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2')

# Tables
print 'Loading tables...'
print('Loading tables...')
utable, btable = load_tables()

# Store everything we need in a dictionary
print 'Packing up...'
print('Packing up...')
model = {}
model['uoptions'] = uoptions
model['boptions'] = boptions
Expand All @@ -76,9 +84,9 @@ def load_tables():
Load the tables
"""
words = []
utable = numpy.load(path_to_tables + 'utable.npy')
btable = numpy.load(path_to_tables + 'btable.npy')
f = open(path_to_tables + 'dictionary.txt', 'rb')
utable = numpy.load(path_to_utable, encoding='latin1')
btable = numpy.load(path_to_btable, encoding='latin1')
f = open(path_to_dictionary, 'rb')
for line in f:
words.append(line.decode('utf-8').strip())
f.close()
Expand Down Expand Up @@ -125,8 +133,8 @@ def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False)
# Get features. This encodes by length, in order to avoid wasting computation
for k in ds.keys():
if verbose:
print k
numbatches = len(ds[k]) / batch_size + 1
print(k)
numbatches = int(len(ds[k]) / batch_size) + 1
for minibatch in range(numbatches):
caps = ds[k][minibatch::numbatches]

Expand Down Expand Up @@ -194,10 +202,10 @@ def nn(model, text, vectors, query, k=5):
scores = numpy.dot(qf, vectors.T).flatten()
sorted_args = numpy.argsort(scores)[::-1]
sentences = [text[a] for a in sorted_args[:k]]
print 'QUERY: ' + query
print 'NEAREST: '
print('QUERY: ' + query)
print('NEAREST: ')
for i, s in enumerate(sentences):
print s, sorted_args[i]
print(s, sorted_args[i])


def word_features(table):
Expand All @@ -221,10 +229,10 @@ def nn_words(table, wordvecs, query, k=10):
scores = numpy.dot(qf, wordvecs.T).flatten()
sorted_args = numpy.argsort(scores)[::-1]
words = [keys[a] for a in sorted_args[:k]]
print 'QUERY: ' + query
print 'NEAREST: '
print('QUERY: ' + query)
print('NEAREST: ')
for i, w in enumerate(words):
print w
print(w)


def _p(pp, name):
Expand All @@ -239,7 +247,7 @@ def init_tparams(params):
initialize Theano shared variables according to the initial parameters
"""
tparams = OrderedDict()
for kk, pp in params.iteritems():
for kk, pp in params.items():
tparams[kk] = theano.shared(params[kk], name=kk)
return tparams

Expand All @@ -249,7 +257,7 @@ def load_params(path, params):
load parameters
"""
pp = numpy.load(path)
for kk, vv in params.iteritems():
for kk, vv in params.items():
if kk not in pp:
warnings.warn('%s is not in the archive'%kk)
continue
Expand Down Expand Up @@ -436,5 +444,3 @@ def _step_slice(m_, x_, xx_, h_, U, Ux):
strict=True)
rval = [rval]
return rval