Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade codebase to Python 3 #72

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions extract_vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
load_dataset(0, use_small=USE_SMALL)
word_emb = load_word_emb('glove/glove.%dB.%dd.txt'%(B_word,N_word),
use_small=USE_SMALL)
print "Length of word vocabulary: %d"%len(word_emb)
print ("Length of word vocabulary: %d"%len(word_emb))

word_to_idx = {'<UNK>':0, '<BEG>':1, '<END>':2}
word_num = 3
Expand Down Expand Up @@ -57,9 +57,9 @@ def check_and_add(tok):
for tok in col:
check_and_add(tok)

print "Length of used word vocab: %s"%len(word_to_idx)
print ("Length of used word vocab: %s"%len(word_to_idx))

emb_array = np.stack(embs, axis=0)
with open('glove/word2idx.json', 'w') as outf:
json.dump(word_to_idx, outf)
np.save(open('glove/usedwordemb.npy', 'w'), emb_array)
np.save(open('glove/usedwordemb.npy', 'wb'), emb_array)
4 changes: 2 additions & 2 deletions sqlnet/lib/dbengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def execute(self, table_id, select_index, aggregation_index, conditions, lower=T
where_clause = []
where_map = {}
for col_index, op, val in conditions:
if lower and (isinstance(val, str) or isinstance(val, unicode)):
if lower and isinstance(val, str):
val = val.lower()
if schema['col{}'.format(col_index)] == 'real' and not isinstance(val, (int, float)):
try:
Expand All @@ -47,6 +47,6 @@ def execute(self, table_id, select_index, aggregation_index, conditions, lower=T
if where_clause:
where_str = 'WHERE ' + ' AND '.join(where_clause)
query = 'SELECT {} AS result FROM {} {}'.format(select, table_id, where_str)
#print query
#print(query)
out = self.db.query(query, **where_map)
return [o.result for o in out]
10 changes: 5 additions & 5 deletions sqlnet/model/modules/aggregator_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from net_utils import run_lstm, col_name_encode
from .net_utils import run_lstm, col_name_encode



Expand All @@ -13,17 +13,17 @@ def __init__(self, N_word, N_h, N_depth, use_ca):
super(AggPredictor, self).__init__()
self.use_ca = use_ca

self.agg_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
self.agg_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2),
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
if use_ca:
print "Using column attention on aggregator predicting"
print ("Using column attention on aggregator predicting")
self.agg_col_name_enc = nn.LSTM(input_size=N_word,
hidden_size=N_h/2, num_layers=N_depth,
hidden_size=int(N_h/2), num_layers=N_depth,
batch_first=True, dropout=0.3, bidirectional=True)
self.agg_att = nn.Linear(N_h, N_h)
else:
print "Not using column attention on aggregator predicting"
print ("Not using column attention on aggregator predicting")
self.agg_att = nn.Linear(N_h, 1)
self.agg_out = nn.Sequential(nn.Linear(N_h, N_h),
nn.Tanh(), nn.Linear(N_h, 6))
Expand Down
10 changes: 5 additions & 5 deletions sqlnet/model/modules/selection_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,23 @@
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from net_utils import run_lstm, col_name_encode
from .net_utils import run_lstm, col_name_encode

class SelPredictor(nn.Module):
def __init__(self, N_word, N_h, N_depth, max_tok_num, use_ca):
super(SelPredictor, self).__init__()
self.use_ca = use_ca
self.max_tok_num = max_tok_num
self.sel_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
self.sel_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2),
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
if use_ca:
print "Using column attention on selection predicting"
print ("Using column attention on selection predicting")
self.sel_att = nn.Linear(N_h, N_h)
else:
print "Not using column attention on selection predicting"
print ("Not using column attention on selection predicting")
self.sel_att = nn.Linear(N_h, 1)
self.sel_col_name_enc = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
self.sel_col_name_enc = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2),
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.sel_out_K = nn.Linear(N_h, N_h)
Expand Down
4 changes: 2 additions & 2 deletions sqlnet/model/modules/seq2sql_condition_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from net_utils import run_lstm
from .net_utils import run_lstm

class Seq2SQLCondPredictor(nn.Module):
def __init__(self, N_word, N_h, N_depth, max_col_num, max_tok_num, gpu):
super(Seq2SQLCondPredictor, self).__init__()
print "Seq2SQL where prediction"
print ("Seq2SQL where prediction")
self.N_h = N_h
self.max_tok_num = max_tok_num
self.max_col_num = max_col_num
Expand Down
26 changes: 13 additions & 13 deletions sqlnet/model/modules/sqlnet_condition_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from net_utils import run_lstm, col_name_encode
from .net_utils import run_lstm, col_name_encode

class SQLNetCondPredictor(nn.Module):
def __init__(self, N_word, N_h, N_depth, max_col_num, max_tok_num, use_ca, gpu):
Expand All @@ -15,57 +15,57 @@ def __init__(self, N_word, N_h, N_depth, max_col_num, max_tok_num, use_ca, gpu):
self.gpu = gpu
self.use_ca = use_ca

self.cond_num_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
self.cond_num_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2),
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_num_att = nn.Linear(N_h, 1)
self.cond_num_out = nn.Sequential(nn.Linear(N_h, N_h),
nn.Tanh(), nn.Linear(N_h, 5))
self.cond_num_name_enc = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
self.cond_num_name_enc = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2),
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_num_col_att = nn.Linear(N_h, 1)
self.cond_num_col2hid1 = nn.Linear(N_h, 2*N_h)
self.cond_num_col2hid2 = nn.Linear(N_h, 2*N_h)

self.cond_col_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
self.cond_col_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2),
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
if use_ca:
print "Using column attention on where predicting"
print ("Using column attention on where predicting")
self.cond_col_att = nn.Linear(N_h, N_h)
else:
print "Not using column attention on where predicting"
print ("Not using column attention on where predicting")
self.cond_col_att = nn.Linear(N_h, 1)
self.cond_col_name_enc = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
self.cond_col_name_enc = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2),
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_col_out_K = nn.Linear(N_h, N_h)
self.cond_col_out_col = nn.Linear(N_h, N_h)
self.cond_col_out = nn.Sequential(nn.ReLU(), nn.Linear(N_h, 1))

self.cond_op_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
self.cond_op_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2),
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
if use_ca:
self.cond_op_att = nn.Linear(N_h, N_h)
else:
self.cond_op_att = nn.Linear(N_h, 1)
self.cond_op_out_K = nn.Linear(N_h, N_h)
self.cond_op_name_enc = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
self.cond_op_name_enc = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2),
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_op_out_col = nn.Linear(N_h, N_h)
self.cond_op_out = nn.Sequential(nn.Linear(N_h, N_h), nn.Tanh(),
nn.Linear(N_h, 3))

self.cond_str_lstm = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
self.cond_str_lstm = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2),
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_str_decoder = nn.LSTM(input_size=self.max_tok_num,
hidden_size=N_h, num_layers=N_depth,
batch_first=True, dropout=0.3)
self.cond_str_name_enc = nn.LSTM(input_size=N_word, hidden_size=N_h/2,
self.cond_str_name_enc = nn.LSTM(input_size=N_word, hidden_size=int(N_h/2),
num_layers=N_depth, batch_first=True,
dropout=0.3, bidirectional=True)
self.cond_str_out_g = nn.Linear(N_h, N_h)
Expand Down Expand Up @@ -123,9 +123,9 @@ def forward(self, x_emb_var, x_len, col_inp_var, col_name_len,
num_col_att = self.softmax(num_col_att_val)
K_num_col = (e_num_col * num_col_att.unsqueeze(2)).sum(1)
cond_num_h1 = self.cond_num_col2hid1(K_num_col).view(
B, 4, self.N_h/2).transpose(0, 1).contiguous()
B, 4, int(self.N_h/2)).transpose(0, 1).contiguous()
cond_num_h2 = self.cond_num_col2hid2(K_num_col).view(
B, 4, self.N_h/2).transpose(0, 1).contiguous()
B, 4, int(self.N_h/2)).transpose(0, 1).contiguous()

h_num_enc, _ = run_lstm(self.cond_num_lstm, x_emb_var, x_len,
hidden=(cond_num_h1, cond_num_h2))
Expand Down
14 changes: 7 additions & 7 deletions sqlnet/model/modules/word_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ def __init__(self, word_emb, N_word, gpu, SQL_TOK,
self.SQL_TOK = SQL_TOK

if trainable:
print "Using trainable embedding"
print ("Using trainable embedding")
self.w2i, word_emb_val = word_emb
self.embedding = nn.Embedding(len(self.w2i), N_word)
self.embedding.weight = nn.Parameter(
torch.from_numpy(word_emb_val.astype(np.float32)))
else:
self.word_emb = word_emb
print "Using fixed embedding"
print ("Using fixed embedding")


def gen_x_batch(self, q, col):
Expand All @@ -32,9 +32,9 @@ def gen_x_batch(self, q, col):
val_len = np.zeros(B, dtype=np.int64)
for i, (one_q, one_col) in enumerate(zip(q, col)):
if self.trainable:
q_val = map(lambda x:self.w2i.get(x, 0), one_q)
q_val = list(map(lambda x:self.w2i.get(x, 0), one_q)) # Py 3.x (list(map) for concat)
else:
q_val = map(lambda x:self.word_emb.get(x, np.zeros(self.N_word, dtype=np.float32)), one_q)
q_val = list(map(lambda x:self.word_emb.get(x, np.zeros(self.N_word, dtype=np.float32)), one_q)) # Py 3.x (list(map) for concat)
if self.our_model:
if self.trainable:
val_embs.append([1] + q_val + [2]) #<BEG> and <END>
Expand All @@ -44,14 +44,14 @@ def gen_x_batch(self, q, col):
else:
one_col_all = [x for toks in one_col for x in toks+[',']]
if self.trainable:
col_val = map(lambda x:self.w2i.get(x, 0), one_col_all)
col_val = list(map(lambda x:self.w2i.get(x, 0), one_col_all)) # Py 3.x (list(map) for concat)
val_embs.append( [0 for _ in self.SQL_TOK] + col_val + [0] + q_val+ [0])
else:
col_val = map(lambda x:self.word_emb.get(x, np.zeros(self.N_word, dtype=np.float32)), one_col_all)
col_val = list(map(lambda x:self.word_emb.get(x, np.zeros(self.N_word, dtype=np.float32)), one_col_all)) # Py 3.x (list(map) for concat)
val_embs.append( [np.zeros(self.N_word, dtype=np.float32) for _ in self.SQL_TOK] + col_val + [np.zeros(self.N_word, dtype=np.float32)] + q_val+ [np.zeros(self.N_word, dtype=np.float32)])
val_len[i] = len(self.SQL_TOK) + len(col_val) + 1 + len(q_val) + 1
max_len = max(val_len)

if self.trainable:
val_tok_array = np.zeros((B, max_len), dtype=np.int64)
for i in range(B):
Expand Down
30 changes: 15 additions & 15 deletions sqlnet/model/seq2sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from modules.word_embedding import WordEmbedding
from modules.aggregator_predict import AggPredictor
from modules.selection_predict import SelPredictor
from modules.seq2sql_condition_predict import Seq2SQLCondPredictor
from .modules.word_embedding import WordEmbedding
from .modules.aggregator_predict import AggPredictor
from .modules.selection_predict import SelPredictor
from .modules.seq2sql_condition_predict import Seq2SQLCondPredictor

# This is a re-implementation based on the following paper:

Expand Down Expand Up @@ -80,8 +80,8 @@ def generate_gt_where_seq(self, q, col, query):
cur_seq = [all_toks.index('<BEG>')]
if 'WHERE' in cur_query:
cur_where_query = cur_query[cur_query.index('WHERE'):]
cur_seq = cur_seq + map(lambda tok:all_toks.index(tok)
if tok in all_toks else 0, cur_where_query)
cur_seq = cur_seq + list(map(lambda tok:all_toks.index(tok)
if tok in all_toks else 0, cur_where_query))
cur_seq.append(all_toks.index('<END>'))
ret_seq.append(cur_seq)
return ret_seq
Expand Down Expand Up @@ -146,7 +146,7 @@ def loss(self, score, truth_num, pred_entry, gt_where):
agg_score, sel_score, cond_score = score
loss = 0
if pred_agg:
agg_truth = map(lambda x:x[0], truth_num)
agg_truth = list(map(lambda x:x[0], truth_num))
data = torch.from_numpy(np.array(agg_truth))
if self.gpu:
agg_truth_var = Variable(data.cuda())
Expand All @@ -156,7 +156,7 @@ def loss(self, score, truth_num, pred_entry, gt_where):
loss += self.CE(agg_score, agg_truth_var)

if pred_sel:
sel_truth = map(lambda x:x[1], truth_num)
sel_truth = list(map(lambda x:x[1], truth_num))
data = torch.from_numpy(np.array(sel_truth))
if self.gpu:
sel_truth_var = Variable(data).cuda()
Expand Down Expand Up @@ -199,9 +199,9 @@ def reinforce_backward(self, score, rewards):

def check_acc(self, vis_info, pred_queries, gt_queries, pred_entry):
def pretty_print(vis_data):
print 'question:', vis_data[0]
print 'headers: (%s)'%(' || '.join(vis_data[1]))
print 'query:', vis_data[2]
print ('question:', vis_data[0])
print ('headers: (%s)'%(' || '.join(vis_data[1])))
print ('query:', vis_data[2])

def gen_cond_str(conds, header):
if len(conds) == 0:
Expand All @@ -210,7 +210,7 @@ def gen_cond_str(conds, header):
for cond in conds:
cond_str.append(
header[cond[0]] + ' ' + self.COND_OPS[cond[1]] + \
' ' + unicode(cond[2]).lower())
' ' + str(cond[2]).lower())
return 'WHERE ' + ' AND '.join(cond_str)

pred_agg, pred_sel, pred_cond = pred_entry
Expand Down Expand Up @@ -261,8 +261,8 @@ def gen_cond_str(conds, header):
if not flag:
break
gt_idx = tuple(x[0] for x in cond_gt).index(cond_pred[idx][0])
if flag and unicode(cond_gt[gt_idx][2]).lower() != \
unicode(cond_pred[idx][2]).lower():
if flag and str(cond_gt[gt_idx][2]).lower() != \
str(cond_pred[idx][2]).lower():
flag = False
cond_val_err += 1

Expand Down Expand Up @@ -349,7 +349,7 @@ def merge_tokens(tok_list, raw_tok_str):
cond_toks.append(cond_val)

if verbose:
print cond_toks
print (cond_toks)
if len(cond_toks) > 0:
cond_toks = cond_toks[1:]
st = 0
Expand Down
Loading