Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added sqlite database creation from tables.json #84

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions create_databases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
###############################################
# python create_databases.py
# Using evaluate_exampels/examples/tables.json
# creates databases/*.sqlite
# Author: Prasad
###############################################
import os
import json
import sqlite3

with open('evaluation_examples/examples/tables.json') as f:
schema = json.load(f)

databases = {}

for data in schema:
db = data["db_id"]

if db not in databases:
databases[db] = { "tables": {}, "foreignkeys": [] }

cindex = 0
for (tindex, cname) in data["column_names_original"]:
if tindex == -1:
continue
table = data["table_names_original"][tindex]
if table not in databases[db]["tables"]:
databases[db]["tables"][table] = { "columns": [], "primarykeys": [] }
databases[db]["tables"][table]["columns"].append({"name": cname, "type": data["column_types"][cindex]})
cindex += 1

if len(data["primary_keys"]):
for pindex in data["primary_keys"]:
c = data["column_names_original"][pindex]
table = data["table_names_original"][c[0]]
databases[db]["tables"][table]["primarykeys"].append(c[1])

if len(data["foreign_keys"]):
for findex in data["foreign_keys"]:
src_col = data["column_names_original"][findex[0]]
ref_col = data["column_names_original"][findex[1]]
databases[db]["foreignkeys"].append({
"table": data["table_names_original"][src_col[0]],
"column": src_col[1],
"ref_table": data["table_names_original"][ref_col[0]],
"ref_column": ref_col[1]
})

for db in databases:
os.makedirs("databases/" + db)
dsn = "databases/" + db + "/" + db + ".sqlite"
dbconn = sqlite3.connect(dsn)
dbcur = dbconn.cursor()

print (dsn)
for table in databases[db]["tables"]:
tablesql = "create table " + table + "("
cdelim = ""
for col in databases[db]["tables"][table]["columns"]:
tablesql += cdelim + '"'+ col["name"] +'" '+ col["type"]
cdelim = ","
if len(databases[db]["tables"][table]["primarykeys"]):
tablesql += ",primary key ("+ ','.join(databases[db]["tables"][table]["primarykeys"]) +")"
tablesql += ");"

print (tablesql)
try:
dbcur.execute(tablesql)
except Exception as e:
# Review tables.json spec.
print ("[ERROR]", e)

for fkeys in databases[db]["foreignkeys"]:
altersql = ("alter table {} add key pk_{} {} references {} ({});".format(
fkeys["table"], fkeys["column"], fkeys["column"], fkeys["ref_table"], fkeys["ref_column"]))
print (altersql)
try:
dbcur.execute(altersql)
except Exception as e:
# Review tables.json spec.
print ("[ERROR]", e)

dbconn.commit()
dbconn.close()
print ()

f.close()
print ("databases created.\n")

print ("Try\npython evaluation.py --gold evaluation_examples/gold_example.txt --pred evaluation_examples/pred_example.txt --etype all --table evaluation_examples/examples/tables.json --db databases")

22 changes: 11 additions & 11 deletions preprocess/get_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ def convert_fk_index(data):
if ref_cid and cid:
fk_holder.append([cid, ref_cid])
except:
traceback.print_exc()
print "table_names_original: ", data['table_names_original']
print "finding tab name: ", tn, ref_tn
traceback.print_exc()
print ("table_names_original: ", data['table_names_original'])
print ("finding tab name: ", tn, ref_tn)
sys.exit()
return fk_holder

Expand Down Expand Up @@ -56,7 +56,7 @@ def dump_db_json_schema(db, f):
data['table_names'].append(table_name.lower().replace("_", ' '))
fks = conn.execute("PRAGMA foreign_key_list('{}') ".format(table_name)).fetchall()
#print("db:{} table:{} fks:{}".format(f,table_name,fks))
fk_holder.extend([[(table_name, fk[3]), (fk[2], fk[4])] for fk in fks])
fk_holder.extend([[(table_name, fk[3]), (fk[2], fk[4])] for fk in fks])
cur = conn.execute("PRAGMA table_info('{}') ".format(table_name))
for j, col in enumerate(cur.fetchall()):
data['column_names_original'].append((i, col[1]))
Expand Down Expand Up @@ -86,7 +86,7 @@ def dump_db_json_schema(db, f):

if __name__ == '__main__':
if len(sys.argv) < 2:
print "Usage: python get_tables.py [dir includes many subdirs containing database.sqlite files] [output file name e.g. output.json] [existing tables.json file to be inherited]"
print ("Usage: python get_tables.py [dir includes many subdirs containing database.sqlite files] [output file name e.g. output.json] [existing tables.json file to be inherited]")
sys.exit()
input_dir = sys.argv[1]
output_file = sys.argv[2]
Expand All @@ -98,18 +98,18 @@ def dump_db_json_schema(db, f):
#for tab in ex_tabs:
# tab["foreign_keys"] = convert_fk_index(tab)
ex_tabs = {tab["db_id"]: tab for tab in ex_tabs if tab["db_id"] in all_fs}
print "precessed file num: ", len(ex_tabs)
print ("precessed file num: ", len(ex_tabs))
not_fs = [df for df in listdir(input_dir) if not exists(join(input_dir, df, df+'.sqlite'))]
for d in not_fs:
print "no sqlite file found in: ", d
print ("no sqlite file found in: ", d)
db_files = [(df+'.sqlite', df) for df in listdir(input_dir) if exists(join(input_dir, df, df+'.sqlite'))]
tables = []
for f, df in db_files:
#if df in ex_tabs.keys():
#print 'reading old db: ', df
# tables.append(ex_tabs[df])
db = join(input_dir, df, f)
print '\nreading new db: ', df
print ('\nreading new db: ', df)
table = dump_db_json_schema(db, df)
prev_tab_num = len(ex_tabs[df]["table_names"])
prev_col_num = len(ex_tabs[df]["column_names"])
Expand All @@ -119,8 +119,8 @@ def dump_db_json_schema(db, f):
table["table_names"] = ex_tabs[df]["table_names"]
table["column_names"] = ex_tabs[df]["column_names"]
else:
print "\n----------------------------------problem db: ", df
print ("\n----------------------------------problem db: ", df)
tables.append(table)
print "final db num: ", len(tables)
print ("final db num: ", len(tables))
with open(output_file, 'wt') as out:
json.dump(tables, out, sort_keys=True, indent=2, separators=(',', ': '))
json.dump(tables, out, sort_keys=True, indent=2, separators=(',', ': '))