From 0e130a29cdd756e29e2d038c18c0b80f94d76ffb Mon Sep 17 00:00:00 2001 From: prasad83 Date: Thu, 4 May 2023 00:09:17 +0530 Subject: [PATCH 1/2] Added sqlite database creation from tables.json Script recreates sqlite databases by reading the evaluate_examples/examples/tables.json definition. --- create_databases.py | 91 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 create_databases.py diff --git a/create_databases.py b/create_databases.py new file mode 100644 index 0000000..70e4132 --- /dev/null +++ b/create_databases.py @@ -0,0 +1,91 @@ +############################################### +# python create_databases.py +# Using evaluate_exampels/examples/tables.json +# creates databases/*.sqlite +# Author: Prasad +############################################### +import os +import json +import sqlite3 + +with open('evaluation_examples/examples/tables.json') as f: + schema = json.load(f) + + databases = {} + + for data in schema: + db = data["db_id"] + + if db not in databases: + databases[db] = { "tables": {}, "foreignkeys": [] } + + cindex = 0 + for (tindex, cname) in data["column_names_original"]: + if tindex == -1: + continue + table = data["table_names_original"][tindex] + if table not in databases[db]["tables"]: + databases[db]["tables"][table] = { "columns": [], "primarykeys": [] } + databases[db]["tables"][table]["columns"].append({"name": cname, "type": data["column_types"][cindex]}) + cindex += 1 + + if len(data["primary_keys"]): + for pindex in data["primary_keys"]: + c = data["column_names_original"][pindex] + table = data["table_names_original"][c[0]] + databases[db]["tables"][table]["primarykeys"].append(c[1]) + + if len(data["foreign_keys"]): + for findex in data["foreign_keys"]: + src_col = data["column_names_original"][findex[0]] + ref_col = data["column_names_original"][findex[1]] + databases[db]["foreignkeys"].append({ + "table": data["table_names_original"][src_col[0]], + "column": src_col[1], + "ref_table": data["table_names_original"][ref_col[0]], + "ref_column": ref_col[1] + }) + + for db in databases: + os.makedirs("databases/" + db) + dsn = "databases/" + db + "/" + db + ".sqlite" + dbconn = sqlite3.connect(dsn) + dbcur = dbconn.cursor() + + print (dsn) + for table in databases[db]["tables"]: + tablesql = "create table " + table + "(" + cdelim = "" + for col in databases[db]["tables"][table]["columns"]: + tablesql += cdelim + '"'+ col["name"] +'" '+ col["type"] + cdelim = "," + if len(databases[db]["tables"][table]["primarykeys"]): + tablesql += ",primary key ("+ ','.join(databases[db]["tables"][table]["primarykeys"]) +")" + tablesql += ");" + + print (tablesql) + try: + dbcur.execute(tablesql) + except Exception as e: + # Review tables.json spec. + print ("[ERROR]", e) + + for fkeys in databases[db]["foreignkeys"]: + altersql = ("alter table {} add key pk_{} {} references {} ({});".format( + fkeys["table"], fkeys["column"], fkeys["column"], fkeys["ref_table"], fkeys["ref_column"])) + print (altersql) + try: + dbcur.execute(altersql) + except Exception as e: + # Review tables.json spec. + print ("[ERROR]", e) + + dbconn.commit() + dbconn.close() + print () + + f.close() + print ("databases created.\n") + + print ("Try\npython evaluation.py --gold evaluation_examples/gold_example.txt --pred evaluation_examples/pred_example.txt --etype all --table evaluation_examples/examples/tables.json --db databases") + From a3c226c64429882c98f5723dcb8489bcf54465b7 Mon Sep 17 00:00:00 2001 From: prasad83 Date: Thu, 4 May 2023 00:10:14 +0530 Subject: [PATCH 2/2] Updated get_tables for Python 3.x compatability. print "XYZ" -> print ("XYZ") --- preprocess/get_tables.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/preprocess/get_tables.py b/preprocess/get_tables.py index a8b36f0..855785f 100644 --- a/preprocess/get_tables.py +++ b/preprocess/get_tables.py @@ -26,9 +26,9 @@ def convert_fk_index(data): if ref_cid and cid: fk_holder.append([cid, ref_cid]) except: - traceback.print_exc() - print "table_names_original: ", data['table_names_original'] - print "finding tab name: ", tn, ref_tn + traceback.print_exc() + print ("table_names_original: ", data['table_names_original']) + print ("finding tab name: ", tn, ref_tn) sys.exit() return fk_holder @@ -56,7 +56,7 @@ def dump_db_json_schema(db, f): data['table_names'].append(table_name.lower().replace("_", ' ')) fks = conn.execute("PRAGMA foreign_key_list('{}') ".format(table_name)).fetchall() #print("db:{} table:{} fks:{}".format(f,table_name,fks)) - fk_holder.extend([[(table_name, fk[3]), (fk[2], fk[4])] for fk in fks]) + fk_holder.extend([[(table_name, fk[3]), (fk[2], fk[4])] for fk in fks]) cur = conn.execute("PRAGMA table_info('{}') ".format(table_name)) for j, col in enumerate(cur.fetchall()): data['column_names_original'].append((i, col[1])) @@ -86,7 +86,7 @@ def dump_db_json_schema(db, f): if __name__ == '__main__': if len(sys.argv) < 2: - print "Usage: python get_tables.py [dir includes many subdirs containing database.sqlite files] [output file name e.g. output.json] [existing tables.json file to be inherited]" + print ("Usage: python get_tables.py [dir includes many subdirs containing database.sqlite files] [output file name e.g. output.json] [existing tables.json file to be inherited]") sys.exit() input_dir = sys.argv[1] output_file = sys.argv[2] @@ -98,10 +98,10 @@ def dump_db_json_schema(db, f): #for tab in ex_tabs: # tab["foreign_keys"] = convert_fk_index(tab) ex_tabs = {tab["db_id"]: tab for tab in ex_tabs if tab["db_id"] in all_fs} - print "precessed file num: ", len(ex_tabs) + print ("precessed file num: ", len(ex_tabs)) not_fs = [df for df in listdir(input_dir) if not exists(join(input_dir, df, df+'.sqlite'))] for d in not_fs: - print "no sqlite file found in: ", d + print ("no sqlite file found in: ", d) db_files = [(df+'.sqlite', df) for df in listdir(input_dir) if exists(join(input_dir, df, df+'.sqlite'))] tables = [] for f, df in db_files: @@ -109,7 +109,7 @@ def dump_db_json_schema(db, f): #print 'reading old db: ', df # tables.append(ex_tabs[df]) db = join(input_dir, df, f) - print '\nreading new db: ', df + print ('\nreading new db: ', df) table = dump_db_json_schema(db, df) prev_tab_num = len(ex_tabs[df]["table_names"]) prev_col_num = len(ex_tabs[df]["column_names"]) @@ -119,8 +119,8 @@ def dump_db_json_schema(db, f): table["table_names"] = ex_tabs[df]["table_names"] table["column_names"] = ex_tabs[df]["column_names"] else: - print "\n----------------------------------problem db: ", df + print ("\n----------------------------------problem db: ", df) tables.append(table) - print "final db num: ", len(tables) + print ("final db num: ", len(tables)) with open(output_file, 'wt') as out: - json.dump(tables, out, sort_keys=True, indent=2, separators=(',', ': ')) \ No newline at end of file + json.dump(tables, out, sort_keys=True, indent=2, separators=(',', ': '))