Skip to content

Commit

Permalink
Feat: Cost Estimation for Query - Added support for dynamic creation …
Browse files Browse the repository at this point in the history
…of histogram tables and refresh of table_stats table for every query
  • Loading branch information
Lohith K S authored and Lohith K S committed Nov 22, 2023
1 parent 5ad867a commit f9325e3
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 32 deletions.
7 changes: 5 additions & 2 deletions evadb/executor/cost_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from evadb.utils.logging_manager import logger
from evadb.configuration import constants
import json
import ast


class CostEstimator:
Expand Down Expand Up @@ -158,9 +159,11 @@ def getCostFromPredicate(self, plan_predicate):
if str(table_name) in constants.EVADB_STATS:
table_data = constants.EVADB_STATS[str(table_name)]
hist_data = table_data["hist"]
data_list = json.loads(hist_data)
my_list = ast.literal_eval(hist_data)
json_data = json.dumps(my_list)
data_list = json.loads(json_data)
for item in data_list:
level_dict = item.get(column, {})
level_dict = item.get(table_name + "." + column, {})
for value in level_dict:
if self.evaluate(condition_value,condition,value):
self._cost += level_dict[value]
Expand Down
17 changes: 17 additions & 0 deletions evadb/executor/cost_estimator_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from evadb.server.command_handler import execute_query_fetch_all
from evadb.configuration import constants


class CostEstimatorUtils():

def fetch_table_stats(db, query):
query_result = execute_query_fetch_all(
db, query, do_not_print_exceptions=False, do_not_raise_exceptions=True
)
for _, row in query_result.iterrows():
entry = {
'table_name': row['table_stats.table_name'],
'num_rows': row['table_stats.num_rows'],
'hist': row['table_stats.hist']
}
constants.EVADB_STATS[row['table_stats.table_name']] = entry
64 changes: 34 additions & 30 deletions evadb/functions/function_bootstrap_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from evadb.database import EvaDBDatabase
from evadb.server.command_handler import execute_query_fetch_all
from evadb.configuration import constants
from collections import Counter

NDARRAY_DIR = "ndarray"
TUTORIALS_DIR = "tutorials"
Expand Down Expand Up @@ -198,8 +199,6 @@
EvaDB_INSTALLATION_DIR
)

getdata_from_stats_query = "SELECT * FROM table_stats;"

yolo8n_query = """CREATE FUNCTION IF NOT EXISTS Yolo
TYPE ultralytics
MODEL 'yolov8n.pt';
Expand Down Expand Up @@ -241,19 +240,7 @@
EvaDB_INSTALLATION_DIR
)

create_buckets_query = """CREATE TABLE level_counts (
level Integer,
row_count INTEGER
);"""

insert_into_buckets = """INSERT INTO level_counts
SELECT
level,
COUNT(*) AS row_count
FROM mydata3
GROUP BY level;"""

extract_data_buckets = """Select * from level_counts"""
get_all_tables = "show tables"


def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None:
Expand Down Expand Up @@ -298,8 +285,7 @@ def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None:
chatgpt_function_query,
face_detection_function_query,
# Mvit_function_query,
extract_data_buckets,
getdata_from_stats_query,
get_all_tables,
Sift_function_query,
Yolo_function_query,
stablediffusion_function_query,
Expand All @@ -325,20 +311,38 @@ def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None:
# ignore exceptions during the bootstrapping phase due to missing packages
for query in queries:
try:
if query.startswith("SELECT"):
query_result = execute_query_fetch_all(
db, query, do_not_print_exceptions=False, do_not_raise_exceptions=True
)
for _, row in query_result.iterrows():
entry = {
'table_name': row['table_stats.table_name'],
'num_rows': row['table_stats.num_rows'],
'hist': row['table_stats.hist']
}
constants.EVADB_STATS[row['table_stats.table_name']] = entry
if query.startswith("show"):
tables = execute_query_fetch_all(
db, query, do_not_print_exceptions=False, do_not_raise_exceptions=True
)
# delete all entries in table_stats table. This is a temporary fix because update query is not yet
# available in EvaDB, the table_stats table is deleted and new data is inserted everytime
execute_query_fetch_all(db, "drop table table_stats", False, True)
execute_query_fetch_all(db, "create table table_stats(table_name TEXT, num_rows integer, hist TEXT)", False, True)
# get the histograms for each column for every table
row_count = 0
for _,table in tables.iterrows():
if str(table['name']) != "table_stats":
final_result = []
result_dict = {}
table_data_query = f"select * from {table['name']}"
table_data = execute_query_fetch_all(db, table_data_query, False, True)
for column in table_data.columns:
try:
column_dict = dict(Counter(table_data.column_as_numpy_array(column)))
row_count = len(table_data.column_as_numpy_array(column))
result_dict[column] = column_dict
except Exception as e:
print(e)
final_result = [{key: value} for key, value in result_dict.items()]
try:
insert_into_table_stats = f"insert into table_stats(table_name, num_rows, hist) values(\"{table['name']}\",{row_count},\"{final_result}\");"
execute_query_fetch_all(db, insert_into_table_stats, False, True)
except Exception as e:
print(e)
else:
execute_query_fetch_all(
db, query, do_not_print_exceptions=False, do_not_raise_exceptions=True
)
db, query, do_not_print_exceptions=False, do_not_raise_exceptions=True
)
except Exception as e:
pass
3 changes: 3 additions & 0 deletions evadb/interfaces/relational/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from evadb.server.command_handler import execute_statement
from evadb.utils.generic_utils import find_nearest_word, is_ray_enabled_and_installed
from evadb.utils.logging_manager import logger
from evadb.executor.cost_estimator_utils import CostEstimatorUtils


class EvaDBConnection:
Expand Down Expand Up @@ -444,6 +445,8 @@ def query(self, sql_query: str) -> EvaDBQuery:
1 3 4
2 5 6
"""
#refresh the table_stats table to get the latest statistics of the database
CostEstimatorUtils.fetch_table_stats(self._evadb, "select * from table_stats;")
stmt = parse_query(sql_query)
return EvaDBQuery(self._evadb, stmt)

Expand Down

0 comments on commit f9325e3

Please sign in to comment.