From b87af50829b2a5fb0374b703ac1ebc84ba11dc65 Mon Sep 17 00:00:00 2001 From: Jiashen Cao Date: Wed, 6 Sep 2023 00:40:48 -0400 Subject: [PATCH] feat: sync master staging (#1050) Co-authored-by: Joy Arulraj Co-authored-by: Jiashen Cao Co-authored-by: Andy Xu Co-authored-by: Sayan Sinha Co-authored-by: Hersh Dhillon --- .circleci/config.yml | 4 + README.md | 25 ++- docs/_toc.yml | 55 +++++-- docs/conf.py | 4 + docs/source/benchmarks/text_summarization.rst | 89 +++++++++- docs/source/overview/concepts.rst | 71 ++++++++ .../source/overview/concepts/data-sources.rst | 20 +++ docs/source/overview/getting-started.rst | 35 +++- .../getting-started/installation-options.rst | 2 +- docs/source/reference/ai/custom.rst | 152 ++++++++++++++++++ .../source/reference/ai/model-forecasting.rst | 19 ++- .../reference/{udfs => ai}/model-train.rst | 18 +-- docs/source/reference/api.rst | 19 +++ docs/source/reference/evaql.rst | 24 +++ docs/source/reference/evaql/create.rst | 22 +-- docs/source/reference/evaql/drop.rst | 6 +- docs/source/reference/evaql/select.rst | 5 + docs/source/reference/evaql/show.rst | 6 +- docs/source/reference/evaql/udf.rst | 24 +-- docs/source/reference/evaql/use.rst | 8 + docs/source/reference/udfs/openai.rst | 27 ---- docs/source/shared/footer.rst | 11 ++ docs/source/shared/postgresql.rst | 34 ++++ docs/source/usecases/emotion-analysis.rst | 32 ++++ docs/source/usecases/food-review.rst | 147 ----------------- docs/source/usecases/image-classification.rst | 68 ++++++++ docs/source/usecases/image-search.rst | 94 ++++++++++- docs/source/usecases/object-detection.rst | 42 ++++- docs/source/usecases/privategpt.rst | 2 + docs/source/usecases/qa-video.rst | 89 ---------- docs/source/usecases/question-answering.rst | 35 +++- docs/source/usecases/sentiment-analysis.rst | 16 +- docs/source/usecases/similar-image-search.rst | 83 ---------- docs/source/usecases/text-summarization.rst | 24 ++- evadb/binder/statement_binder.py | 80 +++++++++ evadb/catalog/catalog_manager.py | 17 ++ .../models/function_metadata_catalog.py | 8 + evadb/catalog/models/index_catalog.py | 15 ++ evadb/catalog/models/utils.py | 3 + .../services/function_catalog_service.py | 17 ++ .../services/function_io_catalog_service.py | 10 ++ .../function_metadata_catalog_service.py | 10 ++ .../catalog/services/index_catalog_service.py | 6 + evadb/executor/apply_and_merge_executor.py | 12 ++ evadb/executor/create_function_executor.py | 93 +++++++++++ evadb/executor/create_index_executor.py | 26 +++ evadb/executor/create_udf_executor.py | 120 +++++++++++++- evadb/executor/drop_object_executor.py | 3 + evadb/executor/executor_utils.py | 23 +++ evadb/executor/function_scan_executor.py | 12 ++ evadb/executor/show_info_executor.py | 4 + evadb/functions/chatgpt.py | 4 + evadb/functions/forecast.py | 44 +++++ evadb/functions/function_bootstrap_queries.py | 19 +++ evadb/optimizer/operators.py | 30 ++++ evadb/optimizer/optimizer_utils.py | 9 ++ evadb/optimizer/rules/rules.py | 21 +++ evadb/optimizer/statement_to_opr_converter.py | 9 ++ evadb/parser/create_function_statement.py | 4 + evadb/parser/create_index_statement.py | 29 ++++ evadb/parser/evadb.lark | 19 +++ .../parser/lark_visitor/_create_statements.py | 11 ++ evadb/parser/lark_visitor/_drop_statement.py | 3 + evadb/parser/lark_visitor/_functions.py | 13 ++ evadb/parser/lark_visitor/_show_statements.py | 6 + evadb/plan_nodes/create_function_plan.py | 9 ++ evadb/plan_nodes/create_index_plan.py | 21 +++ evadb/plan_nodes/show_info_plan.py | 4 + .../databases/clickhouse/__init__.py | 4 + evadb/utils/generic_utils.py | 11 ++ script/formatting/formatter.py | 19 +++ script/formatting/spelling.txt | 61 +++++++ setup.py | 10 ++ .../long/test_function_executor.py | 20 +++ .../long/test_model_forecasting.py | 23 +++ .../long/test_model_train.py | 4 + test/integration_tests/long/test_reuse.py | 3 + .../short/test_drop_executor.py | 15 ++ .../short/test_select_executor.py | 4 + test/markers.py | 6 + .../binder/test_statement_binder.py | 32 ++++ .../catalog/test_catalog_manager.py | 9 ++ .../executor/test_create_udf_executor.py | 16 ++ .../unit_tests/executor/test_plan_executor.py | 4 + .../test_statement_to_opr_converter.py | 6 + test/unit_tests/parser/test_parser.py | 29 ++++ .../parser/test_parser_statements.py | 3 + test/unit_tests/plan_nodes/test_plan.py | 10 ++ .../test_sqlite_native_storage_engine.py | 4 + test/util.py | 4 + ...1-similarity-search-for-motif-mining.ipynb | 141 ++++++++++++++++ 91 files changed, 2045 insertions(+), 419 deletions(-) create mode 100644 docs/source/overview/concepts/data-sources.rst create mode 100644 docs/source/reference/ai/custom.rst rename docs/source/reference/{udfs => ai}/model-train.rst (63%) delete mode 100644 docs/source/reference/udfs/openai.rst delete mode 100644 docs/source/usecases/food-review.rst create mode 100644 docs/source/usecases/privategpt.rst delete mode 100644 docs/source/usecases/qa-video.rst delete mode 100644 docs/source/usecases/similar-image-search.rst diff --git a/.circleci/config.yml b/.circleci/config.yml index 01984e154b..12228b9492 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -486,7 +486,11 @@ jobs: source test_evadb/bin/activate pip install --upgrade pip pip debug --verbose +<<<<<<< HEAD pip install ".[dev,ludwig,qdrant,forecasting,pinecone,chromadb]" +======= + pip install ".[dev,ludwig,qdrant,forecasting]" +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) source test_evadb/bin/activate bash script/test/test.sh -m "<< parameters.mode >>" diff --git a/README.md b/README.md index a3022a69f0..b20280273e 100644 --- a/README.md +++ b/README.md @@ -183,8 +183,17 @@ EvaDB enables software developers to build AI apps in a few lines of code. Its p
  • 📝 following us on Medium +👋 Hey! If you're excited about our vision of bringing AI inside database systems, show some ❤️ by: + + ## Quick Links +<<<<<<< HEAD <<<<<<< HEAD - [Quick Links](#quick-links) - [Documentation](#documentation) @@ -199,12 +208,20 @@ EvaDB enables software developers to build AI apps in a few lines of code. Its p - [Star History](#star-history) - [License](#license) ======= +======= +- [Quick Links](#quick-links) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) - [Documentation](#documentation) - [Why EvaDB](#why-evadb) - [How does EvaDB work](#how-does-evadb-work) -- [Community and Support](#community-and-support) - [Illustrative Queries](#illustrative-queries) - [Illustrative Apps](#illustrative-apps) +- [More Illustrative Queries](#more-illustrative-queries) +- [Architecture of EvaDB](#architecture-of-evadb) +- [Community and Support](#community-and-support) +- [Contributing](#contributing) +- [Star History](#star-history) +- [License](#license) ## Documentation @@ -398,11 +415,11 @@ SELECT ChatGPT('Is this video summary related to Ukraine russia war', text) * Train an ML model using the Ludwig AI engine to predict a column in a table. ```sql -CREATE UDF IF NOT EXISTS PredictHouseRent FROM +CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM ( SELECT * FROM HomeRentals ) TYPE Ludwig -'predict' 'rental_price' -'time_limit' 120; +PREDICT 'rental_price' +TIME_LIMIT 120; ``` diff --git a/docs/_toc.yml b/docs/_toc.yml index e8f7931262..137b49f4c9 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -41,34 +41,34 @@ parts: title: Connect to Database - file: source/overview/concepts title: Concepts + sections: + - file: source/overview/concepts/data-sources + title: Data Sources #- file: source/overview/faq - caption: Use Cases chapters: - - file: source/usecases/food-review.rst + - file: source/usecases/sentiment-analysis.rst title: Sentiment Analysis + - file: source/usecases/question-answering.rst + title: Question Answering + - file: source/usecases/text-summarization.rst + title: Text Summarization - file: source/usecases/image-classification.rst title: Image Classification - - file: source/usecases/similar-image-search.rst + - file: source/usecases/image-search.rst title: Image Search - - file: source/usecases/qa-video.rst - title: Video Question Answering - - file: source/usecases/08-chatgpt.ipynb - title: ChatGPT-based Video Question Answering - - file: source/usecases/12-query-pdf.ipynb - title: PDF Question Answering - - file: source/usecases/02-object-detection.ipynb + - file: source/usecases/object-detection.rst title: Object Detection - - file: source/usecases/03-emotion-analysis.ipynb - title: Emotions Analysis - - file: source/usecases/07-object-segmentation-huggingface.ipynb - title: Image Segmentation - - file: source/usecases/13-privategpt.ipynb + - file: source/usecases/emotion-analysis.rst + title: Emotion Analysis + - file: source/usecases/privategpt.rst title: PrivateGPT >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) - caption: User Reference chapters: +<<<<<<< HEAD - file: source/reference/evaql title: Query Language sections: @@ -95,8 +95,10 @@ parts: <<<<<<< HEAD ======= +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) - file: source/reference/evaql - title: Eva Query Language + title: EvaQL sections: - file: source/reference/evaql/load - file: source/reference/evaql/select @@ -109,7 +111,13 @@ parts: - file: source/reference/evaql/rename - file: source/reference/evaql/use +<<<<<<< HEAD >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= + - file: source/reference/api + title: Python API + +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) - file: source/reference/databases/index title: Data Sources sections: @@ -123,6 +131,7 @@ parts: ======= >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +<<<<<<< HEAD - file: source/reference/vector_databases/index title: Vector Databases sections: @@ -141,6 +150,13 @@ parts: title: Model Training with Sklearn - file: source/reference/ai/model-train-xgboost title: Model Training with XGBoost +======= + - file: source/reference/ai/index + title: AI Engines + sections: + - file: source/reference/ai/model-train + title: Model Training +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) - file: source/reference/ai/model-forecasting title: Time Series Forecasting - file: source/reference/ai/hf @@ -149,6 +165,7 @@ parts: title: OpenAI - file: source/reference/ai/yolo title: YOLO +<<<<<<< HEAD - file: source/reference/ai/stablediffusion title: Stable Diffusion @@ -157,6 +174,10 @@ parts: - file: source/reference/optimizations title: Optimizations +======= + - file: source/reference/ai/custom + title: Custom Model +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) # - file: source/reference/io # title: IO Descriptors @@ -172,7 +193,11 @@ parts: - file: source/benchmarks/text_summarization.rst title: Text Summarization +<<<<<<< HEAD - caption: Contribution Guide +======= + - caption: Developer Reference +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) chapters: - file: source/dev-guide/contribute title: Contributing to EvaDB diff --git a/docs/conf.py b/docs/conf.py index df272c2d4a..be4f6acc15 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -100,6 +100,7 @@ ======= >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) + # The name of the Pygments (syntax highlighting) style to use. pygments_style = "github-dark" @@ -176,6 +177,7 @@ <<<<<<< HEAD ======= +<<<<<<< HEAD for i in os.listdir("../tutorials"): if i in [ @@ -192,6 +194,8 @@ nb_execution_mode = "off" >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) # -- Initialize Sphinx ---------------------------------------------- def setup(app): warnings.filterwarnings( diff --git a/docs/source/benchmarks/text_summarization.rst b/docs/source/benchmarks/text_summarization.rst index 87feb24c74..052afdacf0 100644 --- a/docs/source/benchmarks/text_summarization.rst +++ b/docs/source/benchmarks/text_summarization.rst @@ -1,4 +1,5 @@ <<<<<<< HEAD +<<<<<<< HEAD Text Summarization Benchmark ============================ @@ -12,18 +13,32 @@ Prepare dataset --------------- ======= Text summarization benchmark +======= +Text Summarization Benchmark +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ============================ -In this benchmark, we compare the performance of text summarization between EvaDB and MindsDB on `CNN-DailyMail News `_. +<<<<<<< HEAD 1. Prepare dataset ------------------ >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +In this benchmark, we compare the runtime performance of EvaDB and MindsDB on +a text summarization application operating on a news dataset. In particular, +we focus on the `CNN-DailyMail News `_ dataset. + +All the relevant files are located in the `text summarization benchmark folder on Github `_. + +Prepare dataset +--------------- +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) .. code-block:: bash cd benchmark/text_summarization bash download_dataset.sh +<<<<<<< HEAD <<<<<<< HEAD Use EvaDB for Text Summarization -------------------------------- @@ -31,6 +46,10 @@ Use EvaDB for Text Summarization 2. Using EvaDB to summarize the CNN DailyMail News -------------------------------------------------- >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +Use EvaDB for Text Summarization +-------------------------------- +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) .. note:: @@ -43,6 +62,7 @@ Use EvaDB for Text Summarization python text_summarization_with_evadb.py +<<<<<<< HEAD <<<<<<< HEAD Loading Data Into EvaDB ~~~~~~~~~~~~~~~~~~~~~~~ @@ -98,6 +118,55 @@ Setup SQLite Database Prepare sqlite database for MindsDB ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +Loading Data Into EvaDB +~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sql + + CREATE TABLE IF NOT EXISTS cnn_news_test( + id TEXT(128), + article TEXT(4096), + highlights TEXT(1024) + ); + +Creating Text Summarization Function in EvaDB +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sql + + CREATE UDF IF NOT EXISTS TextSummarizer + TYPE HuggingFace + TASK 'summarization' + MODEL 'sshleifer/distilbart-cnn-12-6' + MIN_LENGTH 5 + MAX_LENGTH 100; + + +Tuning EvaDB for Maximum GPU Utilization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + cursor._evadb.config.update_value("executor", "batch_mem_size", 300000) + cursor._evadb.config.update_value("executor", "gpu_ids", [0,1]) + cursor._evadb.config.update_value("experimental", "ray", True) + + +Text Summarization Query in EvaDB +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sql + + CREATE TABLE IF NOT EXISTS cnn_news_summary AS + SELECT TextSummarizer(article) FROM cnn_news_test; + +Use MindsDB for Text Summarization +----------------------------------- + +Setup SQLite Database +~~~~~~~~~~~~~~~~~~~~~~ +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) .. code-block:: bash @@ -110,6 +179,7 @@ Prepare sqlite database for MindsDB Install MindsDB ~~~~~~~~~~~~~~~ +<<<<<<< HEAD <<<<<<< HEAD Follow the `MindsDB installation guide `_ to install it via ``pip``. @@ -123,6 +193,13 @@ Follow the `Setup for Source Code via pip >>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +Follow the `MindsDB nstallation guide `_ to install it via ``pip``. + +.. note:: + + You will need to manually run ``pip install evaluate`` for the ``HuggingFace`` model to work in MindsDB. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) After installation, use the ``MySQL`` client for connecting to ``MindsDB``. Update the port number if needed. @@ -130,6 +207,7 @@ After installation, use the ``MySQL`` client for connecting to ``MindsDB``. Upda mysql -h 127.0.0.1 --port 47335 -u mindsdb -p +<<<<<<< HEAD <<<<<<< HEAD Benchmark MindsDB ~~~~~~~~~~~~~~~~~ @@ -137,6 +215,10 @@ Benchmark MindsDB Run Experiment ~~~~~~~~~~~~~~ >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +Benchmark MindsDB +~~~~~~~~~~~~~~~~~ +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Connect ``MindsDB`` to the ``sqlite`` database we created before: @@ -175,6 +257,7 @@ Use the ``text summarization`` model to summarize the CNN news dataset: ); +<<<<<<< HEAD <<<<<<< HEAD Benchmarking Results -------------------- @@ -183,6 +266,10 @@ Benchmarking Results --------------------- Below are numbers from a server with 56 Intel(R) Xeon(R) CPU E5-2690 v4 @ 2.60GHz and two Quadro P6000 GPU. >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +Benchmarking Results +-------------------- +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Here are the key runtime metrics for the ``Text Summarization`` benchmark. diff --git a/docs/source/overview/concepts.rst b/docs/source/overview/concepts.rst index 0868ab2825..a783fa21f9 100644 --- a/docs/source/overview/concepts.rst +++ b/docs/source/overview/concepts.rst @@ -2,12 +2,31 @@ Concepts ========= +<<<<<<< HEAD EvaDB is designed around three key concepts: +======= +Here is a list of key concepts in EvaDB. If you have any questions, ask the community on `Slack `__. + +EvaQL: AI-Centric Query Language +-------------------------------- + +EvaDB supports a SQL-like query language, called ``EvaQL``, designed to assist software developers in bringing AI into their applications. + +Here is set of illustrative EvaQL queries for a ChatGPT-based video question answering app. This EvaDB app connects to collection of news videos stored in a folder and runs an AI query for extracting audio transcripts from the videos using a Hugging Face model, followed by another AI query for question answering using ChatGPT. + +.. code-block::sql + + --- Load a collection of news videos into the 'news_videos' table + --- This command returns a Pandas Dataframe with the query's output + --- In this case, the output indicates the number of loaded videos + LOAD VIDEO 'news_videos/*.mp4' INTO VIDEOS; +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) (1) AI Queries (2) AI Functions (3) AI-Centric Query Optimization +<<<<<<< HEAD .. note:: Have a question or want to give feedback? Join us on `Slack `__! @@ -73,6 +92,51 @@ To register an user-defined function, we use the :ref:`CREATE FUNCTION`__ tailored for AI queries. Query optimization has powered traditional SQL database systems for several decades. It is the bridge that connects the declarative query language to efficient query execution on hardware. + +EvaDB accelerates AI queries using a collection of optimizations inspired by SQL database systems including cost-based function predicate reordering, function caching, sampling, etc. + +AI Functions +------------ + +``Functions`` are typically thin wrappers around AI models and are extensively used in queries. Here is an `illustrative AI function for classifying MNIST images `_. + +To register an user-defined function, use the ``CREATE FUNCTION`` statement: + +.. code-block:: sql + + --- Create an MNIST image classifier function + --- The function's implementation code is in 'mnist_image_classifier.py' +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) CREATE FUNCTION MnistImageClassifier IMPL 'mnist_image_classifier.py' @@ -80,7 +144,11 @@ After registering ``MnistImageClassifier`` function, you can call the function i .. code-block:: sql +<<<<<<< HEAD --- Get the output of 'MnistImageClassifier' on the 30th video frame (id=30) +======= + --- Get the output of 'MnistImageClassifier' on frame id 30 +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) --- This query returns the results of the image classification function --- In this case, it is the digit in the 30th frame in the video SELECT data, id, MnistImageClassifier(data).label @@ -94,6 +162,7 @@ After registering ``MnistImageClassifier`` function, you can call the function i FROM MnistVideo WHERE MnistImageClassifier(data).label = '6' LIMIT 5; +<<<<<<< HEAD AI-Centric Query Optimization ----------------------------- @@ -110,3 +179,5 @@ Query optimization has powered SQL database systems for several decades. It is t query2 = query2.select("data, MnistImageClassifier(data).label") response = query2.df() >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/docs/source/overview/concepts/data-sources.rst b/docs/source/overview/concepts/data-sources.rst new file mode 100644 index 0000000000..689e84ac53 --- /dev/null +++ b/docs/source/overview/concepts/data-sources.rst @@ -0,0 +1,20 @@ +.. _data sources: + +Data Sources +============ + +EvaDB simplifies AI app development for two types of data: (1) structured data, and (2) unstructured data. + +Structured Data +--------------- + +Structured data refers to information that is highly organized and follows a predefined format. This format usually involves tables with rows and columns, where each column represents a specific attribute or field, and each row represents a unique record or instance. Structured data is typically found in SQL databases. + +Examples: Customer information in a CRM system, sales transactions in a financial database. + +Unstructured Data +----------------- + +Unstructured data lacks a specific structure or organized format. It doesn't fit neatly into tables with rows and columns. Instead, unstructured data can take the form of text, images, audio, video, social media posts, emails, and more. Unstructured data is diverse and can vary greatly in content, making it challenging to process and analyze without AI models. + +Examples: Social media posts, email content, images on the web. \ No newline at end of file diff --git a/docs/source/overview/getting-started.rst b/docs/source/overview/getting-started.rst index 2747fd717b..3d7bcd0766 100644 --- a/docs/source/overview/getting-started.rst +++ b/docs/source/overview/getting-started.rst @@ -1,8 +1,12 @@ <<<<<<< HEAD +<<<<<<< HEAD .. _getting-started: ======= .. _Getting Started: >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +.. _getting-started: +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Getting Started ================= @@ -120,7 +124,7 @@ The program runs a SQL query for listing all the built-in functions in EvaDB. It cursor = evadb.connect().cursor() # List all the built-in functions in EvaDB - print(cursor.query("SHOW UDFS;").df()) + print(cursor.query("SHOW FUNCTIONS;").df()) Now, run the Python program: @@ -133,9 +137,9 @@ You should see a list of built-in functions including but not limited to the fol .. code-block:: bash name inputs ... impl metadata - 0 ArrayCount [Input_Array NDARRAY ANYTYPE (), Search_Key ANY] ... /home/jarulraj3/evadb/evadb/udfs/ndarray/array... [] - 1 Crop [Frame_Array NDARRAY UINT8 (3, None, None), bb... ... /home/jarulraj3/evadb/evadb/udfs/ndarray/crop.py [] - 2 ChatGPT [query NDARRAY STR (1,), content NDARRAY STR (... ... /home/jarulraj3/evadb/evadb/udfs/chatgpt.py [] + 0 ArrayCount [Input_Array NDARRAY ANYTYPE (), Search_Key ANY] ... /home/jarulraj3/evadb/evadb/functions/ndarray/array... [] + 1 Crop [Frame_Array NDARRAY UINT8 (3, None, None), bb... ... /home/jarulraj3/evadb/evadb/functions/ndarray/crop.py [] + 2 ChatGPT [query NDARRAY STR (1,), content NDARRAY STR (... ... /home/jarulraj3/evadb/evadb/functions/chatgpt.py [] [3 rows x 6 columns] @@ -146,14 +150,27 @@ You should see a list of built-in functions including but not limited to the fol EvaDB supports additional installation options for extending its functionality. Go over the :doc:`Installation Options ` for all the available options. -Illustrative AI App -------------------- +Illustrative AI Query +--------------------- +<<<<<<< HEAD Here is a simple, illustrative `MNIST image classification `_ AI app in EvaDB. >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +Here is an illustrative `MNIST image classification `_ AI query in EvaDB. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) -.. code-block:: python +.. code-block:: sql + + --- This AI query retrieves images in the loaded MNIST video with label 4 + --- We constrain the query to only search through the first 100 frames + --- We limit the query to only return the first five frames with label 4 + SELECT data, id, MnistImageClassifier(data) + FROM MnistVideo + WHERE MnistImageClassifier(data) = '4' AND id < 100 + LIMIT 5; +<<<<<<< HEAD # Import the EvaDB package import evadb @@ -234,3 +251,7 @@ Try out EvaDB by experimenting with the introductory `MNIST notebook on Colab ` to learn more about the functions used in this app. >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +The complete `MNIST notebook is available on Colab `_. +Try out EvaDB by experimenting with this introductory notebook. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/docs/source/overview/getting-started/installation-options.rst b/docs/source/overview/getting-started/installation-options.rst index 3fda7f6be1..b350ddba4d 100644 --- a/docs/source/overview/getting-started/installation-options.rst +++ b/docs/source/overview/getting-started/installation-options.rst @@ -68,7 +68,7 @@ EvaDB provides the following additional installation options for extending its f * ``pip install evadb[qdrant]`` for installing the Qdrant vector database system. It enables use-cases related to similarity search based on feature vectors. -* ``pip install evadb[ludwig]`` for installing the Ludwig model training framework. It enables use-cases related to training and fine-tunining AI models. +* ``pip install evadb[ludwig]`` for installing the Ludwig model training framework. It enables use-cases related to training and fine-tuning AI models. * ``pip install evadb[ray]`` for installing the Ray compute engine. It enables EvaDB to do more efficient query execution on CPUs and GPUs. diff --git a/docs/source/reference/ai/custom.rst b/docs/source/reference/ai/custom.rst new file mode 100644 index 0000000000..d57a0fe059 --- /dev/null +++ b/docs/source/reference/ai/custom.rst @@ -0,0 +1,152 @@ +.. _udf: + + +Functions +====================== + +This section provides an overview of how you can create and use a custom function in your queries. For example, you could write an function that wraps around your custom PyTorch model. + +Part 1: Writing a custom Function +--------------------------------- + +During each step, use `this function implementation `_ as a reference. + +1. Create a new file under `functions/` folder and give it a descriptive name. eg: `yolo_object_detection.py`. + + .. note:: + + Functions packaged along with EvaDB are located inside the `functions `_ folder. + +2. Create a Python class that inherits from `PytorchClassifierAbstractFunction`. + +* The `PytorchClassifierAbstractFunction` is a parent class that defines and implements standard methods for model inference. + +* The functions setup and forward should be implemented in your child class. These functions can be implemented with the help of Decorators. + +Setup +----- + +An abstract method that must be implemented in your child class. The setup function can be used to initialize the parameters for executing the function. The parameters that need to be set are + +- cacheable: bool + + - True: Cache should be enabled. Cache will be automatically invalidated when the function changes. + - False: cache should not be enabled. +- function_type: str + + - object_detection: functions for object detection. +- batchable: bool + + - True: Batching should be enabled + - False: Batching is disabled. + +The custom setup operations for the function can be written inside the function in the child class. If there is no need for any custom logic, then you can just simply write "pass" in the function definition. + +Example of a Setup Function + +.. code-block:: python + + @setup(cacheable=True, function_type="object_detection", batchable=True) + def setup(self, threshold=0.85): + #custom setup function that is specific for the function + self.threshold = threshold + self.model = torch.hub.load("ultralytics/yolov5", "yolov5s", verbose=False) + +Forward +-------- + +An abstract method that must be implemented in your function. The forward function receives the frames and runs the deep learning model on the data. The logic for transforming the frames and running the models must be provided by you. +The arguments that need to be passed are + +- input_signatures: List[IOColumnArgument] + + Data types of the inputs to the forward function must be specified. If no constraints are given, then no validation is done for the inputs. + +- output_signatures: List[IOColumnArgument] + + Data types of the outputs to the forward function must be specified. If no constraints are given, then no validation is done for the inputs. + +A sample forward function is given below + +.. code-block:: python + + @forward( + input_signatures=[ + PyTorchTensor( + name="input_col", + is_nullable=False, + type=NdArrayType.FLOAT32, + dimensions=(1, 3, 540, 960), + ) + ], + output_signatures=[ + PandasDataframe( + columns=["labels", "bboxes", "scores"], + column_types=[ + NdArrayType.STR, + NdArrayType.FLOAT32, + NdArrayType.FLOAT32, + ], + column_shapes=[(None,), (None,), (None,)], + ) + ], + ) + def forward(self, frames: Tensor) -> pd.DataFrame: + #the custom logic for the function + outcome = [] + + frames = torch.permute(frames, (0, 2, 3, 1)) + predictions = self.model([its.cpu().detach().numpy() * 255 for its in frames]) + + for i in range(frames.shape[0]): + single_result = predictions.pandas().xyxy[i] + pred_class = single_result["name"].tolist() + pred_score = single_result["confidence"].tolist() + pred_boxes = single_result[["xmin", "ymin", "xmax", "ymax"]].apply( + lambda x: list(x), axis=1 + ) + + outcome.append( + {"labels": pred_class, "bboxes": pred_boxes, "scores": pred_score} + ) + + return pd.DataFrame(outcome, columns=["labels", "bboxes", "scores"]) + +---------- + +Part 2: Registering and using the function in EvaDB Queries +----------------------------------------------------------- + +Now that you have implemented your function, we need to register it as a function in EvaDB. You can then use the function in any query. + +1. Register the function with a query that follows this template: + + `CREATE FUNCTION [ IF NOT EXISTS ] + IMPL ;` + + where, + + * - specifies the unique identifier for the function. + * - specifies the path to the implementation class for the function + + Here, is an example query that registers a function that wraps around the 'YoloObjectDetection' model that performs Object Detection. + + .. code-block:: sql + + CREATE FUNCTION YoloDecorators + IMPL 'evadb/functions/decorators/yolo_object_detection_decorators.py'; + + + A status of 0 in the response denotes the successful registration of this function. + +2. Now you can execute your function on any video: + + .. code-block:: sql + + SELECT YoloDecorators(data) FROM MyVideo WHERE id < 5; + +3. You can drop the function when you no longer need it. + + .. code-block:: sql + + DROP FUNCTION IF EXISTS YoloDecorators; diff --git a/docs/source/reference/ai/model-forecasting.rst b/docs/source/reference/ai/model-forecasting.rst index 610461223e..d7754131f0 100644 --- a/docs/source/reference/ai/model-forecasting.rst +++ b/docs/source/reference/ai/model-forecasting.rst @@ -1,5 +1,8 @@ +<<<<<<< HEAD .. _forecast: +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Time Series Forecasting ======================== @@ -21,11 +24,19 @@ First, we create a table to insert required data. LOAD CSV 'data/forecasting/air-passengers.csv' INTO AirData; +<<<<<<< HEAD Next, we create a function of `TYPE Forecasting`. We must enter the column name on which we wish to forecast using `PREDICT`. .. code-block:: sql CREATE FUNCTION IF NOT EXISTS Forecast FROM +======= +Next, we create a UDF of `TYPE Forecasting`. We must enter the column name on which we wish to forecast using `predict`. Other options include `id` and `time` (they represent the unique id of the items and the time data if available). + +.. code-block:: sql + + CREATE UDF IF NOT EXISTS Forecast FROM +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) (SELECT y FROM AirData) TYPE Forecasting PREDICT 'y'; @@ -34,6 +45,7 @@ This trains a forecasting model. The model can be called by providing the horizo .. code-block:: sql +<<<<<<< HEAD SELECT Forecast(12); Here, the horizon is `12`, which represents the forecast 12 steps into the future. @@ -90,4 +102,9 @@ Below is an example query with `neuralforecast` with `trend` column as exogenous PREDICT 'y' LIBRARY 'neuralforecast' AUTO 'f' - FREQUENCY 'M'; \ No newline at end of file + FREQUENCY 'M'; +======= + SELECT Forecast(12) FROM AirData; + +Here, the horizon is `12`. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/docs/source/reference/udfs/model-train.rst b/docs/source/reference/ai/model-train.rst similarity index 63% rename from docs/source/reference/udfs/model-train.rst rename to docs/source/reference/ai/model-train.rst index de6e84eeee..c4b10bcbed 100644 --- a/docs/source/reference/udfs/model-train.rst +++ b/docs/source/reference/ai/model-train.rst @@ -9,35 +9,35 @@ Training and Finetuning .. code-block:: sql - CREATE UDF IF NOT EXISTS PredictHouseRent FROM + CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM ( SELECT sqft, location, rental_price FROM HomeRentals ) TYPE Ludwig - 'predict' 'rental_price' - 'time_limit' 120; + PREDICT 'rental_price' + TIME_LIMIT 120; -In the above query, you are creating a new customized UDF by automatically training a model from the `HomeRentals` table. The `rental_price` column will be the target column for predication, while `sqft` and `location` are the inputs. +In the above query, you are creating a new customized function by automatically training a model from the `HomeRentals` table. The `rental_price` column will be the target column for predication, while `sqft` and `location` are the inputs. You can also simply give all other columns in `HomeRentals` as inputs and let the underlying automl framework to figure it out. Below is an example query: .. code-block:: sql - CREATE UDF IF NOT EXISTS PredictHouseRent FROM + CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM ( SELECT * FROM HomeRentals ) TYPE Ludwig - 'predict' 'rental_price' - 'time_limit' 120; + PREDICT 'rental_price' + TIME_LIMIT 120; .. note:: Check :ref:`create-udf-train` for available configurations for training models. -2. After training completes, you can use the `PredictHouseRent` like all other UDFs in EvaDB +2. After training completes, you can use the `PredictHouseRent` like all other functions in EvaDB .. code-block:: sql CREATE PredictHouseRent(sqft, location) FROM HomeRentals; -You can also simply give all columns in `HomeRentals` as inputs for inference. The customized UDF with the underlying model can figure out the proper inference columns via the training columns. +You can also simply give all columns in `HomeRentals` as inputs for inference. The customized function with the underlying model can figure out the proper inference columns via the training columns. .. code-block:: sql diff --git a/docs/source/reference/api.rst b/docs/source/reference/api.rst index caf9e5b18f..4bf99029f7 100644 --- a/docs/source/reference/api.rst +++ b/docs/source/reference/api.rst @@ -1,10 +1,14 @@ .. _python-api: +<<<<<<< HEAD <<<<<<< HEAD Python API ======= Basic API >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +Python API +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ========== To begin a querying session in EvaDB, obtain a connection with a cursor using ``connect`` and ``cursor`` functions. After getting the cursor, you can run queries with the ``query`` function in this manner: @@ -18,7 +22,11 @@ To begin a querying session in EvaDB, obtain a connection with a cursor using `` cursor = evadb.connect().cursor() # List all the built-in functions in EvaDB +<<<<<<< HEAD print(cursor.query("SHOW FUNCTIONS;").df()) +======= + print(cursor.query("SHOW UDFS;").df()) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) .. autosummary:: @@ -27,6 +35,7 @@ To begin a querying session in EvaDB, obtain a connection with a cursor using `` ~evadb.connect ~evadb.EvaDBConnection.cursor ~evadb.EvaDBCursor.query +<<<<<<< HEAD ~evadb.EvaDBQuery.df .. warning:: @@ -81,3 +90,13 @@ EvaDBQuery Interface ~evadb.EvaDBQuery.sql_query ~evadb.EvaDBQuery.execute >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= + ~evadb.EvaDBCursor.df + +.. warning:: + + It is important to call ``df`` to run the actual query and get the output dataframe. + + ``cursor.query("...")`` only construct the query and not run the query. ``cursor.query("...").df()`` will both construct and run the query. + +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/docs/source/reference/evaql.rst b/docs/source/reference/evaql.rst index 1500953e1b..e42b8b28bf 100644 --- a/docs/source/reference/evaql.rst +++ b/docs/source/reference/evaql.rst @@ -6,11 +6,35 @@ EvaDB Query Language Reference =============================== >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +<<<<<<< HEAD EvaDB Query Language (EvaQL) is tailored for AI apps and is derived from SQL. AI models are simply function calls in a EvaQL query. This page lists all the EvaDB statements that you can leverage in your AI applications and notebooks. Get started by copying these SQL queries into a `.py` file or a Jupyter notebook. +======= +EvaDB Query Language (EvaDB) is derived from SQL. It is tailored for AI-driven analytics. EvaDB allows users to invoke deep learning models in the form +of functions. + +Here is an example where we first define a function wrapping around the FastRCNN object detection model. We then issue a query with this function to detect objects. + +.. code:: sql + + --- Create an user-defined function wrapping around FastRCNN ObjectDetector + CREATE FUNCTION IF NOT EXISTS FastRCNNObjectDetector + INPUT (frame NDARRAY UINT8(3, ANYDIM, ANYDIM)) + OUTPUT (labels NDARRAY STR(ANYDIM), bboxes NDARRAY FLOAT32(ANYDIM, 4), + scores NDARRAY FLOAT32(ANYDIM)) + TYPE Classification + IMPL 'evadb/functions/fastrcnn_object_detector.py'; + + --- Use the function to retrieve frames that contain more than 3 cars + SELECT id FROM MyVideo + WHERE ArrayCount(FastRCNNObjectDetector(data).label, 'car') > 3 + ORDER BY id; + +This page presents a list of all the EvaDB statements that you can leverage in your Jupyter Notebooks. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) .. tableofcontents:: diff --git a/docs/source/reference/evaql/create.rst b/docs/source/reference/evaql/create.rst index 8e1f39c9bd..9023e6613f 100644 --- a/docs/source/reference/evaql/create.rst +++ b/docs/source/reference/evaql/create.rst @@ -49,35 +49,35 @@ To create a table, specify the schema of the table. object_id INTEGER ); -CREATE UDF ----------- +CREATE FUNCTION +--------------- -To register an user-defined function, specify the implementation details of the UDF. +To register an user-defined function, specify the implementation details of the function. .. code-block:: sql - CREATE UDF IF NOT EXISTS FastRCNNObjectDetector + CREATE FUNCTION IF NOT EXISTS FastRCNNObjectDetector INPUT (frame NDARRAY UINT8(3, ANYDIM, ANYDIM)) OUTPUT (labels NDARRAY STR(ANYDIM), bboxes NDARRAY FLOAT32(ANYDIM, 4), scores NDARRAY FLOAT32(ANYDIM)) TYPE Classification - IMPL 'evadb/udfs/fastrcnn_object_detector.py'; + IMPL 'evadb/functions/fastrcnn_object_detector.py'; .. _create-udf-train: -CREATE UDF via Training ------------------------ +CREATE FUNCTION via Training +---------------------------- To register an user-defined function by training a predication model. .. code-block:: sql - CREATE UDF IF NOT EXISTS PredictHouseRent FROM + CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM (SELECT * FROM HomeRentals) TYPE Ludwig - 'predict' 'rental_price' - 'time_list' 120; - 'tune_for_memory' False; + PREDICT 'rental_price' + TIME_LIST 120; + TUNE_FOR_MEMORY False; CREATE MATERIALIZED VIEW ------------------------ diff --git a/docs/source/reference/evaql/drop.rst b/docs/source/reference/evaql/drop.rst index b9dfeb56ec..67a7234891 100644 --- a/docs/source/reference/evaql/drop.rst +++ b/docs/source/reference/evaql/drop.rst @@ -9,9 +9,9 @@ DROP TABLE DROP TABLE DETRACVideo; -DROP UDF --------- +DROP FUNCTION +------------- .. code:: mysql - DROP UDF FastRCNNObjectDetector; + DROP FUNCTION FastRCNNObjectDetector; diff --git a/docs/source/reference/evaql/select.rst b/docs/source/reference/evaql/select.rst index 1d30051686..d1fd6fc739 100644 --- a/docs/source/reference/evaql/select.rst +++ b/docs/source/reference/evaql/select.rst @@ -37,6 +37,7 @@ Search for frames containing greater than 3 cars WHERE ArrayCount(FastRCNNObjectDetector(data).label, 'car') > 3 ORDER BY id; +<<<<<<< HEAD <<<<<<< HEAD SELECT TUPLES WITH MULTIPLE PREDICATES -------------------------------------- @@ -44,6 +45,10 @@ SELECT TUPLES WITH MULTIPLE PREDICATES SELECT WITH MULTIPLE UDFS ------------------------- >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +SELECT WITH MULTIPLE FUNCTIONS +------------------------------ +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Compose multiple user-defined functions in a single query to construct semantically complex queries. diff --git a/docs/source/reference/evaql/show.rst b/docs/source/reference/evaql/show.rst index 77397cb96a..41a46e17d0 100644 --- a/docs/source/reference/evaql/show.rst +++ b/docs/source/reference/evaql/show.rst @@ -1,11 +1,11 @@ SHOW ==== -SHOW UDFS ---------- +SHOW FUNCTIONS +-------------- List the registered user-defined functions .. code:: sql - SHOW UDFS; + SHOW FUNCTIONS; diff --git a/docs/source/reference/evaql/udf.rst b/docs/source/reference/evaql/udf.rst index 3081821df6..883c0e7e8b 100644 --- a/docs/source/reference/evaql/udf.rst +++ b/docs/source/reference/evaql/udf.rst @@ -1,32 +1,32 @@ :orphan: -UDF -=== +Functions +========= -SHOW UDFS ---------- +SHOW FUNCTIONS +-------------- Here is a list of built-in user-defined functions in EvaDB. .. code:: mysql - SHOW UDFS; + SHOW FUNCTIONS; id name impl - 0 FastRCNNObjectDetector evadb/udfs/fastrcnn_object_detector.p - 1 MVITActionRecognition evadb/udfs/mvit_action_recognition.py - 2 ArrayCount evadb/udfs/ndarray/array_count.py - 3 Crop evadb/evadb/udfs/ndarray/crop.py + 0 FastRCNNObjectDetector evadb/functions/fastrcnn_object_detector.p + 1 MVITActionRecognition evadb/functions/mvit_action_recognition.py + 2 ArrayCount evadb/functions/ndarray/array_count.py + 3 Crop evadb/evadb/functions/ndarray/crop.py FastRCNNObjectDetector is a model for detecting objects. MVITActionRecognition is a model for recognizing actions. ArrayCount and Crop are utility functions for counting the number of objects in an array and cropping a bounding box from an image, respectively. -SELECT WITH MULTIPLE UDFS -------------------------- +SELECT WITH MULTIPLE FUNCTIONS +------------------------------ -Here is a query that illustrates how to use multiple UDFs in a single query. +Here is a query that illustrates how to use multiple functions in a single query. .. code:: sql diff --git a/docs/source/reference/evaql/use.rst b/docs/source/reference/evaql/use.rst index 2b3d0389d1..ec98c6aef7 100644 --- a/docs/source/reference/evaql/use.rst +++ b/docs/source/reference/evaql/use.rst @@ -11,8 +11,12 @@ The USE statement allows us to run arbitrary native queries in the connected dat USE === +<<<<<<< HEAD The USE statement allows us to run arbitary native queries in the connected database. >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +The USE statement allows us to run arbitrary native queries in the connected database. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) .. code:: text @@ -20,10 +24,14 @@ The USE statement allows us to run arbitary native queries in the connected data * [database_connection] is an external database connection instanced by the `CREATE DATABASE statement`. <<<<<<< HEAD +<<<<<<< HEAD * [native_query] is an arbitrary SQL query supported by the [database_connection]. ======= * [native_query] is an arbitary SQL query supprted by the [database_connection]. >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +* [native_query] is an arbitrary SQL query supported by the [database_connection]. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) .. warning:: diff --git a/docs/source/reference/udfs/openai.rst b/docs/source/reference/udfs/openai.rst deleted file mode 100644 index d229eff44a..0000000000 --- a/docs/source/reference/udfs/openai.rst +++ /dev/null @@ -1,27 +0,0 @@ -OpenAI Models -===================== - -This section provides an overview of how you can use OpenAI models in EvaDB. - - -Chat Completion UDFs --------------------- - -To create a chat completion UDF in EvaDB, use the following SQL command: - -.. code-block:: sql - - CREATE UDF IF NOT EXISTS OpenAIChatCompletion - IMPL 'evadb/udfs/openai_chat_completion_udf.py' - 'model' 'gpt-3.5-turbo' - -EvaDB supports the following models for chat completion task: - -- "gpt-4" -- "gpt-4-0314" -- "gpt-4-32k" -- "gpt-4-32k-0314" -- "gpt-3.5-turbo" -- "gpt-3.5-turbo-0301" - -The chat completion UDF can be composed in interesting ways with other UDFs. Please check the `Google Colab `_ for an example of combining chat completion task with caption extraction and video summarization models from Hugging Face and feeding it to chat completion to ask questions about the results. diff --git a/docs/source/shared/footer.rst b/docs/source/shared/footer.rst index df3ac513f3..4d2b1610db 100644 --- a/docs/source/shared/footer.rst +++ b/docs/source/shared/footer.rst @@ -1,6 +1,7 @@ What's Next? ------------ +<<<<<<< HEAD 👋 If you are excited about our vision of bringing AI inside databases, consider: - 📟 joining our Slack: https://evadb.ai/slack @@ -8,3 +9,13 @@ What's Next? - 🐦 following us on Twitter: https://evadb.ai/twitter - 📝 following us on Medium: https://evadb.ai/blog - 🖥️ contributing to EvaDB: https://evadb.ai/github +======= +👋 EvaDB's vision is to bring AI inside your database system and make it easy to build fast AI-powered apps. If you liked this tutorial and are excited about our vision, show some ❤️ by: + +- 🐙 giving a ⭐ for the EvaDB repository on Github: https://github.com/georgia-tech-db/evadb +- 📟 engaging with the EvaDB community on Slack to ask questions and share your ideas and thoughts: https://evadb.ai/community +- 🎉 contributing to EvaDB by developing cool applications/integrations: https://github.com/georgia-tech-db/evadb/issues +- 🐦 following us on Twitter: https://twitter.com/evadb_ai +- 📝 following us on Medium: https://medium.com/evadb-blog + +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/docs/source/shared/postgresql.rst b/docs/source/shared/postgresql.rst index 3af309e67a..6cf5ab6485 100644 --- a/docs/source/shared/postgresql.rst +++ b/docs/source/shared/postgresql.rst @@ -5,6 +5,7 @@ We will assume that you have a ``PostgreSQL`` database server running locally th EvaDB lets you connect to your favorite databases, data warehouses, data lakes, etc., via the ``CREATE DATABASE`` statement. In this query, we connect EvaDB to an existing ``PostgreSQL`` server: +<<<<<<< HEAD .. code-block:: CREATE DATABASE postgres_data @@ -16,3 +17,36 @@ EvaDB lets you connect to your favorite databases, data warehouses, data lakes, "port": "5432", "database": "evadb" } +======= +.. tab-set:: + + .. tab-item:: Python + + .. code-block:: python + + params = { + "user": "eva", + "password": "password", + "host": "localhost", + "port": "5432", + "database": "evadb", + } + query = f"CREATE DATABASE postgres_data + WITH ENGINE = 'postgres', + PARAMETERS = {params};" + cursor.query(query).df() + + .. tab-item:: SQL + + .. code-block:: text + + CREATE DATABASE postgres_data + WITH ENGINE = 'postgres', + PARAMETERS = { + "user": "eva", + "password": "password", + "host": "localhost", + "port": "5432", + "database": "evadb" + } +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/docs/source/usecases/emotion-analysis.rst b/docs/source/usecases/emotion-analysis.rst index 19ac5c5496..995ca92eda 100644 --- a/docs/source/usecases/emotion-analysis.rst +++ b/docs/source/usecases/emotion-analysis.rst @@ -8,6 +8,7 @@ Emotion Analysis + +
    +<<<<<<< HEAD Run on Google Colab @@ -15,6 +16,15 @@ Emotion Analysis Download notebook +======= + Run on Google Colab + + View source on GitHub + + Download notebook +>>>>>>> 2dacff69 (feat: sync master staging (#1050))


    @@ -37,7 +47,11 @@ To create custom ``FaceDetector`` and ``EmotionDetector`` functions, use the ``C .. code-block:: sql +<<<<<<< HEAD CREATE FUNCTION IF NOT EXISTS FaceDetector +======= + CREATE UDF IF NOT EXISTS FaceDetector +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) INPUT (frame NDARRAY UINT8(3, ANYDIM, ANYDIM)) OUTPUT (bboxes NDARRAY FLOAT32(ANYDIM, 4), scores NDARRAY FLOAT32(ANYDIM)) @@ -72,6 +86,7 @@ This query returns the faces detected in the first ten frames of the video: .. code-block:: +<<<<<<< HEAD +----------+---------------------+-------------------------+ | happy.id | facedetector.bboxes | facedetector.scores | +----------+---------------------+-------------------------+ @@ -91,6 +106,14 @@ This query returns the faces detected in the first ten frames of the video: | 9 | [[508 90 785 448] | [0.99992466 0.7014416 ] | | | [235 309 325 412]] | | +----------+---------------------+-------------------------+ +======= + +-----------------------------------------------------------------------------------------------------+ + | objectdetectionvideos.id | yolo.labels | + +--------------------------+-----------------------------------------------------------------+ + | 0 | [car, car, car, car, car, car, person, car, ... | + | 1 | [car, car, car, car, car, car, car, car, car, ... | + +-----------------------------------------------------------------------------------------------------+ +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Chaining Functions in a Single AI Query --------------------------------------- @@ -110,6 +133,7 @@ Now, the ``DataFrame`` only contains the emotions of the detected faces: .. code-block:: +<<<<<<< HEAD +----------+-------------------+------------------------+------------------------+ | happy.id | Face.bbox | emotiondetector.labels | emotiondetector.scores | +----------+-------------------+------------------------+------------------------+ @@ -135,5 +159,13 @@ Now, the ``DataFrame`` only contains the emotions of the detected faces: | 13 | [513 87 789 456] | happy | 0.9997060894966125 | | 14 | [515 88 790 454] | happy | 0.9997135996818542 | +----------+-------------------+------------------------+------------------------+ +======= + +------------------------------+ + | objectdetectionvideos.label | + |------------------------------| + | 6 | + | 6 | + +------------------------------+ +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) .. include:: ../shared/footer.rst diff --git a/docs/source/usecases/food-review.rst b/docs/source/usecases/food-review.rst deleted file mode 100644 index 8f688b0e83..0000000000 --- a/docs/source/usecases/food-review.rst +++ /dev/null @@ -1,147 +0,0 @@ -ChatGPT + Postgres Tutorial -=========================== - -.. raw:: html - - - - - - -
    - Run on Google Colab - - View source on GitHub - - Download notebook -


    - - -In this tutorial, we demonstrate how to use EvaDB + ChatGPT to analyze the tone of food reviews stored in PostgreSQL. Then, based on the analysis, we further use -EvaDB + ChatGPT to address negative reviews by proposing a solution to the customer. - -For this use case, we assume user has a Postgres server running locally. You can also check our notebook above to skip Postgres setup. - -1. Connect to EvaDB ---------------------- - -.. code-block:: python - - import evadb - cursor = evadb.connect().cursor() - -2. Connect to an Existing Postgres Database ---------------------------------------------- - -.. tab-set:: - - .. tab-item:: Python - - .. code-block:: python - - params = { - "user": "eva", - "password": "password", - "host": "localhost", - "port": "5432", - "database": "evadb", - } - query = f"CREATE DATABASE postgres_data WITH ENGINE = 'postgres', PARAMETERS = {params};" - cursor.query(query).df() - - .. tab-item:: SQL - - .. code-block:: text - - CREATE DATABASE postgres_data WITH ENGINE = 'postgres', PARAMETERS = { - "user": "eva", - "password": "password", - "host": "localhost", - "port": "5432", - "database": "evadb" - } - -3. Sentiment Analysis of Food Review using ChatGPT ---------------------------------------------------- - -We then use EvaDB + ChatGPT to analyze whether the review is "positive" or "negative" with customized ChatGPT prompt. For this use case, -we assume reviews have been already loaded into the table inside PostgreSQL. -You can check our `Jupyter Notebook `__ for how to load data. - -.. tab-set:: - - .. tab-item:: Python - - .. code-block:: python - - cursor.query(""" - SELECT ChatGPT( - "Is the review positive or negative. Only reply 'positive' or 'negative'. Here are examples. The food is very bad: negative. The food is very good: postive.", - review) - FROM postgres_data.review_table; - """).df() - - .. tab-item:: SQL - - .. code-block:: sql - - SELECT ChatGPT( - "Is the review positive or negative. Only reply 'positive' or 'negative'. Here are examples. The food is very bad: negative. The food is very good: postive.", - review) - FROM postgres_data.review_table; - -This will return tone analysis results for existing reviews. - -.. code-block:: - - +------------------------------+ - | chatgpt.response | - |------------------------------| - | negative | - | positive | - | negative | - +------------------------------+ - -4. Response to Negative Reviews using ChatGPT ---------------------------------------------- - -.. tab-set:: - - .. tab-item:: Python - - .. code-block:: python - - cursor.query(""" - SELECT ChatGPT( - "Respond the the review with solution to address the review's concern", - review) - FROM postgres_data.review_table - WHERE ChatGPT( - "Is the review positive or negative. Only reply 'positive' or 'negative'. Here are examples. The food is very bad: negative. The food is very good: postive.", - review) = "negative"; - """).df() - - .. tab-item:: SQL - - .. code-block:: sql - - SELECT ChatGPT( - "Respond the the review with solution to address the review's concern", - review) - FROM postgres_data.review_table - WHERE ChatGPT( - "Is the review positive or negative. Only reply 'positive' or 'negative'. Here are examples. The food is very bad: negative. The food is very good: postive.", - review) = "negative"; - -This query will first filter out positive reviews and then apply ChatGPT again to create response to negative reviews. This will give results. - -.. code-block:: - - +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | chatgpt.response | - |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | Dear valued customer, Thank you for bringing this matter to our attention. We apologize for the inconvenience caused by the excessive saltiness of your fried rice. We understand how important it is to have a satisfying dining experience, and we would like to make it right for you ... | - | Dear [Customer's Name], Thank you for bringing this issue to our attention. We apologize for the inconvenience caused by the missing chicken sandwich in your takeout order. We understand how frustrating it can be when an item is missing from your meal. To address this concern, we ... | - +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Check out our `Jupyter Notebook `__ for working example. \ No newline at end of file diff --git a/docs/source/usecases/image-classification.rst b/docs/source/usecases/image-classification.rst index f8f1bc99da..72131d05a4 100644 --- a/docs/source/usecases/image-classification.rst +++ b/docs/source/usecases/image-classification.rst @@ -1,4 +1,5 @@ <<<<<<< HEAD +<<<<<<< HEAD .. _image-classification: ======= .. _image classification: @@ -77,6 +78,65 @@ In the following query, we call the classifier on every image in the video. The SELECT MnistImageClassifier(data).label FROM mnist_video; +======= +.. _image-classification: + +Image Classification +==================== + +.. raw:: html + + + + + + +
    + Run on Google Colab + + View source on GitHub + + Download notebook +


    + + +Introduction +------------ + +In this tutorial, we present how to use ``PyTorch`` models in EvaDB to classify images. In particular, we focus on classifying images from the ``MNIST`` dataset that contains ``digits``. EvaDB makes it easy to do image classification using its built-in support for ``PyTorch`` models. + +In this tutorial, besides classifying images, we will also showcase a query where the model's output is used to retrieve images with the digit ``6``. + +.. include:: ../shared/evadb.rst + +We will assume that the input ``MNIST`` video is loaded into ``EvaDB``. To download the video and load it into ``EvaDB``, see the complete `image classification notebook on Colab `_. + +Create Image Classification Function +------------------------------------ + +To create a custom ``MnistImageClassifier`` function, use the ``CREATE FUNCTION`` statement. The code for the custom classification model is available `here `_. + +We will assume that the file is downloaded and stored as ``mnist_image_classifier.py``. Now, run the following query to register the AI function: + +.. code-block:: sql + + CREATE FUNCTION + IF NOT EXISTS MnistImageClassifier + IMPL 'mnist_image_classifier.py'; + +Image Classification Queries +---------------------------- + +After the function is registered in ``EvaDB``, you can use it subsequent SQL queries in different ways. + +In the following query, we call the classifier on every image in the video. The output of the function is stored in the ``label`` column (i.e., the digit associated with the given frame) of the output ``DataFrame``. + +.. code-block:: sql + + SELECT MnistImageClassifier(data).label + FROM mnist_video; + +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) This query returns the label of all the images: .. code-block:: @@ -92,6 +152,7 @@ This query returns the label of all the images: | ... | | 4 | | 4 | +<<<<<<< HEAD <<<<<<< HEAD +------------------------------+ ======= @@ -123,6 +184,9 @@ Like normal SQL, you can also specify conditions to filter out some frames of th SELECT MnistImageClassifier(data).label FROM mnist_video WHERE id < 2 >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= + +------------------------------+ +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Filtering Based on AI Function @@ -147,6 +211,7 @@ Now, the ``DataFrame`` only contains images of the digit ``6``. | 6 | +------------------------------+ +<<<<<<< HEAD <<<<<<< HEAD .. include:: ../shared/footer.rst @@ -154,3 +219,6 @@ Now, the ``DataFrame`` only contains images of the digit ``6``. ======= Check out our `Jupyter Notebook `_ for working example. >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +.. include:: ../shared/footer.rst +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/docs/source/usecases/image-search.rst b/docs/source/usecases/image-search.rst index 775ad86395..4b2fdd3d01 100644 --- a/docs/source/usecases/image-search.rst +++ b/docs/source/usecases/image-search.rst @@ -8,6 +8,7 @@ Image Search + +
    +<<<<<<< HEAD Run on Google Colab @@ -15,6 +16,15 @@ Image Search Download notebook +======= + Run on Google Colab + + View source on GitHub + + Download notebook +>>>>>>> 2dacff69 (feat: sync master staging (#1050))


    @@ -33,11 +43,33 @@ Create Image Feature Extraction Function To create a custom ``SiftFeatureExtractor`` function, use the ``CREATE FUNCTION`` statement. We will assume that the file is downloaded and stored as ``sift_feature_extractor.py``. Now, run the following query to register this function: +<<<<<<< HEAD .. code-block:: sql CREATE FUNCTION IF NOT EXISTS SiftFeatureExtractor IMPL 'evadb/udfs/sift_feature_extractor.py' +======= +.. tab-set:: + + .. tab-item:: Python + + .. code-block:: python + + cursor.query(""" + CREATE FUNCTION + IF NOT EXISTS SiftFeatureExtractor + IMPL 'evadb/udfs/sift_feature_extractor.py' + """).df() + + .. tab-item:: SQL + + .. code-block:: sql + + CREATE FUNCTION + IF NOT EXISTS SiftFeatureExtractor + IMPL 'evadb/udfs/sift_feature_extractor.py' +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Create Vector Index for Similar Image Search @@ -49,17 +81,40 @@ EvaDB lets you connect to your favorite vector database via the ``CREATE INDEX`` The following EvaQL statement creates a vector index on the ``SiftFeatureExtractor(data)`` column in the ``reddit_dataset`` table: +<<<<<<< HEAD .. code-block:: sql CREATE INDEX reddit_sift_image_index ON reddit_dataset (SiftFeatureExtractor(data)) USING FAISS; +======= +.. tab-set:: + + .. tab-item:: Python + + .. code-block:: python + + cursor.query(""" + CREATE INDEX reddit_sift_image_index + ON reddit_dataset (SiftFeatureExtractor(data)) + USING FAISS; + """).df() + + .. tab-item:: SQL + + .. code-block:: sql + + CREATE INDEX reddit_sift_image_index + ON reddit_dataset (SiftFeatureExtractor(data)) + USING FAISS; +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Similar Image Search Powered By Vector Index -------------------------------------------- EvaQL supports the ``ORDER BY`` and ``LIMIT`` clauses to retrieve the ``top-k`` most similar images for a given image. +<<<<<<< HEAD EvaDB contains a built-in ``Similarity(x, y)`` function that computes the Euclidean distance between ``x`` and ``y``. We will use this function to compare the feature vector of image being search (i.e., the given image) and the feature vectors of all the images in the dataset that is stored in the vector index. EvaDB's query optimizer automatically picks the correct vector index to accelerate a given EvaQL query. It uses the vector index created in the prior step to accelerate the following image search query: @@ -73,6 +128,39 @@ EvaDB's query optimizer automatically picks the correct vector index to accelera SiftFeatureExtractor(data) ) LIMIT 5 +======= +EvaDB contains a built-in ``Similarity(x, y)`` function that computets the Euclidean distance between ``x`` and ``y``. We will use this function to compare the feature vector of image being search (i.e., the given image) and the feature vectors of all the images in the dataset that is stored in the vector index. + +EvaDB's query optimizer automatically picks the correct vector index to accelerate a given EvaQL query. It uses the vector index created in the prior step to accelerate the following image search query: + +.. tab-set:: + + .. tab-item:: Python + + .. code-block:: python + + query = cursor.query(""" + SELECT name FROM reddit_dataset ORDER BY + Similarity( + SiftFeatureExtractor(Open('reddit-images/g1074_d4mxztt.jpg')), + SiftFeatureExtractor(data) + ) + LIMIT 5 + """).df() + + .. tab-item:: SQL + + .. code-block:: sql + + SELECT name FROM reddit_dataset ORDER BY + Similarity( + SiftFeatureExtractor(Open('reddit-images/g1074_d4mxztt.jpg')), + SiftFeatureExtractor(data) + ) + LIMIT 5 + +.. code-block:: python +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) This query returns the top-5 most similar images in a ``DataFrame``: @@ -88,6 +176,10 @@ This query returns the top-5 most similar images in a ``DataFrame``: | reddit-images/g1190_clna2x2.jpg | +---------------------------------+ +<<<<<<< HEAD .. include:: ../shared/footer.rst -.. include:: ../shared/designs/design8.rst \ No newline at end of file +.. include:: ../shared/designs/design8.rst +======= +.. include:: ../shared/footer.rst +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/docs/source/usecases/object-detection.rst b/docs/source/usecases/object-detection.rst index 726a7d64fc..a1b3389786 100644 --- a/docs/source/usecases/object-detection.rst +++ b/docs/source/usecases/object-detection.rst @@ -8,6 +8,7 @@ Object Detection + +
    +<<<<<<< HEAD Run on Google Colab @@ -15,6 +16,15 @@ Object Detection Download notebook +======= + Run on Google Colab + + View source on GitHub + + Download notebook +>>>>>>> 2dacff69 (feat: sync master staging (#1050))


    @@ -37,7 +47,11 @@ To create a custom ``Yolo`` function based on the popular ``YOLO-v8m`` model, us .. code-block:: sql +<<<<<<< HEAD CREATE FUNCTION IF NOT EXISTS Yolo +======= + CREATE UDF IF NOT EXISTS Yolo +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) TYPE ultralytics MODEL 'yolov8m.pt'; @@ -59,6 +73,7 @@ This query returns the label of all the images: .. code-block:: +<<<<<<< HEAD +--------------------------+--------------------------------+--------------------------------+--------------------------------+ | objectdetectionvideos.id | yolo.labels | yolo.bboxes | yolo.scores | +--------------------------+--------------------------------+--------------------------------+--------------------------------+ @@ -68,6 +83,14 @@ This query returns the label of all the images: | 3 | ['car', 'car', 'car', 'car ... | [[839.319580078125, 279.92 ... | [0.91, 0.84, 0.82, 0.81, 0 ... | | 4 | ['car', 'car', 'car', 'car ... | [[843.2254028320312, 280.8 ... | [0.9, 0.85, 0.83, 0.8, 0.7 ... | +--------------------------+--------------------------------+--------------------------------+--------------------------------+ +======= + +-----------------------------------------------------------------------------------------------------+ + | objectdetectionvideos.id | yolo.labels | + +--------------------------+-----------------------------------------------------------------+ + | 0 | [car, car, car, car, car, car, person, car, ... | + | 1 | [car, car, car, car, car, car, car, car, car, ... | + +-----------------------------------------------------------------------------------------------------+ +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Filtering Based on YOLO Function -------------------------------- @@ -76,15 +99,22 @@ In the following query, we use the output of the object detector to retrieve a s .. code-block:: sql +<<<<<<< HEAD SELECT id, Yolo(data).labels FROM ObjectDetectionVideos WHERE ['person', 'car'] <@ Yolo(data).labels LIMIT 5; +======= + SELECT id + FROM ObjectDetectionVideos + WHERE ['pedestrian', 'car'] <@ Yolo(data).label; +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Now, the ``DataFrame`` only contains frames with the desired objects: .. code-block:: +<<<<<<< HEAD +--------------------------+--------------------------------------------------------------+ | objectdetectionvideos.id | yolo.labels | +--------------------------+--------------------------------------------------------------+ @@ -97,4 +127,14 @@ Now, the ``DataFrame`` only contains frames with the desired objects: .. include:: ../shared/footer.rst -.. include:: ../shared/designs/design9.rst \ No newline at end of file +.. include:: ../shared/designs/design9.rst +======= + +------------------------------+ + | objectdetectionvideos.label | + |------------------------------| + | 6 | + | 6 | + +------------------------------+ + +.. include:: ../shared/footer.rst +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/docs/source/usecases/privategpt.rst b/docs/source/usecases/privategpt.rst new file mode 100644 index 0000000000..767cc8895e --- /dev/null +++ b/docs/source/usecases/privategpt.rst @@ -0,0 +1,2 @@ +PrivateGPT +========== diff --git a/docs/source/usecases/qa-video.rst b/docs/source/usecases/qa-video.rst deleted file mode 100644 index 133eefca24..0000000000 --- a/docs/source/usecases/qa-video.rst +++ /dev/null @@ -1,89 +0,0 @@ -Q&A Application on Videos -========================= - -1. Connect to EvaDB -------------------- - -.. code-block:: python - - import evadb - cursor = evadb.connect().cursor() - -2. Register Functions ---------------------- - -Register speech-to-text **whisper** model from `HuggingFace` - -.. code-block:: python - - cursor.query(""" - CREATE UDF SpeechRecognizer - TYPE HuggingFace - 'task' 'automatic-speech-recognition' - 'model' 'openai/whisper-base'; - """).execute() - -.. note:: - - EvaDB allows users to register any model in HuggingFace as a function. - -Register **OpenAI** LLM model - -.. code-block:: python - - cursor.query(""" - CREATE UDF ChatGPT - IMPL 'evadb/udfs/chatgpt.py' - """).execute() - - # Set OpenAI token - import os - os.environ["OPENAI_KEY"] = "sk-..." - -.. note:: - - ChatGPT function is a wrapper around OpenAI API call. You can also switch to other LLM models that can run locally. - -3. Summarize Video in Text --------------------------- - -Create a table with text summary of the video. Text summarization is generated by running speech-to-text ``Whisper`` model from ``HuggingFace``. - -.. code-block:: python - - cursor.query(""" - CREATE TABLE text_summary AS - SELECT SpeechRecognizer(audio) FROM ukraine_video; - """).execute() - -This results a table shown below. - -.. code-block:: - - +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | text_summary.text | - |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | The war in Ukraine has been on for 415 days. Who is winning it? Not Russia. Certainly not Ukraine. It is the US oil companies. US oil companies have reached $200 billion in pure profits. The earnings are still on. They are still milking this war and sharing the spoils. Let us look at how Exxon mobile has been doing. In 2022, the company made $56 billion in profits. Oil companies capitalized on instability and they are profiting from pain. American oil companies are masters of this art. You may remember the war in Iraq. The US went to war in Iraq by selling a lie. The Americans did not find any weapons of mass destruction but they did find lots of oil. And in the year since, American officials have admitted this. And this story is not over. It's repeating itself in Ukraine. They are feeding another war and filling the coffers of US oil companies. | - +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -4. Q&A using ChatGPT ---------------------- - -We can now embed the ChatGPT prompt inside SQL with text summary from the table as its knowledge base. - -.. code-block:: python - - cursor.query(""" - SELECT ChatGPT('Is this video summary related to Ukraine russia war', text) - FROM text_summary; - """).df() - -This query returns a projected ``DataFrame``. - -.. code-block:: - - +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | chatgpt.response | - |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | Based on the provided context, it seems that the video summary is related to the Ukraine-Russia war. It discusses how US oil companies are allegedly profiting from the war in Ukraine, similar to how they allegedly benefited from the war in Iraq. | - +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/docs/source/usecases/question-answering.rst b/docs/source/usecases/question-answering.rst index 7a1235da04..92a1ffd077 100644 --- a/docs/source/usecases/question-answering.rst +++ b/docs/source/usecases/question-answering.rst @@ -8,6 +8,7 @@ Question Answering + +
    +<<<<<<< HEAD Run on Google Colab @@ -15,6 +16,15 @@ Question Answering Download notebook +======= + Run on Google Colab + + View source on GitHub + + Download notebook +>>>>>>> 2dacff69 (feat: sync master staging (#1050))


    @@ -61,7 +71,11 @@ EvaDB has built-in support for ``ChatGPT`` function from ``OpenAI``. You will ne .. note:: +<<<<<<< HEAD EvaDB has built-in support for a wide range of :ref:`OpenAI` models. You can also switch to another large language models that runs locally by defining a :ref:`custom AI function`. +======= + EvaDB has built-in support for a wide range of :ref:`OpenAI` models. You can also switch to another large language models that runs locally by defining a :ref:`Custom function`. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ChatGPT function is a wrapper around OpenAI API call. You can also switch to other LLM models that can run locally. @@ -83,11 +97,19 @@ Here is the query's output ``DataFrame``: .. code-block:: +<<<<<<< HEAD +-------------------------------------------------------------------------------------------------------------------------+ | text_summary.text | +-------------------------------------------------------------------------------------------------------------------------+ | The war in Ukraine has been on for 415 days. Who is winning it? Not Russia. Certainly not Ukraine. It is the US oil ... | +-------------------------------------------------------------------------------------------------------------------------+ +======= + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | text_summary.text | + |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | The war in Ukraine has been on for 415 days. Who is winning it? Not Russia. Certainly not Ukraine. It is the US oil companies. US oil companies have reached $200 billion in pure profits. The earnings are still on. They are still milking this war and sharing the spoils. Let us look at how Exxon mobile has been doing. In 2022, the company made $56 billion in profits. Oil companies capitalized on instability and they are profiting from pain. American oil companies are masters of this art. You may remember the war in Iraq. The US went to war in Iraq by selling a lie. The Americans did not find any weapons of mass destruction but they did find lots of oil. And in the year since, American officials have admitted this. And this story is not over. It's repeating itself in Ukraine. They are feeding another war and filling the coffers of US oil companies. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Question Answering using ChatGPT -------------------------------- @@ -105,14 +127,25 @@ Here is the query's output ``DataFrame``: .. code-block:: +<<<<<<< HEAD +--------------------------------------------------------------------------------------------------------------------------+ | chatgpt.response | +--------------------------------------------------------------------------------------------------------------------------+ | No, the video summary provided does not appear to be related to the Ukraine-Russia war. It seems to be a conversatio ... | +--------------------------------------------------------------------------------------------------------------------------+ +======= + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | chatgpt.response | + |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | Based on the provided context, it seems that the video summary is related to the Ukraine-Russia war. It discusses how US oil companies are allegedly profiting from the war in Ukraine, similar to how they allegedly benefited from the war in Iraq. | + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) .. include:: ../shared/nlp.rst .. include:: ../shared/footer.rst +<<<<<<< HEAD -.. include:: ../shared/designs/design1.rst \ No newline at end of file +.. include:: ../shared/designs/design1.rst +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/docs/source/usecases/sentiment-analysis.rst b/docs/source/usecases/sentiment-analysis.rst index a7dfeb2d98..c37076030e 100644 --- a/docs/source/usecases/sentiment-analysis.rst +++ b/docs/source/usecases/sentiment-analysis.rst @@ -8,6 +8,7 @@ Sentiment Analysis + +
    +<<<<<<< HEAD Run on Google Colab @@ -15,6 +16,15 @@ Sentiment Analysis Download notebook +======= + Run on Google Colab + + View source on GitHub + + Download notebook +>>>>>>> 2dacff69 (feat: sync master staging (#1050))


    @@ -81,8 +91,12 @@ While running this query, EvaDB first retrieves the negative reviews and then ap | Dear [Customer's Name], Thank you for bringing this issue to our attention. We apologize for the inconvenience caused by the missing chicken sandwich in your takeout order. We understand how frustrating it can be when an item is missing from your meal. To address this concern, we ... | +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +<<<<<<< HEAD .. include:: ../shared/nlp.rst .. include:: ../shared/footer.rst -.. include:: ../shared/designs/design3.rst \ No newline at end of file +.. include:: ../shared/designs/design3.rst +======= +Check out our `Jupyter Notebook `__ for working example. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/docs/source/usecases/similar-image-search.rst b/docs/source/usecases/similar-image-search.rst deleted file mode 100644 index 91563cc2cd..0000000000 --- a/docs/source/usecases/similar-image-search.rst +++ /dev/null @@ -1,83 +0,0 @@ -Image Similarity Search Pipeline using EvaDB on Images -====================================================== - -In this use case, we want to search similar images based on an image provided by the user. To implement this use case, we leverage EvaDB's capability of easily expressing feature extraction pipeline. Additionally, we also leverage EvaDB's capability of building a similarity search index and searching the index to -locate similar images through ``FAISS`` library. - -For this use case, we use a reddit image dataset that can be downloaded from `Here `_. -We populate a table in the database that contains all images. - -1. Connect to EvaDB -------------------- - -.. code-block:: python - - import evadb - cursor = evadb.connect().cursor() - -2. Register SIFT as Function ----------------------------- - -.. code-block:: python - - cursor.query(""" - CREATE UDF IF NOT EXISTS SiftFeatureExtractor - IMPL 'evadb/udfs/sift_feature_extractor.py' - """).execute() - -3. Search Similar Images ------------------------- - -To locate images that have similar appearance, we will first build an index based on embeddings of images. -Then, for the given image, EvaDB can find similar images by searching in the index. - -Build Index using ``FAISS`` -*************************** - -The below query creates a new index on the projected column ``SiftFeatureExtractor(data)`` from the ``reddit_dataset`` table. - -.. code-block:: python - - cursor.query(""" - CREATE INDEX reddit_sift_image_index - ON reddit_dataset (SiftFeatureExtractor(data)) - USING FAISS - """).execute() - -Search Index for a Given Image -******************************* - -EvaDB leverages the ``ORDER BY ... LIMIT ...`` SQL syntax to retrieve the top 5 similar images. -In this example, ``Similarity(x, y)`` is a built-in function to calculate distance between ``x`` and ``y``. -In current version, ``x`` is a single tuple and ``y`` is a column that contains multiple tuples. -By default EvaDB does pairwise distance calculation between ``x`` and all tuples from ``y``. -In this case, EvaDB leverages the index that we have already built. - -.. code-block:: python - - query = cursor.query(""" - SELECT name FROM reddit_dataset ORDER BY - Similarity( - SiftFeatureExtractor(Open('reddit-images/g1074_d4mxztt.jpg')), - SiftFeatureExtractor(data) - ) - LIMIT 5 - """) - query.df() - -The ``DataFrame`` contains the top 5 similar images. - -.. code-block:: - - +---------------------------------+ - | reddit_dataset.name | - |---------------------------------| - | reddit-images/g1074_d4mxztt.jpg | - | reddit-images/g348_d7ju7dq.jpg | - | reddit-images/g1209_ct6bf1n.jpg | - | reddit-images/g1190_cln9xzr.jpg | - | reddit-images/g1190_clna2x2.jpg | - +---------------------------------+ - -Check out our `Jupyter Notebook `_ for working example. -We also demonstrate more complicated features of EvaDB for similarity search. diff --git a/docs/source/usecases/text-summarization.rst b/docs/source/usecases/text-summarization.rst index caca6e04a6..08e3dda127 100644 --- a/docs/source/usecases/text-summarization.rst +++ b/docs/source/usecases/text-summarization.rst @@ -8,6 +8,7 @@ Text Summarization + +
    +<<<<<<< HEAD Run on Google Colab @@ -15,6 +16,15 @@ Text Summarization Download notebook +======= + Run on Google Colab + + View source on GitHub + + Download notebook +>>>>>>> 2dacff69 (feat: sync master staging (#1050))


    @@ -75,16 +85,28 @@ Here is the query's output ``DataFrame``: .. code-block:: +<<<<<<< HEAD +--------------------------------------------------------------+--------------------------------------------------------------+ | mypdfs.data | textsummarizer.summary_text | +--------------------------------------------------------------+--------------------------------------------------------------+ | DEFINATION  Specialized connective tissue with ... | Specialized connective tissue with fluid matrix. Erythro ... | | PHYSICAL CHARACTERISTICS ( 1 ) COLOUR -- Red ( 2 ) ... | The temperature is 38° C / 100.4° F. The body weight is ... | +--------------------------------------------------------------+--------------------------------------------------------------+ +======= + +--------------------------------------------------------+--------------------------------------------------------+ + | mypdfs.data | mypdfs.summary_text | + +--------------------------------------------------------+--------------------------------------------------------+ + | DEFINATION  Specialized connective tissue wit... | Specialized connective tissue with fluid matri... | + | PHYSICAL CHARACTERISTICS ( 1 ) COLOUR -- R... | The temperature is 38° C / 100.4° F. The body ... | + +--------------------------------------------------------+--------------------------------------------------------+ +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) .. include:: ../shared/nlp.rst .. include:: ../shared/footer.rst +<<<<<<< HEAD -.. include:: ../shared/designs/design2.rst \ No newline at end of file +.. include:: ../shared/designs/design2.rst +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py index f1e949941c..da0217bc6f 100644 --- a/evadb/binder/statement_binder.py +++ b/evadb/binder/statement_binder.py @@ -39,7 +39,11 @@ from evadb.expression.tuple_value_expression import TupleValueExpression from evadb.parser.create_function_statement import CreateFunctionStatement from evadb.parser.create_index_statement import CreateIndexStatement +<<<<<<< HEAD from evadb.parser.create_statement import ColumnDefinition, CreateTableStatement +======= +from evadb.parser.create_statement import CreateTableStatement +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) from evadb.parser.delete_statement import DeleteTableStatement from evadb.parser.explain_statement import ExplainStatement from evadb.parser.rename_statement import RenameTableStatement @@ -48,10 +52,14 @@ from evadb.parser.table_ref import TableRef from evadb.parser.types import FunctionType from evadb.third_party.huggingface.binder import assign_hf_function +<<<<<<< HEAD from evadb.utils.generic_utils import ( load_function_class_from_file, string_comparison_case_insensitive, ) +======= +from evadb.utils.generic_utils import load_function_class_from_file +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) from evadb.utils.logging_manager import logger @@ -89,6 +97,7 @@ def _bind_create_function_statement(self, node: CreateFunctionStatement): node.query.target_list ) arg_map = {key: value for key, value in node.metadata} +<<<<<<< HEAD inputs, outputs = [], [] if string_comparison_case_insensitive(node.function_type, "ludwig"): assert ( @@ -136,11 +145,75 @@ def _bind_create_function_statement(self, node: CreateFunctionStatement): raise BinderError( f"Unsupported type of function: {node.function_type}." ) +======= + assert ( + "predict" in arg_map + ), f"Creating {node.function_type} functions expects 'predict' metadata." + # We only support a single predict column for now + predict_columns = set([arg_map["predict"]]) + inputs, outputs = [], [] + for column in all_column_list: + if column.name in predict_columns: + if node.function_type != "Forecasting": + column.name = column.name + "_predictions" + else: + column.name = column.name + outputs.append(column) + else: + inputs.append(column) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) assert ( len(node.inputs) == 0 and len(node.outputs) == 0 ), f"{node.function_type} functions' input and output are auto assigned" node.inputs, node.outputs = inputs, outputs +<<<<<<< HEAD +======= + @bind.register(CreateIndexStatement) + def _bind_create_index_statement(self, node: CreateIndexStatement): + self.bind(node.table_ref) + if node.function: + self.bind(node.function) + + # TODO: create index currently only supports single numpy column. + assert len(node.col_list) == 1, "Index cannot be created on more than 1 column" + + # TODO: create index currently only works on TableInfo, but will extend later. + assert node.table_ref.is_table_atom(), "Index can only be created on Tableinfo" + if not node.function: + # Feature table type needs to be float32 numpy array. + assert ( + len(node.col_list) == 1 + ), f"Index can be only created on one column, but instead {len(node.col_list)} are provided" + col_def = node.col_list[0] + + table_ref_obj = node.table_ref.table.table_obj + col_list = [ + col for col in table_ref_obj.columns if col.name == col_def.name + ] + assert ( + len(col_list) == 1 + ), f"Index is created on non-existent column {col_def.name}" + + col = col_list[0] + assert ( + col.array_type == NdArrayType.FLOAT32 + ), "Index input needs to be float32." + assert len(col.array_dimensions) == 2 + else: + # Output of the function should be 2 dimension and float32 type. + function_obj = self._catalog().get_function_catalog_entry_by_name( + node.function.name + ) + for output in function_obj.outputs: + assert ( + output.array_type == NdArrayType.FLOAT32 + ), "Index input needs to be float32." + assert ( + len(output.array_dimensions) == 2 + ), "Index input needs to be 2 dimensional." + +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) @bind.register(SelectStatement) def _bind_select_statement(self, node: SelectStatement): if node.from_table: @@ -298,10 +371,17 @@ def _bind_func_expr(self, node: FunctionExpression): logger.error(err_msg) raise BinderError(err_msg) +<<<<<<< HEAD if string_comparison_case_insensitive(function_obj.type, "HuggingFace"): node.function = assign_hf_function(function_obj) elif string_comparison_case_insensitive(function_obj.type, "Ludwig"): +======= + if function_obj.type == "HuggingFace": + node.function = assign_hf_function(function_obj) + + elif function_obj.type == "Ludwig": +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) function_class = load_function_class_from_file( function_obj.impl_file_path, "GenericLudwigModel", diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index 7f63be108b..14fc280d4d 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -343,6 +343,7 @@ def insert_function_catalog_entry( checksum = get_file_checksum(impl_file_path) function_entry = self._function_service.insert_entry( +<<<<<<< HEAD name, impl_file_path, type, @@ -350,6 +351,16 @@ def insert_function_catalog_entry( function_io_list, function_metadata_list, ) +======= + name, impl_file_path, type, checksum + ) + for function_io in function_io_list: + function_io.function_id = function_entry.row_id + self._function_io_service.insert_entries(function_io_list) + for function_metadata in function_metadata_list: + function_metadata.function_id = function_entry.row_id + self._function_metadata_service.insert_entries(function_metadata_list) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) return function_entry def get_function_catalog_entry_by_name(self, name: str) -> FunctionCatalogEntry: @@ -416,6 +427,7 @@ def insert_index_catalog_entry( vector_store_type: VectorStoreType, feat_column: ColumnCatalogEntry, function_signature: str, +<<<<<<< HEAD index_def: str, ) -> IndexCatalogEntry: index_catalog_entry = self._index_service.insert_entry( @@ -425,6 +437,11 @@ def insert_index_catalog_entry( feat_column, function_signature, index_def, +======= + ) -> IndexCatalogEntry: + index_catalog_entry = self._index_service.insert_entry( + name, save_file_path, vector_store_type, feat_column, function_signature +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) return index_catalog_entry diff --git a/evadb/catalog/models/function_metadata_catalog.py b/evadb/catalog/models/function_metadata_catalog.py index 4b398a8c67..8c2945cc1b 100644 --- a/evadb/catalog/models/function_metadata_catalog.py +++ b/evadb/catalog/models/function_metadata_catalog.py @@ -17,7 +17,11 @@ from sqlalchemy.orm import relationship from evadb.catalog.models.base_model import BaseModel +<<<<<<< HEAD from evadb.catalog.models.utils import FunctionMetadataCatalogEntry, TextPickleType +======= +from evadb.catalog.models.utils import FunctionMetadataCatalogEntry +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) class FunctionMetadataCatalog(BaseModel): @@ -34,7 +38,11 @@ class FunctionMetadataCatalog(BaseModel): __tablename__ = "function_metadata_catalog" _key = Column("key", String(100)) +<<<<<<< HEAD _value = Column("value", TextPickleType()) +======= + _value = Column("value", String(100)) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) _function_id = Column( "function_id", Integer, ForeignKey("function_catalog._row_id") ) diff --git a/evadb/catalog/models/index_catalog.py b/evadb/catalog/models/index_catalog.py index 40a40f63a5..114ae6084a 100644 --- a/evadb/catalog/models/index_catalog.py +++ b/evadb/catalog/models/index_catalog.py @@ -31,8 +31,11 @@ class IndexCatalog(BaseModel): `_feat_column_id:` the `_row_id` of the `ColumnCatalog` entry for the column on which the index is built. `_function_signature:` if the index is created by running function expression on input column, this will store the function signature of the used function. Otherwise, this field is None. +<<<<<<< HEAD `_index_def:` the original SQL statement that is used to create this index. We record this to rerun create index on updated table. +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) """ __tablename__ = "index_catalog" @@ -44,7 +47,10 @@ class IndexCatalog(BaseModel): "column_id", Integer, ForeignKey("column_catalog._row_id", ondelete="CASCADE") ) _function_signature = Column("function", String, default=None) +<<<<<<< HEAD _index_def = Column("index_def", String, default=None) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) _feat_column = relationship( "ColumnCatalog", @@ -58,14 +64,20 @@ def __init__( type: VectorStoreType, feat_column_id: int = None, function_signature: str = None, +<<<<<<< HEAD index_def: str = None, +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ): self._name = name self._save_file_path = save_file_path self._type = type self._feat_column_id = feat_column_id self._function_signature = function_signature +<<<<<<< HEAD self._index_def = index_def +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def as_dataclass(self) -> "IndexCatalogEntry": feat_column = self._feat_column.as_dataclass() if self._feat_column else None @@ -76,6 +88,9 @@ def as_dataclass(self) -> "IndexCatalogEntry": type=self._type, feat_column_id=self._feat_column_id, function_signature=self._function_signature, +<<<<<<< HEAD index_def=self._index_def, +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) feat_column=feat_column, ) diff --git a/evadb/catalog/models/utils.py b/evadb/catalog/models/utils.py index b1c067aa0b..4691cb4438 100644 --- a/evadb/catalog/models/utils.py +++ b/evadb/catalog/models/utils.py @@ -201,7 +201,10 @@ class IndexCatalogEntry: row_id: int = None feat_column_id: int = None function_signature: str = None +<<<<<<< HEAD index_def: str = None +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) feat_column: ColumnCatalogEntry = None diff --git a/evadb/catalog/services/function_catalog_service.py b/evadb/catalog/services/function_catalog_service.py index 0c6c272d3c..a4611bf95d 100644 --- a/evadb/catalog/services/function_catalog_service.py +++ b/evadb/catalog/services/function_catalog_service.py @@ -12,12 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +<<<<<<< HEAD from typing import List +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) from sqlalchemy.orm import Session from sqlalchemy.sql.expression import select from evadb.catalog.models.function_catalog import FunctionCatalog, FunctionCatalogEntry +<<<<<<< HEAD from evadb.catalog.models.utils import ( FunctionIOCatalogEntry, FunctionMetadataCatalogEntry, @@ -28,12 +32,16 @@ FunctionMetadataCatalogService, ) from evadb.utils.errors import CatalogError +======= +from evadb.catalog.services.base_service import BaseService +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) from evadb.utils.logging_manager import logger class FunctionCatalogService(BaseService): def __init__(self, db_session: Session): super().__init__(FunctionCatalog, db_session) +<<<<<<< HEAD self._function_io_service = FunctionIOCatalogService(db_session) self._function_metadata_service = FunctionMetadataCatalogService(db_session) @@ -45,6 +53,11 @@ def insert_entry( checksum: str, function_io_list: List[FunctionIOCatalogEntry], function_metadata_list: List[FunctionMetadataCatalogEntry], +======= + + def insert_entry( + self, name: str, impl_path: str, type: str, checksum: str +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) -> FunctionCatalogEntry: """Insert a new function entry @@ -59,6 +72,7 @@ def insert_entry( """ function_obj = self.model(name, impl_path, type, checksum) function_obj = function_obj.save(self.session) +<<<<<<< HEAD for function_io in function_io_list: function_io.function_id = function_obj._row_id @@ -84,6 +98,9 @@ def insert_entry( raise CatalogError(e) else: return function_obj.as_dataclass() +======= + return function_obj.as_dataclass() +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def get_entry_by_name(self, name: str) -> FunctionCatalogEntry: """return the function entry that matches the name provided. diff --git a/evadb/catalog/services/function_io_catalog_service.py b/evadb/catalog/services/function_io_catalog_service.py index caf3390987..6de305ecd6 100644 --- a/evadb/catalog/services/function_io_catalog_service.py +++ b/evadb/catalog/services/function_io_catalog_service.py @@ -69,8 +69,18 @@ def get_output_entries_by_function_id( logger.error(error) raise RuntimeError(error) +<<<<<<< HEAD def create_entries(self, io_list: List[FunctionIOCatalogEntry]): io_objs = [] +======= + def insert_entries(self, io_list: List[FunctionIOCatalogEntry]): + """Commit entries to the function_io table + + Arguments: + io_list (List[FunctionIOCatalogEntry]): List of io info io be added + """ + +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) for io in io_list: io_obj = FunctionIOCatalog( name=io.name, diff --git a/evadb/catalog/services/function_metadata_catalog_service.py b/evadb/catalog/services/function_metadata_catalog_service.py index 2629b8040a..33c4058aea 100644 --- a/evadb/catalog/services/function_metadata_catalog_service.py +++ b/evadb/catalog/services/function_metadata_catalog_service.py @@ -30,8 +30,12 @@ class FunctionMetadataCatalogService(BaseService): def __init__(self, db_session: Session): super().__init__(FunctionMetadataCatalog, db_session) +<<<<<<< HEAD def create_entries(self, entries: List[FunctionMetadataCatalogEntry]): metadata_objs = [] +======= + def insert_entries(self, entries: List[FunctionMetadataCatalogEntry]): +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) try: for entry in entries: metadata_obj = FunctionMetadataCatalog( @@ -40,6 +44,12 @@ def create_entries(self, entries: List[FunctionMetadataCatalogEntry]): metadata_objs.append(metadata_obj) return metadata_objs except Exception as e: +<<<<<<< HEAD +======= + logger.exception( + f"Failed to insert entry {entry} into function metadata catalog with exception {str(e)}" + ) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) raise CatalogError(e) def get_entries_by_function_id( diff --git a/evadb/catalog/services/index_catalog_service.py b/evadb/catalog/services/index_catalog_service.py index 4b4b675780..25e88d586a 100644 --- a/evadb/catalog/services/index_catalog_service.py +++ b/evadb/catalog/services/index_catalog_service.py @@ -35,6 +35,7 @@ def insert_entry( type: VectorStoreType, feat_column: ColumnCatalogEntry, function_signature: str, +<<<<<<< HEAD index_def: str, ) -> IndexCatalogEntry: index_entry = IndexCatalog( @@ -44,6 +45,11 @@ def insert_entry( feat_column.row_id, function_signature, index_def, +======= + ) -> IndexCatalogEntry: + index_entry = IndexCatalog( + name, save_file_path, type, feat_column.row_id, function_signature +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) index_entry = index_entry.save(self.session) return index_entry.as_dataclass() diff --git a/evadb/executor/apply_and_merge_executor.py b/evadb/executor/apply_and_merge_executor.py index 414f40641c..3c2189c445 100644 --- a/evadb/executor/apply_and_merge_executor.py +++ b/evadb/executor/apply_and_merge_executor.py @@ -43,6 +43,18 @@ def exec(self, *args, **kwargs) -> Iterator[Batch]: for batch in child_executor.exec(**kwargs): func_result = self.func_expr.evaluate(batch) +<<<<<<< HEAD +======= + # persist stats of function expression + if self.func_expr.function_obj and self.func_expr._stats: + function_id = self.func_expr.function_obj.row_id + self.catalog().upsert_function_cost_catalog_entry( + function_id, + self.func_expr.function_obj.name, + self.func_expr._stats.prev_cost, + ) + +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) output = Batch.merge_column_wise([batch, func_result]) if self.do_unnest: output.unnest(func_result.columns) diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py index 3791575634..53c126e9e2 100644 --- a/evadb/executor/create_function_executor.py +++ b/evadb/executor/create_function_executor.py @@ -12,9 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +<<<<<<< HEAD import hashlib import os import pickle +======= +import os +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) from pathlib import Path from typing import Dict, List @@ -25,7 +29,10 @@ from evadb.catalog.models.function_io_catalog import FunctionIOCatalogEntry from evadb.catalog.models.function_metadata_catalog import FunctionMetadataCatalogEntry from evadb.configuration.constants import ( +<<<<<<< HEAD DEFAULT_TRAIN_REGRESSION_METRIC, +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) DEFAULT_TRAIN_TIME_LIMIT, EvaDB_INSTALLATION_DIR, ) @@ -38,6 +45,7 @@ from evadb.utils.errors import FunctionIODefinitionError from evadb.utils.generic_utils import ( load_function_class_from_file, +<<<<<<< HEAD string_comparison_case_insensitive, try_to_import_ludwig, try_to_import_neuralforecast, @@ -46,6 +54,11 @@ try_to_import_torch, try_to_import_ultralytics, try_to_import_xgboost, +======= + try_to_import_ludwig, + try_to_import_torch, + try_to_import_ultralytics, +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) from evadb.utils.logging_manager import logger @@ -56,10 +69,17 @@ def __init__(self, db: EvaDBDatabase, node: CreateFunctionPlan): self.function_dir = Path(EvaDB_INSTALLATION_DIR) / "functions" def handle_huggingface_function(self): +<<<<<<< HEAD """Handle HuggingFace functions HuggingFace functions are special functions that are not loaded from a file. So we do not need to call the setup method on them like we do for other functions. +======= + """Handle HuggingFace Functions + + HuggingFace Functions are special Functions that are not loaded from a file. + So we do not need to call the setup method on them like we do for other Functions. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) """ # We need at least one deep learning framework for HuggingFace # Torch or Tensorflow @@ -75,9 +95,15 @@ def handle_huggingface_function(self): ) def handle_ludwig_function(self): +<<<<<<< HEAD """Handle ludwig functions Use Ludwig's auto_train engine to train/tune models. +======= + """Handle ludwig Functions + + Use ludwig's auto_train engine to train/tune models. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) """ try_to_import_ludwig() from ludwig.automl import auto_train @@ -121,6 +147,7 @@ def handle_ludwig_function(self): self.node.metadata, ) +<<<<<<< HEAD def handle_sklearn_function(self): """Handle sklearn functions @@ -226,6 +253,10 @@ def handle_xgboost_function(self): def handle_ultralytics_function(self): """Handle Ultralytics functions""" +======= + def handle_ultralytics_function(self): + """Handle Ultralytics Functions""" +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) try_to_import_ultralytics() impl_path = ( @@ -243,6 +274,7 @@ def handle_ultralytics_function(self): self.node.metadata, ) +<<<<<<< HEAD def handle_forecasting_function(self): """Handle forecasting functions""" os.environ["CUDA_VISIBLE_DEVICES"] = "" @@ -497,6 +529,12 @@ def handle_generic_function(self): """Handle generic functions Generic functions are loaded from a file. We check for inputs passed by the user during CREATE or try to load io from decorators. +======= + def handle_generic_function(self): + """Handle generic Functions + + Generic Functions are loaded from a file. We check for inputs passed by the user during CREATE or try to load io from decorators. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) """ impl_path = self.node.impl_path.absolute().as_posix() function = self._try_initializing_function(impl_path) @@ -515,6 +553,7 @@ def exec(self, *args, **kwargs): Calls the catalog to insert a function catalog entry. """ +<<<<<<< HEAD assert ( self.node.if_not_exists and self.node.or_replace ) is False, ( @@ -522,12 +561,15 @@ def exec(self, *args, **kwargs): ) overwrite = False +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) # check catalog if it already has this function entry if self.catalog().get_function_catalog_entry_by_name(self.node.name): if self.node.if_not_exists: msg = f"Function {self.node.name} already exists, nothing added." yield Batch(pd.DataFrame([msg])) return +<<<<<<< HEAD elif self.node.or_replace: # We use DropObjectExecutor to avoid bookkeeping the code. The drop function should be moved to catalog. from evadb.executor.drop_object_executor import DropObjectExecutor @@ -539,13 +581,19 @@ def exec(self, *args, **kwargs): pass else: overwrite = True +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) else: msg = f"Function {self.node.name} already exists." logger.error(msg) raise RuntimeError(msg) # if it's a type of HuggingFaceModel, override the impl_path +<<<<<<< HEAD if string_comparison_case_insensitive(self.node.function_type, "HuggingFace"): +======= + if self.node.function_type == "HuggingFace": +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ( name, impl_path, @@ -553,7 +601,11 @@ def exec(self, *args, **kwargs): io_list, metadata, ) = self.handle_huggingface_function() +<<<<<<< HEAD elif string_comparison_case_insensitive(self.node.function_type, "ultralytics"): +======= + elif self.node.function_type == "ultralytics": +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ( name, impl_path, @@ -561,7 +613,11 @@ def exec(self, *args, **kwargs): io_list, metadata, ) = self.handle_ultralytics_function() +<<<<<<< HEAD elif string_comparison_case_insensitive(self.node.function_type, "Ludwig"): +======= + elif self.node.function_type == "Ludwig": +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ( name, impl_path, @@ -569,6 +625,7 @@ def exec(self, *args, **kwargs): io_list, metadata, ) = self.handle_ludwig_function() +<<<<<<< HEAD elif string_comparison_case_insensitive(self.node.function_type, "Sklearn"): ( name, @@ -593,6 +650,8 @@ def exec(self, *args, **kwargs): io_list, metadata, ) = self.handle_forecasting_function() +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) else: ( name, @@ -605,12 +664,20 @@ def exec(self, *args, **kwargs): self.catalog().insert_function_catalog_entry( name, impl_path, function_type, io_list, metadata ) +<<<<<<< HEAD if overwrite: msg = f"Function {self.node.name} overwritten." else: msg = f"Function {self.node.name} added to the database." yield Batch(pd.DataFrame([msg])) +======= + yield Batch( + pd.DataFrame( + [f"Function {self.node.name} successfully added to the database."] + ) + ) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def _try_initializing_function( self, impl_path: str, function_args: Dict = {} @@ -619,6 +686,7 @@ def _try_initializing_function( Args: impl_path (str): The file path of the function implementation file. +<<<<<<< HEAD function_args (Dict, optional): Dictionary of arguments to pass to the function. Defaults to {}. Returns: @@ -626,6 +694,15 @@ def _try_initializing_function( Raises: RuntimeError: If an error occurs while initializing the function. +======= + function_args (Dict, optional): Dictionary of arguments to pass to the Function. Defaults to {}. + + Returns: + FunctionCatalogEntry: A FunctionCatalogEntry object that represents the initialized Function. + + Raises: + RuntimeError: If an error occurs while initializing the Function. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) """ # load the function class from the file @@ -635,7 +712,11 @@ def _try_initializing_function( # initializing the function class calls the setup method internally function(**function_args) except Exception as e: +<<<<<<< HEAD err_msg = f"Error creating function {self.node.name}: {str(e)}" +======= + err_msg = f"Error creating Function: {str(e)}" +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) # logger.error(err_msg) raise RuntimeError(err_msg) @@ -644,7 +725,11 @@ def _try_initializing_function( def _resolve_function_io( self, function: FunctionCatalogEntry ) -> List[FunctionIOCatalogEntry]: +<<<<<<< HEAD """Private method that resolves the input/output definitions for a given function. +======= + """Private method that resolves the input/output definitions for a given Function. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) It first searches for the input/outputs in the CREATE statement. If not found, it resolves them using decorators. If not found there as well, it raises an error. Args: @@ -652,7 +737,11 @@ def _resolve_function_io( Returns: A List of FunctionIOCatalogEntry objects that represent the resolved input and +<<<<<<< HEAD output definitions for the function. +======= + output definitions for the Function. +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Raises: RuntimeError: If an error occurs while resolving the function input/output @@ -678,7 +767,11 @@ def _resolve_function_io( except FunctionIODefinitionError as e: err_msg = ( +<<<<<<< HEAD f"Error creating function, input/output definition incorrect: {str(e)}" +======= + f"Error creating Function, input/output definition incorrect: {str(e)}" +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) logger.error(err_msg) raise RuntimeError(err_msg) diff --git a/evadb/executor/create_index_executor.py b/evadb/executor/create_index_executor.py index 407cfef3c0..73e1b48554 100644 --- a/evadb/executor/create_index_executor.py +++ b/evadb/executor/create_index_executor.py @@ -94,6 +94,7 @@ def _create_evadb_index(self): # Get feature tables. feat_tb_catalog_entry = self.table_ref.table.table_obj +<<<<<<< HEAD # Get feature column. feat_col_name = self.col_list[0].name feat_col_catalog_entry = [ @@ -124,6 +125,21 @@ def _create_evadb_index(self): self.vector_store_type, index_path ), ) +======= + # Add features to index. + # TODO: batch size is hardcoded for now. + input_dim = -1 + storage_engine = StorageEngine.factory(self.db, feat_catalog_entry) + for input_batch in storage_engine.read(feat_catalog_entry): + if self.node.function: + # Create index through function expression. + # Function(input column) -> 2 dimension feature vector. + input_batch.modify_column_alias(feat_catalog_entry.name.lower()) + feat_batch = self.node.function.evaluate(input_batch) + feat_batch.drop_column_alias() + input_batch.drop_column_alias() + feat = feat_batch.column_as_numpy_array("features") +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) else: # Skip index update if CREATE INDEX runs on a different index. logger.warn(msg) @@ -163,6 +179,7 @@ def _create_evadb_index(self): index.persist() # Save to catalog. +<<<<<<< HEAD if index_catalog_entry is None: self.catalog().insert_index_catalog_entry( self.name, @@ -172,6 +189,15 @@ def _create_evadb_index(self): function_expression_signature, self.index_def, ) +======= + self.catalog().insert_index_catalog_entry( + self.node.name, + self.index_path, + self.node.vector_store_type, + feat_column, + self.node.function.signature() if self.node.function else None, + ) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) except Exception as e: # Delete index. if index: diff --git a/evadb/executor/create_udf_executor.py b/evadb/executor/create_udf_executor.py index e39aa317b0..7d68aab5f3 100644 --- a/evadb/executor/create_udf_executor.py +++ b/evadb/executor/create_udf_executor.py @@ -12,7 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import hashlib import os +import pickle from pathlib import Path from typing import Dict, List @@ -35,6 +37,7 @@ from evadb.utils.errors import UDFIODefinitionError from evadb.utils.generic_utils import ( load_udf_class_from_file, + try_to_import_forecast, try_to_import_ludwig, try_to_import_torch, try_to_import_ultralytics, @@ -69,7 +72,7 @@ def handle_huggingface_udf(self): def handle_ludwig_udf(self): """Handle ludwig UDFs - Use ludwig's auto_train engine to train/tune models. + Use Ludwig's auto_train engine to train/tune models. """ try_to_import_ludwig() from ludwig.automl import auto_train @@ -128,6 +131,119 @@ def handle_ultralytics_udf(self): self.node.metadata, ) + def handle_forecasting_udf(self): + """Handle forecasting UDFs""" + aggregated_batch_list = [] + child = self.children[0] + for batch in child.exec(): + aggregated_batch_list.append(batch) + aggregated_batch = Batch.concat(aggregated_batch_list, copy=False) + aggregated_batch.drop_column_alias() + + arg_map = {arg.key: arg.value for arg in self.node.metadata} + if not self.node.impl_path: + impl_path = Path(f"{self.udf_dir}/forecast.py").absolute().as_posix() + else: + impl_path = self.node.impl_path.absolute().as_posix() + arg_map = {arg.key: arg.value for arg in self.node.metadata} + + if "model" not in arg_map.keys(): + arg_map["model"] = "AutoARIMA" + if "frequency" not in arg_map.keys(): + arg_map["frequency"] = "M" + + model_name = arg_map["model"] + frequency = arg_map["frequency"] + + data = aggregated_batch.frames.rename(columns={arg_map["predict"]: "y"}) + if "time" in arg_map.keys(): + aggregated_batch.frames.rename(columns={arg_map["time"]: "ds"}) + if "id" in arg_map.keys(): + aggregated_batch.frames.rename(columns={arg_map["id"]: "unique_id"}) + + if "unique_id" not in list(data.columns): + data["unique_id"] = ["test" for x in range(len(data))] + + if "ds" not in list(data.columns): + data["ds"] = [x + 1 for x in range(len(data))] + + try_to_import_forecast() + from statsforecast import StatsForecast + from statsforecast.models import AutoARIMA, AutoCES, AutoETS, AutoTheta + + model_dict = { + "AutoARIMA": AutoARIMA, + "AutoCES": AutoCES, + "AutoETS": AutoETS, + "AutoTheta": AutoTheta, + } + + season_dict = { # https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases + "H": 24, + "M": 12, + "Q": 4, + "SM": 24, + "BM": 12, + "BMS": 12, + "BQ": 4, + "BH": 24, + } + + new_freq = ( + frequency.split("-")[0] if "-" in frequency else frequency + ) # shortens longer frequencies like Q-DEC + season_length = season_dict[new_freq] if new_freq in season_dict else 1 + model = StatsForecast( + [model_dict[model_name](season_length=season_length)], freq=new_freq + ) + + model_dir = os.path.join( + self.db.config.get_value("storage", "model_dir"), self.node.name + ) + Path(model_dir).mkdir(parents=True, exist_ok=True) + model_path = os.path.join( + self.db.config.get_value("storage", "model_dir"), + self.node.name, + str(hashlib.sha256(data.to_string().encode()).hexdigest()) + ".pkl", + ) + + weight_file = Path(model_path) + + if not weight_file.exists(): + model.fit(data) + f = open(model_path, "wb") + pickle.dump(model, f) + f.close() + + arg_map_here = {"model_name": model_name, "model_path": model_path} + udf = self._try_initializing_udf(impl_path, arg_map_here) + io_list = self._resolve_udf_io(udf) + + metadata_here = [ + UdfMetadataCatalogEntry( + key="model_name", + value=model_name, + udf_id=None, + udf_name=None, + row_id=None, + ), + UdfMetadataCatalogEntry( + key="model_path", + value=model_path, + udf_id=None, + udf_name=None, + row_id=None, + ), + ] + + return ( + self.node.name, + impl_path, + self.node.udf_type, + io_list, + metadata_here, + ) + def handle_generic_udf(self): """Handle generic UDFs @@ -168,6 +284,8 @@ def exec(self, *args, **kwargs): name, impl_path, udf_type, io_list, metadata = self.handle_ultralytics_udf() elif self.node.udf_type == "Ludwig": name, impl_path, udf_type, io_list, metadata = self.handle_ludwig_udf() + elif self.node.udf_type == "Forecasting": + name, impl_path, udf_type, io_list, metadata = self.handle_forecasting_udf() else: name, impl_path, udf_type, io_list, metadata = self.handle_generic_udf() diff --git a/evadb/executor/drop_object_executor.py b/evadb/executor/drop_object_executor.py index 38d5419dc4..8d4e5e8ef2 100644 --- a/evadb/executor/drop_object_executor.py +++ b/evadb/executor/drop_object_executor.py @@ -42,9 +42,12 @@ def exec(self, *args, **kwargs): elif self.node.object_type == ObjectType.FUNCTION: yield self._handle_drop_function(self.node.name, self.node.if_exists) +<<<<<<< HEAD elif self.node.object_type == ObjectType.DATABASE: yield self._handle_drop_database(self.node.name, self.node.if_exists) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def _handle_drop_table(self, table_name: str, if_exists: bool): if not self.catalog().check_table_exists(table_name): diff --git a/evadb/executor/executor_utils.py b/evadb/executor/executor_utils.py index 23987ea153..19fd077ead 100644 --- a/evadb/executor/executor_utils.py +++ b/evadb/executor/executor_utils.py @@ -69,6 +69,19 @@ def apply_project(batch: Batch, project_list: List[AbstractExpression]): batches = [expr.evaluate(batch) for expr in project_list] batch = Batch.merge_column_wise(batches) +<<<<<<< HEAD +======= + # persist stats of function expression + for expr in project_list: + for func_expr in expr.find_all(FunctionExpression): + if func_expr.function_obj and func_expr._stats: + function_id = func_expr.function_obj.row_id + catalog.upsert_function_cost_catalog_entry( + function_id, + func_expr.function_obj.name, + func_expr._stats.prev_cost, + ) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) return batch @@ -78,6 +91,16 @@ def apply_predicate(batch: Batch, predicate: AbstractExpression) -> Batch: batch.drop_zero(outcomes) batch.reset_index() +<<<<<<< HEAD +======= + # persist stats of function expression + for func_expr in predicate.find_all(FunctionExpression): + if func_expr.function_obj and func_expr._stats: + function_id = func_expr.function_obj.row_id + catalog.upsert_function_cost_catalog_entry( + function_id, func_expr.function_obj.name, func_expr._stats.prev_cost + ) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) return batch diff --git a/evadb/executor/function_scan_executor.py b/evadb/executor/function_scan_executor.py index 3c376ffab3..13b11745f7 100644 --- a/evadb/executor/function_scan_executor.py +++ b/evadb/executor/function_scan_executor.py @@ -42,6 +42,18 @@ def exec(self, *args, **kwargs) -> Iterator[Batch]: if not lateral_input.empty(): res = self.func_expr.evaluate(lateral_input) +<<<<<<< HEAD +======= + # persist stats of function expression + if self.func_expr.function_obj and self.func_expr._stats: + function_id = self.func_expr.function_obj.row_id + self.catalog().upsert_function_cost_catalog_entry( + function_id, + self.func_expr.function_obj.name, + self.func_expr._stats.prev_cost, + ) + +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) if not res.empty(): if self.do_unnest: res.unnest(res.columns) diff --git a/evadb/executor/show_info_executor.py b/evadb/executor/show_info_executor.py index 96dc0a537f..7e806eb567 100644 --- a/evadb/executor/show_info_executor.py +++ b/evadb/executor/show_info_executor.py @@ -30,10 +30,14 @@ def exec(self, *args, **kwargs): show_entries = [] assert ( +<<<<<<< HEAD self.node.show_type is ShowType.FUNCTIONS or ShowType.TABLES or ShowType.DATABASES or ShowType.CONFIG +======= + self.node.show_type is ShowType.FUNCTIONS or ShowType.TABLES +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ), f"Show command does not support type {self.node.show_type}" if self.node.show_type is ShowType.FUNCTIONS: diff --git a/evadb/functions/chatgpt.py b/evadb/functions/chatgpt.py index 61253116fe..e5998c545b 100644 --- a/evadb/functions/chatgpt.py +++ b/evadb/functions/chatgpt.py @@ -80,7 +80,11 @@ class ChatGPT(AbstractFunction): def name(self) -> str: return "ChatGPT" +<<<<<<< HEAD @setup(cacheable=True, function_type="chat-completion", batchable=True) +======= + @setup(cacheable=False, function_type="chat-completion", batchable=True) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def setup( self, model="gpt-3.5-turbo", diff --git a/evadb/functions/forecast.py b/evadb/functions/forecast.py index 1571f6c4fc..f073ebfb84 100644 --- a/evadb/functions/forecast.py +++ b/evadb/functions/forecast.py @@ -18,15 +18,26 @@ import pandas as pd +<<<<<<< HEAD from evadb.functions.abstract.abstract_function import AbstractFunction from evadb.functions.decorators.decorators import setup class ForecastModel(AbstractFunction): +======= +from evadb.catalog.catalog_type import NdArrayType +from evadb.udfs.abstract.abstract_udf import AbstractUDF +from evadb.udfs.decorators.decorators import forward, setup +from evadb.udfs.decorators.io_descriptors.data_types import PandasDataframe + + +class ForecastModel(AbstractUDF): +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) @property def name(self) -> str: return "ForecastModel" +<<<<<<< HEAD @setup(cacheable=False, function_type="Forecasting", batchable=True) def setup( self, @@ -38,11 +49,16 @@ def setup( horizon: int, library: str, ): +======= + @setup(cacheable=False, udf_type="Forecasting", batchable=True) + def setup(self, model_name: str, model_path: str): +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) f = open(model_path, "rb") loaded_model = pickle.load(f) f.close() self.model = loaded_model self.model_name = model_name +<<<<<<< HEAD self.predict_column_rename = predict_column_rename self.time_column_rename = time_column_rename self.id_column_rename = id_column_rename @@ -63,3 +79,31 @@ def forward(self, data) -> pd.DataFrame: } )[: self.horizon * forecast_df["unique_id"].nunique()] return forecast_df +======= + + @forward( + input_signatures=[], + output_signatures=[ + PandasDataframe( + columns=["y"], + column_types=[ + NdArrayType.FLOAT32, + ], + column_shapes=[(None,)], + ) + ], + ) + def forward(self, data) -> pd.DataFrame: + horizon = list(data.iloc[:, -1])[0] + assert ( + type(horizon) is int + ), "Forecast UDF expects integral horizon in parameter." + forecast_df = self.model.predict(h=horizon) + forecast_df = forecast_df.rename(columns={self.model_name: "y"}) + return pd.DataFrame( + forecast_df, + columns=[ + "y", + ], + ) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/evadb/functions/function_bootstrap_queries.py b/evadb/functions/function_bootstrap_queries.py index 0bf7c4ed90..ce62e87f28 100644 --- a/evadb/functions/function_bootstrap_queries.py +++ b/evadb/functions/function_bootstrap_queries.py @@ -49,6 +49,7 @@ EvaDB_INSTALLATION_DIR ) +<<<<<<< HEAD DummyNoInputFunction_function_query = """CREATE FUNCTION IF NOT EXISTS DummyNoInputFunction IMPL '{}/../test/util.py'; @@ -63,6 +64,8 @@ EvaDB_INSTALLATION_DIR ) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) fuzzy_function_query = """CREATE FUNCTION IF NOT EXISTS FuzzDistance INPUT (Input_Array1 NDARRAY ANYTYPE, Input_Array2 NDARRAY ANYTYPE) OUTPUT (distance FLOAT(32, 7)) @@ -208,6 +211,7 @@ EvaDB_INSTALLATION_DIR ) +<<<<<<< HEAD dalle_function_query = """CREATE FUNCTION IF NOT EXISTS DallE IMPL '{}/functions/dalle.py'; """.format( @@ -218,12 +222,21 @@ def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None: """Load the built-in functions into the system during system bootstrapping. +======= +def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None: + """Load the built-in functions into the system during system bootstrapping. + +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) The function loads a set of pre-defined function queries based on the `mode` argument. In 'debug' mode, the function loads debug functions along with release functions. In 'release' mode, only release functions are loaded. In addition, in 'debug' mode, the function loads a smaller model to accelerate the test suite time. +<<<<<<< HEAD Args:G +======= + Args: +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) mode (str, optional): The mode for loading functions, either 'debug' or 'release'. Defaults to 'debug'. @@ -259,8 +272,11 @@ def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None: # Mvit_function_query, Sift_function_query, Yolo_function_query, +<<<<<<< HEAD stablediffusion_function_query, dalle_function_query, +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ] # if mode is 'debug', add debug functions @@ -270,8 +286,11 @@ def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None: DummyObjectDetector_function_query, DummyMultiObjectDetector_function_query, DummyFeatureExtractor_function_query, +<<<<<<< HEAD DummyNoInputFunction_function_query, DummyLLM_function_query, +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ] ) diff --git a/evadb/optimizer/operators.py b/evadb/optimizer/operators.py index 5b9bbf78de..32deceb304 100644 --- a/evadb/optimizer/operators.py +++ b/evadb/optimizer/operators.py @@ -22,7 +22,10 @@ from evadb.catalog.models.function_io_catalog import FunctionIOCatalogEntry from evadb.catalog.models.function_metadata_catalog import FunctionMetadataCatalogEntry from evadb.catalog.models.table_catalog import TableCatalogEntry +<<<<<<< HEAD from evadb.catalog.models.utils import IndexCatalogEntry +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) from evadb.expression.abstract_expression import AbstractExpression from evadb.expression.constant_value_expression import ConstantValueExpression from evadb.expression.function_expression import FunctionExpression @@ -642,10 +645,16 @@ class LogicalCreateFunction(Operator): Attributes: name: str function_name provided by the user required +<<<<<<< HEAD or_replace: bool if true should overwrite if function with same name exists if_not_exists: bool if true should skip if function with same name exists +======= + if_not_exists: bool + if true should throw an error if function with same name exists + else will replace the existing +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) inputs: List[FunctionIOCatalogEntry] function inputs, annotated list similar to table columns outputs: List[FunctionIOCatalogEntry] @@ -1095,8 +1104,12 @@ def __init__( table_ref: TableRef, col_list: List[ColumnDefinition], vector_store_type: VectorStoreType, +<<<<<<< HEAD project_expr_list: List[AbstractExpression], index_def: str, +======= + function: FunctionExpression = None, +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) children: List = None, ): super().__init__(OperatorType.LOGICALCREATEINDEX, children) @@ -1105,8 +1118,12 @@ def __init__( self._table_ref = table_ref self._col_list = col_list self._vector_store_type = vector_store_type +<<<<<<< HEAD self._project_expr_list = project_expr_list self._index_def = index_def +======= + self._function = function +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) @property def name(self): @@ -1129,12 +1146,17 @@ def vector_store_type(self): return self._vector_store_type @property +<<<<<<< HEAD def project_expr_list(self): return self._project_expr_list @property def index_def(self): return self._index_def +======= + def function(self): + return self._function +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def __eq__(self, other): is_subtree_equal = super().__eq__(other) @@ -1147,8 +1169,12 @@ def __eq__(self, other): and self.table_ref == other.table_ref and self.col_list == other.col_list and self.vector_store_type == other.vector_store_type +<<<<<<< HEAD and self.project_expr_list == other.project_expr_list and self.index_def == other.index_def +======= + and self.function == other.function +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) def __hash__(self) -> int: @@ -1160,8 +1186,12 @@ def __hash__(self) -> int: self.table_ref, tuple(self.col_list), self.vector_store_type, +<<<<<<< HEAD tuple(self.project_expr_list), self.index_def, +======= + self.function, +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) ) diff --git a/evadb/optimizer/optimizer_utils.py b/evadb/optimizer/optimizer_utils.py index 13062f398c..4a0b776399 100644 --- a/evadb/optimizer/optimizer_utils.py +++ b/evadb/optimizer/optimizer_utils.py @@ -307,6 +307,7 @@ def enable_cache_on_expression_tree( def check_expr_validity_for_cache(expr: FunctionExpression): +<<<<<<< HEAD valid = expr.name in CACHEABLE_FUNCTIONS and not expr.has_cache() if len(expr.children) == 1: # Normal function that only takes one parameter. @@ -317,6 +318,14 @@ def check_expr_validity_for_cache(expr: FunctionExpression): expr.children[1], TupleValueExpression ) return valid +======= + return ( + expr.name in CACHEABLE_FUNCTIONS + and not expr.has_cache() + and len(expr.children) <= 1 + and isinstance(expr.children[0], TupleValueExpression) + ) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def get_expression_execution_cost( diff --git a/evadb/optimizer/rules/rules.py b/evadb/optimizer/rules/rules.py index 8e18e4d70b..b63b195680 100644 --- a/evadb/optimizer/rules/rules.py +++ b/evadb/optimizer/rules/rules.py @@ -18,7 +18,10 @@ from evadb.catalog.catalog_type import TableType, VectorStoreType from evadb.catalog.catalog_utils import is_video_table +<<<<<<< HEAD from evadb.catalog.models.utils import IndexCatalogEntry +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) from evadb.constants import CACHEABLE_FUNCTIONS from evadb.executor.execution_context import Context from evadb.expression.expression_utils import ( @@ -571,6 +574,7 @@ def _exists_predicate(opr): # Get column catalog entry and function_signature. column_catalog_entry = tv_expr.col_object +<<<<<<< HEAD # Only check the index existence when building on EvaDB data. if not is_postgres_data_source: @@ -594,6 +598,19 @@ def _exists_predicate(opr): save_file_path="", type=VectorStoreType.PGVECTOR, feat_column=column_catalog_entry, +======= + function_signature = ( + None + if isinstance(base_func_expr, TupleValueExpression) + else base_func_expr.signature() + ) + + # Get index catalog. Check if an index exists for matching + # function signature and table columns. + index_catalog_entry = ( + catalog_manager().get_index_catalog_entry_by_column_and_function_signature( + column_catalog_entry, function_signature +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) # Construct the Vector index scan plan. @@ -832,8 +849,12 @@ def apply(self, before: LogicalCreateIndex, context: OptimizerContext): before.table_ref, before.col_list, before.vector_store_type, +<<<<<<< HEAD before.project_expr_list, before.index_def, +======= + before.function, +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) child = SeqScanPlan(None, before.project_expr_list, before.table_ref.alias) batch_mem_size = context.db.config.get_value("executor", "batch_mem_size") diff --git a/evadb/optimizer/statement_to_opr_converter.py b/evadb/optimizer/statement_to_opr_converter.py index c60d0b258b..1462a722e2 100644 --- a/evadb/optimizer/statement_to_opr_converter.py +++ b/evadb/optimizer/statement_to_opr_converter.py @@ -55,8 +55,13 @@ from evadb.parser.select_statement import SelectStatement from evadb.parser.show_statement import ShowStatement from evadb.parser.statement import AbstractStatement +<<<<<<< HEAD from evadb.parser.table_ref import JoinNode, TableRef, TableValuedExpression from evadb.parser.types import FunctionType, JoinType +======= +from evadb.parser.table_ref import TableRef +from evadb.parser.types import FunctionType +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) from evadb.utils.logging_manager import logger @@ -364,8 +369,12 @@ def visit_create_index(self, statement: CreateIndexStatement): statement.table_ref, statement.col_list, statement.vector_store_type, +<<<<<<< HEAD statement.project_expr_list, statement.index_def, +======= + statement.function, +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) self._plan = create_index_opr diff --git a/evadb/parser/create_function_statement.py b/evadb/parser/create_function_statement.py index eb35fcffaa..f3080865ed 100644 --- a/evadb/parser/create_function_statement.py +++ b/evadb/parser/create_function_statement.py @@ -70,12 +70,16 @@ def __init__( self._metadata = metadata def __str__(self) -> str: +<<<<<<< HEAD s = "CREATE" if self._or_replace: s += " OR REPLACE" s += " " + "FUNCTION" +======= + s = "CREATE FUNCTION" +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) if self._if_not_exists: s += " IF NOT EXISTS" diff --git a/evadb/parser/create_index_statement.py b/evadb/parser/create_index_statement.py index 5eb312aa28..d1b71fed93 100644 --- a/evadb/parser/create_index_statement.py +++ b/evadb/parser/create_index_statement.py @@ -32,7 +32,11 @@ def __init__( table_ref: TableRef, col_list: List[ColumnDefinition], vector_store_type: VectorStoreType, +<<<<<<< HEAD project_expr_list: List[AbstractStatement], +======= + function: FunctionExpression = None, +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ): super().__init__(StatementType.CREATE_INDEX) self._name = name @@ -40,6 +44,7 @@ def __init__( self._table_ref = table_ref self._col_list = col_list self._vector_store_type = vector_store_type +<<<<<<< HEAD self._project_expr_list = project_expr_list # Definition of CREATE INDEX. @@ -68,6 +73,17 @@ def traverse_create_function_expression_str(expr): print_str += f" ({traverse_create_function_expression_str(function_expr)})" print_str += f" USING {self._vector_store_type};" +======= + self._function = function + + def __str__(self) -> str: + print_str = "CREATE INDEX {} ON {} ({}{}) ".format( + self._name, + self._table_ref, + "" if self._function else self._function, + tuple(self._col_list), + ) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) return print_str @property @@ -91,6 +107,7 @@ def vector_store_type(self): return self._vector_store_type @property +<<<<<<< HEAD def project_expr_list(self): return self._project_expr_list @@ -101,6 +118,10 @@ def project_expr_list(self, project_expr_list: List[AbstractExpression]): @property def index_def(self): return self._index_def +======= + def function(self): + return self._function +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def __eq__(self, other): if not isinstance(other, CreateIndexStatement): @@ -111,8 +132,12 @@ def __eq__(self, other): and self._table_ref == other.table_ref and self.col_list == other.col_list and self._vector_store_type == other.vector_store_type +<<<<<<< HEAD and self._project_expr_list == other.project_expr_list and self._index_def == other.index_def +======= + and self._function == other.function +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) def __hash__(self) -> int: @@ -124,7 +149,11 @@ def __hash__(self) -> int: self._table_ref, tuple(self.col_list), self._vector_store_type, +<<<<<<< HEAD tuple(self._project_expr_list), self._index_def, +======= + self._function, +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) ) diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index e834d1a7d0..36410ed8c9 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -35,10 +35,17 @@ create_table: CREATE TABLE if_not_exists? table_name (create_definitions | (AS s rename_table: RENAME TABLE table_name TO table_name // Create Functions +<<<<<<< HEAD create_function: CREATE or_replace? FUNCTION if_not_exists? function_name INPUT create_definitions OUTPUT create_definitions TYPE function_type IMPL function_impl function_metadata* | CREATE or_replace? FUNCTION if_not_exists? function_name IMPL function_impl function_metadata* | CREATE or_replace? FUNCTION if_not_exists? function_name TYPE function_type function_metadata* | CREATE or_replace? FUNCTION if_not_exists? function_name FROM LR_BRACKET select_statement RR_BRACKET TYPE function_type function_metadata* +======= +create_function: CREATE FUNCTION if_not_exists? function_name INPUT create_definitions OUTPUT create_definitions TYPE function_type IMPL function_impl function_metadata* + | CREATE FUNCTION if_not_exists? function_name IMPL function_impl function_metadata* + | CREATE FUNCTION if_not_exists? function_name TYPE function_type function_metadata* + | CREATE FUNCTION if_not_exists? function_name FROM LR_BRACKET select_statement RR_BRACKET TYPE function_type function_metadata* +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) // Details function_name: uid @@ -51,7 +58,11 @@ function_metadata: function_metadata_key function_metadata_value function_metadata_key: uid +<<<<<<< HEAD function_metadata_value: constant +======= +function_metadata_value: string_literal | decimal_literal +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) vector_store_type: USING (FAISS | QDRANT | PINECONE | PGVECTOR | CHROMADB) @@ -177,7 +188,11 @@ describe_statement: DESCRIBE table_name help_statement: HELP STRING_LITERAL +<<<<<<< HEAD show_statement: SHOW (FUNCTIONS | TABLES | uid | DATABASES) +======= +show_statement: SHOW (FUNCTIONS | TABLES) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) explain_statement: EXPLAIN explainable_statement @@ -278,7 +293,11 @@ or_replace: OR REPLACE function_call: function ->function_call | aggregate_windowed_function ->aggregate_function_call +<<<<<<< HEAD function: simple_id "(" (STAR | function_args)? ")" dotted_id? +======= +function: simple_id "(" (STAR | function_args) ")" dotted_id? +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) aggregate_windowed_function: aggregate_function_name "(" function_arg ")" | COUNT "(" (STAR | function_arg) ")" diff --git a/evadb/parser/lark_visitor/_create_statements.py b/evadb/parser/lark_visitor/_create_statements.py index 18e13ca3fc..5bce4ae41a 100644 --- a/evadb/parser/lark_visitor/_create_statements.py +++ b/evadb/parser/lark_visitor/_create_statements.py @@ -257,12 +257,19 @@ def create_index(self, tree): elif child.data == "index_elem": index_elem = self.visit(child) +<<<<<<< HEAD # Projection list of child of index creation. project_expr_list = [] # Parse either a single function call or column list. if not isinstance(index_elem, list): project_expr_list += [index_elem] +======= + # Parse either a single function call or column list. + col_list, function = None, None + if not isinstance(index_elem, list): + function = index_elem +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) # Traverse to the tuple value expression. while not isinstance(index_elem, TupleValueExpression): @@ -277,12 +284,16 @@ def create_index(self, tree): col_list += [ColumnDefinition(tv_expr.name, None, None, None)] return CreateIndexStatement( +<<<<<<< HEAD index_name, if_not_exists, table_ref, col_list, vector_store_type, project_expr_list, +======= + index_name, table_ref, col_list, vector_store_type, function +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) def vector_store_type(self, tree): diff --git a/evadb/parser/lark_visitor/_drop_statement.py b/evadb/parser/lark_visitor/_drop_statement.py index 0b397378ae..8d2c829cf9 100644 --- a/evadb/parser/lark_visitor/_drop_statement.py +++ b/evadb/parser/lark_visitor/_drop_statement.py @@ -59,6 +59,7 @@ def drop_function(self, tree): if_exists = True return DropObjectStatement(ObjectType.FUNCTION, function_name, if_exists) +<<<<<<< HEAD # Drop Database def drop_database(self, tree): @@ -73,3 +74,5 @@ def drop_database(self, tree): database_name = self.visit(child) return DropObjectStatement(ObjectType.DATABASE, database_name, if_exists) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/evadb/parser/lark_visitor/_functions.py b/evadb/parser/lark_visitor/_functions.py index a3b5a868af..70cc7fc6e1 100644 --- a/evadb/parser/lark_visitor/_functions.py +++ b/evadb/parser/lark_visitor/_functions.py @@ -30,7 +30,11 @@ class Functions: def function(self, tree): function_name = None function_output = None +<<<<<<< HEAD function_args = [] +======= + function_args = None +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) for child in tree.children: if isinstance(child, Token): @@ -60,7 +64,10 @@ def function_args(self, tree): # Create function def create_function(self, tree): function_name = None +<<<<<<< HEAD or_replace = False +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) if_not_exists = False input_definitions = [] output_definitions = [] @@ -74,8 +81,11 @@ def create_function(self, tree): if isinstance(child, Tree): if child.data == "function_name": function_name = self.visit(child) +<<<<<<< HEAD elif child.data == "or_replace": or_replace = True +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) elif child.data == "if_not_exists": if_not_exists = True elif child.data == "create_definitions": @@ -106,7 +116,10 @@ def create_function(self, tree): return CreateFunctionStatement( function_name, +<<<<<<< HEAD or_replace, +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) if_not_exists, impl_path, input_definitions, diff --git a/evadb/parser/lark_visitor/_show_statements.py b/evadb/parser/lark_visitor/_show_statements.py index ca9581aca7..08aa10d7d8 100644 --- a/evadb/parser/lark_visitor/_show_statements.py +++ b/evadb/parser/lark_visitor/_show_statements.py @@ -23,9 +23,15 @@ class Show: def show_statement(self, tree): token = tree.children[1] +<<<<<<< HEAD if isinstance(token, str) and str.upper(token) == "FUNCTIONS": return ShowStatement(show_type=ShowType.FUNCTIONS) elif isinstance(token, str) and str.upper(token) == "TABLES": +======= + if str.upper(token) == "FUNCTIONS": + return ShowStatement(show_type=ShowType.FUNCTIONS) + elif str.upper(token) == "TABLES": +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) return ShowStatement(show_type=ShowType.TABLES) elif isinstance(token, str) and str.upper(token) == "DATABASES": return ShowStatement(show_type=ShowType.DATABASES) diff --git a/evadb/plan_nodes/create_function_plan.py b/evadb/plan_nodes/create_function_plan.py index 6023cf144c..af4d83d3a3 100644 --- a/evadb/plan_nodes/create_function_plan.py +++ b/evadb/plan_nodes/create_function_plan.py @@ -28,10 +28,16 @@ class CreateFunctionPlan(AbstractPlan): Attributes: name: str function_name provided by the user required +<<<<<<< HEAD or_replace: bool if true should overwrite if function with same name exists if_not_exists: bool if true should skip if function with same name exists +======= + if_not_exists: bool + if true should throw an error if function with same name exists + else will replace the existing +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) inputs: List[FunctionIOCatalogEntry] function inputs, annotated list similar to table columns outputs: List[FunctionIOCatalogEntry] @@ -97,7 +103,10 @@ def metadata(self): def __str__(self): return "CreateFunctionPlan(name={}, \ +<<<<<<< HEAD or_replace={}, \ +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) if_not_exists={}, \ inputs={}, \ outputs={}, \ diff --git a/evadb/plan_nodes/create_index_plan.py b/evadb/plan_nodes/create_index_plan.py index e5c5734779..1ff3abb881 100644 --- a/evadb/plan_nodes/create_index_plan.py +++ b/evadb/plan_nodes/create_index_plan.py @@ -31,8 +31,12 @@ def __init__( table_ref: TableRef, col_list: List[ColumnDefinition], vector_store_type: VectorStoreType, +<<<<<<< HEAD project_expr_list: List[AbstractExpression], index_def: str, +======= + function: FunctionExpression = None, +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ): super().__init__(PlanOprType.CREATE_INDEX) self._name = name @@ -40,8 +44,12 @@ def __init__( self._table_ref = table_ref self._col_list = col_list self._vector_store_type = vector_store_type +<<<<<<< HEAD self._project_expr_list = project_expr_list self._index_def = index_def +======= + self._function = function +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) @property def name(self): @@ -64,12 +72,17 @@ def vector_store_type(self): return self._vector_store_type @property +<<<<<<< HEAD def project_expr_list(self): return self._project_expr_list @property def index_def(self): return self._index_def +======= + def function(self): + return self._function +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def __str__(self): function_expr = None @@ -86,7 +99,11 @@ def __str__(self): self._table_ref, tuple(self._col_list), self._vector_store_type, +<<<<<<< HEAD "" if function_expr is None else "function={}".format(function_expr), +======= + "" if not self._function else "function={}".format(self._function), +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) def __hash__(self) -> int: @@ -98,7 +115,11 @@ def __hash__(self) -> int: self.table_ref, tuple(self.col_list), self.vector_store_type, +<<<<<<< HEAD tuple(self.project_expr_list), self.index_def, +======= + self.function, +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) ) diff --git a/evadb/plan_nodes/show_info_plan.py b/evadb/plan_nodes/show_info_plan.py index 733cc0401d..ad2fd39812 100644 --- a/evadb/plan_nodes/show_info_plan.py +++ b/evadb/plan_nodes/show_info_plan.py @@ -36,9 +36,13 @@ def show_val(self): def __str__(self): if self._show_type == ShowType.FUNCTIONS: return "ShowFunctionPlan" +<<<<<<< HEAD if self._show_type == ShowType.DATABASES: return "ShowDatabasePlan" elif self._show_type == ShowType.TABLES: +======= + else: +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) return "ShowTablePlan" elif self._show_type == ShowType.CONFIG: return "ShowConfigPlan" diff --git a/evadb/third_party/databases/clickhouse/__init__.py b/evadb/third_party/databases/clickhouse/__init__.py index e9f1e2861b..2748d44ed9 100644 --- a/evadb/third_party/databases/clickhouse/__init__.py +++ b/evadb/third_party/databases/clickhouse/__init__.py @@ -12,4 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +<<<<<<<< HEAD:evadb/third_party/databases/clickhouse/__init__.py """Clickhouse integrations""" +======== +"""user defined test functions operating on ndarrays functions""" +>>>>>>>> 2dacff69 (feat: sync master staging (#1050)):test/integration_tests/long/functions/ndarray/__init__.py diff --git a/evadb/utils/generic_utils.py b/evadb/utils/generic_utils.py index fb6bd9986a..0ace33de07 100644 --- a/evadb/utils/generic_utils.py +++ b/evadb/utils/generic_utils.py @@ -292,7 +292,11 @@ def try_to_import_ray(): ) +<<<<<<< HEAD def try_to_import_statsforecast(): +======= +def try_to_import_forecast(): +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) try: from statsforecast import StatsForecast # noqa: F401 except ImportError: @@ -302,6 +306,7 @@ def try_to_import_statsforecast(): ) +<<<<<<< HEAD def try_to_import_neuralforecast(): try: from neuralforecast import NeuralForecast # noqa: F401 @@ -312,6 +317,8 @@ def try_to_import_neuralforecast(): ) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def is_ray_available() -> bool: try: try_to_import_ray() @@ -351,6 +358,7 @@ def is_ludwig_available() -> bool: def is_forecast_available() -> bool: try: +<<<<<<< HEAD try_to_import_statsforecast() try_to_import_neuralforecast() return True @@ -391,6 +399,9 @@ def try_to_import_xgboost(): def is_xgboost_available() -> bool: try: try_to_import_xgboost() +======= + try_to_import_forecast() +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) return True except ValueError: # noqa: E722 return False diff --git a/script/formatting/formatter.py b/script/formatting/formatter.py index 433de9c48c..6941f882ae 100755 --- a/script/formatting/formatter.py +++ b/script/formatting/formatter.py @@ -461,6 +461,7 @@ def check_file(file): # CODESPELL #LOG.info("Codespell") +<<<<<<< HEAD subprocess.check_output(""" codespell "evadb/*.py" """, shell=True, universal_newlines=True) @@ -477,6 +478,24 @@ def check_file(file): shell=True, universal_newlines=True) subprocess.check_output(""" codespell "evadb/*.md" """, +======= + subprocess.check_output("codespell 'evadb/*.py'", + shell=True, + universal_newlines=True) + subprocess.check_output("codespell 'evadb/*/*.py'", + shell=True, + universal_newlines=True) + subprocess.check_output("codespell 'docs/source/*/*.rst'", + shell=True, + universal_newlines=True) + subprocess.check_output("codespell 'docs/source/*.rst'", + shell=True, + universal_newlines=True) + subprocess.check_output("codespell '*.md'", + shell=True, + universal_newlines=True) + subprocess.check_output("codespell 'evadb/*.md'", +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) shell=True, universal_newlines=True) diff --git a/script/formatting/spelling.txt b/script/formatting/spelling.txt index 3cb014eedd..9328f731f9 100644 --- a/script/formatting/spelling.txt +++ b/script/formatting/spelling.txt @@ -1,8 +1,12 @@ <<<<<<< HEAD +<<<<<<< HEAD personal_ws-1.1 en 1776 ======= personal_ws-1.1 en 1467 >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +personal_ws-1.1 en 1563 +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ABCD ABCMeta ANYDIM @@ -39,11 +43,14 @@ AbstractUDF AbstractUDFTest AggregationExpression AggregationExpressionsTest +<<<<<<< HEAD AirData AirDataPanel AirForecast AirPanelForecast AirPassengersPanel +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) AliExpress Anirudh AnnotateTests @@ -154,11 +161,14 @@ CreateDatabaseStatement CreateDatabaseTest CreateExecutor CreateFromSelectPlan +<<<<<<< HEAD CreateFunctionExecutor CreateFunctionExecutorTest CreateFunctionPlan CreateFunctionStatement CreateIndex +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) CreateIndexExecutor CreateIndexPlan CreateIndexStatement @@ -213,7 +223,10 @@ DeleteExecutorTest DeletePlan DeleteTableStatement DemoDB +<<<<<<< HEAD DemoFunc +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) DemoTable DemoUDF Deserialize @@ -356,10 +369,13 @@ GaussianBlur GaussianBlurTests GenericHuggingfaceModel GenericLudwigModel +<<<<<<< HEAD GenericSklearnModel GenericUtilsTests GithubDataSourceTest GithubHandler +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) GroupBy GroupByExecutor GroupByPlan @@ -580,7 +596,10 @@ MyMeta MyPDF MyPDFs MySQLNativeStorageEngineTest +<<<<<<< HEAD MyTable +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) MyTextCSV MyUDF MyVideo @@ -588,11 +607,15 @@ MyVideoCSV MyVideos MydbHandler <<<<<<< HEAD +<<<<<<< HEAD MysqlHandler NBEATS NCHAR ======= >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +MysqlHandler +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) NEQ NHITS NLP @@ -769,10 +792,15 @@ SampleExecutor SampleExecutorTest SamplePlan <<<<<<< HEAD +<<<<<<< HEAD SampleTable SampleVideoTable ======= >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +SampleTable +SampleVideoTable +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) Scalability ScanPlan SchemaUtils @@ -865,14 +893,20 @@ TestSuite TestTable TestTextHFModel TestUDF +<<<<<<< HEAD TextClassifier +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) TextFilterKeyword TextFilteringTests TextHFModel TextLoader TextPickleType TextProcessing +<<<<<<< HEAD TextSummarizer +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) TextTestRunner TimeoutError TimerTests @@ -1227,9 +1261,13 @@ exog expr expresssion exprs +<<<<<<< HEAD extname extractOne extrator +======= +extractOne +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) f'LOAD facebook facedetector @@ -1247,9 +1285,13 @@ featCol featureextractor feedstock <<<<<<< HEAD +<<<<<<< HEAD fetchall ======= >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +fetchall +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ffill ffmpeg fileFormat @@ -1287,7 +1329,10 @@ gaurav gaussianBlur gb gc +<<<<<<< HEAD gcp +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) gdp georgia geq @@ -1370,7 +1415,10 @@ integratedTerminal integrations intp invaid +<<<<<<< HEAD inviter +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) io ipynb iq @@ -1523,7 +1571,10 @@ plangenerator pluggable png poolclass +<<<<<<< HEAD popitem +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) popleft pos posix @@ -1633,10 +1684,14 @@ singledispatch singledispatchmethod sk <<<<<<< HEAD +<<<<<<< HEAD sklearn smallint ======= >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +smallint +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) smi softmax spacy @@ -1718,7 +1773,10 @@ textsummarizer th thefuzz timm +<<<<<<< HEAD tinyint +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) tmp toGrayscale toc @@ -1764,7 +1822,10 @@ url urllib urlparse urls +<<<<<<< HEAD usecase +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) usecases usecols utf diff --git a/setup.py b/setup.py index e9bd17799f..6806355867 100644 --- a/setup.py +++ b/setup.py @@ -137,6 +137,10 @@ def read(path, encoding="utf-8"): ======= >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +forecasting_libs = [ + "statsforecast" # MODEL TRAIN AND FINE TUNING +] + ### NEEDED FOR DEVELOPER TESTING ONLY dev_libs = [ @@ -176,11 +180,17 @@ def read(path, encoding="utf-8"): "chromadb": chromadb_libs, "postgres": postgres_libs, "ludwig": ludwig_libs, +<<<<<<< HEAD "sklearn": sklearn_libs, "xgboost": xgboost_libs, "forecasting": forecasting_libs, # everything except ray, qdrant, ludwig and postgres. The first three fail on pyhton 3.11. "dev": dev_libs + vision_libs + document_libs + function_libs + notebook_libs + forecasting_libs + sklearn_libs + imagegen_libs + xgboost_libs +======= + "forecasting": forecasting_libs, + # everything except ray, qdrant, ludwig and postgres. The first three fail on pyhton 3.11. + "dev": dev_libs + vision_libs + document_libs + function_libs + notebook_libs + forecasting_libs, +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) } setup( diff --git a/test/integration_tests/long/test_function_executor.py b/test/integration_tests/long/test_function_executor.py index 2b21f20165..b1e34266c0 100644 --- a/test/integration_tests/long/test_function_executor.py +++ b/test/integration_tests/long/test_function_executor.py @@ -167,6 +167,7 @@ def test_create_function(self): ) self.assertEqual(actual, expected) +<<<<<<< HEAD def test_create_or_replace(self): function_name = "DummyObjectDetector" execute_query_fetch_all(self.evadb, f"DROP FUNCTION IF EXISTS {function_name};") @@ -191,6 +192,8 @@ def test_create_or_replace(self): expected = Batch(pd.DataFrame([f"Function {function_name} overwritten."])) self.assertEqual(actual, expected) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def test_should_create_function_with_metadata(self): function_name = "DummyObjectDetector" execute_query_fetch_all(self.evadb, f"DROP FUNCTION {function_name};") @@ -199,11 +202,16 @@ def test_should_create_function_with_metadata(self): OUTPUT (label NDARRAY STR(10)) TYPE Classification IMPL 'test/util.py' +<<<<<<< HEAD CACHE TRUE BATCH FALSE INT_VAL 1 FLOAT_VAL 1.5 STR_VAL "gg"; +======= + CACHE 'TRUE' + BATCH 'FALSE'; +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) """ execute_query_fetch_all(self.evadb, create_function_query.format(function_name)) @@ -211,6 +219,7 @@ def test_should_create_function_with_metadata(self): entries = self.evadb.catalog().get_function_metadata_entries_by_function_name( function_name ) +<<<<<<< HEAD self.assertEqual(len(entries), 5) metadata = [(entry.key, entry.value) for entry in entries] @@ -222,6 +231,13 @@ def test_should_create_function_with_metadata(self): ("float_val", 1.5), ("str_val", "gg"), ] +======= + self.assertEqual(len(entries), 2) + metadata = [(entry.key, entry.value) for entry in entries] + + # metadata ultimately stored as lowercase string literals in metadata + expected_metadata = [("cache", "TRUE"), ("batch", "FALSE")] +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) self.assertEqual(set(metadata), set(expected_metadata)) def test_should_return_empty_metadata_list_for_missing_function(self): @@ -318,7 +334,11 @@ def test_should_raise_if_function_file_is_modified(self): "SELECT id,DummyObjectDetector(data) FROM MyVideo ORDER BY id;" ) +<<<<<<< HEAD # disabling warning for function modification for now +======= + # disabling warning for function modificiation for now +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) # with self.assertRaises(AssertionError): execute_query_fetch_all(self.evadb, select_query) diff --git a/test/integration_tests/long/test_model_forecasting.py b/test/integration_tests/long/test_model_forecasting.py index 47ffe65a83..0d8db977ad 100644 --- a/test/integration_tests/long/test_model_forecasting.py +++ b/test/integration_tests/long/test_model_forecasting.py @@ -37,6 +37,7 @@ def setUpClass(cls): y INTEGER);""" execute_query_fetch_all(cls.evadb, create_table_query) +<<<<<<< HEAD create_table_query = """ CREATE TABLE AirDataPanel (\ unique_id TEXT(30),\ @@ -54,10 +55,13 @@ def setUpClass(cls): bedrooms INTEGER);""" execute_query_fetch_all(cls.evadb, create_table_query) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) path = f"{EvaDB_ROOT_DIR}/data/forecasting/air-passengers.csv" load_query = f"LOAD CSV '{path}' INTO AirData;" execute_query_fetch_all(cls.evadb, load_query) +<<<<<<< HEAD path = f"{EvaDB_ROOT_DIR}/data/forecasting/AirPassengersPanel.csv" load_query = f"LOAD CSV '{path}' INTO AirDataPanel;" execute_query_fetch_all(cls.evadb, load_query) @@ -66,29 +70,42 @@ def setUpClass(cls): load_query = f"LOAD CSV '{path}' INTO HomeData;" execute_query_fetch_all(cls.evadb, load_query) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) @classmethod def tearDownClass(cls): shutdown_ray() # clean up +<<<<<<< HEAD execute_query_fetch_all(cls.evadb, "DROP TABLE IF EXISTS AirData;") execute_query_fetch_all(cls.evadb, "DROP TABLE IF EXISTS HomeData;") execute_query_fetch_all(cls.evadb, "DROP FUNCTION IF EXISTS AirForecast;") execute_query_fetch_all(cls.evadb, "DROP FUNCTION IF EXISTS HomeForecast;") +======= + execute_query_fetch_all(cls.evadb, "DROP TABLE IF EXISTS HomeRentals;") +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) @forecast_skip_marker def test_forecast(self): create_predict_udf = """ +<<<<<<< HEAD CREATE FUNCTION AirForecast FROM (SELECT unique_id, ds, y FROM AirData) TYPE Forecasting HORIZON 12 +======= + CREATE FUNCTION Forecast FROM + (SELECT unique_id, ds, y FROM AirData) + TYPE Forecasting +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) PREDICT 'y'; """ execute_query_fetch_all(self.evadb, create_predict_udf) predict_query = """ +<<<<<<< HEAD SELECT AirForecast() order by y; """ result = execute_query_fetch_all(self.evadb, predict_query) @@ -145,6 +162,12 @@ def test_forecast_with_column_rename(self): result.columns, ["homeforecast.type", "homeforecast.saledate", "homeforecast.ma"], ) +======= + SELECT Forecast(12) FROM AirData; + """ + result = execute_query_fetch_all(self.evadb, predict_query) + self.assertEqual(int(list(result.frames.iloc[:, -1])[-1]), 459) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) if __name__ == "__main__": diff --git a/test/integration_tests/long/test_model_train.py b/test/integration_tests/long/test_model_train.py index 85e508f4d1..6e75bd4331 100644 --- a/test/integration_tests/long/test_model_train.py +++ b/test/integration_tests/long/test_model_train.py @@ -63,7 +63,11 @@ def tearDownClass(cls): @ludwig_skip_marker def test_ludwig_automl(self): create_predict_function = """ +<<<<<<< HEAD CREATE OR REPLACE FUNCTION PredictHouseRentLudwig FROM +======= + CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ( SELECT * FROM HomeRentals ) TYPE Ludwig PREDICT 'rental_price' diff --git a/test/integration_tests/long/test_reuse.py b/test/integration_tests/long/test_reuse.py index 3f41e5fbb0..859cf8b331 100644 --- a/test/integration_tests/long/test_reuse.py +++ b/test/integration_tests/long/test_reuse.py @@ -55,6 +55,7 @@ def setUp(self): self.evadb.catalog().reset() ua_detrac = f"{EvaDB_ROOT_DIR}/data/ua_detrac/ua_detrac.mp4" execute_query_fetch_all(self.evadb, f"LOAD VIDEO '{ua_detrac}' INTO DETRAC;") +<<<<<<< HEAD execute_query_fetch_all(self.evadb, "CREATE TABLE fruitTable (data TEXT(100))") data_list = [ "The color of apple is red", @@ -64,6 +65,8 @@ def setUp(self): execute_query_fetch_all( self.evadb, f"INSERT INTO fruitTable (data) VALUES ('{data}')" ) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) load_functions_for_testing(self.evadb) self._load_hf_model() diff --git a/test/integration_tests/short/test_drop_executor.py b/test/integration_tests/short/test_drop_executor.py index 632aa4a008..aa5383224b 100644 --- a/test/integration_tests/short/test_drop_executor.py +++ b/test/integration_tests/short/test_drop_executor.py @@ -112,11 +112,14 @@ def test_should_drop_table(self): self.evadb, drop_query, do_not_print_exceptions=True ) +<<<<<<< HEAD # we should be able to re-create the table execute_query_fetch_all(self.evadb, query) # clean up execute_query_fetch_all(self.evadb, drop_query) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def run_create_function_query(self): create_function_query = """CREATE FUNCTION DummyObjectDetector INPUT (Frame_Array NDARRAY UINT8(3, 256, 256)) @@ -141,6 +144,7 @@ def test_should_drop_function(self): ) self.assertTrue(function is None) +<<<<<<< HEAD # We should be able to re-create the function self.run_create_function_query() # clean up @@ -155,6 +159,17 @@ def test_drop_wrong_function_name(self): ) self.assertTrue(function is not None) +======= + def test_drop_wrong_function_name(self): + self.run_create_function_query() + right_function_name = "DummyObjectDetector" + wrong_function_name = "FakeDummyObjectDetector" + function = self.evadb.catalog().get_function_catalog_entry_by_name( + right_function_name + ) + self.assertTrue(function is not None) + +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) # Test that dropping the wrong FUNCTION: # - does not affect FUNCTIONs in the catalog # - raises an appropriate exception diff --git a/test/integration_tests/short/test_select_executor.py b/test/integration_tests/short/test_select_executor.py index b108150458..f3126aaa73 100644 --- a/test/integration_tests/short/test_select_executor.py +++ b/test/integration_tests/short/test_select_executor.py @@ -430,7 +430,11 @@ def test_expression_tree_signature(self): self.evadb, "SELECT id FROM MyVideo WHERE DummyMultiObjectDetector(data).labels @> ['person'];", ) +<<<<<<< HEAD signature = next(plan.find_all(LogicalFilter)).predicate.children[0].signature() +======= + signature = plan.target_list[0].signature() +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) function_id = ( self.evadb.catalog() .get_function_catalog_entry_by_name("DummyMultiObjectDetector") diff --git a/test/markers.py b/test/markers.py index 3e95c1cfff..c21314dab7 100644 --- a/test/markers.py +++ b/test/markers.py @@ -19,7 +19,10 @@ import pytest from evadb.utils.generic_utils import ( +<<<<<<< HEAD is_chromadb_available, +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) is_forecast_available, is_gpu_available, is_ludwig_available, @@ -102,7 +105,10 @@ is_forecast_available() is False, reason="Run only if forecasting packages available", ) +<<<<<<< HEAD stable_diffusion_skip_marker = pytest.mark.skipif( is_replicate_available() is False, reason="requires replicate" ) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/test/unit_tests/binder/test_statement_binder.py b/test/unit_tests/binder/test_statement_binder.py index d6642ea9a2..88086c0c0c 100644 --- a/test/unit_tests/binder/test_statement_binder.py +++ b/test/unit_tests/binder/test_statement_binder.py @@ -333,6 +333,7 @@ def test_bind_create_index(self): with self.assertRaises(AssertionError): binder._bind_create_index_statement(create_index_statement) +<<<<<<< HEAD col_def = MagicMock() col_def.name = "a" create_index_statement.col_list = [col_def] @@ -349,6 +350,12 @@ def test_bind_create_index(self): FunctionExpression(MagicMock(), name="a"), TupleValueExpression(name="*"), ] +======= + create_index_statement.col_list = ["foo"] + function_obj = MagicMock() + output = MagicMock() + function_obj.outputs = [output] +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) with patch.object( catalog(), @@ -363,7 +370,17 @@ def test_bind_create_index(self): output.array_dimensions = [1, 100] binder._bind_create_index_statement(create_index_statement) +<<<<<<< HEAD create_index_statement.project_expr_list = [TupleValueExpression(name="*")] +======= + create_index_statement.function = None + col_def = MagicMock() + col_def.name = "a" + create_index_statement.col_list = [col_def] + col = MagicMock() + col.name = "a" + create_index_statement.table_ref.table.table_obj.columns = [col] +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) with self.assertRaises(AssertionError): binder._bind_create_index_statement(create_index_statement) @@ -373,20 +390,32 @@ def test_bind_create_index(self): col.array_dimensions = [1, 10] binder._bind_create_index_statement(create_index_statement) +<<<<<<< HEAD def test_bind_create_function_should_raise_without_predict_for_ludwig(self): with patch.object(StatementBinder, "bind"): create_function_statement = MagicMock() create_function_statement.function_type = "ludwig" +======= + def test_bind_create_function_should_raise(self): + with patch.object(StatementBinder, "bind"): + create_function_statement = MagicMock() +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) create_function_statement.query.target_list = [] create_function_statement.metadata = [] binder = StatementBinder(StatementBinderContext(MagicMock())) with self.assertRaises(AssertionError): binder._bind_create_function_statement(create_function_statement) +<<<<<<< HEAD def test_bind_create_function_should_drop_row_id_for_select_star(self): with patch.object(StatementBinder, "bind"): create_function_statement = MagicMock() create_function_statement.function_type = "ludwig" +======= + def test_bind_create_function_should_drop_row_id(self): + with patch.object(StatementBinder, "bind"): + create_function_statement = MagicMock() +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) row_id_col_obj = ColumnCatalogEntry( name=IDENTIFIER_COLUMN, type=MagicMock(), @@ -452,6 +481,7 @@ def test_bind_create_function_should_drop_row_id_for_select_star(self): ] self.assertEqual(create_function_statement.inputs, expected_inputs) self.assertEqual(create_function_statement.outputs, expected_outputs) +<<<<<<< HEAD def test_bind_create_function_should_bind_forecast_with_default_columns(self): with patch.object(StatementBinder, "bind"): @@ -611,3 +641,5 @@ def test_bind_create_function_should_raise_forecast_missing_required_columns(sel err_msg = "Missing required {'ma'} columns for forecasting function." self.assertEqual(str(cm.exception), err_msg) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/test/unit_tests/catalog/test_catalog_manager.py b/test/unit_tests/catalog/test_catalog_manager.py index 6149be34c9..5fd8aa2477 100644 --- a/test/unit_tests/catalog/test_catalog_manager.py +++ b/test/unit_tests/catalog/test_catalog_manager.py @@ -142,6 +142,7 @@ def test_insert_function( function_io_list, function_metadata_list, ) +<<<<<<< HEAD function_mock.return_value.insert_entry.assert_called_with( "function", "sample.py", @@ -149,6 +150,14 @@ def test_insert_function( checksum_mock.return_value, function_io_list, function_metadata_list, +======= + functionio_mock.return_value.insert_entries.assert_called_with(function_io_list) + functionmetadata_mock.return_value.insert_entries.assert_called_with( + function_metadata_list + ) + function_mock.return_value.insert_entry.assert_called_with( + "function", "sample.py", "classification", checksum_mock.return_value +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) ) checksum_mock.assert_called_with("sample.py") self.assertEqual(actual, function_mock.return_value.insert_entry.return_value) diff --git a/test/unit_tests/executor/test_create_udf_executor.py b/test/unit_tests/executor/test_create_udf_executor.py index 63e62e76a9..d510cafd8f 100644 --- a/test/unit_tests/executor/test_create_udf_executor.py +++ b/test/unit_tests/executor/test_create_udf_executor.py @@ -57,6 +57,7 @@ def test_should_create_function(self, load_function_class_from_file_mock): {"key1": "value1", "key2": "value2"}, ) +<<<<<<< HEAD def test_should_raise_or_replace_if_not_exists(self): plan = type( "CreateFunctionPlan", @@ -80,11 +81,20 @@ def test_should_raise_or_replace_if_not_exists(self): def test_should_skip_if_not_exists(self, load_function_class_from_file_mock): catalog_instance = MagicMock() catalog_instance().get_function_catalog_entry_by_name.return_value = True +======= + @patch("evadb.executor.create_function_executor.load_function_class_from_file") + def test_should_raise_error_on_incorrect_io_definition( + self, load_function_class_from_file_mock + ): + catalog_instance = MagicMock() + catalog_instance().get_function_catalog_entry_by_name.return_value = None +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) catalog_instance().insert_function_catalog_entry.return_value = "function" impl_path = MagicMock() abs_path = impl_path.absolute.return_value = MagicMock() abs_path.as_posix.return_value = "test.py" load_function_class_from_file_mock.return_value.return_value = "mock_class" +<<<<<<< HEAD plan = type( "CreateFunctionPlan", (), @@ -200,6 +210,8 @@ def test_should_raise_error_on_incorrect_io_definition( abs_path = impl_path.absolute.return_value = MagicMock() abs_path.as_posix.return_value = "test.py" load_function_class_from_file_mock.return_value.return_value = "mock_class" +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) incorrect_input_definition = PandasDataframe( columns=["Frame_Array", "Frame_Array_2"], column_types=[NdArrayType.UINT8], @@ -229,7 +241,11 @@ def test_should_raise_error_on_incorrect_io_definition( with self.assertRaises(RuntimeError) as exc: next(create_function_executor.exec()) self.assertIn( +<<<<<<< HEAD "Error creating function, input/output definition incorrect:", +======= + "Error creating Function, input/output definition incorrect:", +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) str(exc.exception), ) diff --git a/test/unit_tests/executor/test_plan_executor.py b/test/unit_tests/executor/test_plan_executor.py index 8fcaaef17e..3a7d07b0be 100644 --- a/test/unit_tests/executor/test_plan_executor.py +++ b/test/unit_tests/executor/test_plan_executor.py @@ -181,7 +181,11 @@ def test_execute_plan_for_create_insert_load_upload_plans(self, mock_build): # CreateFunctionExecutor mock_build.reset_mock() +<<<<<<< HEAD tree = MagicMock(node=CreateFunctionPlan(None, False, False, [], [], None)) +======= + tree = MagicMock(node=CreateFunctionPlan(None, False, [], [], None)) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) mock_build.return_value = tree actual = list(PlanExecutor(MagicMock(), None).execute_plan()) tree.exec.assert_called_once() diff --git a/test/unit_tests/optimizer/test_statement_to_opr_converter.py b/test/unit_tests/optimizer/test_statement_to_opr_converter.py index 12d5d4f080..e4f28de2e6 100644 --- a/test/unit_tests/optimizer/test_statement_to_opr_converter.py +++ b/test/unit_tests/optimizer/test_statement_to_opr_converter.py @@ -137,6 +137,7 @@ def test_visit_select_should_not_call_visits_for_null_values(self): converter._visit_projection.assert_not_called() converter._visit_select_predicate.assert_not_called() +<<<<<<< HEAD def test_visit_select_without_table_ref(self): converter = StatementToPlanConverter() converter.visit_table_ref = MagicMock() @@ -158,6 +159,8 @@ def test_visit_select_without_table_ref(self): converter._visit_orderby.assert_not_called() converter._visit_limit.assert_not_called() +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) @patch("evadb.optimizer.statement_to_opr_converter.LogicalCreateFunction") @patch( "evadb.optimizer.\ @@ -328,7 +331,10 @@ def test_check_plan_equality(self): extract_object_plan = LogicalExtractObject( MagicMock(), MagicMock(), MagicMock(), MagicMock() ) +<<<<<<< HEAD +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) create_plan.append_child(create_function_plan) plans.append(dummy_plan) diff --git a/test/unit_tests/parser/test_parser.py b/test/unit_tests/parser/test_parser.py index b7b7a21978..b022d4fd97 100644 --- a/test/unit_tests/parser/test_parser.py +++ b/test/unit_tests/parser/test_parser.py @@ -121,6 +121,7 @@ def test_create_index_statement(self): self.assertEqual(actual_stmt, expected_stmt) self.assertEqual(actual_stmt.index_def, create_index_query) +<<<<<<< HEAD # create if_not_exists expected_stmt = CreateIndexStatement( "testindex", @@ -141,6 +142,8 @@ def test_create_index_statement(self): self.assertEqual(actual_stmt, expected_stmt) self.assertEqual(actual_stmt.index_def, create_index_query) +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) # create index on Function expression create_index_query = ( "CREATE INDEX testindex ON MyVideo (FeatureExtractor(featCol)) USING FAISS;" @@ -643,7 +646,11 @@ def test_select_statement_sample_class(self): def test_select_function_star(self): parser = Parser() +<<<<<<< HEAD query = "SELECT DemoFunc(*) FROM DemoDB.DemoTable;" +======= + query = "SELECT DemoFunc(*) FROM DemoDB.DemoTable" +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) evadb_stmt_list = parser.parse(query) # check stmt itself @@ -755,6 +762,7 @@ def test_delete_statement(self): self.assertEqual(delete_stmt, expected_stmt) +<<<<<<< HEAD def test_set_statement(self): parser = Parser() set_statement = """SET OPENAIKEY = 'ABCD'""" @@ -843,6 +851,10 @@ def test_create_predict_function_statement(self): def test_create_function_statement(self): parser = Parser() +======= + def test_create_function_statement(self): + parser = Parser() +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) create_func_query = """CREATE FUNCTION IF NOT EXISTS FastRCNN INPUT (Frame_Array NDARRAY UINT8(3, 256, 256)) OUTPUT (Labels NDARRAY STR(10), Bbox NDARRAY UINT8(10, 4)) @@ -980,7 +992,10 @@ def test_should_return_false_for_unequal_expression(self): insert_stmt = InsertTableStatement(table) create_func = CreateFunctionStatement( "func", +<<<<<<< HEAD False, +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) False, Path("data/fastrcnn.py"), [ @@ -1137,3 +1152,17 @@ def test_class_equality(self): self.assertNotEqual(tuple_frame, table_ref) self.assertNotEqual(join_node, table_ref) self.assertNotEqual(table_ref, table_info) +<<<<<<< HEAD +======= + + def test_lark(self): + query = """CREATE FUNCTION FaceDetector + INPUT (frame NDARRAY UINT8(3, ANYDIM, ANYDIM)) + OUTPUT (bboxes NDARRAY FLOAT32(ANYDIM, 4), + scores NDARRAY FLOAT32(ANYDIM)) + TYPE FaceDetection + IMPL 'evadb/functions/face_detector.py'; + """ + parser = Parser() + parser.parse(query) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) diff --git a/test/unit_tests/parser/test_parser_statements.py b/test/unit_tests/parser/test_parser_statements.py index eba32480ee..e89c0a6ca0 100644 --- a/test/unit_tests/parser/test_parser_statements.py +++ b/test/unit_tests/parser/test_parser_statements.py @@ -80,7 +80,10 @@ def test_parser_statement_types(self): """, "SHOW TABLES;", "SHOW FUNCTIONS;", +<<<<<<< HEAD "SHOW DATABASES;", +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) "EXPLAIN SELECT a FROM foo;", "SELECT HomeRentalForecast(12);", """SELECT data FROM MyVideo WHERE id < 5 diff --git a/test/unit_tests/plan_nodes/test_plan.py b/test/unit_tests/plan_nodes/test_plan.py index b06f801194..c15e26a8e7 100644 --- a/test/unit_tests/plan_nodes/test_plan.py +++ b/test/unit_tests/plan_nodes/test_plan.py @@ -72,7 +72,10 @@ def test_insert_plan(self): def test_create_function_plan(self): function_name = "function" +<<<<<<< HEAD or_replace = False +======= +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) if_not_exists = True functionIO = "functionIO" inputs = [functionIO, functionIO] @@ -80,11 +83,18 @@ def test_create_function_plan(self): impl_path = "test" ty = "classification" node = CreateFunctionPlan( +<<<<<<< HEAD function_name, or_replace, if_not_exists, inputs, outputs, impl_path, ty ) self.assertEqual(node.opr_type, PlanOprType.CREATE_FUNCTION) self.assertEqual(node.or_replace, or_replace) self.assertEqual(node.if_not_exists, if_not_exists) +======= + function_name, if_not_exists, inputs, outputs, impl_path, ty + ) + self.assertEqual(node.opr_type, PlanOprType.CREATE_FUNCTION) + self.assertEqual(node.if_not_exists, True) +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) self.assertEqual(node.inputs, [functionIO, functionIO]) self.assertEqual(node.outputs, [functionIO]) self.assertEqual(node.impl_path, impl_path) diff --git a/test/unit_tests/storage/test_sqlite_native_storage_engine.py b/test/unit_tests/storage/test_sqlite_native_storage_engine.py index 15512a4624..7d84982a68 100644 --- a/test/unit_tests/storage/test_sqlite_native_storage_engine.py +++ b/test/unit_tests/storage/test_sqlite_native_storage_engine.py @@ -31,10 +31,14 @@ def __init__(self): @pytest.mark.notparallel <<<<<<< HEAD +<<<<<<< HEAD class SQLiteNativeStorageEngineTest(unittest.TestCase): ======= class SQLiiteNativeStorageEngineTest(unittest.TestCase): >>>>>>> 8c5b63dc (release: merge staging into master (#1032)) +======= +class SQLiteNativeStorageEngineTest(unittest.TestCase): +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/test/util.py b/test/util.py index 23eaeb35e8..4ce03a0f4e 100644 --- a/test/util.py +++ b/test/util.py @@ -34,10 +34,14 @@ from evadb.configuration.constants import EvaDB_DATABASE_DIR, EvaDB_INSTALLATION_DIR from evadb.database import init_evadb_instance from evadb.expression.function_expression import FunctionExpression +<<<<<<< HEAD from evadb.functions.abstract.abstract_function import ( AbstractClassifierFunction, AbstractFunction, ) +======= +from evadb.functions.abstract.abstract_function import AbstractClassifierFunction +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) from evadb.functions.decorators import decorators from evadb.functions.decorators.io_descriptors.data_types import ( NumpyArray, diff --git a/tutorials/11-similarity-search-for-motif-mining.ipynb b/tutorials/11-similarity-search-for-motif-mining.ipynb index 98863c6a7c..c02caf357f 100644 --- a/tutorials/11-similarity-search-for-motif-mining.ipynb +++ b/tutorials/11-similarity-search-for-motif-mining.ipynb @@ -9,6 +9,147 @@ "source": [ "# Similarity search for motif mining" ] +<<<<<<< HEAD +======= + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "response = cursor.query(\"DROP TABLE IF EXISTS reddit_dataset;\").df()\n", + "cursor.query(\n", + " \"LOAD IMAGE 'reddit-images/*.jpg' INTO reddit_dataset;\"\n", + ").df()\n" + ] + }, + { + "cell_type": "markdown", + "id": "6743684c", + "metadata": {}, + "source": [ + "### Register a SIFT FeatureExtractor \n", + "It uses `kornia` library to extract sift features for each image" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "49496e97", + "metadata": { + "execution": { + "iopub.execute_input": "2023-07-11T07:28:21.140631Z", + "iopub.status.busy": "2023-07-11T07:28:21.140397Z", + "iopub.status.idle": "2023-07-11T07:28:21.273873Z", + "shell.execute_reply": "2023-07-11T07:28:21.273142Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    0
    0UDF SiftFeatureExtractor successfully added to...
    \n", + "
    " + ], + "text/plain": [ + " 0\n", + "0 UDF SiftFeatureExtractor successfully added to..." + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cursor.query(\"DROP UDF IF EXISTS SiftFeatureExtractor;\").df()\n", + "cursor.query(\"\"\"CREATE UDF IF NOT EXISTS SiftFeatureExtractor\n", + " IMPL '../evadb/functions/sift_feature_extractor.py'\"\"\").df()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1101ec76", + "metadata": { + "execution": { + "iopub.execute_input": "2023-07-11T07:28:21.277349Z", + "iopub.status.busy": "2023-07-11T07:28:21.277097Z", + "iopub.status.idle": "2023-07-11T07:28:21.280842Z", + "shell.execute_reply": "2023-07-11T07:28:21.280177Z" + } + }, + "outputs": [], + "source": [ + "# Keep track of which image gets the most votes\n", + "from collections import Counter\n", + "vote = Counter()" + ] + }, + { + "cell_type": "markdown", + "id": "cc5a1e73", + "metadata": { + "execution": { + "iopub.execute_input": "2023-05-10T04:14:02.011536Z", + "iopub.status.busy": "2023-05-10T04:14:02.011425Z", + "iopub.status.idle": "2023-05-10T04:14:02.015115Z", + "shell.execute_reply": "2023-05-10T04:14:02.014808Z" + } + }, + "source": [ + "## Image-level similarity search pipeline. \n", + "This pipeline creates one vector per image. Next, we should breakdown steps how we build the index and search similar vectors using the index." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d85e3fa4", + "metadata": { + "execution": { + "iopub.execute_input": "2023-07-11T07:28:21.284447Z", + "iopub.status.busy": "2023-07-11T07:28:21.284133Z", + "iopub.status.idle": "2023-07-11T07:28:23.295499Z", + "shell.execute_reply": "2023-07-11T07:28:23.294682Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "07-21-2023 09:48:26 WARNING[drop_object_executor:drop_object_executor.py:_handle_drop_index:0106] Index reddit_sift_image_index does not exist, therefore cannot be dropped.\n" + ] +>>>>>>> 2dacff69 (feat: sync master staging (#1050)) }, { "cell_type": "markdown",