From adbd3f5f5e4866bda47708bb39ca0a793d99c5e9 Mon Sep 17 00:00:00 2001 From: hershd23 Date: Fri, 1 Sep 2023 12:19:49 -0400 Subject: [PATCH 01/10] Replacing udf_metadata_key from string_literal to ID_LITERAL modified: evadb/parser/evadb.lark Removed .value from key_value_pair[0] post the change in type modified: evadb/parser/lark_visitor/_functions.py Replaced string key to ID_LITERAL in test query modified: test/unit_tests/parser/test_parser.py --- evadb/parser/evadb.lark | 4 ++-- evadb/parser/lark_visitor/_functions.py | 3 ++- test/unit_tests/parser/test_parser.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index ac15951b58..7171f8b63c 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -49,7 +49,7 @@ udf_impl: string_literal udf_metadata: udf_metadata_key udf_metadata_value -udf_metadata_key: string_literal +udf_metadata_key: uid udf_metadata_value: string_literal | decimal_literal @@ -207,7 +207,7 @@ simple_id: ID dotted_id: DOT_ID | "." uid -// Literals +// Literalss string_literal: STRING_LITERAL diff --git a/evadb/parser/lark_visitor/_functions.py b/evadb/parser/lark_visitor/_functions.py index d8f0503de5..0a341abfb7 100644 --- a/evadb/parser/lark_visitor/_functions.py +++ b/evadb/parser/lark_visitor/_functions.py @@ -97,7 +97,8 @@ def create_udf(self, tree): value = key_value_pair[1] if isinstance(value, ConstantValueExpression): value = value.value - metadata.append((key_value_pair[0].value, value)), + # Removing .value from key_value_pair[0] since key is now an ID_LITERAL + metadata.append((key_value_pair[0], value)), return CreateUDFStatement( udf_name, diff --git a/test/unit_tests/parser/test_parser.py b/test/unit_tests/parser/test_parser.py index e50748e51d..be216bc42f 100644 --- a/test/unit_tests/parser/test_parser.py +++ b/test/unit_tests/parser/test_parser.py @@ -662,7 +662,7 @@ def test_create_udf_statement(self): OUTPUT (Labels NDARRAY STR(10), Bbox NDARRAY UINT8(10, 4)) TYPE Classification IMPL 'data/fastrcnn.py' - "KEY" "VALUE"; + PREDICT "VALUE"; """ expected_cci = ColConstraintInfo() @@ -690,7 +690,7 @@ def test_create_udf_statement(self): ], "Classification", None, - [("KEY", "VALUE")], + [("PREDICT", "VALUE")], ) evadb_statement_list = parser.parse(create_udf_query) self.assertIsInstance(evadb_statement_list, list) From 0bdeb711e4418384d6db84fc17bbc9e3d128f295 Mon Sep 17 00:00:00 2001 From: hershd23 Date: Fri, 1 Sep 2023 12:27:44 -0400 Subject: [PATCH 02/10] Fixing typos modified: evadb/parser/evadb.lark --- evadb/parser/evadb.lark | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index 7171f8b63c..d0df789756 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -49,7 +49,7 @@ udf_impl: string_literal udf_metadata: udf_metadata_key udf_metadata_value -udf_metadata_key: uid +udf_metadata_key: uid udf_metadata_value: string_literal | decimal_literal @@ -207,7 +207,7 @@ simple_id: ID dotted_id: DOT_ID | "." uid -// Literalss +// Literals string_literal: STRING_LITERAL From a4c3f01c64f13404cf0fa03bb4479f0f0f6ef21e Mon Sep 17 00:00:00 2001 From: hershd23 Date: Fri, 1 Sep 2023 12:29:13 -0400 Subject: [PATCH 03/10] Replaced metadata key back to KEY in test modified: test/unit_tests/parser/test_parser.py --- test/unit_tests/parser/test_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit_tests/parser/test_parser.py b/test/unit_tests/parser/test_parser.py index be216bc42f..cbb93d0e10 100644 --- a/test/unit_tests/parser/test_parser.py +++ b/test/unit_tests/parser/test_parser.py @@ -662,7 +662,7 @@ def test_create_udf_statement(self): OUTPUT (Labels NDARRAY STR(10), Bbox NDARRAY UINT8(10, 4)) TYPE Classification IMPL 'data/fastrcnn.py' - PREDICT "VALUE"; + KEY "VALUE"; """ expected_cci = ColConstraintInfo() @@ -690,7 +690,7 @@ def test_create_udf_statement(self): ], "Classification", None, - [("PREDICT", "VALUE")], + [("KEY", "VALUE")], ) evadb_statement_list = parser.parse(create_udf_query) self.assertIsInstance(evadb_statement_list, list) From fcb8e89299683ee6698fd243edce6a515eb8ae6d Mon Sep 17 00:00:00 2001 From: hershd23 Date: Fri, 1 Sep 2023 12:41:10 -0400 Subject: [PATCH 04/10] Replacing udf_metadata_key in docs modified: README.md modified: docs/source/reference/evaql/create.rst modified: docs/source/reference/udfs/model-train.rst Replacing udf_metadata_key in Ludwig test modified: test/integration_tests/long/test_model_train.py --- README.md | 4 ++-- docs/source/reference/evaql/create.rst | 6 +++--- docs/source/reference/udfs/model-train.rst | 8 ++++---- test/integration_tests/long/test_model_train.py | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index be43e8ff9b..57b58dd076 100644 --- a/README.md +++ b/README.md @@ -185,8 +185,8 @@ SELECT ChatGPT('Is this video summary related to Ukraine russia war', text) CREATE UDF IF NOT EXISTS PredictHouseRent FROM ( SELECT * FROM HomeRentals ) TYPE Ludwig -'predict' 'rental_price' -'time_limit' 120; +PREDICT 'rental_price' +TIME_LIMIT 120; ``` diff --git a/docs/source/reference/evaql/create.rst b/docs/source/reference/evaql/create.rst index edbae69e26..d44bfedfcb 100644 --- a/docs/source/reference/evaql/create.rst +++ b/docs/source/reference/evaql/create.rst @@ -44,9 +44,9 @@ To register an user-defined function by training a predication model. CREATE UDF IF NOT EXISTS PredictHouseRent FROM (SELECT * FROM HomeRentals) TYPE Ludwig - 'predict' 'rental_price' - 'time_list' 120; - 'tune_for_memory' False; + PREDICT 'rental_price' + TIME_LIST 120; + TUNE_FOR_MEMORY False; CREATE MATERIALIZED VIEW ------------------------ diff --git a/docs/source/reference/udfs/model-train.rst b/docs/source/reference/udfs/model-train.rst index de6e84eeee..4a40796fa1 100644 --- a/docs/source/reference/udfs/model-train.rst +++ b/docs/source/reference/udfs/model-train.rst @@ -12,8 +12,8 @@ Training and Finetuning CREATE UDF IF NOT EXISTS PredictHouseRent FROM ( SELECT sqft, location, rental_price FROM HomeRentals ) TYPE Ludwig - 'predict' 'rental_price' - 'time_limit' 120; + PREDICT 'rental_price' + TIME_LIMIT 120; In the above query, you are creating a new customized UDF by automatically training a model from the `HomeRentals` table. The `rental_price` column will be the target column for predication, while `sqft` and `location` are the inputs. @@ -24,8 +24,8 @@ You can also simply give all other columns in `HomeRentals` as inputs and let th CREATE UDF IF NOT EXISTS PredictHouseRent FROM ( SELECT * FROM HomeRentals ) TYPE Ludwig - 'predict' 'rental_price' - 'time_limit' 120; + PREDICT 'rental_price' + TIME_LIMIT 120; .. note:: diff --git a/test/integration_tests/long/test_model_train.py b/test/integration_tests/long/test_model_train.py index cb25865b68..133dfc3b71 100644 --- a/test/integration_tests/long/test_model_train.py +++ b/test/integration_tests/long/test_model_train.py @@ -60,8 +60,8 @@ def test_ludwig_automl(self): CREATE UDF IF NOT EXISTS PredictHouseRent FROM ( SELECT * FROM HomeRentals ) TYPE Ludwig - 'predict' 'rental_price' - 'time_limit' 120; + PREDICT 'rental_price' + TIME_LIMIT 120; """ execute_query_fetch_all(self.evadb, create_predict_udf) From 0011642b072d2cfc20a782840c9e38f20a2aa064 Mon Sep 17 00:00:00 2001 From: hershd23 Date: Fri, 1 Sep 2023 12:51:22 -0400 Subject: [PATCH 05/10] Fixing failed unit test --- .lock_preprocessing | 0 evadb/parser/lark_visitor/_functions.py | 3 ++- test/unit_tests/parser/test_parser.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 .lock_preprocessing diff --git a/.lock_preprocessing b/.lock_preprocessing new file mode 100644 index 0000000000..e69de29bb2 diff --git a/evadb/parser/lark_visitor/_functions.py b/evadb/parser/lark_visitor/_functions.py index 0a341abfb7..9d9c56bbd4 100644 --- a/evadb/parser/lark_visitor/_functions.py +++ b/evadb/parser/lark_visitor/_functions.py @@ -98,7 +98,8 @@ def create_udf(self, tree): if isinstance(value, ConstantValueExpression): value = value.value # Removing .value from key_value_pair[0] since key is now an ID_LITERAL - metadata.append((key_value_pair[0], value)), + # Adding lower() to ensure the key is in lowercase + metadata.append((key_value_pair[0].lower(), value)), return CreateUDFStatement( udf_name, diff --git a/test/unit_tests/parser/test_parser.py b/test/unit_tests/parser/test_parser.py index cbb93d0e10..be9a40b354 100644 --- a/test/unit_tests/parser/test_parser.py +++ b/test/unit_tests/parser/test_parser.py @@ -662,7 +662,7 @@ def test_create_udf_statement(self): OUTPUT (Labels NDARRAY STR(10), Bbox NDARRAY UINT8(10, 4)) TYPE Classification IMPL 'data/fastrcnn.py' - KEY "VALUE"; + PREDICT "VALUE"; """ expected_cci = ColConstraintInfo() @@ -690,7 +690,7 @@ def test_create_udf_statement(self): ], "Classification", None, - [("KEY", "VALUE")], + [("predict", "VALUE")], ) evadb_statement_list = parser.parse(create_udf_query) self.assertIsInstance(evadb_statement_list, list) From b30a2047dac2b3d9ed8dd4bc85cba2d3d7ddafc5 Mon Sep 17 00:00:00 2001 From: hershd23 Date: Mon, 4 Sep 2023 16:31:35 -0400 Subject: [PATCH 06/10] Replacing <'metadata_key'> with modified: benchmark/text_summarization/text_summarization_with_evadb.py modified: docs/source/benchmarks/text_summarization.rst modified: docs/source/overview/concepts.rst modified: docs/source/reference/ai/hf.rst modified: docs/source/reference/ai/openai.rst modified: docs/source/reference/ai/yolo.rst modified: docs/source/usecases/object-detection.rst modified: docs/source/usecases/question-answering.rst modified: docs/source/usecases/text-summarization.rst modified: evadb/parser/utils.py modified: evadb/udfs/udf_bootstrap_queries.py modified: test/benchmark_tests/test_benchmark_pytorch.py modified: test/integration_tests/long/interfaces/relational/test_relational_api.py modified: test/integration_tests/long/test_error_handling_with_ray.py modified: test/integration_tests/long/test_huggingface_udfs.py modified: test/integration_tests/long/test_reuse.py --- .../text_summarization_with_evadb.py | 8 ++--- docs/source/benchmarks/text_summarization.rst | 8 ++--- docs/source/overview/concepts.rst | 4 +-- docs/source/reference/ai/hf.rst | 4 +-- docs/source/reference/ai/openai.rst | 2 +- docs/source/reference/ai/yolo.rst | 2 +- docs/source/usecases/object-detection.rst | 2 +- docs/source/usecases/question-answering.rst | 4 +-- docs/source/usecases/text-summarization.rst | 8 ++--- evadb/parser/utils.py | 2 +- evadb/udfs/udf_bootstrap_queries.py | 4 +-- .../benchmark_tests/test_benchmark_pytorch.py | 6 ++-- .../relational/test_relational_api.py | 4 +-- .../long/test_error_handling_with_ray.py | 2 +- .../long/test_huggingface_udfs.py | 30 +++++++++---------- test/integration_tests/long/test_reuse.py | 4 +-- 16 files changed, 47 insertions(+), 47 deletions(-) diff --git a/benchmark/text_summarization/text_summarization_with_evadb.py b/benchmark/text_summarization/text_summarization_with_evadb.py index 7e788a0882..f5e7e5237f 100644 --- a/benchmark/text_summarization/text_summarization_with_evadb.py +++ b/benchmark/text_summarization/text_summarization_with_evadb.py @@ -16,10 +16,10 @@ cursor.query("DROP UDF IF EXISTS TextSummarizer;").df() cursor.query("""CREATE UDF IF NOT EXISTS TextSummarizer TYPE HuggingFace - 'task' 'summarization' - 'model' 'sshleifer/distilbart-cnn-12-6' - 'min_length' 5 - 'max_length' 100;""").df() + TASK 'summarization' + MODEL 'sshleifer/distilbart-cnn-12-6' + MIN_LENGTH 5 + MAX_LENGTH 100;""").df() cursor.query("DROP TABLE IF EXISTS cnn_news_summary;").df() diff --git a/docs/source/benchmarks/text_summarization.rst b/docs/source/benchmarks/text_summarization.rst index a536239b0f..e4a38c69a1 100644 --- a/docs/source/benchmarks/text_summarization.rst +++ b/docs/source/benchmarks/text_summarization.rst @@ -47,10 +47,10 @@ Creating Text Summarization Function in EvaDB CREATE UDF IF NOT EXISTS TextSummarizer TYPE HuggingFace - 'task' 'summarization' - 'model' 'sshleifer/distilbart-cnn-12-6' - 'min_length' 5 - 'max_length' 100; + TASK 'summarization' + MODEL 'sshleifer/distilbart-cnn-12-6' + MIN_LENGTH 5 + MAX_LENGTH 100; Tuning EvaDB for Maximum GPU Utilization diff --git a/docs/source/overview/concepts.rst b/docs/source/overview/concepts.rst index b37478f06e..1c9317114d 100644 --- a/docs/source/overview/concepts.rst +++ b/docs/source/overview/concepts.rst @@ -24,8 +24,8 @@ Here is set of illustrative EvaQL queries for a ChatGPT-based video question ans --- After creating the function, we can use the function in any future query CREATE UDF SpeechRecognizer TYPE HuggingFace - 'task' 'automatic-speech-recognition' - 'model' 'openai/whisper-base'; + TASK 'automatic-speech-recognition' + MODEL 'openai/whisper-base'; -- EvaDB automatically extracts the audio from the videos --- We only need to run the SpeechRecognizer UDF on the 'audio' column diff --git a/docs/source/reference/ai/hf.rst b/docs/source/reference/ai/hf.rst index 174fee1171..5152f55187 100644 --- a/docs/source/reference/ai/hf.rst +++ b/docs/source/reference/ai/hf.rst @@ -13,8 +13,8 @@ EvaDB supports UDFS similar to `Pipelines Date: Mon, 4 Sep 2023 16:37:19 -0400 Subject: [PATCH 07/10] Increased underline length in at line 75 in text_summarization.rst modified: docs/source/benchmarks/text_summarization.rst --- docs/source/benchmarks/text_summarization.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/benchmarks/text_summarization.rst b/docs/source/benchmarks/text_summarization.rst index e4a38c69a1..a9a818f2c3 100644 --- a/docs/source/benchmarks/text_summarization.rst +++ b/docs/source/benchmarks/text_summarization.rst @@ -72,7 +72,7 @@ Text Summarization Query in EvaDB SELECT TextSummarizer(article) FROM cnn_news_test; Use MindsDB for Text Summarization --------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Setup SQLite Database ~~~~~~~~~~~~~~~~~~~~~~ From ec050b5352efa5cd84257984010a7551619b2108 Mon Sep 17 00:00:00 2001 From: hershd23 Date: Tue, 5 Sep 2023 10:31:28 -0400 Subject: [PATCH 08/10] Deleted .lock_preprocessing file deleted: .lock_preprocessing Converted CACHE and BATCH from string literals to UIDs modified: test/integration_tests/long/test_udf_executor.py --- .lock_preprocessing | 0 test/integration_tests/long/test_udf_executor.py | 11 ++++++----- 2 files changed, 6 insertions(+), 5 deletions(-) delete mode 100644 .lock_preprocessing diff --git a/.lock_preprocessing b/.lock_preprocessing deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/integration_tests/long/test_udf_executor.py b/test/integration_tests/long/test_udf_executor.py index 68fed4f219..3d99053c41 100644 --- a/test/integration_tests/long/test_udf_executor.py +++ b/test/integration_tests/long/test_udf_executor.py @@ -173,8 +173,8 @@ def test_should_create_udf_with_metadata(self): OUTPUT (label NDARRAY STR(10)) TYPE Classification IMPL 'test/util.py' - 'CACHE' 'TRUE' - 'BATCH' 'FALSE'; + CACHE 'TRUE' + BATCH 'FALSE'; """ execute_query_fetch_all(self.evadb, create_udf_query.format(udf_name)) @@ -183,7 +183,8 @@ def test_should_create_udf_with_metadata(self): self.assertEqual(len(entries), 2) metadata = [(entry.key, entry.value) for entry in entries] - expected_metadata = [("CACHE", "TRUE"), ("BATCH", "FALSE")] + # metadata ultimately stored as lowercase string literals in metadata + expected_metadata = [("cache", "TRUE"), ("batch", "FALSE")] self.assertEqual(set(metadata), set(expected_metadata)) def test_should_return_empty_metadata_list_for_missing_udf(self): @@ -199,8 +200,8 @@ def test_should_return_empty_metadata_list_if_udf_is_removed(self): OUTPUT (label NDARRAY STR(10)) TYPE Classification IMPL 'test/util.py' - 'CACHE' 'TRUE' - 'BATCH' 'FALSE'; + CACHE 'TRUE' + BATCH 'FALSE'; """ execute_query_fetch_all(self.evadb, create_udf_query.format(udf_name)) From 9968033dcc9833072d4318509917bf470ee8b140 Mon Sep 17 00:00:00 2001 From: hershd23 Date: Tue, 5 Sep 2023 10:59:07 -0400 Subject: [PATCH 09/10] Removed quotes from udf_metadata_key and converted to upper case for direct string matching test cases modified: evadb/parser/create_udf_statement.py --- evadb/parser/create_udf_statement.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/evadb/parser/create_udf_statement.py b/evadb/parser/create_udf_statement.py index baf977b2d2..29296ed5dc 100644 --- a/evadb/parser/create_udf_statement.py +++ b/evadb/parser/create_udf_statement.py @@ -86,7 +86,9 @@ def __str__(self) -> str: if self._metadata is not None: for key, value in self._metadata: - s += f" '{key}' '{value}'" + # NOTE :- Removing quotes around key and making it upper case + # Since in tests we are doing a straight string comparison + s += f" {key.upper()} '{value}'" return s @property From 49a2df89d52986639f13025bfd4d8d0ded3d8b1f Mon Sep 17 00:00:00 2001 From: xzdandy Date: Tue, 5 Sep 2023 11:54:42 -0400 Subject: [PATCH 10/10] Sync with staging --- docs/_toc.yml | 2 +- docs/source/reference/{udfs => ai}/model-forecasting.rst | 6 +++--- test/integration_tests/long/test_model_forecasting.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename docs/source/reference/{udfs => ai}/model-forecasting.rst (92%) diff --git a/docs/_toc.yml b/docs/_toc.yml index 14dffd09bc..927d131973 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -66,7 +66,7 @@ parts: sections: - file: source/reference/ai/model-train title: Model Training - - file: source/reference/udfs/model-forecasting + - file: source/reference/ai/model-forecasting title: Time Series Forecasting - file: source/reference/ai/hf title: Hugging Face diff --git a/docs/source/reference/udfs/model-forecasting.rst b/docs/source/reference/ai/model-forecasting.rst similarity index 92% rename from docs/source/reference/udfs/model-forecasting.rst rename to docs/source/reference/ai/model-forecasting.rst index 7a6ebef50b..75602a0352 100644 --- a/docs/source/reference/udfs/model-forecasting.rst +++ b/docs/source/reference/ai/model-forecasting.rst @@ -5,7 +5,7 @@ You can train a forecasting model easily in EvaDB. .. note:: - Install `statsforecast` in your EvaDB virtual environment: ``pip install statsforecast``. + Install `statsforecast` in your EvaDB virtual environment: ``pip install eva[forecasting]``. First, we create a table to insert required data. @@ -26,7 +26,7 @@ Next, we create a UDF of `TYPE Forecasting`. We must enter the column name on wh CREATE UDF IF NOT EXISTS Forecast FROM (SELECT y FROM AirData) TYPE Forecasting - 'predict' 'y'; + PREDICT 'y'; This trains a forecasting model. The model can be called by providing the horizon for forecasting. @@ -34,4 +34,4 @@ This trains a forecasting model. The model can be called by providing the horizo SELECT Forecast(12) FROM AirData; -Here, the horizon is `12`. \ No newline at end of file +Here, the horizon is `12`. diff --git a/test/integration_tests/long/test_model_forecasting.py b/test/integration_tests/long/test_model_forecasting.py index 874a840e67..04cf69975a 100644 --- a/test/integration_tests/long/test_model_forecasting.py +++ b/test/integration_tests/long/test_model_forecasting.py @@ -54,7 +54,7 @@ def test_forecast(self): CREATE UDF Forecast FROM (SELECT unique_id, ds, y FROM AirData) TYPE Forecasting - 'predict' 'y'; + PREDICT 'y'; """ execute_query_fetch_all(self.evadb, create_predict_udf)