From 7dd048628de80bb5687d0d429727e12ee4214616 Mon Sep 17 00:00:00 2001 From: Hersh Dhillon Date: Tue, 5 Sep 2023 11:56:07 -0400 Subject: [PATCH] Removing quotes from udf_metadata_key (#1026) Replacing udf_metadata_key from string_literal to ID_LITERAL ` modified: evadb/parser/evadb.lark` Removed .value from key_value_pair[0] post the change in type ` modified: evadb/parser/lark_visitor/_functions.py` Replaced string key to ID_LITERAL in test query ` modified: test/unit_tests/parser/test_parser.py` Solves #1010 --------- Co-authored-by: xzdandy --- README.md | 4 +-- .../text_summarization_with_evadb.py | 8 ++--- docs/_toc.yml | 2 +- docs/source/benchmarks/text_summarization.rst | 8 ++--- docs/source/overview/concepts.rst | 4 +-- docs/source/reference/ai/hf.rst | 4 +-- .../{udfs => ai}/model-forecasting.rst | 6 ++-- docs/source/reference/ai/model-train.rst | 8 ++--- docs/source/reference/ai/openai.rst | 2 +- docs/source/reference/ai/yolo.rst | 2 +- docs/source/reference/evaql/create.rst | 6 ++-- docs/source/usecases/object-detection.rst | 2 +- docs/source/usecases/question-answering.rst | 4 +-- docs/source/usecases/text-summarization.rst | 8 ++--- evadb/functions/function_bootstrap_queries.py | 4 +-- evadb/parser/create_function_statement.py | 4 ++- evadb/parser/evadb.lark | 2 +- evadb/parser/lark_visitor/_functions.py | 4 ++- evadb/parser/utils.py | 2 +- .../benchmark_tests/test_benchmark_pytorch.py | 6 ++-- .../relational/test_relational_api.py | 4 +-- .../long/test_error_handling_with_ray.py | 2 +- .../long/test_function_executor.py | 11 +++---- .../long/test_huggingface_functions.py | 30 +++++++++---------- .../long/test_model_forecasting.py | 2 +- .../long/test_model_train.py | 4 +-- test/integration_tests/long/test_reuse.py | 4 +-- test/unit_tests/parser/test_parser.py | 4 +-- 28 files changed, 78 insertions(+), 73 deletions(-) rename docs/source/reference/{udfs => ai}/model-forecasting.rst (92%) diff --git a/README.md b/README.md index 138b8d804d..67d2bd7b99 100644 --- a/README.md +++ b/README.md @@ -199,8 +199,8 @@ SELECT ChatGPT('Is this video summary related to Ukraine russia war', text) CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM ( SELECT * FROM HomeRentals ) TYPE Ludwig -'predict' 'rental_price' -'time_limit' 120; +PREDICT 'rental_price' +TIME_LIMIT 120; ``` diff --git a/benchmark/text_summarization/text_summarization_with_evadb.py b/benchmark/text_summarization/text_summarization_with_evadb.py index 7e788a0882..f5e7e5237f 100644 --- a/benchmark/text_summarization/text_summarization_with_evadb.py +++ b/benchmark/text_summarization/text_summarization_with_evadb.py @@ -16,10 +16,10 @@ cursor.query("DROP UDF IF EXISTS TextSummarizer;").df() cursor.query("""CREATE UDF IF NOT EXISTS TextSummarizer TYPE HuggingFace - 'task' 'summarization' - 'model' 'sshleifer/distilbart-cnn-12-6' - 'min_length' 5 - 'max_length' 100;""").df() + TASK 'summarization' + MODEL 'sshleifer/distilbart-cnn-12-6' + MIN_LENGTH 5 + MAX_LENGTH 100;""").df() cursor.query("DROP TABLE IF EXISTS cnn_news_summary;").df() diff --git a/docs/_toc.yml b/docs/_toc.yml index 14dffd09bc..927d131973 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -66,7 +66,7 @@ parts: sections: - file: source/reference/ai/model-train title: Model Training - - file: source/reference/udfs/model-forecasting + - file: source/reference/ai/model-forecasting title: Time Series Forecasting - file: source/reference/ai/hf title: Hugging Face diff --git a/docs/source/benchmarks/text_summarization.rst b/docs/source/benchmarks/text_summarization.rst index fa8d87a454..119803f390 100644 --- a/docs/source/benchmarks/text_summarization.rst +++ b/docs/source/benchmarks/text_summarization.rst @@ -47,10 +47,10 @@ Creating Text Summarization Function in EvaDB CREATE UDF IF NOT EXISTS TextSummarizer TYPE HuggingFace - 'task' 'summarization' - 'model' 'sshleifer/distilbart-cnn-12-6' - 'min_length' 5 - 'max_length' 100; + TASK 'summarization' + MODEL 'sshleifer/distilbart-cnn-12-6' + MIN_LENGTH 5 + MAX_LENGTH 100; Tuning EvaDB for Maximum GPU Utilization diff --git a/docs/source/overview/concepts.rst b/docs/source/overview/concepts.rst index b37478f06e..1c9317114d 100644 --- a/docs/source/overview/concepts.rst +++ b/docs/source/overview/concepts.rst @@ -24,8 +24,8 @@ Here is set of illustrative EvaQL queries for a ChatGPT-based video question ans --- After creating the function, we can use the function in any future query CREATE UDF SpeechRecognizer TYPE HuggingFace - 'task' 'automatic-speech-recognition' - 'model' 'openai/whisper-base'; + TASK 'automatic-speech-recognition' + MODEL 'openai/whisper-base'; -- EvaDB automatically extracts the audio from the videos --- We only need to run the SpeechRecognizer UDF on the 'audio' column diff --git a/docs/source/reference/ai/hf.rst b/docs/source/reference/ai/hf.rst index f1c8ffc97b..8a08313311 100644 --- a/docs/source/reference/ai/hf.rst +++ b/docs/source/reference/ai/hf.rst @@ -15,8 +15,8 @@ EvaDB supports functions similar to `Pipelines str: if self._metadata is not None: for key, value in self._metadata: - s += f" '{key}' '{value}'" + # NOTE :- Removing quotes around key and making it upper case + # Since in tests we are doing a straight string comparison + s += f" {key.upper()} '{value}'" return s @property diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index 802f41fa3c..a958dd54c5 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -49,7 +49,7 @@ function_impl: string_literal function_metadata: function_metadata_key function_metadata_value -function_metadata_key: string_literal +function_metadata_key: uid function_metadata_value: string_literal | decimal_literal diff --git a/evadb/parser/lark_visitor/_functions.py b/evadb/parser/lark_visitor/_functions.py index 25cfeb12cd..6c354cac19 100644 --- a/evadb/parser/lark_visitor/_functions.py +++ b/evadb/parser/lark_visitor/_functions.py @@ -97,7 +97,9 @@ def create_function(self, tree): value = key_value_pair[1] if isinstance(value, ConstantValueExpression): value = value.value - metadata.append((key_value_pair[0].value, value)), + # Removing .value from key_value_pair[0] since key is now an ID_LITERAL + # Adding lower() to ensure the key is in lowercase + metadata.append((key_value_pair[0].lower(), value)), return CreateFunctionStatement( function_name, diff --git a/evadb/parser/utils.py b/evadb/parser/utils.py index 31615992dd..70db55cecc 100644 --- a/evadb/parser/utils.py +++ b/evadb/parser/utils.py @@ -74,7 +74,7 @@ def parse_create_function( mock_query += f" TYPE {type}" task, model = kwargs["task"], kwargs["model"] if task is not None and model is not None: - mock_query += f" 'task' '{task}' 'model' '{model}'" + mock_query += f" TASK '{task}' MODEL '{model}'" else: mock_query += f" IMPL '{function_file_path}'" mock_query += ";" diff --git a/test/benchmark_tests/test_benchmark_pytorch.py b/test/benchmark_tests/test_benchmark_pytorch.py index de6a01439c..b9300cfd66 100644 --- a/test/benchmark_tests/test_benchmark_pytorch.py +++ b/test/benchmark_tests/test_benchmark_pytorch.py @@ -109,7 +109,7 @@ def test_automatic_speech_recognition(benchmark, setup_pytorch_tests): udf_name = "SpeechRecognizer" create_udf = ( f"CREATE UDF {udf_name} TYPE HuggingFace " - "'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base';" + "TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base';" ) execute_query_fetch_all(setup_pytorch_tests, create_udf) @@ -135,14 +135,14 @@ def test_summarization_from_video(benchmark, setup_pytorch_tests): asr_udf = "SpeechRecognizer" create_udf = ( f"CREATE UDF {asr_udf} TYPE HuggingFace " - "'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base';" + "TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base';" ) execute_query_fetch_all(setup_pytorch_tests, create_udf) summary_udf = "Summarizer" create_udf = ( f"CREATE UDF {summary_udf} TYPE HuggingFace " - "'task' 'summarization' 'model' 'philschmid/bart-large-cnn-samsum' 'min_length' 10 'max_length' 100;" + "TASK 'summarization' MODEL 'philschmid/bart-large-cnn-samsum' MIN_LENGTH 10 MAX_LENGTH 100;" ) execute_query_fetch_all(setup_pytorch_tests, create_udf) diff --git a/test/integration_tests/long/interfaces/relational/test_relational_api.py b/test/integration_tests/long/interfaces/relational/test_relational_api.py index 411a2e51f6..773607960b 100644 --- a/test/integration_tests/long/interfaces/relational/test_relational_api.py +++ b/test/integration_tests/long/interfaces/relational/test_relational_api.py @@ -231,7 +231,7 @@ def test_create_function_with_relational_api(self): query = create_speech_recognizer_function_if_not_exists.sql_query() self.assertEqual( query, - """CREATE FUNCTION IF NOT EXISTS SpeechRecognizer TYPE HuggingFace 'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base'""", + """CREATE FUNCTION IF NOT EXISTS SpeechRecognizer TYPE HuggingFace TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base'""", ) create_speech_recognizer_function_if_not_exists.execute() @@ -242,7 +242,7 @@ def test_create_function_with_relational_api(self): query = create_speech_recognizer_function.sql_query() self.assertEqual( query, - "CREATE FUNCTION SpeechRecognizer TYPE HuggingFace 'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base'", + "CREATE FUNCTION SpeechRecognizer TYPE HuggingFace TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base'", ) with self.assertRaises(ExecutorError): create_speech_recognizer_function.execute() diff --git a/test/integration_tests/long/test_error_handling_with_ray.py b/test/integration_tests/long/test_error_handling_with_ray.py index de0ba4d3f4..da134b7ed3 100644 --- a/test/integration_tests/long/test_error_handling_with_ray.py +++ b/test/integration_tests/long/test_error_handling_with_ray.py @@ -58,7 +58,7 @@ def test_ray_error_populate_to_all_stages(self): function_name, task = "HFObjectDetector", "image-classification" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' '{task}' + TASK '{task}' """ execute_query_fetch_all(self.evadb, create_function_query) diff --git a/test/integration_tests/long/test_function_executor.py b/test/integration_tests/long/test_function_executor.py index 77c7e74c02..28368ddad8 100644 --- a/test/integration_tests/long/test_function_executor.py +++ b/test/integration_tests/long/test_function_executor.py @@ -175,8 +175,8 @@ def test_should_create_function_with_metadata(self): OUTPUT (label NDARRAY STR(10)) TYPE Classification IMPL 'test/util.py' - 'CACHE' 'TRUE' - 'BATCH' 'FALSE'; + CACHE 'TRUE' + BATCH 'FALSE'; """ execute_query_fetch_all(self.evadb, create_function_query.format(function_name)) @@ -187,7 +187,8 @@ def test_should_create_function_with_metadata(self): self.assertEqual(len(entries), 2) metadata = [(entry.key, entry.value) for entry in entries] - expected_metadata = [("CACHE", "TRUE"), ("BATCH", "FALSE")] + # metadata ultimately stored as lowercase string literals in metadata + expected_metadata = [("cache", "TRUE"), ("batch", "FALSE")] self.assertEqual(set(metadata), set(expected_metadata)) def test_should_return_empty_metadata_list_for_missing_function(self): @@ -205,8 +206,8 @@ def test_should_return_empty_metadata_list_if_function_is_removed(self): OUTPUT (label NDARRAY STR(10)) TYPE Classification IMPL 'test/util.py' - 'CACHE' 'TRUE' - 'BATCH' 'FALSE'; + CACHE 'TRUE' + BATCH 'FALSE'; """ execute_query_fetch_all(self.evadb, create_function_query.format(function_name)) diff --git a/test/integration_tests/long/test_huggingface_functions.py b/test/integration_tests/long/test_huggingface_functions.py index b125d78b26..e2a258525a 100644 --- a/test/integration_tests/long/test_huggingface_functions.py +++ b/test/integration_tests/long/test_huggingface_functions.py @@ -55,7 +55,7 @@ def test_io_catalog_entries_populated(self): function_name, task = "HFObjectDetector", "image-classification" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' '{task}' + TASK '{task}' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -79,7 +79,7 @@ def test_raise_error_on_unsupported_task(self): task = "zero-shot-object-detection" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' '{task}' + TASK '{task}' """ # catch an assert @@ -95,8 +95,8 @@ def test_object_detection(self): function_name = "HFObjectDetector" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'object-detection' - 'model' 'facebook/detr-resnet-50'; + TASK 'object-detection' + MODEL 'facebook/detr-resnet-50'; """ execute_query_fetch_all(self.evadb, create_function_query) @@ -147,7 +147,7 @@ def test_image_classification(self): function_name = "HFImageClassifier" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'image-classification' + TASK 'image-classification' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -192,7 +192,7 @@ def test_text_classification(self): function_name = "HFTextClassifier" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'text-classification' + TASK 'text-classification' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -229,7 +229,7 @@ def test_automatic_speech_recognition(self): function_name = "SpeechRecognizer" create_function = ( f"CREATE FUNCTION {function_name} TYPE HuggingFace " - "'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base';" + "TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base';" ) execute_query_fetch_all(self.evadb, create_function) @@ -258,14 +258,14 @@ def test_summarization_from_video(self): asr_function = "SpeechRecognizer" create_function = ( f"CREATE FUNCTION {asr_function} TYPE HuggingFace " - "'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base';" + "TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base';" ) execute_query_fetch_all(self.evadb, create_function) summary_function = "Summarizer" create_function = ( f"CREATE FUNCTION {summary_function} TYPE HuggingFace " - "'task' 'summarization' 'model' 'philschmid/bart-large-cnn-samsum' 'min_length' 10 'max_new_tokens' 100;" + "TASK 'summarization' MODEL 'philschmid/bart-large-cnn-samsum' MIN_LENGTH 10 MAX_NEW_TOKENS 100;" ) execute_query_fetch_all(self.evadb, create_function) @@ -290,8 +290,8 @@ def test_toxicity_classification(self): function_name = "HFToxicityClassifier" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'text-classification' - 'model' 'martin-ha/toxic-comment-model' + TASK 'text-classification' + MODEL 'martin-ha/toxic-comment-model' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -340,8 +340,8 @@ def test_multilingual_toxicity_classification(self): function_name = "HFMultToxicityClassifier" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'text-classification' - 'model' 'EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus' + TASK 'text-classification' + MODEL 'EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -389,7 +389,7 @@ def test_named_entity_recognition_model_all_pdf_data(self): function_name = "HFNERModel" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'ner' + TASK 'ner' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -424,7 +424,7 @@ def test_named_entity_recognition_model_no_ner_data_exists(self): function_name = "HFNERModel" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'ner' + TASK 'ner' """ execute_query_fetch_all(self.evadb, create_function_query) diff --git a/test/integration_tests/long/test_model_forecasting.py b/test/integration_tests/long/test_model_forecasting.py index 874a840e67..04cf69975a 100644 --- a/test/integration_tests/long/test_model_forecasting.py +++ b/test/integration_tests/long/test_model_forecasting.py @@ -54,7 +54,7 @@ def test_forecast(self): CREATE UDF Forecast FROM (SELECT unique_id, ds, y FROM AirData) TYPE Forecasting - 'predict' 'y'; + PREDICT 'y'; """ execute_query_fetch_all(self.evadb, create_predict_udf) diff --git a/test/integration_tests/long/test_model_train.py b/test/integration_tests/long/test_model_train.py index bbc8ed26f5..55ae6da9c1 100644 --- a/test/integration_tests/long/test_model_train.py +++ b/test/integration_tests/long/test_model_train.py @@ -60,8 +60,8 @@ def test_ludwig_automl(self): CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM ( SELECT * FROM HomeRentals ) TYPE Ludwig - 'predict' 'rental_price' - 'time_limit' 120; + PREDICT 'rental_price' + TIME_LIMIT 120; """ execute_query_fetch_all(self.evadb, create_predict_function) diff --git a/test/integration_tests/long/test_reuse.py b/test/integration_tests/long/test_reuse.py index 7911bb20f6..eefcda67eb 100644 --- a/test/integration_tests/long/test_reuse.py +++ b/test/integration_tests/long/test_reuse.py @@ -42,8 +42,8 @@ def _load_hf_model(self): function_name = "HFObjectDetector" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'object-detection' - 'model' 'facebook/detr-resnet-50'; + TASK 'object-detection' + MODEL 'facebook/detr-resnet-50'; """ execute_query_fetch_all(self.evadb, create_function_query) diff --git a/test/unit_tests/parser/test_parser.py b/test/unit_tests/parser/test_parser.py index d0894d5894..339abe7e84 100644 --- a/test/unit_tests/parser/test_parser.py +++ b/test/unit_tests/parser/test_parser.py @@ -662,7 +662,7 @@ def test_create_function_statement(self): OUTPUT (Labels NDARRAY STR(10), Bbox NDARRAY UINT8(10, 4)) TYPE Classification IMPL 'data/fastrcnn.py' - "KEY" "VALUE"; + PREDICT "VALUE"; """ expected_cci = ColConstraintInfo() @@ -690,7 +690,7 @@ def test_create_function_statement(self): ], "Classification", None, - [("KEY", "VALUE")], + [("predict", "VALUE")], ) evadb_statement_list = parser.parse(create_func_query) self.assertIsInstance(evadb_statement_list, list)