From a7902de7e2be8cd9c55c2166b2890cc5a8512b87 Mon Sep 17 00:00:00 2001 From: ckunki Date: Fri, 18 Oct 2024 13:58:22 +0200 Subject: [PATCH 01/49] Added support to create dynamic modules --- doc/changes/changes_0.1.0.md | 1 + .../udf_framework/dynamic_modules.py | 19 +++++++++++++++++++ .../udf_framework/test_dynamic_modules.py | 10 ++++++++++ 3 files changed, 30 insertions(+) create mode 100644 exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py create mode 100644 tests/unit_tests/udf_framework/test_dynamic_modules.py diff --git a/doc/changes/changes_0.1.0.md b/doc/changes/changes_0.1.0.md index 90664173..70319284 100644 --- a/doc/changes/changes_0.1.0.md +++ b/doc/changes/changes_0.1.0.md @@ -52,6 +52,7 @@ Code name: * #176: Updated usage of `exasol-bucketfs` to new API * #185: Removed directory and script for building SLC AAF * #191: Renamed UDF json element "parameters" to "parameter" +* #190: Enabled to generate a dynamic module for custom UDF ### Documentation diff --git a/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py b/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py new file mode 100644 index 00000000..96732d04 --- /dev/null +++ b/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py @@ -0,0 +1,19 @@ +import importlib +from typing import Any, List + + +def _new_module(mod_name): + spec = importlib.machinery.ModuleSpec(mod_name, None) + return importlib.util.module_from_spec(spec) + + +def create_module(name: str, objects: List[Any]): + """ + Dynamically create a python module using the specified name and add + the specified objects to the new module. Each object may be a class, + variable, or function. + """ + mod = _new_module(name) + for obj in objects: + setattr(mod, obj.__name__, obj) + return mod diff --git a/tests/unit_tests/udf_framework/test_dynamic_modules.py b/tests/unit_tests/udf_framework/test_dynamic_modules.py new file mode 100644 index 00000000..3d7b0e48 --- /dev/null +++ b/tests/unit_tests/udf_framework/test_dynamic_modules.py @@ -0,0 +1,10 @@ +from exasol_advanced_analytics_framework.udf_framework.dynamic_modules import create_module + +class ExampleClass: + pass + + +def test_create_module(): + new_module = create_module("xyz", [ExampleClass]) + instance = new_module.ExampleClass() + assert isinstance(instance, ExampleClass) From 9d05c4cf4fd8cd3392cf75773397dae1d188c4b3 Mon Sep 17 00:00:00 2001 From: ckunki Date: Fri, 18 Oct 2024 16:10:36 +0200 Subject: [PATCH 02/49] Updated dynamic_modules, preferring functions over a class --- .../udf_framework/dynamic_modules.py | 31 +++++++++++-------- .../udf_framework/test_dynamic_modules.py | 24 +++++++++++--- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py b/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py index 96732d04..8086ea4d 100644 --- a/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py +++ b/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py @@ -1,19 +1,24 @@ +import sys import importlib -from typing import Any, List +from typing import Any +from types import ModuleType -def _new_module(mod_name): - spec = importlib.machinery.ModuleSpec(mod_name, None) - return importlib.util.module_from_spec(spec) - - -def create_module(name: str, objects: List[Any]): +def create_module(name: str) -> ModuleType: """ - Dynamically create a python module using the specified name and add - the specified objects to the new module. Each object may be a class, - variable, or function. + Dynamically create a python module using the specified name and + register the module in sys.modules[]. + + Additionally add a function add_to_module() to the module enabling other + code to add classes and functions to the module. """ - mod = _new_module(name) - for obj in objects: - setattr(mod, obj.__name__, obj) + spec = importlib.machinery.ModuleSpec(name, None) + mod = importlib.util.module_from_spec(spec) + sys.modules[name] = mod + + def add_to_module(object: Any): + object.__module__ = name + setattr(mod, object.__name__, object) + + add_to_module(add_to_module) return mod diff --git a/tests/unit_tests/udf_framework/test_dynamic_modules.py b/tests/unit_tests/udf_framework/test_dynamic_modules.py index 3d7b0e48..0b3feb66 100644 --- a/tests/unit_tests/udf_framework/test_dynamic_modules.py +++ b/tests/unit_tests/udf_framework/test_dynamic_modules.py @@ -1,10 +1,26 @@ from exasol_advanced_analytics_framework.udf_framework.dynamic_modules import create_module + class ExampleClass: pass -def test_create_module(): - new_module = create_module("xyz", [ExampleClass]) - instance = new_module.ExampleClass() - assert isinstance(instance, ExampleClass) +def example_function(): + return "example_function return value" + + +def test_create_module_with_class(): + mod = create_module("xx1") + mod.add_to_module(ExampleClass) + import xx1 + instance = xx1.ExampleClass() + assert isinstance(instance, ExampleClass) and \ + ExampleClass.__module__ == "xx1" + + +def test_add_function(): + mod = create_module("xx2") + mod.add_to_module(example_function) + import xx2 + assert xx2.example_function() == "example_function return value" \ + and example_function.__module__ == "xx2" From e231ac1eb323afa43cbaad1bcedf14df76d6afd2 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 08:55:06 +0200 Subject: [PATCH 03/49] #190: Enabled to generate a dynamic module for custom UDF Implemented initial components --- ...ackaging.yml => check-code-generation.yml} | 21 ++--- doc/user_guide/user_guide.md | 2 + .../example/__init__.py | 0 .../example/generator.py | 55 +++++++++++ .../example/query_handler.py | 82 ++++++++++++++++ .../example/sql.jinja | 15 +++ noxfile.py | 9 ++ scripts/document_updater.py | 94 +++++++++++++++++++ .../udf_framework/test_dynamic_modules.py | 2 +- 9 files changed, 268 insertions(+), 12 deletions(-) rename .github/workflows/{check-packaging.yml => check-code-generation.yml} (70%) create mode 100644 exasol_advanced_analytics_framework/example/__init__.py create mode 100644 exasol_advanced_analytics_framework/example/generator.py create mode 100644 exasol_advanced_analytics_framework/example/query_handler.py create mode 100644 exasol_advanced_analytics_framework/example/sql.jinja create mode 100644 scripts/document_updater.py diff --git a/.github/workflows/check-packaging.yml b/.github/workflows/check-code-generation.yml similarity index 70% rename from .github/workflows/check-packaging.yml rename to .github/workflows/check-code-generation.yml index 8514ce77..59bab88f 100644 --- a/.github/workflows/check-packaging.yml +++ b/.github/workflows/check-code-generation.yml @@ -1,4 +1,4 @@ -name: Check packaging of the python package +name: Check Code Generation on: push: @@ -6,7 +6,8 @@ on: - main jobs: - check_packaging: + check_lua_amalgate: + name: Lua Amalgate strategy: fail-fast: false matrix: @@ -27,19 +28,17 @@ jobs: - name: Poetry install run: poetry run -- nox -s run_in_dev_env -- poetry install - - name: Run packaging update + - name: Run Lua amalgation # re-generates / amalgate the lua script # refactor pre-commit as nox task # and call in pre-commit run: bash ./githooks/pre-commit - - name: Show changes on working copy - # check if re-generated lua script is still up-to-date - run: git status --porcelain=v1 -uno + - name: Check if code generated by Lua amalgation changed + run: | + git status --porcelain=v1 --untracked-files=no + git diff --exit-code - - name: Show diff on working copy + - name: Show changes on working copy + if: ${{ failure() }} run: git diff --cached - - - name: Check if packaging changed - run: | - [ -z "$(git status --porcelain=v1 -uno 2>/dev/null)" ] diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index c308aad9..c0e1d5df 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -190,6 +190,7 @@ Each algorithm should extend the `UDFQueryHandler` abstract class and then imple The example uses the module `builtins` and dynamically adds `ExampleQueryHandler` and `ExampleQueryHandlerFactory` to it. + ```python --/ CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "MY_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) @@ -270,6 +271,7 @@ EXECUTE SCRIPT MY_SCHEMA.AAF_RUN_QUERY_HANDLER('{ } }'); ``` + The figure below illustrates the execution of this algorithm implemented in class `ExampleQueryHandler`. * When method `start()` is called, it executes two queries and an additional `input_query` to obtain the input for the next iteration. diff --git a/exasol_advanced_analytics_framework/example/__init__.py b/exasol_advanced_analytics_framework/example/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/exasol_advanced_analytics_framework/example/generator.py b/exasol_advanced_analytics_framework/example/generator.py new file mode 100644 index 00000000..c85329b4 --- /dev/null +++ b/exasol_advanced_analytics_framework/example/generator.py @@ -0,0 +1,55 @@ +import json +import importlib.resources +from jinja2 import Template, Environment, PackageLoader, select_autoescape +from pathlib import Path +from exasol_advanced_analytics_framework.deployment import constants +from exasol_advanced_analytics_framework.deployment.jinja_template_location import JinjaTemplateLocation + +PACKAGE_PATH = "example" + +QUERY_HANDLER_SCRIPT = { + "query_handler": { + "factory_class": { + "module": "xyz", + "name": "ExampleQueryHandlerFactory", + }, + "parameters": "bla-bla", + "udf": { + "schema": "MY_SCHEMA", + "name": "MY_QUERY_HANDLER_UDF", + }, + }, + "temporary_output": { + "bucketfs_location": { + "connection_name": "BFS_CON", + "directory": "temp", + }, + "schema_name": "TEMP_SCHEMA", + }, +} + +def jinja_env(): + return Environment( + loader=PackageLoader( + package_name=constants.BASE_DIR, + package_path=PACKAGE_PATH), + autoescape=select_autoescape() + ) + + +def generate(): + env = jinja_env() + python_code = importlib.resources.read_text( + f"{constants.BASE_DIR}.{PACKAGE_PATH}", + "query_handler.py", + ) + json_code = json.dumps(QUERY_HANDLER_SCRIPT, indent=4) + template = env.get_template("sql.jinja") + return template.render( + python_code=python_code, + json_code=json_code, + ) + + +if __name__ == "__main__": + print(f'{generate()}') diff --git a/exasol_advanced_analytics_framework/example/query_handler.py b/exasol_advanced_analytics_framework/example/query_handler.py new file mode 100644 index 00000000..7ac701f0 --- /dev/null +++ b/exasol_advanced_analytics_framework/example/query_handler.py @@ -0,0 +1,82 @@ +from typing import Union +from exasol_advanced_analytics_framework.udf_framework.udf_query_handler import UDFQueryHandler +from exasol_advanced_analytics_framework.udf_framework.dynamic_modules import create_module +from exasol_advanced_analytics_framework.query_handler.context.query_handler_context import QueryHandlerContext +from exasol_advanced_analytics_framework.query_result.query_result import QueryResult +from exasol_advanced_analytics_framework.query_handler.result import Result, Continue, Finish +from exasol_advanced_analytics_framework.query_handler.query.select_query import SelectQuery, SelectQueryWithColumnDefinition +from exasol_advanced_analytics_framework.query_handler.context.proxy.bucketfs_location_proxy import \ + BucketFSLocationProxy +from exasol_data_science_utils_python.schema.column import Column +from exasol_data_science_utils_python.schema.column_name import ColumnName +from exasol_data_science_utils_python.schema.column_type import ColumnType +from datetime import datetime +from exasol.bucketfs import as_string + + +xyz = create_module("xyz") + +class ExampleQueryHandler(UDFQueryHandler): + + def __init__(self, parameter: str, query_handler_context: QueryHandlerContext): + super().__init__(parameter, query_handler_context) + self.parameter = parameter + self.query_handler_context = query_handler_context + self.bfs_proxy = None + self.db_table_proxy = None + + def _bfs_file(self, proxy: BucketFSLocationProxy): + return proxy.bucketfs_location() / "temp_file.txt" + + def start(self) -> Union[Continue, Finish[str]]: + def sample_content(key: str) -> str: + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + return f"{timestamp} {key} {self.parameter}" + + def table_query_string(statement: str, **kwargs): + table_name = self.db_table_proxy._db_object_name.fully_qualified + return statement.format(table_name=table_name, **kwargs) + + def table_query(statement: str, **kwargs): + return SelectQuery(table_query_string(statement, **kwargs)) + + self.bfs_proxy = self.query_handler_context.get_temporary_bucketfs_location() + self._bfs_file(self.bfs_proxy).write(sample_content("bucketfs")) + self.db_table_proxy = self.query_handler_context.get_temporary_table_name() + query_list = [ + table_query('CREATE TABLE {table_name} ("c1" VARCHAR(100), "c2" INTEGER)'), + table_query("INSERT INTO {table_name} VALUES ('{value}', 4)", + value=sample_content("table-insert")), + ] + query_handler_return_query = SelectQueryWithColumnDefinition( + query_string=table_query_string('SELECT "c1", "c2" from {table_name}'), + output_columns=[ + Column(ColumnName("c1"), ColumnType("VARCHAR(100)")), + Column(ColumnName("c2"), ColumnType("INTEGER")), + ]) + return Continue( + query_list=query_list, + input_query=query_handler_return_query) + + def handle_query_result(self, query_result: QueryResult) -> Union[Continue, Finish[str]]: + c1 = query_result.c1 + c2 = query_result.c2 + bfs_content = as_string(self._bfs_file(self.bfs_proxy).read()) + return Finish(result=f"Final result: from query '{c1}', {c2} and bucketfs: '{bfs_content}'") + + +xyz.add_to_module(ExampleQueryHandler) + +class ExampleQueryHandlerFactory: + def create(self, parameter: str, query_handler_context: QueryHandlerContext): + return xyz.ExampleQueryHandler(parameter, query_handler_context) + +xyz.add_to_module(ExampleQueryHandlerFactory) + +from exasol_advanced_analytics_framework.udf_framework.query_handler_runner_udf \ + import QueryHandlerRunnerUDF + +udf = QueryHandlerRunnerUDF(exa) + +def run(ctx): + return udf.run(ctx) diff --git a/exasol_advanced_analytics_framework/example/sql.jinja b/exasol_advanced_analytics_framework/example/sql.jinja new file mode 100644 index 00000000..c3f21f04 --- /dev/null +++ b/exasol_advanced_analytics_framework/example/sql.jinja @@ -0,0 +1,15 @@ +create schema IF NOT EXISTS "TEMP_SCHEMA"; +create schema IF NOT EXISTS "MY_SCHEMA"; +open schema "MY_SCHEMA"; + +ALTER SYSTEM SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; + +--/ +CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "MY_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) +EMITS (outputs VARCHAR(2000000)) AS + +{{python_code}} + +/ + +EXECUTE SCRIPT MY_SCHEMA.AAF_RUN_QUERY_HANDLER('{{json_code}}'); diff --git a/noxfile.py b/noxfile.py index d66922b5..bf864228 100644 --- a/noxfile.py +++ b/noxfile.py @@ -2,6 +2,7 @@ import os from pathlib import Path from exasol_advanced_analytics_framework.slc import custom_slc_builder +from scripts.document_updater import update_examples from datetime import datetime import nox @@ -124,3 +125,11 @@ def run_python_integration_tests_with_db(session: Session): str(integration_test_directory), *session.posargs, ) + + +@nox.session(python=False) +def update_user_guide(session: Session): + """ + This task updates the examples in the user guide. + """ + update_examples(Path("doc/user_guide/u1.md")) diff --git a/scripts/document_updater.py b/scripts/document_updater.py new file mode 100644 index 00000000..05f65559 --- /dev/null +++ b/scripts/document_updater.py @@ -0,0 +1,94 @@ +import sys +import re + +from typing import List +from dataclasses import dataclass +from pathlib import Path +from inspect import cleandoc + +from exasol_advanced_analytics_framework.example import generator as example_generator + + +class ParseException(Exception): + """ + If input file is not well-formed. + """ + + +@dataclass +class Template: + path: str + + def render(self): + if self.path != "example/sql.jinja": + raise ParseException("") + return "\n".join([ + "", + "```python", + example_generator.generate(), + "```", + ""]) + + +class ChunkReader: + """ + Enables to replace chunks of a string by text generated from + jinja templates. + """ + def __init__(self): + self._generated = None + self._plain = [] + self._chunks = [] + + def _process_plain(self): + if self._plain: + self._chunks.append("\n".join(self._plain) + "\n") + self._plain = [] + + def _start_generated(self, line: str, match: re.Match): + if self._generated: + raise ParseException( + f"Found another {line} before {self._generated} was closed." + ) + self._plain += [ + line, + "", + ] + self._process_plain() + self._generated = line + self._chunks.append(Template(match.group(1))) + + def _end_generated(self, line: str): + if not self._generated: + raise ParseException( + f"Found {line} before any ." + ) + self._generated = None + self._plain.append(line) + + def split(self, content: str) -> List[str|Template]: + start = re.compile("") + end = re.compile("") + for line in content.splitlines(): + match = start.match(line) + if match: + self._start_generated(line, match) + elif end.match(line): + self._end_generated(line) + elif not self._generated: + self._plain.append(line) + self._process_plain() + return self._chunks + + @classmethod + def chunks(cls, content: str): + return cls().split(content) + + +def update_examples(path: Path): + content = path.read_text() + with path.open(mode="w") as f: + for chunk in ChunkReader.chunks(content): + f.write(chunk if type(chunk) == str else chunk.render()) diff --git a/tests/unit_tests/udf_framework/test_dynamic_modules.py b/tests/unit_tests/udf_framework/test_dynamic_modules.py index 0b3feb66..ba1db1c0 100644 --- a/tests/unit_tests/udf_framework/test_dynamic_modules.py +++ b/tests/unit_tests/udf_framework/test_dynamic_modules.py @@ -20,7 +20,7 @@ def test_create_module_with_class(): def test_add_function(): mod = create_module("xx2") - mod.add_to_module(example_function) import xx2 + xx2.add_to_module(example_function) assert xx2.example_function() == "example_function return value" \ and example_function.__module__ == "xx2" From 69ad70df5ef5b5ac12d8adbd726665a8d0c89106 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 08:58:08 +0200 Subject: [PATCH 04/49] Experiment for GH workflow --- .github/workflows/check-code-generation.yml | 2 +- .../resources/outputs/create_query_loop.sql | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/check-code-generation.yml b/.github/workflows/check-code-generation.yml index 59bab88f..1de0ba18 100644 --- a/.github/workflows/check-code-generation.yml +++ b/.github/workflows/check-code-generation.yml @@ -34,7 +34,7 @@ jobs: # and call in pre-commit run: bash ./githooks/pre-commit - - name: Check if code generated by Lua amalgation changed + - name: Check if code re-generated file differ from commit run: | git status --porcelain=v1 --untracked-files=no git diff --exit-code diff --git a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql index 811a9df1..ceaaea53 100644 --- a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql +++ b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql @@ -1,4 +1,4 @@ --- This file was generated by the ExasolLuaScriptGenerator. +-- This file was generated by the ExasolLuaScriptGenerator. -- CREATE OR REPLACE LUA SCRIPT "AAF_RUN_QUERY_HANDLER"(json_str) RETURNS TABLE AS table.insert(_G.package.searchers, @@ -666,8 +666,8 @@ query_handler_runner = require("query_handler_runner") --- -- This is the main function of the Query Loop. -- --- @param json_str input parameters as JSON string --- @param exa the database context (`exa`) of the Lua script +-- @param json_str input parameters as JSON string +-- @param exa the database context (`exa`) of the Lua script -- function query_handler_runner_main(json_str, exa) return query_handler_runner.run(json_str, exa) From 237a0cb317fea47315fdbb0ccf04f96b9e88f8e5 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:08:49 +0200 Subject: [PATCH 05/49] Experiment for GH workflow 2 --- .github/workflows/check-code-generation.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/check-code-generation.yml b/.github/workflows/check-code-generation.yml index 1de0ba18..acb47bfb 100644 --- a/.github/workflows/check-code-generation.yml +++ b/.github/workflows/check-code-generation.yml @@ -6,7 +6,7 @@ on: - main jobs: - check_lua_amalgate: + check_code_generation: name: Lua Amalgate strategy: fail-fast: false @@ -34,11 +34,14 @@ jobs: # and call in pre-commit run: bash ./githooks/pre-commit - - name: Check if code re-generated file differ from commit + - name: Show differences run: | git status --porcelain=v1 --untracked-files=no - git diff --exit-code + git diff --cached - - name: Show changes on working copy - if: ${{ failure() }} - run: git diff --cached + - name: Fail if re-generated files differ from commit + run: git diff --exit-code + +# - name: Show changes on working copy +# if: ${{ failure() }} +# run: git diff --cached From e94b29abf07e112db622f0ddeca34dbcf6875c3c Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:18:26 +0200 Subject: [PATCH 06/49] Experiment for GH workflow 3 --- .github/workflows/check-code-generation.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/check-code-generation.yml b/.github/workflows/check-code-generation.yml index acb47bfb..68dfd7c8 100644 --- a/.github/workflows/check-code-generation.yml +++ b/.github/workflows/check-code-generation.yml @@ -34,13 +34,16 @@ jobs: # and call in pre-commit run: bash ./githooks/pre-commit - - name: Show differences + - name: Show differences run: | git status --porcelain=v1 --untracked-files=no - git diff --cached - name: Fail if re-generated files differ from commit - run: git diff --exit-code + run: | + if [ -n $(git status --untracked-files=no) ]; then + git diff --cached + false + fi # - name: Show changes on working copy # if: ${{ failure() }} From 24df40746f54c1f987a943c84d9ccb161c01ea31 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:21:13 +0200 Subject: [PATCH 07/49] Experiment for GH workflow 4 --- .github/workflows/check-code-generation.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/check-code-generation.yml b/.github/workflows/check-code-generation.yml index 68dfd7c8..e7c2ed84 100644 --- a/.github/workflows/check-code-generation.yml +++ b/.github/workflows/check-code-generation.yml @@ -40,11 +40,7 @@ jobs: - name: Fail if re-generated files differ from commit run: | - if [ -n $(git status --untracked-files=no) ]; then + if [ -n "$(git status --untracked-files=no)" ]; then git diff --cached false fi - -# - name: Show changes on working copy -# if: ${{ failure() }} -# run: git diff --cached From c1b7068e460f02b885d1ebd613a379003d543875 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:24:01 +0200 Subject: [PATCH 08/49] Reset changes to file create_query_loop.sql --- .../resources/outputs/create_query_loop.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql index ceaaea53..f8f41f0e 100644 --- a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql +++ b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql @@ -1,4 +1,4 @@ --- This file was generated by the ExasolLuaScriptGenerator. -- +-- This file was generated by the ExasolLuaScriptGenerator. CREATE OR REPLACE LUA SCRIPT "AAF_RUN_QUERY_HANDLER"(json_str) RETURNS TABLE AS table.insert(_G.package.searchers, From 0b7a4cdc6897fba8792480f082adb37fa96758a5 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:38:08 +0200 Subject: [PATCH 09/49] Experiment for GH workflow 5 --- .github/workflows/check-code-generation.yml | 35 +++++++++++-------- .../resources/outputs/create_query_loop.sql | 4 +-- noxfile.py | 6 ++++ 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/.github/workflows/check-code-generation.yml b/.github/workflows/check-code-generation.yml index e7c2ed84..6b394aaa 100644 --- a/.github/workflows/check-code-generation.yml +++ b/.github/workflows/check-code-generation.yml @@ -23,24 +23,29 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install Development Environment - run: poetry run -- nox -s install_dev_env + run: poetry run nox -s install_dev_env - name: Poetry install run: poetry run -- nox -s run_in_dev_env -- poetry install - name: Run Lua amalgation # re-generates / amalgate the lua script - # refactor pre-commit as nox task - # and call in pre-commit - run: bash ./githooks/pre-commit - - - name: Show differences - run: | - git status --porcelain=v1 --untracked-files=no - - - name: Fail if re-generated files differ from commit - run: | - if [ -n "$(git status --untracked-files=no)" ]; then - git diff --cached - false - fi + # run: bash ./githooks/pre-commit + run: poetry run nox -s amalgate_lua_scripts + + - name: Check if re-generated files differ from commit + run: git diff --cached --exit-code + +# - name: Show differences +# run: git status --porcelain=v1 --untracked-files=no +# +# - name: Fail if re-generated files differ from commit +# run: | +# if [ -n "$(git status --untracked-files=no)" ]; then +# git diff --cached +# false +# fi + +# - name: Show changes on working copy +# if: ${{ failure() }} +# run: git diff --cached diff --git a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql index f8f41f0e..811a9df1 100644 --- a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql +++ b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql @@ -666,8 +666,8 @@ query_handler_runner = require("query_handler_runner") --- -- This is the main function of the Query Loop. -- --- @param json_str input parameters as JSON string --- @param exa the database context (`exa`) of the Lua script +-- @param json_str input parameters as JSON string +-- @param exa the database context (`exa`) of the Lua script -- function query_handler_runner_main(json_str, exa) return query_handler_runner.run(json_str, exa) diff --git a/noxfile.py b/noxfile.py index bf864228..c3406a98 100644 --- a/noxfile.py +++ b/noxfile.py @@ -53,6 +53,12 @@ def install_dev_env(session: Session): session.run(str(install_script)) +@nox.session(python=False) +def amalgate_lua_scripts(session: Session): + script = ROOT_DIR / "exasol_advanced_analytics_framework" / "deployment" / "regenerate_scripts.py" + _run_in_dev_env_poetry_call(session, "python", str(script)) + + @nox.session(python=False) def run_lua_unit_tests(session: Session): lua_tests_script = SCRIPTS_DIRECTORY / "lua_tests.sh" From 3e60314e243ff0c6e58d0584ea9099b075da543c Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:39:54 +0200 Subject: [PATCH 10/49] Experiment for GH workflow 6 --- .../resources/outputs/create_query_loop.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql index 811a9df1..ceaaea53 100644 --- a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql +++ b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql @@ -1,4 +1,4 @@ --- This file was generated by the ExasolLuaScriptGenerator. +-- This file was generated by the ExasolLuaScriptGenerator. -- CREATE OR REPLACE LUA SCRIPT "AAF_RUN_QUERY_HANDLER"(json_str) RETURNS TABLE AS table.insert(_G.package.searchers, @@ -666,8 +666,8 @@ query_handler_runner = require("query_handler_runner") --- -- This is the main function of the Query Loop. -- --- @param json_str input parameters as JSON string --- @param exa the database context (`exa`) of the Lua script +-- @param json_str input parameters as JSON string +-- @param exa the database context (`exa`) of the Lua script -- function query_handler_runner_main(json_str, exa) return query_handler_runner.run(json_str, exa) From 73db337af2b8bcd67ba1c211d819e2bddf1de1cd Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:42:22 +0200 Subject: [PATCH 11/49] Updated pre-commit hook to use nox task --- githooks/pre-commit | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/githooks/pre-commit b/githooks/pre-commit index ba113b9e..80411bc7 100755 --- a/githooks/pre-commit +++ b/githooks/pre-commit @@ -8,10 +8,10 @@ GITHOOKS_PATH="$REPO_DIR/githooks" pushd "$REPO_DIR" bash "$GITHOOKS_PATH/prohibit_commit_to_main.sh" - -SRC_PATH="$REPO_DIR/exasol_advanced_analytics_framework" -export PYTHONPATH=. -"$REPO_DIR"/scripts/run_in_dev_env.sh poetry run python3 "$SRC_PATH/deployment/regenerate_scripts.py" +poetry run nox -s amalgate_lua_scripts +# SRC_PATH="$REPO_DIR/exasol_advanced_analytics_framework" +# export PYTHONPATH=. +# "$REPO_DIR"/scripts/run_in_dev_env.sh poetry run python3 "$SRC_PATH/deployment/regenerate_scripts.py" git add "$SRC_PATH/resources/outputs/" popd From 4c8bdbbf159f1e51a76521f74299b22e89b49209 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:44:56 +0200 Subject: [PATCH 12/49] Updated create_query_loop.sql --- .../resources/outputs/create_query_loop.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql index ceaaea53..811a9df1 100644 --- a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql +++ b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql @@ -1,4 +1,4 @@ --- This file was generated by the ExasolLuaScriptGenerator. -- +-- This file was generated by the ExasolLuaScriptGenerator. CREATE OR REPLACE LUA SCRIPT "AAF_RUN_QUERY_HANDLER"(json_str) RETURNS TABLE AS table.insert(_G.package.searchers, @@ -666,8 +666,8 @@ query_handler_runner = require("query_handler_runner") --- -- This is the main function of the Query Loop. -- --- @param json_str input parameters as JSON string --- @param exa the database context (`exa`) of the Lua script +-- @param json_str input parameters as JSON string +-- @param exa the database context (`exa`) of the Lua script -- function query_handler_runner_main(json_str, exa) return query_handler_runner.run(json_str, exa) From dcb07f12fca3f38eb8dad72f4e65b5ea256ddfef Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:46:02 +0200 Subject: [PATCH 13/49] Updated create_query_loop.sql --- .../resources/outputs/create_query_loop.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql index 811a9df1..ceaaea53 100644 --- a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql +++ b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql @@ -1,4 +1,4 @@ --- This file was generated by the ExasolLuaScriptGenerator. +-- This file was generated by the ExasolLuaScriptGenerator. -- CREATE OR REPLACE LUA SCRIPT "AAF_RUN_QUERY_HANDLER"(json_str) RETURNS TABLE AS table.insert(_G.package.searchers, @@ -666,8 +666,8 @@ query_handler_runner = require("query_handler_runner") --- -- This is the main function of the Query Loop. -- --- @param json_str input parameters as JSON string --- @param exa the database context (`exa`) of the Lua script +-- @param json_str input parameters as JSON string +-- @param exa the database context (`exa`) of the Lua script -- function query_handler_runner_main(json_str, exa) return query_handler_runner.run(json_str, exa) From b5314aba6c74730b8ca13b36ac932b263c99f575 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:46:44 +0200 Subject: [PATCH 14/49] Experiment for GH workflow 7 --- .github/workflows/check-code-generation.yml | 2 +- .../resources/outputs/create_query_loop.sql | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/check-code-generation.yml b/.github/workflows/check-code-generation.yml index 6b394aaa..3a901a71 100644 --- a/.github/workflows/check-code-generation.yml +++ b/.github/workflows/check-code-generation.yml @@ -34,7 +34,7 @@ jobs: run: poetry run nox -s amalgate_lua_scripts - name: Check if re-generated files differ from commit - run: git diff --cached --exit-code + run: git diff --exit-code # - name: Show differences # run: git status --porcelain=v1 --untracked-files=no diff --git a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql index ceaaea53..811a9df1 100644 --- a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql +++ b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql @@ -1,4 +1,4 @@ --- This file was generated by the ExasolLuaScriptGenerator. -- +-- This file was generated by the ExasolLuaScriptGenerator. CREATE OR REPLACE LUA SCRIPT "AAF_RUN_QUERY_HANDLER"(json_str) RETURNS TABLE AS table.insert(_G.package.searchers, @@ -666,8 +666,8 @@ query_handler_runner = require("query_handler_runner") --- -- This is the main function of the Query Loop. -- --- @param json_str input parameters as JSON string --- @param exa the database context (`exa`) of the Lua script +-- @param json_str input parameters as JSON string +-- @param exa the database context (`exa`) of the Lua script -- function query_handler_runner_main(json_str, exa) return query_handler_runner.run(json_str, exa) From 2d8ac83bebc15aa05e655fc5a485f266f962b2ac Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:48:20 +0200 Subject: [PATCH 15/49] Experiment for GH workflow 8 --- .../resources/outputs/create_query_loop.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql index 811a9df1..ceaaea53 100644 --- a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql +++ b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql @@ -1,4 +1,4 @@ --- This file was generated by the ExasolLuaScriptGenerator. +-- This file was generated by the ExasolLuaScriptGenerator. -- CREATE OR REPLACE LUA SCRIPT "AAF_RUN_QUERY_HANDLER"(json_str) RETURNS TABLE AS table.insert(_G.package.searchers, @@ -666,8 +666,8 @@ query_handler_runner = require("query_handler_runner") --- -- This is the main function of the Query Loop. -- --- @param json_str input parameters as JSON string --- @param exa the database context (`exa`) of the Lua script +-- @param json_str input parameters as JSON string +-- @param exa the database context (`exa`) of the Lua script -- function query_handler_runner_main(json_str, exa) return query_handler_runner.run(json_str, exa) From 1a8e0c4d090fbdbf24da5e137cb081b40d1432fb Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:48:45 +0200 Subject: [PATCH 16/49] Removed dead code from pre-commit --- githooks/pre-commit | 3 --- 1 file changed, 3 deletions(-) diff --git a/githooks/pre-commit b/githooks/pre-commit index 80411bc7..0a1616a0 100755 --- a/githooks/pre-commit +++ b/githooks/pre-commit @@ -9,9 +9,6 @@ pushd "$REPO_DIR" bash "$GITHOOKS_PATH/prohibit_commit_to_main.sh" poetry run nox -s amalgate_lua_scripts -# SRC_PATH="$REPO_DIR/exasol_advanced_analytics_framework" -# export PYTHONPATH=. -# "$REPO_DIR"/scripts/run_in_dev_env.sh poetry run python3 "$SRC_PATH/deployment/regenerate_scripts.py" git add "$SRC_PATH/resources/outputs/" popd From 658d90ba451bbf1f01ba27f6d171a354dcd19de3 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:52:13 +0200 Subject: [PATCH 17/49] Added check for user guide up-to-date --- .github/workflows/check-code-generation.yml | 21 ++++--------------- .../resources/outputs/create_query_loop.sql | 2 +- noxfile.py | 2 +- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/.github/workflows/check-code-generation.yml b/.github/workflows/check-code-generation.yml index 3a901a71..f121365d 100644 --- a/.github/workflows/check-code-generation.yml +++ b/.github/workflows/check-code-generation.yml @@ -28,24 +28,11 @@ jobs: - name: Poetry install run: poetry run -- nox -s run_in_dev_env -- poetry install - - name: Run Lua amalgation - # re-generates / amalgate the lua script - # run: bash ./githooks/pre-commit + - name: Amalgate Lua Scripts run: poetry run nox -s amalgate_lua_scripts + - name: Update Examples in User Guide + run: poetry run nox -s update_user_guide + - name: Check if re-generated files differ from commit run: git diff --exit-code - -# - name: Show differences -# run: git status --porcelain=v1 --untracked-files=no -# -# - name: Fail if re-generated files differ from commit -# run: | -# if [ -n "$(git status --untracked-files=no)" ]; then -# git diff --cached -# false -# fi - -# - name: Show changes on working copy -# if: ${{ failure() }} -# run: git diff --cached diff --git a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql index ceaaea53..f8f41f0e 100644 --- a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql +++ b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql @@ -1,4 +1,4 @@ --- This file was generated by the ExasolLuaScriptGenerator. -- +-- This file was generated by the ExasolLuaScriptGenerator. CREATE OR REPLACE LUA SCRIPT "AAF_RUN_QUERY_HANDLER"(json_str) RETURNS TABLE AS table.insert(_G.package.searchers, diff --git a/noxfile.py b/noxfile.py index c3406a98..36c70343 100644 --- a/noxfile.py +++ b/noxfile.py @@ -138,4 +138,4 @@ def update_user_guide(session: Session): """ This task updates the examples in the user guide. """ - update_examples(Path("doc/user_guide/u1.md")) + update_examples(Path("doc/user_guide/user_guide.md")) From d03f8ee681bff70218023586b15dcd1f6f12bbe4 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:54:31 +0200 Subject: [PATCH 18/49] Experiment for GH workflow 9 --- .github/workflows/check-code-generation.yml | 2 +- .../resources/outputs/create_query_loop.sql | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/check-code-generation.yml b/.github/workflows/check-code-generation.yml index f121365d..81a560aa 100644 --- a/.github/workflows/check-code-generation.yml +++ b/.github/workflows/check-code-generation.yml @@ -7,7 +7,7 @@ on: jobs: check_code_generation: - name: Lua Amalgate + name: Lua Amalgate and Example in User Guide strategy: fail-fast: false matrix: diff --git a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql index f8f41f0e..811a9df1 100644 --- a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql +++ b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql @@ -666,8 +666,8 @@ query_handler_runner = require("query_handler_runner") --- -- This is the main function of the Query Loop. -- --- @param json_str input parameters as JSON string --- @param exa the database context (`exa`) of the Lua script +-- @param json_str input parameters as JSON string +-- @param exa the database context (`exa`) of the Lua script -- function query_handler_runner_main(json_str, exa) return query_handler_runner.run(json_str, exa) From 7c1b4704a18d58b2e94144138b1f19d5b4d4992a Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 09:58:58 +0200 Subject: [PATCH 19/49] Updated user guide --- doc/user_guide/user_guide.md | 74 ++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 16 deletions(-) diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index c0e1d5df..cbd6d5dd 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -191,54 +191,95 @@ Each algorithm should extend the `UDFQueryHandler` abstract class and then imple The example uses the module `builtins` and dynamically adds `ExampleQueryHandler` and `ExampleQueryHandlerFactory` to it. + + ```python +create schema IF NOT EXISTS "TEMP_SCHEMA"; +create schema IF NOT EXISTS "MY_SCHEMA"; +open schema "MY_SCHEMA"; + +ALTER SYSTEM SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; + --/ CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "MY_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) EMITS (outputs VARCHAR(2000000)) AS from typing import Union from exasol_advanced_analytics_framework.udf_framework.udf_query_handler import UDFQueryHandler +from exasol_advanced_analytics_framework.udf_framework.dynamic_modules import create_module from exasol_advanced_analytics_framework.query_handler.context.query_handler_context import QueryHandlerContext from exasol_advanced_analytics_framework.query_result.query_result import QueryResult from exasol_advanced_analytics_framework.query_handler.result import Result, Continue, Finish from exasol_advanced_analytics_framework.query_handler.query.select_query import SelectQuery, SelectQueryWithColumnDefinition +from exasol_advanced_analytics_framework.query_handler.context.proxy.bucketfs_location_proxy import \ + BucketFSLocationProxy from exasol_data_science_utils_python.schema.column import Column from exasol_data_science_utils_python.schema.column_name import ColumnName from exasol_data_science_utils_python.schema.column_type import ColumnType +from datetime import datetime +from exasol.bucketfs import as_string +xyz = create_module("xyz") + class ExampleQueryHandler(UDFQueryHandler): + def __init__(self, parameter: str, query_handler_context: QueryHandlerContext): super().__init__(parameter, query_handler_context) self.parameter = parameter self.query_handler_context = query_handler_context + self.bfs_proxy = None + self.db_table_proxy = None + + def _bfs_file(self, proxy: BucketFSLocationProxy): + return proxy.bucketfs_location() / "temp_file.txt" def start(self) -> Union[Continue, Finish[str]]: + def sample_content(key: str) -> str: + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + return f"{timestamp} {key} {self.parameter}" + + def table_query_string(statement: str, **kwargs): + table_name = self.db_table_proxy._db_object_name.fully_qualified + return statement.format(table_name=table_name, **kwargs) + + def table_query(statement: str, **kwargs): + return SelectQuery(table_query_string(statement, **kwargs)) + + self.bfs_proxy = self.query_handler_context.get_temporary_bucketfs_location() + self._bfs_file(self.bfs_proxy).write(sample_content("bucketfs")) + self.db_table_proxy = self.query_handler_context.get_temporary_table_name() query_list = [ - SelectQuery("SELECT 1 FROM DUAL"), - SelectQuery("SELECT 2 FROM DUAL")] + table_query('CREATE TABLE {table_name} ("c1" VARCHAR(100), "c2" INTEGER)'), + table_query("INSERT INTO {table_name} VALUES ('{value}', 4)", + value=sample_content("table-insert")), + ] query_handler_return_query = SelectQueryWithColumnDefinition( - query_string="SELECT 5 AS 'return_column' FROM DUAL", + query_string=table_query_string('SELECT "c1", "c2" from {table_name}'), output_columns=[ - Column(ColumnName("return_column"), ColumnType("INTEGER"))]) - + Column(ColumnName("c1"), ColumnType("VARCHAR(100)")), + Column(ColumnName("c2"), ColumnType("INTEGER")), + ]) return Continue( query_list=query_list, input_query=query_handler_return_query) def handle_query_result(self, query_result: QueryResult) -> Union[Continue, Finish[str]]: - return_value = query_result.return_column - result = 2 ** return_value - return Finish(result=result) + c1 = query_result.c1 + c2 = query_result.c2 + bfs_content = as_string(self._bfs_file(self.bfs_proxy).read()) + return Finish(result=f"Final result: from query '{c1}', {c2} and bucketfs: '{bfs_content}'") + -import builtins -builtins.ExampleQueryHandler=ExampleQueryHandler # required for pickle +xyz.add_to_module(ExampleQueryHandler) class ExampleQueryHandlerFactory: - def create(self, parameter: str, query_handler_context: QueryHandlerContext): - return builtins.ExampleQueryHandler(parameter, query_handler_context) + def create(self, parameter: str, query_handler_context: QueryHandlerContext): + return xyz.ExampleQueryHandler(parameter, query_handler_context) -builtins.ExampleQueryHandlerFactory=ExampleQueryHandlerFactory +xyz.add_to_module(ExampleQueryHandlerFactory) from exasol_advanced_analytics_framework.udf_framework.query_handler_runner_udf \ import QueryHandlerRunnerUDF @@ -247,16 +288,17 @@ udf = QueryHandlerRunnerUDF(exa) def run(ctx): return udf.run(ctx) -/ +/ + EXECUTE SCRIPT MY_SCHEMA.AAF_RUN_QUERY_HANDLER('{ "query_handler": { "factory_class": { - "module": "builtins", + "module": "xyz", "name": "ExampleQueryHandlerFactory" }, - "parameter": "bla-bla", + "parameters": "bla-bla", "udf": { "schema": "MY_SCHEMA", "name": "MY_QUERY_HANDLER_UDF" From 5a0c79856041f7c9f6634ceeea4f4e661852e8b0 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 10:14:40 +0200 Subject: [PATCH 20/49] Updated developer guide --- doc/developer_guide/developer_guide.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/doc/developer_guide/developer_guide.md b/doc/developer_guide/developer_guide.md index 2f66f71c..0e0b97fe 100644 --- a/doc/developer_guide/developer_guide.md +++ b/doc/developer_guide/developer_guide.md @@ -14,6 +14,28 @@ poetry run nox -s build_language_container Installing the SLC ins described in the [AAF User Guide](../user_guide/user_guide.md#script-language-container-slc). +## Updated Generated Files + +AAF contains some generated files that are committed to git, though: +* The amalgated Lua script [create_query_loop.sql](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql) +* The examples in the user guide + +The amalgated Lua script originates from the files in directory [exasol_advanced_analytics_framework/lua/src](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/lua/src/). + +The following command updates the amalgated script: + +```shell +poetry run nox -s amalgate_lua_scripts +``` + +AAF's user guide contains an example for an adhoc implementation of a Query Handler originating from the files in directory [exasol_advanced_analytics_framework/example](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/example/). + +The following command updates the example in the uiser guide: + +```shell +poetry run nox -s update_user_guide +``` + ## Running Tests AAF comes with different automated tests implemented in different programming languages and requiring different environments: From 88daa7109cb9dd88fcb105d33620bd9d7e1b1ff1 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 12:57:03 +0200 Subject: [PATCH 21/49] Additional changes to example --- doc/user_guide/user_guide.md | 4 +--- exasol_advanced_analytics_framework/example/generator.py | 4 ---- exasol_advanced_analytics_framework/example/sql.jinja | 2 -- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index cbd6d5dd..65e92f3f 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -188,7 +188,7 @@ Each algorithm should extend the `UDFQueryHandler` abstract class and then imple ### Concrete Example Using an Adhoc Implementation Within the UDF -The example uses the module `builtins` and dynamically adds `ExampleQueryHandler` and `ExampleQueryHandlerFactory` to it. +The example dynamically creates a python module `xyz` adds `ExampleQueryHandler` and `ExampleQueryHandlerFactory` to it. @@ -200,8 +200,6 @@ create schema IF NOT EXISTS "TEMP_SCHEMA"; create schema IF NOT EXISTS "MY_SCHEMA"; open schema "MY_SCHEMA"; -ALTER SYSTEM SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; - --/ CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "MY_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) EMITS (outputs VARCHAR(2000000)) AS diff --git a/exasol_advanced_analytics_framework/example/generator.py b/exasol_advanced_analytics_framework/example/generator.py index c85329b4..cbf78775 100644 --- a/exasol_advanced_analytics_framework/example/generator.py +++ b/exasol_advanced_analytics_framework/example/generator.py @@ -49,7 +49,3 @@ def generate(): python_code=python_code, json_code=json_code, ) - - -if __name__ == "__main__": - print(f'{generate()}') diff --git a/exasol_advanced_analytics_framework/example/sql.jinja b/exasol_advanced_analytics_framework/example/sql.jinja index c3f21f04..dfab1620 100644 --- a/exasol_advanced_analytics_framework/example/sql.jinja +++ b/exasol_advanced_analytics_framework/example/sql.jinja @@ -2,8 +2,6 @@ create schema IF NOT EXISTS "TEMP_SCHEMA"; create schema IF NOT EXISTS "MY_SCHEMA"; open schema "MY_SCHEMA"; -ALTER SYSTEM SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; - --/ CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "MY_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) EMITS (outputs VARCHAR(2000000)) AS From 443bde6f1083dd18cc4dc3bb95904ad6286212c5 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 16:11:37 +0200 Subject: [PATCH 22/49] Adde integration test --- doc/user_guide/user_guide.md | 19 ++++++-- .../example/generator.py | 4 +- .../example/sql.jinja | 5 -- .../with_db/test_user_guide_example.py | 46 +++++++++++++++++++ 4 files changed, 62 insertions(+), 12 deletions(-) create mode 100644 tests/integration_tests/with_db/test_user_guide_example.py diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 65e92f3f..601f2e38 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -190,16 +190,26 @@ Each algorithm should extend the `UDFQueryHandler` abstract class and then imple The example dynamically creates a python module `xyz` adds `ExampleQueryHandler` and `ExampleQueryHandlerFactory` to it. +In order to execute the example successfully you need to +1. [Create a BucketFS connection](#bucketfs-connection) +2. Activate the AAF's SLC +2. Create the involved database schemas + +The example assumes the name for the BucketFS Connection `` to be `BFS_CON`. + +```shell +ALTER SYSTEM SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; + +create schema IF NOT EXISTS "MY_SCHEMA"; +create schema IF NOT EXISTS "TEMP_SCHEMA"; +``` + ```python -create schema IF NOT EXISTS "TEMP_SCHEMA"; -create schema IF NOT EXISTS "MY_SCHEMA"; -open schema "MY_SCHEMA"; - --/ CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "MY_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) EMITS (outputs VARCHAR(2000000)) AS @@ -287,7 +297,6 @@ udf = QueryHandlerRunnerUDF(exa) def run(ctx): return udf.run(ctx) - / EXECUTE SCRIPT MY_SCHEMA.AAF_RUN_QUERY_HANDLER('{ diff --git a/exasol_advanced_analytics_framework/example/generator.py b/exasol_advanced_analytics_framework/example/generator.py index cbf78775..6280c580 100644 --- a/exasol_advanced_analytics_framework/example/generator.py +++ b/exasol_advanced_analytics_framework/example/generator.py @@ -37,13 +37,13 @@ def jinja_env(): ) -def generate(): +def generate(query_handler_script=QUERY_HANDLER_SCRIPT): env = jinja_env() python_code = importlib.resources.read_text( f"{constants.BASE_DIR}.{PACKAGE_PATH}", "query_handler.py", ) - json_code = json.dumps(QUERY_HANDLER_SCRIPT, indent=4) + json_code = json.dumps(query_handler_script, indent=4) template = env.get_template("sql.jinja") return template.render( python_code=python_code, diff --git a/exasol_advanced_analytics_framework/example/sql.jinja b/exasol_advanced_analytics_framework/example/sql.jinja index dfab1620..94d8cfc6 100644 --- a/exasol_advanced_analytics_framework/example/sql.jinja +++ b/exasol_advanced_analytics_framework/example/sql.jinja @@ -1,13 +1,8 @@ -create schema IF NOT EXISTS "TEMP_SCHEMA"; -create schema IF NOT EXISTS "MY_SCHEMA"; -open schema "MY_SCHEMA"; - --/ CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "MY_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) EMITS (outputs VARCHAR(2000000)) AS {{python_code}} - / EXECUTE SCRIPT MY_SCHEMA.AAF_RUN_QUERY_HANDLER('{{json_code}}'); diff --git a/tests/integration_tests/with_db/test_user_guide_example.py b/tests/integration_tests/with_db/test_user_guide_example.py new file mode 100644 index 00000000..41f14c50 --- /dev/null +++ b/tests/integration_tests/with_db/test_user_guide_example.py @@ -0,0 +1,46 @@ +import re +import pytest +from exasol_advanced_analytics_framework.example \ + import generator as example_generator + + +def generate_example(bfs_connection_name: str, schema_name: str): + script = dict(example_generator.QUERY_HANDLER_SCRIPT) + script["query_handler"]["udf"]["schema"] = schema_name + script["temporary_output"]["bucketfs_location"]["connection_name"] = bfs_connection_name + script["temporary_output"]["schema_name"] = schema_name + return example_generator.generate(script) + + +def test_x1(request): + # opt = request.config.getoption("--exasol-host") + # print(f'{opt}') + # return + example_code = generate_example("BBB", "SSS") + print(f'{example_code}') + result = [[( + "Final result: from query" + " '2024-10-21 12:26:00 table-insert bla-bla', 4" + " and bucketfs: '2024-10-21 12:26:00 bucketfs bla-bla'" + )]] + expected = ( + "Final result: from query '.* table-insert bla-bla', 4" + " and bucketfs: '.* bucketfs bla-bla'" + ) + assert re.match(expected, result[0][0]) + + +def test_user_guide_example(database_with_slc, pyexasol_connection): + """ + This test verifies the adhoc implementation of a QueryHandler as shown + in the AAF user guide. The adhoc implementation dynamically creating its + own python module. + """ + bucketfs_connection_name, schema_name = database_with_slc + example_code = generate_example(bucketfs_connection_name, schema_name) + result = pyexasol_connection.execute(example_code).fetchall() + expected = ( + "Final result: from query '.* table-insert bla-bla', 4" + " and bucketfs: '.* bucketfs bla-bla'" + ) + assert re.match(expected, result[0][0]) From 3384a78e7b03e61183b8f852080579e88a9e5828 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 16:47:29 +0200 Subject: [PATCH 23/49] Fixed integration test --- exasol_advanced_analytics_framework/example/generator.py | 8 ++++++++ exasol_advanced_analytics_framework/example/sql.jinja | 6 +++--- .../integration_tests/with_db/test_user_guide_example.py | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/exasol_advanced_analytics_framework/example/generator.py b/exasol_advanced_analytics_framework/example/generator.py index 6280c580..9fa0b978 100644 --- a/exasol_advanced_analytics_framework/example/generator.py +++ b/exasol_advanced_analytics_framework/example/generator.py @@ -4,6 +4,7 @@ from pathlib import Path from exasol_advanced_analytics_framework.deployment import constants from exasol_advanced_analytics_framework.deployment.jinja_template_location import JinjaTemplateLocation +from typing import Any, Dict PACKAGE_PATH = "example" @@ -37,6 +38,12 @@ def jinja_env(): ) +def quoted_udf_name(query_handler_script: Dict[str, Any]): + schema = query_handler_script["udf"]["schema"] + name = query_handler_script["udf"]["name"] + return f'"{schema}"."{name}"' + + def generate(query_handler_script=QUERY_HANDLER_SCRIPT): env = jinja_env() python_code = importlib.resources.read_text( @@ -48,4 +55,5 @@ def generate(query_handler_script=QUERY_HANDLER_SCRIPT): return template.render( python_code=python_code, json_code=json_code, + **query_handler_script, ) diff --git a/exasol_advanced_analytics_framework/example/sql.jinja b/exasol_advanced_analytics_framework/example/sql.jinja index 94d8cfc6..857d7253 100644 --- a/exasol_advanced_analytics_framework/example/sql.jinja +++ b/exasol_advanced_analytics_framework/example/sql.jinja @@ -1,8 +1,8 @@ --/ -CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "MY_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) +CREATE OR REPLACE PYTHON3_AAF SET SCRIPT {{ query_handler.udf.schema }}"."{{ query_handler.udf.name }}"(...) EMITS (outputs VARCHAR(2000000)) AS -{{python_code}} +{{ python_code }} / -EXECUTE SCRIPT MY_SCHEMA.AAF_RUN_QUERY_HANDLER('{{json_code}}'); +EXECUTE SCRIPT {{ query_handler.udf.schema }}.AAF_RUN_QUERY_HANDLER('{{ json_code }}'); diff --git a/tests/integration_tests/with_db/test_user_guide_example.py b/tests/integration_tests/with_db/test_user_guide_example.py index 41f14c50..2267dffb 100644 --- a/tests/integration_tests/with_db/test_user_guide_example.py +++ b/tests/integration_tests/with_db/test_user_guide_example.py @@ -33,7 +33,7 @@ def test_x1(request): def test_user_guide_example(database_with_slc, pyexasol_connection): """ This test verifies the adhoc implementation of a QueryHandler as shown - in the AAF user guide. The adhoc implementation dynamically creating its + in the AAF user guide. The adhoc implementation dynamically creates its own python module. """ bucketfs_connection_name, schema_name = database_with_slc From ee0b129cc053729c2b56a0c1ea94fdab59268831 Mon Sep 17 00:00:00 2001 From: ckunki Date: Mon, 21 Oct 2024 16:51:42 +0200 Subject: [PATCH 24/49] Fixed integration test 2 --- exasol_advanced_analytics_framework/example/sql.jinja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol_advanced_analytics_framework/example/sql.jinja b/exasol_advanced_analytics_framework/example/sql.jinja index 857d7253..98345698 100644 --- a/exasol_advanced_analytics_framework/example/sql.jinja +++ b/exasol_advanced_analytics_framework/example/sql.jinja @@ -1,5 +1,5 @@ --/ -CREATE OR REPLACE PYTHON3_AAF SET SCRIPT {{ query_handler.udf.schema }}"."{{ query_handler.udf.name }}"(...) +CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "{{ query_handler.udf.schema }}"."{{ query_handler.udf.name }}"(...) EMITS (outputs VARCHAR(2000000)) AS {{ python_code }} From f39824f54b30d33f60079fd99a52ab1c9dfb3872 Mon Sep 17 00:00:00 2001 From: ckunki Date: Tue, 22 Oct 2024 08:52:17 +0200 Subject: [PATCH 25/49] Fixed integration test 3 --- doc/user_guide/user_guide.md | 2 - .../example/generator.py | 71 ++++++++++++++++--- scripts/document_updater.py | 3 +- .../with_db/test_user_guide_example.py | 47 +++++++----- 4 files changed, 92 insertions(+), 31 deletions(-) diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 601f2e38..0adfb984 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -213,7 +213,6 @@ create schema IF NOT EXISTS "TEMP_SCHEMA"; --/ CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "MY_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) EMITS (outputs VARCHAR(2000000)) AS - from typing import Union from exasol_advanced_analytics_framework.udf_framework.udf_query_handler import UDFQueryHandler from exasol_advanced_analytics_framework.udf_framework.dynamic_modules import create_module @@ -298,7 +297,6 @@ def run(ctx): return udf.run(ctx) / - EXECUTE SCRIPT MY_SCHEMA.AAF_RUN_QUERY_HANDLER('{ "query_handler": { "factory_class": { diff --git a/exasol_advanced_analytics_framework/example/generator.py b/exasol_advanced_analytics_framework/example/generator.py index 9fa0b978..cdb224b2 100644 --- a/exasol_advanced_analytics_framework/example/generator.py +++ b/exasol_advanced_analytics_framework/example/generator.py @@ -1,6 +1,6 @@ import json import importlib.resources -from jinja2 import Template, Environment, PackageLoader, select_autoescape +from jinja2 import Template, Environment, PackageLoader, BaseLoader, select_autoescape from pathlib import Path from exasol_advanced_analytics_framework.deployment import constants from exasol_advanced_analytics_framework.deployment.jinja_template_location import JinjaTemplateLocation @@ -8,7 +8,7 @@ PACKAGE_PATH = "example" -QUERY_HANDLER_SCRIPT = { +SCRIPT_ARGUMENTS = { "query_handler": { "factory_class": { "module": "xyz", @@ -29,6 +29,30 @@ }, } +# CREATE_SCRIPT = """--/ +# CREATE OR REPLACE PYTHON3_AAF SET SCRIPT +# "{{ query_handler.udf.schema }}"."{{ query_handler.udf.name }}"(...) +# EMITS (outputs VARCHAR(2000000)) AS +# {{ python_code }} +# / +# """ + +CREATE_SCRIPT = ( + '--/\n' + 'CREATE OR REPLACE PYTHON3_AAF SET SCRIPT' + ' "{{ query_handler.udf.schema }}"."{{ query_handler.udf.name }}"(...)\n' + 'EMITS (outputs VARCHAR(2000000)) AS\n' + '{{ python_code }}\n' + '/\n' +) + + +EXECUTE_SCRIPT = ( + "EXECUTE SCRIPT {{ query_handler.udf.schema }}" + ".AAF_RUN_QUERY_HANDLER(" + "'{{ json_string }}')" +) + def jinja_env(): return Environment( loader=PackageLoader( @@ -44,16 +68,45 @@ def quoted_udf_name(query_handler_script: Dict[str, Any]): return f'"{schema}"."{name}"' -def generate(query_handler_script=QUERY_HANDLER_SCRIPT): - env = jinja_env() +def render_template(template: str, **kwargs) -> str: + return ( + Environment(loader=BaseLoader) + .from_string(template) + .render(**kwargs) + ) + + +def create_script(script_arguments=SCRIPT_ARGUMENTS): python_code = importlib.resources.read_text( f"{constants.BASE_DIR}.{PACKAGE_PATH}", "query_handler.py", ) - json_code = json.dumps(query_handler_script, indent=4) - template = env.get_template("sql.jinja") - return template.render( + return render_template( + CREATE_SCRIPT, python_code=python_code, - json_code=json_code, - **query_handler_script, + **script_arguments, + ) + + +def execute_script(script_arguments=SCRIPT_ARGUMENTS): + json_string = json.dumps(script_arguments, indent=4) + return render_template( + EXECUTE_SCRIPT, + json_string=json_string, + **script_arguments, ) + + +# def generate(query_handler_script=SCRIPT_ARGUMENTS): +# env = jinja_env() +# python_code = importlib.resources.read_text( +# f"{constants.BASE_DIR}.{PACKAGE_PATH}", +# "query_handler.py", +# ) +# json_code = json.dumps(query_handler_script, indent=4) +# template = env.get_template("sql.jinja") +# return template.render( +# python_code=python_code, +# json_code=json_code, +# **query_handler_script, +# ) diff --git a/scripts/document_updater.py b/scripts/document_updater.py index 05f65559..441e5b7c 100644 --- a/scripts/document_updater.py +++ b/scripts/document_updater.py @@ -27,7 +27,8 @@ def render(self): " parts are in python. Formally, however, the python code is embedded into", " an SQL statement, though. -->", "```python", - example_generator.generate(), + example_generator.create_script(), + example_generator.execute_script() + ";", "```", ""]) diff --git a/tests/integration_tests/with_db/test_user_guide_example.py b/tests/integration_tests/with_db/test_user_guide_example.py index 2267dffb..59fdecf4 100644 --- a/tests/integration_tests/with_db/test_user_guide_example.py +++ b/tests/integration_tests/with_db/test_user_guide_example.py @@ -4,25 +4,31 @@ import generator as example_generator -def generate_example(bfs_connection_name: str, schema_name: str): - script = dict(example_generator.QUERY_HANDLER_SCRIPT) - script["query_handler"]["udf"]["schema"] = schema_name - script["temporary_output"]["bucketfs_location"]["connection_name"] = bfs_connection_name - script["temporary_output"]["schema_name"] = schema_name - return example_generator.generate(script) +def script_args(bfs_connection_name: str, schema_name: str): + args = dict(example_generator.SCRIPT_ARGUMENTS) + args["query_handler"]["udf"]["schema"] = schema_name + args["temporary_output"]["bucketfs_location"]["connection_name"] = bfs_connection_name + args["temporary_output"]["schema_name"] = schema_name + return args -def test_x1(request): - # opt = request.config.getoption("--exasol-host") - # print(f'{opt}') - # return - example_code = generate_example("BBB", "SSS") - print(f'{example_code}') - result = [[( - "Final result: from query" - " '2024-10-21 12:26:00 table-insert bla-bla', 4" - " and bucketfs: '2024-10-21 12:26:00 bucketfs bla-bla'" - )]] +import pyexasol +@pytest.mark.skip("local") +def test_x2(): + pyexasol_connection = pyexasol.connect( + dsn="192.168.124.221:8563", + user="SYS", + password="exasol", + ) + bucketfs_connection_name, schema_name = ("BFS_CON", "MY_SCHEMA") + args = script_args(bucketfs_connection_name, schema_name) + statement = example_generator.create_script(args) + # print(f'create_script:\n{statement}') + pyexasol_connection.execute(statement) + statement = example_generator.execute_script(args) + # print(f'execute_script:\n{statement}') + result = pyexasol_connection.execute(statement).fetchall() + print(f'{result}') expected = ( "Final result: from query '.* table-insert bla-bla', 4" " and bucketfs: '.* bucketfs bla-bla'" @@ -37,8 +43,11 @@ def test_user_guide_example(database_with_slc, pyexasol_connection): own python module. """ bucketfs_connection_name, schema_name = database_with_slc - example_code = generate_example(bucketfs_connection_name, schema_name) - result = pyexasol_connection.execute(example_code).fetchall() + args = script_args(bucketfs_connection_name, schema_name) + statement = example_generator.create_script(args) + pyexasol_connection.execute(statement) + statement = example_generator.execute_script(args) + result = pyexasol_connection.execute(statement).fetchall() expected = ( "Final result: from query '.* table-insert bla-bla', 4" " and bucketfs: '.* bucketfs bla-bla'" From 9baf2b11582b80abfbb60f7d6b76c06d0bf398f8 Mon Sep 17 00:00:00 2001 From: ckunki Date: Tue, 22 Oct 2024 10:35:34 +0200 Subject: [PATCH 26/49] Cleanup, removed SQL template --- doc/user_guide/user_guide.md | 2 +- .../example/generator.py | 31 ------------------- .../example/sql.jinja | 8 ----- scripts/document_updater.py | 11 ++++--- 4 files changed, 7 insertions(+), 45 deletions(-) delete mode 100644 exasol_advanced_analytics_framework/example/sql.jinja diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 0adfb984..13492297 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -204,7 +204,7 @@ create schema IF NOT EXISTS "MY_SCHEMA"; create schema IF NOT EXISTS "TEMP_SCHEMA"; ``` - + ." + f"Found {line} before any ." ) self._generated = None self._plain.append(line) def split(self, content: str) -> List[str|Template]: - start = re.compile("") + start = re.compile("") end = re.compile("") for line in content.splitlines(): match = start.match(line) From 647dfc1921bfdf8c617df34eee6ce2d5dcaa10df Mon Sep 17 00:00:00 2001 From: ckunki Date: Tue, 22 Oct 2024 12:05:53 +0200 Subject: [PATCH 27/49] Fixed integration test 4 --- exasol_advanced_analytics_framework/example/generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exasol_advanced_analytics_framework/example/generator.py b/exasol_advanced_analytics_framework/example/generator.py index e0fe7433..d74f0459 100644 --- a/exasol_advanced_analytics_framework/example/generator.py +++ b/exasol_advanced_analytics_framework/example/generator.py @@ -14,7 +14,7 @@ "module": "xyz", "name": "ExampleQueryHandlerFactory", }, - "parameters": "bla-bla", + "parameter": "bla-bla", "udf": { "schema": "MY_SCHEMA", "name": "MY_QUERY_HANDLER_UDF", From 4d97386d70bad8afed4d526ae27c7ad41ffb04b2 Mon Sep 17 00:00:00 2001 From: ckunki Date: Tue, 22 Oct 2024 12:07:35 +0200 Subject: [PATCH 28/49] Updated user guide --- doc/user_guide/user_guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 13492297..1d9817c4 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -303,7 +303,7 @@ EXECUTE SCRIPT MY_SCHEMA.AAF_RUN_QUERY_HANDLER('{ "module": "xyz", "name": "ExampleQueryHandlerFactory" }, - "parameters": "bla-bla", + "parameter": "bla-bla", "udf": { "schema": "MY_SCHEMA", "name": "MY_QUERY_HANDLER_UDF" From 9502fffe92aed7c053a0e0215afb1d7ec36cae20 Mon Sep 17 00:00:00 2001 From: ckunki Date: Tue, 22 Oct 2024 13:44:02 +0200 Subject: [PATCH 29/49] Fixed types in user guide and added some additional sentences. --- doc/developer_guide/developer_guide.md | 2 +- doc/user_guide/user_guide.md | 6 +++++- exasol_advanced_analytics_framework/example/generator.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/developer_guide/developer_guide.md b/doc/developer_guide/developer_guide.md index 0e0b97fe..a3e8ab24 100644 --- a/doc/developer_guide/developer_guide.md +++ b/doc/developer_guide/developer_guide.md @@ -30,7 +30,7 @@ poetry run nox -s amalgate_lua_scripts AAF's user guide contains an example for an adhoc implementation of a Query Handler originating from the files in directory [exasol_advanced_analytics_framework/example](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/example/). -The following command updates the example in the uiser guide: +The following command updates the example in the user guide: ```shell poetry run nox -s update_user_guide diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 1d9817c4..4eb1dbc4 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -188,7 +188,7 @@ Each algorithm should extend the `UDFQueryHandler` abstract class and then imple ### Concrete Example Using an Adhoc Implementation Within the UDF -The example dynamically creates a python module `xyz` adds `ExampleQueryHandler` and `ExampleQueryHandlerFactory` to it. +The example dynamically creates a python module `xyz` and adds classes `ExampleQueryHandler` and `ExampleQueryHandlerFactory` to it. In order to execute the example successfully you need to 1. [Create a BucketFS connection](#bucketfs-connection) @@ -197,6 +197,8 @@ In order to execute the example successfully you need to The example assumes the name for the BucketFS Connection `` to be `BFS_CON`. +The following SQL statements activate the AAF's SLC and create the involved database schemas: + ```shell ALTER SYSTEM SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; @@ -204,6 +206,8 @@ create schema IF NOT EXISTS "MY_SCHEMA"; create schema IF NOT EXISTS "TEMP_SCHEMA"; ``` +Here comes the main SQL command creating the UDF script and executing the scripts with the argument defined as Json string: + ", "```python", example_generator.create_script(), + "", example_generator.execute_script() + ";", "```", ""]) @@ -36,7 +37,7 @@ def render(self): class ChunkReader: """ - Enables to replace chunks of a string by text generated from + Enables to replace chunks of a string by generated text, e.g. from jinja templates. """ def __init__(self): From 4fdd329491e27bd0b2a3f08a5639cda55bb58431 Mon Sep 17 00:00:00 2001 From: Christoph Kuhnke Date: Tue, 22 Oct 2024 16:44:43 +0200 Subject: [PATCH 35/49] Apply suggestions from code review Co-authored-by: Mikhail Beck --- doc/developer_guide/developer_guide.md | 6 +++--- doc/user_guide/user_guide.md | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/developer_guide/developer_guide.md b/doc/developer_guide/developer_guide.md index a3e8ab24..d4144c37 100644 --- a/doc/developer_guide/developer_guide.md +++ b/doc/developer_guide/developer_guide.md @@ -16,11 +16,11 @@ Installing the SLC ins described in the [AAF User Guide](../user_guide/user_guid ## Updated Generated Files -AAF contains some generated files that are committed to git, though: +AAF contains some generated files that are committed to git, including: * The amalgated Lua script [create_query_loop.sql](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql) * The examples in the user guide -The amalgated Lua script originates from the files in directory [exasol_advanced_analytics_framework/lua/src](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/lua/src/). +The amalgated Lua script originates from the files in the directory [exasol_advanced_analytics_framework/lua/src](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/lua/src/). The following command updates the amalgated script: @@ -28,7 +28,7 @@ The following command updates the amalgated script: poetry run nox -s amalgate_lua_scripts ``` -AAF's user guide contains an example for an adhoc implementation of a Query Handler originating from the files in directory [exasol_advanced_analytics_framework/example](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/example/). +AAF's user guide contains an example of an ad hoc implementation of a Query Handler originating from the files in the directory [exasol_advanced_analytics_framework/example](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/example/). The following command updates the example in the user guide: diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 56df8285..79a956dc 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -193,11 +193,11 @@ The example dynamically creates a python module `xyz` and adds classes `ExampleQ In order to execute the example successfully you need to 1. [Create a BucketFS connection](#bucketfs-connection) 2. Activate the AAF's SLC -2. Create the involved database schemas +3. Make sure the database schemas used in the example exist. The example assumes the name for the BucketFS Connection `` to be `BFS_CON`. -The following SQL statements activate the AAF's SLC and create the involved database schemas: +The following SQL statements activate the AAF's SLC and create the required database schemas unless they already exist: ```shell ALTER SYSTEM SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; From 35932a6792b0eea853d5ebfccafa9049b3dc3f09 Mon Sep 17 00:00:00 2001 From: ckunki Date: Tue, 22 Oct 2024 16:46:40 +0200 Subject: [PATCH 36/49] fixed review findings --- exasol_advanced_analytics_framework/example/generator.py | 2 +- .../example/query_handler.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/exasol_advanced_analytics_framework/example/generator.py b/exasol_advanced_analytics_framework/example/generator.py index 120b6b35..b696587d 100644 --- a/exasol_advanced_analytics_framework/example/generator.py +++ b/exasol_advanced_analytics_framework/example/generator.py @@ -11,7 +11,7 @@ SCRIPT_ARGUMENTS = { "query_handler": { "factory_class": { - "module": "xyz", + "module": "example_module", "name": "ExampleQueryHandlerFactory", }, "parameter": "bla-bla", diff --git a/exasol_advanced_analytics_framework/example/query_handler.py b/exasol_advanced_analytics_framework/example/query_handler.py index 7ac701f0..fedf5ef6 100644 --- a/exasol_advanced_analytics_framework/example/query_handler.py +++ b/exasol_advanced_analytics_framework/example/query_handler.py @@ -14,7 +14,7 @@ from exasol.bucketfs import as_string -xyz = create_module("xyz") +example_module = create_module("example_module") class ExampleQueryHandler(UDFQueryHandler): @@ -65,13 +65,13 @@ def handle_query_result(self, query_result: QueryResult) -> Union[Continue, Fini return Finish(result=f"Final result: from query '{c1}', {c2} and bucketfs: '{bfs_content}'") -xyz.add_to_module(ExampleQueryHandler) +example_module.add_to_module(ExampleQueryHandler) class ExampleQueryHandlerFactory: def create(self, parameter: str, query_handler_context: QueryHandlerContext): - return xyz.ExampleQueryHandler(parameter, query_handler_context) + return example_module.ExampleQueryHandler(parameter, query_handler_context) -xyz.add_to_module(ExampleQueryHandlerFactory) +example_module.add_to_module(ExampleQueryHandlerFactory) from exasol_advanced_analytics_framework.udf_framework.query_handler_runner_udf \ import QueryHandlerRunnerUDF From ff550868b315de3a899dc1d87dd37f991f8e4723 Mon Sep 17 00:00:00 2001 From: ckunki Date: Tue, 22 Oct 2024 17:02:22 +0200 Subject: [PATCH 37/49] Fixed changelog entry --- doc/changes/changes_0.1.0.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/changes/changes_0.1.0.md b/doc/changes/changes_0.1.0.md index 55a002ff..cb1bd98a 100644 --- a/doc/changes/changes_0.1.0.md +++ b/doc/changes/changes_0.1.0.md @@ -52,7 +52,7 @@ Code name: * #176: Updated usage of `exasol-bucketfs` to new API * #185: Removed directory and script for building SLC AAF * #191: Renamed UDF json element "parameters" to "parameter" -* #190: Enabled to generate a dynamic module for custom UDF +* #190: Added dynamic module generation and used it in the example UDF in the user guide * #178: Fixed names of mock objects: * Renamed `testing.mock_query_handler_runner.MockQueryHandlerRunner` to `query_handler.python_query_handler_runner.PythonQueryHandlerRunner` * Renamed method `PythonQueryHandlerRunner.execute_query()` to `execute_queries()` From ffd56f6b3f8fa142d021d15b83bfab5e6848f537 Mon Sep 17 00:00:00 2001 From: ckunki Date: Wed, 23 Oct 2024 09:41:59 +0200 Subject: [PATCH 38/49] Fixed review findings --- doc/user_guide/proxies.md | 13 ++++ doc/user_guide/user_guide.md | 122 ----------------------------------- scripts/document_updater.py | 32 +++++++-- 3 files changed, 41 insertions(+), 126 deletions(-) create mode 100644 doc/user_guide/proxies.md diff --git a/doc/user_guide/proxies.md b/doc/user_guide/proxies.md new file mode 100644 index 00000000..1719c771 --- /dev/null +++ b/doc/user_guide/proxies.md @@ -0,0 +1,13 @@ +## AAF Proxies + +The Advanced Analytics Framework (AAF) uses _Object Proxies_ to manage temporary objects. + +An _Object Proxy_ +* Encapsulates a temporary object +* Provides a reference enabling using the object, i.e. its name incl. the database schema or the path in the BucketFS +* Ensures the object is removed when leaving the current scope, e.g. the Query Handler. For this the proxy maintains a _reference counter_. + +All Object Proxies are derived from class `exasol_advanced_analytics_framework.query_handler.context.proxy.object_proxy.ObjectProxy`: +* `BucketFSLocationProxy` encapsulates a location in the BucketFS +* `DBObjectNameProxy` encapsulates a database object, e.g. a table + diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 79a956dc..770d77f4 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -210,125 +210,3 @@ Here comes the main SQL command creating the UDF script and executing the script - -```python ---/ -CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "MY_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) -EMITS (outputs VARCHAR(2000000)) AS -from typing import Union -from exasol_advanced_analytics_framework.udf_framework.udf_query_handler import UDFQueryHandler -from exasol_advanced_analytics_framework.udf_framework.dynamic_modules import create_module -from exasol_advanced_analytics_framework.query_handler.context.query_handler_context import QueryHandlerContext -from exasol_advanced_analytics_framework.query_result.query_result import QueryResult -from exasol_advanced_analytics_framework.query_handler.result import Result, Continue, Finish -from exasol_advanced_analytics_framework.query_handler.query.select_query import SelectQuery, SelectQueryWithColumnDefinition -from exasol_advanced_analytics_framework.query_handler.context.proxy.bucketfs_location_proxy import \ - BucketFSLocationProxy -from exasol_data_science_utils_python.schema.column import Column -from exasol_data_science_utils_python.schema.column_name import ColumnName -from exasol_data_science_utils_python.schema.column_type import ColumnType -from datetime import datetime -from exasol.bucketfs import as_string - - -xyz = create_module("xyz") - -class ExampleQueryHandler(UDFQueryHandler): - - def __init__(self, parameter: str, query_handler_context: QueryHandlerContext): - super().__init__(parameter, query_handler_context) - self.parameter = parameter - self.query_handler_context = query_handler_context - self.bfs_proxy = None - self.db_table_proxy = None - - def _bfs_file(self, proxy: BucketFSLocationProxy): - return proxy.bucketfs_location() / "temp_file.txt" - - def start(self) -> Union[Continue, Finish[str]]: - def sample_content(key: str) -> str: - timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - return f"{timestamp} {key} {self.parameter}" - - def table_query_string(statement: str, **kwargs): - table_name = self.db_table_proxy._db_object_name.fully_qualified - return statement.format(table_name=table_name, **kwargs) - - def table_query(statement: str, **kwargs): - return SelectQuery(table_query_string(statement, **kwargs)) - - self.bfs_proxy = self.query_handler_context.get_temporary_bucketfs_location() - self._bfs_file(self.bfs_proxy).write(sample_content("bucketfs")) - self.db_table_proxy = self.query_handler_context.get_temporary_table_name() - query_list = [ - table_query('CREATE TABLE {table_name} ("c1" VARCHAR(100), "c2" INTEGER)'), - table_query("INSERT INTO {table_name} VALUES ('{value}', 4)", - value=sample_content("table-insert")), - ] - query_handler_return_query = SelectQueryWithColumnDefinition( - query_string=table_query_string('SELECT "c1", "c2" from {table_name}'), - output_columns=[ - Column(ColumnName("c1"), ColumnType("VARCHAR(100)")), - Column(ColumnName("c2"), ColumnType("INTEGER")), - ]) - return Continue( - query_list=query_list, - input_query=query_handler_return_query) - - def handle_query_result(self, query_result: QueryResult) -> Union[Continue, Finish[str]]: - c1 = query_result.c1 - c2 = query_result.c2 - bfs_content = as_string(self._bfs_file(self.bfs_proxy).read()) - return Finish(result=f"Final result: from query '{c1}', {c2} and bucketfs: '{bfs_content}'") - - -xyz.add_to_module(ExampleQueryHandler) - -class ExampleQueryHandlerFactory: - def create(self, parameter: str, query_handler_context: QueryHandlerContext): - return xyz.ExampleQueryHandler(parameter, query_handler_context) - -xyz.add_to_module(ExampleQueryHandlerFactory) - -from exasol_advanced_analytics_framework.udf_framework.query_handler_runner_udf \ - import QueryHandlerRunnerUDF - -udf = QueryHandlerRunnerUDF(exa) - -def run(ctx): - return udf.run(ctx) - -/ - -EXECUTE SCRIPT MY_SCHEMA.AAF_RUN_QUERY_HANDLER('{ - "query_handler": { - "factory_class": { - "module": "xyz", - "name": "ExampleQueryHandlerFactory" - }, - "parameter": "bla-bla", - "udf": { - "schema": "MY_SCHEMA", - "name": "MY_QUERY_HANDLER_UDF" - } - }, - "temporary_output": { - "bucketfs_location": { - "connection_name": "BFS_CON", - "directory": "temp" - }, - "schema_name": "TEMP_SCHEMA" - } -}'); -``` - - -The figure below illustrates the execution of this algorithm implemented in class `ExampleQueryHandler`. -* When method `start()` is called, it executes two queries and an additional `input_query` to obtain the input for the next iteration. -* After the first iteration is completed, the framework calls method the `handle_query_result` with the `query_result` of the `input_query` of the previous iteration. - -In this example, the algorithm is finished at this iteration and returns 2_return value_ as final result. - -![Sample Execution](../images/sample_execution.png "Sample Execution") diff --git a/scripts/document_updater.py b/scripts/document_updater.py index 7905efaa..3f2caf78 100644 --- a/scripts/document_updater.py +++ b/scripts/document_updater.py @@ -21,8 +21,8 @@ class Template: def render(self): if self.path != "example/generator.py": - raise ParseException("document_updater.Template currently only" - " supports path example/generator.py") + raise ValueError("document_updater.Template currently only" + " supports path example/generator.py") return "\n".join([ " + + ... and terminated by a line + + + The ChunkReader returns a list of chunks. Each chunk is either a simple + string or an instance of class ``Template``. ChunkReader passes the string + after "by" to the template so the template can render a corresponding + content later on. + + See function update_examples() below for a sample usage. """ def __init__(self): self._generated = None @@ -72,8 +90,14 @@ def _end_generated(self, line: str): self._plain.append(line) def split(self, content: str) -> List[str|Template]: + """ + Split the input document content into chunks. + """ start = re.compile("") end = re.compile("") + self._generated = None + self._plain = [] + self._chunks = [] for line in content.splitlines(): match = start.match(line) if match: From 62a089ca99100c68fb1701f2476f599bfaa2f97e Mon Sep 17 00:00:00 2001 From: ckunki Date: Thu, 24 Oct 2024 10:37:38 +0200 Subject: [PATCH 39/49] Replaced updating the user guide by simple references to example files Updated integration test --- .github/workflows/check-code-generation.yml | 3 - doc/developer_guide/developer_guide.md | 10 +- .../user_guide/example-udf-script/create.sql | 6 + doc/user_guide/example-udf-script/execute.sql | 20 +++ doc/user_guide/proxies.md | 2 +- doc/user_guide/user_guide.md | 34 +++-- .../example/generator.py | 78 ----------- .../lua/src/query_loop.lua | 2 +- .../resources/outputs/create_query_loop.sql | 2 +- noxfile.py | 9 -- scripts/document_updater.py | 121 ------------------ .../with_db/test_user_guide_example.py | 41 ++++-- 12 files changed, 85 insertions(+), 243 deletions(-) rename exasol_advanced_analytics_framework/example/query_handler.py => doc/user_guide/example-udf-script/create.sql (96%) create mode 100644 doc/user_guide/example-udf-script/execute.sql delete mode 100644 exasol_advanced_analytics_framework/example/generator.py delete mode 100644 scripts/document_updater.py diff --git a/.github/workflows/check-code-generation.yml b/.github/workflows/check-code-generation.yml index 81a560aa..527782c1 100644 --- a/.github/workflows/check-code-generation.yml +++ b/.github/workflows/check-code-generation.yml @@ -31,8 +31,5 @@ jobs: - name: Amalgate Lua Scripts run: poetry run nox -s amalgate_lua_scripts - - name: Update Examples in User Guide - run: poetry run nox -s update_user_guide - - name: Check if re-generated files differ from commit run: git diff --exit-code diff --git a/doc/developer_guide/developer_guide.md b/doc/developer_guide/developer_guide.md index d4144c37..56649615 100644 --- a/doc/developer_guide/developer_guide.md +++ b/doc/developer_guide/developer_guide.md @@ -14,7 +14,7 @@ poetry run nox -s build_language_container Installing the SLC ins described in the [AAF User Guide](../user_guide/user_guide.md#script-language-container-slc). -## Updated Generated Files +## Update Generated Files AAF contains some generated files that are committed to git, including: * The amalgated Lua script [create_query_loop.sql](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql) @@ -28,14 +28,6 @@ The following command updates the amalgated script: poetry run nox -s amalgate_lua_scripts ``` -AAF's user guide contains an example of an ad hoc implementation of a Query Handler originating from the files in the directory [exasol_advanced_analytics_framework/example](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/example/). - -The following command updates the example in the user guide: - -```shell -poetry run nox -s update_user_guide -``` - ## Running Tests AAF comes with different automated tests implemented in different programming languages and requiring different environments: diff --git a/exasol_advanced_analytics_framework/example/query_handler.py b/doc/user_guide/example-udf-script/create.sql similarity index 96% rename from exasol_advanced_analytics_framework/example/query_handler.py rename to doc/user_guide/example-udf-script/create.sql index fedf5ef6..6991f3c0 100644 --- a/exasol_advanced_analytics_framework/example/query_handler.py +++ b/doc/user_guide/example-udf-script/create.sql @@ -1,3 +1,7 @@ +--/ +CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "EXAMPLE_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) +EMITS (outputs VARCHAR(2000000)) AS + from typing import Union from exasol_advanced_analytics_framework.udf_framework.udf_query_handler import UDFQueryHandler from exasol_advanced_analytics_framework.udf_framework.dynamic_modules import create_module @@ -80,3 +84,5 @@ def create(self, parameter: str, query_handler_context: QueryHandlerContext): def run(ctx): return udf.run(ctx) + +/ diff --git a/doc/user_guide/example-udf-script/execute.sql b/doc/user_guide/example-udf-script/execute.sql new file mode 100644 index 00000000..9520e7de --- /dev/null +++ b/doc/user_guide/example-udf-script/execute.sql @@ -0,0 +1,20 @@ +EXECUTE SCRIPT "AAF_DB_SCHEMA"."AAF_RUN_QUERY_HANDLER"('{ + "query_handler": { + "factory_class": { + "module": "example_module", + "name": "ExampleQueryHandlerFactory" + }, + "parameter": "bla-bla", + "udf": { + "schema": "EXAMPLE_SCHEMA", + "name": "MY_QUERY_HANDLER_UDF" + } + }, + "temporary_output": { + "bucketfs_location": { + "connection_name": "BFS_CON", + "directory": "temp" + }, + "schema_name": "EXAMPLE_TEMP_SCHEMA" + } +}') diff --git a/doc/user_guide/proxies.md b/doc/user_guide/proxies.md index 1719c771..642a8703 100644 --- a/doc/user_guide/proxies.md +++ b/doc/user_guide/proxies.md @@ -5,7 +5,7 @@ The Advanced Analytics Framework (AAF) uses _Object Proxies_ to manage temporary An _Object Proxy_ * Encapsulates a temporary object * Provides a reference enabling using the object, i.e. its name incl. the database schema or the path in the BucketFS -* Ensures the object is removed when leaving the current scope, e.g. the Query Handler. For this the proxy maintains a _reference counter_. +* Ensures the object is removed when leaving the current scope, e.g. the Query Handler. All Object Proxies are derived from class `exasol_advanced_analytics_framework.query_handler.context.proxy.object_proxy.ObjectProxy`: * `BucketFSLocationProxy` encapsulates a location in the BucketFS diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 770d77f4..6f87badb 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -108,10 +108,12 @@ python -m exasol_advanced_analytics_framework.deploy scripts \ --dsn "$DB_HOST:DB_PORT" \ --db-user "$DB_USER" \ --db-pass "$DB_PASSWORD" \ - --schema "$DB_SCHEMA" \ + --schema "$AAF_DB_SCHEMA" \ --language-alias "$LANGUAGE_ALIAS" ``` +The name of the database schema must match the schema when executing the script. + ## Usage The entry point of this framework is `AAF_RUN_QUERY_HANDLER` script. This script is simply a query loop which is responsible for executing the implemented algorithm. @@ -124,7 +126,7 @@ This script takes the necessary parameters to execute the desired algorithm in s The following SQL statement shows how to call an AAF query handler: ```sql -EXECUTE SCRIPT AAF_RUN_QUERY_HANDLER('{ +EXECUTE SCRIPT .AAF_RUN_QUERY_HANDLER('{ "query_handler": { "factory_class": { "module": "", @@ -152,6 +154,7 @@ See [Implementing a Custom Algorithm as Example Query Handler](#implementing-a-c | Parameter | Required? | Description | |------------------------------|-----------|-------------------------------------------------------------------------------| +| `` | yes | Name of the database schema containing the default Query Handler, See [Additional Scripts](#additional-scripts) | | `` | yes | Name of the query handler class | | `` | yes | Module name of the query handler class | | `` | yes | Parameters of the query handler class encoded as string | @@ -195,18 +198,33 @@ In order to execute the example successfully you need to 2. Activate the AAF's SLC 3. Make sure the database schemas used in the example exist. -The example assumes the name for the BucketFS Connection `` to be `BFS_CON`. +The example assumes +* the name for the BucketFS Connection `` to be `BFS_CON` +* the name for the AAF database schema `_return value_ as final result. + +![Sample Execution](../images/sample_execution.png "Sample Execution") + +## Additional Information - - +* [Object Proxies](proxies.md) for managing temporary locations in the database and BucketFS diff --git a/exasol_advanced_analytics_framework/example/generator.py b/exasol_advanced_analytics_framework/example/generator.py deleted file mode 100644 index b696587d..00000000 --- a/exasol_advanced_analytics_framework/example/generator.py +++ /dev/null @@ -1,78 +0,0 @@ -import json -import importlib.resources -from jinja2 import Template, Environment, PackageLoader, BaseLoader, select_autoescape -from pathlib import Path -from exasol_advanced_analytics_framework.deployment import constants -from exasol_advanced_analytics_framework.deployment.jinja_template_location import JinjaTemplateLocation -from typing import Any, Dict - -PACKAGE_PATH = "example" - -SCRIPT_ARGUMENTS = { - "query_handler": { - "factory_class": { - "module": "example_module", - "name": "ExampleQueryHandlerFactory", - }, - "parameter": "bla-bla", - "udf": { - "schema": "MY_SCHEMA", - "name": "MY_QUERY_HANDLER_UDF", - }, - }, - "temporary_output": { - "bucketfs_location": { - "connection_name": "BFS_CON", - "directory": "temp", - }, - "schema_name": "TEMP_SCHEMA", - }, -} - - -def quoted_udf_name(query_handler_script: Dict[str, Any]): - schema = query_handler_script["udf"]["schema"] - name = query_handler_script["udf"]["name"] - return f'"{schema}"."{name}"' - - -def render_template(template: str, **kwargs) -> str: - return ( - Environment(loader=BaseLoader) - .from_string(template) - .render(**kwargs) - ) - - -def create_script(script_arguments=SCRIPT_ARGUMENTS): - script = ( - '--/\n' - 'CREATE OR REPLACE PYTHON3_AAF SET SCRIPT' - ' "{{ query_handler.udf.schema }}"."{{ query_handler.udf.name }}"(...)\n' - 'EMITS (outputs VARCHAR(2000000)) AS\n' - '{{ python_code }}\n' - '/\n' - ) - python_code = importlib.resources.read_text( - f"{constants.BASE_DIR}.{PACKAGE_PATH}", - "query_handler.py", - ) - return render_template( - script, - python_code=python_code, - **script_arguments, - ) - - -def execute_script(script_arguments=SCRIPT_ARGUMENTS): - script = ( - "EXECUTE SCRIPT {{ query_handler.udf.schema }}" - ".AAF_RUN_QUERY_HANDLER(" - "'{{ json_string }}')" - ) - json_string = json.dumps(script_arguments, indent=4) - return render_template( - script, - json_string=json_string, - **script_arguments, - ) diff --git a/exasol_advanced_analytics_framework/lua/src/query_loop.lua b/exasol_advanced_analytics_framework/lua/src/query_loop.lua index 7d628844..f97d9540 100644 --- a/exasol_advanced_analytics_framework/lua/src/query_loop.lua +++ b/exasol_advanced_analytics_framework/lua/src/query_loop.lua @@ -55,7 +55,7 @@ function M.prepare_init_query(arguments, meta) local udf_schema = udf['schema'] local udf_name = udf['name'] - local full_qualified_udf_name = string.format("%s.%s", udf_schema, udf_name) + local full_qualified_udf_name = string.format("\"%s\".\"%s\"", udf_schema, udf_name) local udf_args = string.format("(%d,'%s','%s','%s','%s','%s','%s','%s')", iter_num, temporary_bfs_location_conn, diff --git a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql index 811a9df1..0f4940fe 100644 --- a/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql +++ b/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql @@ -510,7 +510,7 @@ function M.prepare_init_query(arguments, meta) local udf_schema = udf['schema'] local udf_name = udf['name'] - local full_qualified_udf_name = string.format("%s.%s", udf_schema, udf_name) + local full_qualified_udf_name = string.format("\"%s\".\"%s\"", udf_schema, udf_name) local udf_args = string.format("(%d,'%s','%s','%s','%s','%s','%s','%s')", iter_num, temporary_bfs_location_conn, diff --git a/noxfile.py b/noxfile.py index 36c70343..f7ed8c6b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -2,7 +2,6 @@ import os from pathlib import Path from exasol_advanced_analytics_framework.slc import custom_slc_builder -from scripts.document_updater import update_examples from datetime import datetime import nox @@ -131,11 +130,3 @@ def run_python_integration_tests_with_db(session: Session): str(integration_test_directory), *session.posargs, ) - - -@nox.session(python=False) -def update_user_guide(session: Session): - """ - This task updates the examples in the user guide. - """ - update_examples(Path("doc/user_guide/user_guide.md")) diff --git a/scripts/document_updater.py b/scripts/document_updater.py deleted file mode 100644 index 3f2caf78..00000000 --- a/scripts/document_updater.py +++ /dev/null @@ -1,121 +0,0 @@ -import sys -import re - -from typing import List -from dataclasses import dataclass -from pathlib import Path -from inspect import cleandoc - -from exasol_advanced_analytics_framework.example import generator as example_generator - - -class ParseException(Exception): - """ - If input file is not well-formed. - """ - - -@dataclass -class Template: - path: str - - def render(self): - if self.path != "example/generator.py": - raise ValueError("document_updater.Template currently only" - " supports path example/generator.py") - return "\n".join([ - "", - "```python", - example_generator.create_script(), - "", - example_generator.execute_script() + ";", - "```", - ""]) - - -class ChunkReader: - """ - Enables to replace chunks of a document content passed as string by - generated text, e.g. from jinja templates. - - You can create documents containing individual test passages as well as - passages that are generated from templates. ChunkReader helps you keeping - your document up-to-date. - - The content is expected to be organized in lines. - A chunk is started by a line - - - ... and terminated by a line - - - The ChunkReader returns a list of chunks. Each chunk is either a simple - string or an instance of class ``Template``. ChunkReader passes the string - after "by" to the template so the template can render a corresponding - content later on. - - See function update_examples() below for a sample usage. - """ - def __init__(self): - self._generated = None - self._plain = [] - self._chunks = [] - - def _process_plain(self): - if self._plain: - self._chunks.append("\n".join(self._plain) + "\n") - self._plain = [] - - def _start_generated(self, line: str, match: re.Match): - if self._generated: - raise ParseException( - f"Found another {line} before {self._generated} was closed." - ) - self._plain += [ - line, - "", - ] - self._process_plain() - self._generated = line - self._chunks.append(Template(match.group(2))) - - def _end_generated(self, line: str): - if not self._generated: - raise ParseException( - f"Found {line} before any ." - ) - self._generated = None - self._plain.append(line) - - def split(self, content: str) -> List[str|Template]: - """ - Split the input document content into chunks. - """ - start = re.compile("") - end = re.compile("") - self._generated = None - self._plain = [] - self._chunks = [] - for line in content.splitlines(): - match = start.match(line) - if match: - self._start_generated(line, match) - elif end.match(line): - self._end_generated(line) - elif not self._generated: - self._plain.append(line) - self._process_plain() - return self._chunks - - @classmethod - def chunks(cls, content: str): - return cls().split(content) - - -def update_examples(path: Path): - content = path.read_text() - with path.open(mode="w") as f: - for chunk in ChunkReader.chunks(content): - f.write(chunk if type(chunk) == str else chunk.render()) diff --git a/tests/integration_tests/with_db/test_user_guide_example.py b/tests/integration_tests/with_db/test_user_guide_example.py index fdb6730f..80fed913 100644 --- a/tests/integration_tests/with_db/test_user_guide_example.py +++ b/tests/integration_tests/with_db/test_user_guide_example.py @@ -1,27 +1,44 @@ +import importlib.resources +import pytest import re -from exasol_advanced_analytics_framework.example \ - import generator as example_generator +from contextlib import ExitStack +from exasol_advanced_analytics_framework.deployment import constants +from exasol.python_extension_common.deployment.temp_schema import temp_schema -def script_args(bfs_connection_name: str, schema_name: str): - args = dict(example_generator.SCRIPT_ARGUMENTS) - args["query_handler"]["udf"]["schema"] = schema_name - args["temporary_output"]["bucketfs_location"]["connection_name"] = bfs_connection_name - args["temporary_output"]["schema_name"] = schema_name - return args +@pytest.fixture +def example_db_schemas(pyexasol_connection): + with ExitStack() as stack: + s1 = stack.enter_context(temp_schema(example_connection)) + s2 = stack.enter_context(temp_schema(example_connection)) + yield (s1, s2) -def test_user_guide_example(database_with_slc, pyexasol_connection): + +def test_user_guide_example(database_with_slc, pyexasol_connection, example_db_schemas): """ This test verifies the adhoc implementation of a QueryHandler as shown in the AAF user guide. The adhoc implementation dynamically creates its own python module. """ bucketfs_connection_name, schema_name = database_with_slc - args = script_args(bucketfs_connection_name, schema_name) - statement = example_generator.create_script(args) + dir = importlib.resources.files(constants.BASE_DIR) \ + / ".." / "doc" / "user_guide" / "example-udf-script" + + statement = ( + (dir / "create.sql") + .read_text() + .replace("EXAMPLE_SCHEMA", example_db_schemas[0]) + ) pyexasol_connection.execute(statement) - statement = example_generator.execute_script(args) + statement = ( + (dir / "execute.sql") + .read_text() + .replace("BFS_CON", bucketfs_connection_name) + .replace("AAF_DB_SCHEMA", schema_name) + .replace("EXAMPLE_SCHEMA", example_db_schemas[0]) + .replace("EXAMPLE_TEMP_SCHEMA", example_db_schemas[1]) + ) result = pyexasol_connection.execute(statement).fetchall() expected = ( "Final result: from query '.* table-insert bla-bla', 4" From 2cdeb0ddea7244dd8a8cf29e25b0591d9324dfbf Mon Sep 17 00:00:00 2001 From: ckunki Date: Thu, 24 Oct 2024 10:49:34 +0200 Subject: [PATCH 40/49] Fixed lua unit tests --- .../lua/test/test_query_handler_runner.lua | 2 +- .../lua/test/test_query_loop.lua | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/exasol_advanced_analytics_framework/lua/test/test_query_handler_runner.lua b/exasol_advanced_analytics_framework/lua/test/test_query_handler_runner.lua index 801113fd..fdadbfc7 100644 --- a/exasol_advanced_analytics_framework/lua/test/test_query_handler_runner.lua +++ b/exasol_advanced_analytics_framework/lua/test/test_query_handler_runner.lua @@ -24,7 +24,7 @@ test_query_handler_runner = { parameter = "param" }, }, - query = "SELECT UDF_SCHEMA.UDF_NAME(" .. + query = "SELECT \"UDF_SCHEMA\".\"UDF_NAME\"(" .. "0,'bfs_conn','directory','db_name_1122334455_1','temp_schema'," .. "'cls_name','package.module','param')", return_query_result = { diff --git a/exasol_advanced_analytics_framework/lua/test/test_query_loop.lua b/exasol_advanced_analytics_framework/lua/test/test_query_loop.lua index b3c39679..07804bcf 100644 --- a/exasol_advanced_analytics_framework/lua/test/test_query_loop.lua +++ b/exasol_advanced_analytics_framework/lua/test/test_query_loop.lua @@ -55,7 +55,7 @@ test_query_loop = { parameter = "param" }, }, - query = "SELECT UDF_SCHEMA.UDF_NAME(" .. + query = "SELECT \"UDF_SCHEMA\".\"UDF_NAME\"(" .. "0,'bfs_conn','directory','db_name_1122334455_1','temp_schema'," .. "'cls_name','package.module','param')" }, @@ -76,7 +76,7 @@ test_query_loop = { parameter = "param" }, }, - query = "SELECT script_schema.AAF_QUERY_HANDLER_UDF(" .. + query = "SELECT \"script_schema\".\"AAF_QUERY_HANDLER_UDF\"(" .. "0,'bfs_conn','directory','db_name_1122334455_1','temp_schema'," .. "'cls_name','package.module','param')" }, From 7045dffa78f20f28955958f1ca86135e531a135c Mon Sep 17 00:00:00 2001 From: ckunki Date: Thu, 24 Oct 2024 11:33:27 +0200 Subject: [PATCH 41/49] Fixed review findings --- .../udf_framework/dynamic_modules.py | 16 +++++++++++++--- .../udf_framework/test_dynamic_modules.py | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py b/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py index 8086ea4d..899e00ad 100644 --- a/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py +++ b/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py @@ -4,6 +4,15 @@ from types import ModuleType +def _create_module(name: str) -> ModuleType: + spec = importlib.machinery.ModuleSpec(name, None) + return importlib.util.module_from_spec(spec) + + +def _register_module_for_import(name: str, mod: ModuleType): + sys.modules[name] = mod + + def create_module(name: str) -> ModuleType: """ Dynamically create a python module using the specified name and @@ -12,9 +21,10 @@ def create_module(name: str) -> ModuleType: Additionally add a function add_to_module() to the module enabling other code to add classes and functions to the module. """ - spec = importlib.machinery.ModuleSpec(name, None) - mod = importlib.util.module_from_spec(spec) - sys.modules[name] = mod + mod = sys.modules.get(name) + if mod is None: + mod = _create_module(name) + _register_module_for_import(name, mod) def add_to_module(object: Any): object.__module__ = name diff --git a/tests/unit_tests/udf_framework/test_dynamic_modules.py b/tests/unit_tests/udf_framework/test_dynamic_modules.py index ba1db1c0..b4f08a09 100644 --- a/tests/unit_tests/udf_framework/test_dynamic_modules.py +++ b/tests/unit_tests/udf_framework/test_dynamic_modules.py @@ -24,3 +24,17 @@ def test_add_function(): xx2.add_to_module(example_function) assert xx2.example_function() == "example_function return value" \ and example_function.__module__ == "xx2" + + +def test_add_function_to_existing_module(): + def my_func(): + return "another return value" + + mod1 = create_module("xx2") + import xx2 + xx2.add_to_module(example_function) + mod2 = create_module("xx2") + assert mod2 == mod1 + xx2.add_to_module(my_func) + assert xx2.example_function() == "example_function return value" \ + and xx2.my_func() == "another return value" From 8656120e965d681e13eebcf7d5272a7d15155c24 Mon Sep 17 00:00:00 2001 From: Christoph Kuhnke Date: Thu, 24 Oct 2024 13:40:42 +0200 Subject: [PATCH 42/49] Update doc/user_guide/user_guide.md Co-authored-by: Torsten Kilias --- doc/user_guide/user_guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 6f87badb..f9b9308b 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -205,7 +205,7 @@ The example assumes The following SQL statements activate the AAF's SLC and create the required database schemas unless they already exist: ```shell -ALTER SYSTEM SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; +ALTER SESSION SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; create schema IF NOT EXISTS "EXAMPLE_SCHEMA"; create schema IF NOT EXISTS "EXAMPLE_TEMP_SCHEMA"; From 0877c1ef1c102e05a0fd442cfbe46260d267ce87 Mon Sep 17 00:00:00 2001 From: ckunki Date: Thu, 24 Oct 2024 13:40:26 +0200 Subject: [PATCH 43/49] Fixed review findings 2 --- doc/developer_guide/developer_guide.md | 6 +----- doc/user_guide/user_guide.md | 4 ++-- .../udf_framework/dynamic_modules.py | 18 ++++++++++++----- .../udf_framework/test_dynamic_modules.py | 20 ++++++++----------- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/doc/developer_guide/developer_guide.md b/doc/developer_guide/developer_guide.md index 56649615..b0cf4568 100644 --- a/doc/developer_guide/developer_guide.md +++ b/doc/developer_guide/developer_guide.md @@ -16,11 +16,7 @@ Installing the SLC ins described in the [AAF User Guide](../user_guide/user_guid ## Update Generated Files -AAF contains some generated files that are committed to git, including: -* The amalgated Lua script [create_query_loop.sql](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql) -* The examples in the user guide - -The amalgated Lua script originates from the files in the directory [exasol_advanced_analytics_framework/lua/src](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/lua/src/). +AAF contains the amalgated Lua script [create_query_loop.sql](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/resources/outputs/create_query_loop.sql) originating from the files in the directory [exasol_advanced_analytics_framework/lua/src](https://github.com/exasol/advanced-analytics-framework/blob/main/exasol_advanced_analytics_framework/lua/src/). The following command updates the amalgated script: diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index f9b9308b..4201ebdf 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -112,7 +112,7 @@ python -m exasol_advanced_analytics_framework.deploy scripts \ --language-alias "$LANGUAGE_ALIAS" ``` -The name of the database schema must match the schema when executing the script. +The name of the database schema must match the schema `AAF_DB_SCHEMA` when executing the script, see section [Usage, Parameters](#parameters). ## Usage @@ -126,7 +126,7 @@ This script takes the necessary parameters to execute the desired algorithm in s The following SQL statement shows how to call an AAF query handler: ```sql -EXECUTE SCRIPT .AAF_RUN_QUERY_HANDLER('{ +EXECUTE SCRIPT ""."AAF_RUN_QUERY_HANDLER"('{ "query_handler": { "factory_class": { "module": "", diff --git a/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py b/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py index 899e00ad..add6dde9 100644 --- a/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py +++ b/exasol_advanced_analytics_framework/udf_framework/dynamic_modules.py @@ -13,18 +13,26 @@ def _register_module_for_import(name: str, mod: ModuleType): sys.modules[name] = mod + +class ModuleExistsException(Exception): + """ + When trying create a module that already exists. + """ + + def create_module(name: str) -> ModuleType: """ Dynamically create a python module using the specified name and - register the module in sys.modules[]. + register the module in sys.modules[] for import. Additionally add a function add_to_module() to the module enabling other code to add classes and functions to the module. """ - mod = sys.modules.get(name) - if mod is None: - mod = _create_module(name) - _register_module_for_import(name, mod) + if name in sys.modules: + raise ModuleExistsException(f'Module "{name}" already exists') + + mod = _create_module(name) + _register_module_for_import(name, mod) def add_to_module(object: Any): object.__module__ = name diff --git a/tests/unit_tests/udf_framework/test_dynamic_modules.py b/tests/unit_tests/udf_framework/test_dynamic_modules.py index b4f08a09..b3772725 100644 --- a/tests/unit_tests/udf_framework/test_dynamic_modules.py +++ b/tests/unit_tests/udf_framework/test_dynamic_modules.py @@ -1,4 +1,8 @@ -from exasol_advanced_analytics_framework.udf_framework.dynamic_modules import create_module +import pytest +from exasol_advanced_analytics_framework.udf_framework.dynamic_modules import ( + create_module, + ModuleExistsException, +) class ExampleClass: @@ -27,14 +31,6 @@ def test_add_function(): def test_add_function_to_existing_module(): - def my_func(): - return "another return value" - - mod1 = create_module("xx2") - import xx2 - xx2.add_to_module(example_function) - mod2 = create_module("xx2") - assert mod2 == mod1 - xx2.add_to_module(my_func) - assert xx2.example_function() == "example_function return value" \ - and xx2.my_func() == "another return value" + create_module("xx3") + with pytest.raises(ModuleExistsException, match='Module "xx3" already exists') as ex: + create_module("xx3") From bfa6c4494e3d7ba8bd1c5c094597a4a9bb6b4d6e Mon Sep 17 00:00:00 2001 From: ckunki Date: Thu, 24 Oct 2024 14:19:50 +0200 Subject: [PATCH 44/49] Updated the user guide to following the review findings --- doc/user_guide/user_guide.md | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 4201ebdf..ffb2107c 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -73,7 +73,7 @@ pip install exasol-advanced-analytics-framework Exasol executes User Defined Functions (UDFs) in an isolated Container whose root filesystem is derived from a Script Language Container (SLC). -Running the AAF requires a SLC. The following command +Running the AAF requires an SLC. The following command * downloads the specified version `` (preferrably the latest) of a prebuilt AAF SLC from the [AAF releases](https://github.com/exasol/advanced-analytics-framework/releases/latest) on GitHub, * uploads the file into the BucketFS, * and registers it to the database. @@ -200,13 +200,27 @@ In order to execute the example successfully you need to The example assumes * the name for the BucketFS Connection `` to be `BFS_CON` -* the name for the AAF database schema `` to be `AAF_DB_SCHEMA`, see [Additional Scripts](#additional-scripts) + + + +The following SQL statements create the required database schemas unless they already exist: +```sql create schema IF NOT EXISTS "EXAMPLE_SCHEMA"; create schema IF NOT EXISTS "EXAMPLE_TEMP_SCHEMA"; ``` From 75cd145fabce19a21c6029f5717b4824b79f8d99 Mon Sep 17 00:00:00 2001 From: ckunki Date: Thu, 24 Oct 2024 15:04:37 +0200 Subject: [PATCH 45/49] Fixed review findings --- doc/user_guide/user_guide.md | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index ffb2107c..e1f209b9 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -112,7 +112,7 @@ python -m exasol_advanced_analytics_framework.deploy scripts \ --language-alias "$LANGUAGE_ALIAS" ``` -The name of the database schema must match the schema `AAF_DB_SCHEMA` when executing the script, see section [Usage, Parameters](#parameters). +When later on [executing the script](#placeholders) you must use the schema name `AAF_DB_SCHEMA` or make it the current schema. ## Usage @@ -150,9 +150,9 @@ EXECUTE SCRIPT ""."AAF_RUN_QUERY_HANDLER"('{ See [Implementing a Custom Algorithm as Example Query Handler](#implementing-a-custom-algorithm-as-example-query-handler) for a complete example. -### Parameters +### Placeholders -| Parameter | Required? | Description | +| Placeholders | Required? | Description | |------------------------------|-----------|-------------------------------------------------------------------------------| | `` | yes | Name of the database schema containing the default Query Handler, See [Additional Scripts](#additional-scripts) | | `` | yes | Name of the query handler class | @@ -191,7 +191,7 @@ Each algorithm should extend the `UDFQueryHandler` abstract class and then imple ### Concrete Example Using an Adhoc Implementation Within the UDF -The example dynamically creates a python module `xyz` and adds classes `ExampleQueryHandler` and `ExampleQueryHandlerFactory` to it. +The example dynamically creates a python module `EXAMPLE_MODULE` and adds classes `ExampleQueryHandler` and `ExampleQueryHandlerFactory` to it. In order to execute the example successfully you need to 1. [Create a BucketFS connection](#bucketfs-connection) @@ -202,22 +202,6 @@ The example assumes * the name for the BucketFS Connection `` to be `BFS_CON` * the name for the AAF database schema `` to be `AAF_DB_SCHEMA`, see [Additional Scripts](#additional-scripts) - - The following SQL statements create the required database schemas unless they already exist: ```sql From 408ac95b87277b53fcef3388d907dfd4f70e2884 Mon Sep 17 00:00:00 2001 From: ckunki Date: Thu, 24 Oct 2024 15:48:46 +0200 Subject: [PATCH 46/49] Fixed character case of example_module in the user guide --- doc/user_guide/user_guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index e1f209b9..7be9bf22 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -191,7 +191,7 @@ Each algorithm should extend the `UDFQueryHandler` abstract class and then imple ### Concrete Example Using an Adhoc Implementation Within the UDF -The example dynamically creates a python module `EXAMPLE_MODULE` and adds classes `ExampleQueryHandler` and `ExampleQueryHandlerFactory` to it. +The example dynamically creates a python module `example_module` and adds classes `ExampleQueryHandler` and `ExampleQueryHandlerFactory` to it. In order to execute the example successfully you need to 1. [Create a BucketFS connection](#bucketfs-connection) From 008854a595eb454baedda2796099ae9db1441e73 Mon Sep 17 00:00:00 2001 From: ckunki Date: Fri, 25 Oct 2024 09:01:52 +0200 Subject: [PATCH 47/49] Added setting the language alias to the user guide --- doc/user_guide/user_guide.md | 53 +++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 7be9bf22..cd314e88 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -73,12 +73,16 @@ pip install exasol-advanced-analytics-framework Exasol executes User Defined Functions (UDFs) in an isolated Container whose root filesystem is derived from a Script Language Container (SLC). -Running the AAF requires an SLC. The following command -* downloads the specified version `` (preferrably the latest) of a prebuilt AAF SLC from the [AAF releases](https://github.com/exasol/advanced-analytics-framework/releases/latest) on GitHub, +Running the AAF requires an SLC. + +#### Uploading the SLC to the BucketFS + +The following command +* downloads the specified version `$VERSION` (preferrably the latest) of a prebuilt AAF SLC from the [AAF releases](https://github.com/exasol/advanced-analytics-framework/releases/latest) on GitHub, * uploads the file into the BucketFS, * and registers it to the database. -The variable `$LANGUAGE_ALIAS` will be reused in [Additional Scripts](#additional-scripts). +The variable `$LANGUAGE_ALIAS` will be reused in [Defining Additional SQL Scripts](#defining-additional-sql-scripts). ```shell LANGUAGE_ALIAS=PYTHON3_AAF @@ -97,7 +101,42 @@ python -m exasol_advanced_analytics_framework.deploy language-container \ --language-alias "$LANGUAGE_ALIAS" ``` -### Additional Scripts +#### Activating the AAF SLC by Setting a Language Alias + +AAF requires activating the AAF SLC as _script language_. This is accomplished by setting a _language alias_, see [Activate the New Script Language Container](https://docs.exasol.com/db/latest/database_concepts/udf_scripts/adding_new_packages_script_languages.htm) in the official Exasol documentation. + +When using the AAF command `deploy` `language-container` for uploading the SLC you can activate the language alias on one of two available levels using the corresponding CLI option: + +| Level | CLI option | Required Permissions | Persistent? | +|------------------------|---------------------|----------------------------|-------------| +| system level (default) | `--alter-system` | Administration permissions | yes | +| session level (fallback) | `--no-alter-system` | none | no | + +By default the AAF deploy command will try to set the language alias on _system level_. If the specified user does not have administration permissions, then the command falls back to using only _session level_. + +When setting the language alias on _session level_, you need to set it for each session again. The session used by the AAF deploy command will be closed after the command has terminated and hence the language alias will no longer exist when you want to execute a query handler script. + +The following SQL statement displays the language aliases currently defined in the `EXA_PARAMETERS` system table: + +```sql +SELECT * FROM exa_parameters WHERE parameter_name='SCRIPT_LANGUAGES'; +``` + +When using the AAF deploy command with CLI option `--no-alter-system` then the command will display the `ALTER SESSION` SQL statements so you can copy them into your SQL editor and execute them for each session. + +Here is an example output + +``` +In SQL, you can activate the SLC by using the following statements: + +To activate the SLC only for the current session: +ALTER SESSION SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; + +To activate the SLC on the system: +ALTER SYSTEM SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; +``` + +### Defining Additional SQL Scripts Besides the BucketFS connection, the SLC, and the Python package AAF also requires some additional Lua scripts to be created in the Exasol database. @@ -152,9 +191,9 @@ See [Implementing a Custom Algorithm as Example Query Handler](#implementing-a-c ### Placeholders -| Placeholders | Required? | Description | +| Placeholder | Required? | Description | |------------------------------|-----------|-------------------------------------------------------------------------------| -| `` | yes | Name of the database schema containing the default Query Handler, See [Additional Scripts](#additional-scripts) | +| `` | yes | Name of the database schema containing the default Query Handler, See [Defining Additional SQL Scripts](#defining-additional-sql-scripts) | | `` | yes | Name of the query handler class | | `` | yes | Module name of the query handler class | | `` | yes | Parameters of the query handler class encoded as string | @@ -200,7 +239,7 @@ In order to execute the example successfully you need to The example assumes * the name for the BucketFS Connection `` to be `BFS_CON` -* the name for the AAF database schema `` to be `AAF_DB_SCHEMA`, see [Additional Scripts](#additional-scripts) +* the name for the AAF database schema `` to be `AAF_DB_SCHEMA`, see [Defining Additional SQL Scripts](#defining-additional-sql-scripts). The following SQL statements create the required database schemas unless they already exist: From cf8bd2bf250265ea2c48794870749c8d1b9dc9bd Mon Sep 17 00:00:00 2001 From: ckunki Date: Fri, 25 Oct 2024 10:36:54 +0200 Subject: [PATCH 48/49] Shortened user guide - Renamed MY_QUERY_HANDLER_UDF to EXAMPLE_QUERY_HANDLER_UDF - Renamed BFS_CON to EXAMPLE_BFS_CON - Removed section "Activating the AAF SLC by Setting a Language Alias" --- doc/user_guide/example-udf-script/create.sql | 2 +- doc/user_guide/example-udf-script/execute.sql | 4 +- doc/user_guide/user_guide.md | 68 ++----------------- .../with_db/test_user_guide_example.py | 2 +- 4 files changed, 11 insertions(+), 65 deletions(-) diff --git a/doc/user_guide/example-udf-script/create.sql b/doc/user_guide/example-udf-script/create.sql index 6991f3c0..565afc65 100644 --- a/doc/user_guide/example-udf-script/create.sql +++ b/doc/user_guide/example-udf-script/create.sql @@ -1,5 +1,5 @@ --/ -CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "EXAMPLE_SCHEMA"."MY_QUERY_HANDLER_UDF"(...) +CREATE OR REPLACE PYTHON3_AAF SET SCRIPT "EXAMPLE_SCHEMA"."EXAMPLE_QUERY_HANDLER_UDF"(...) EMITS (outputs VARCHAR(2000000)) AS from typing import Union diff --git a/doc/user_guide/example-udf-script/execute.sql b/doc/user_guide/example-udf-script/execute.sql index 9520e7de..bd20b329 100644 --- a/doc/user_guide/example-udf-script/execute.sql +++ b/doc/user_guide/example-udf-script/execute.sql @@ -7,12 +7,12 @@ EXECUTE SCRIPT "AAF_DB_SCHEMA"."AAF_RUN_QUERY_HANDLER"('{ "parameter": "bla-bla", "udf": { "schema": "EXAMPLE_SCHEMA", - "name": "MY_QUERY_HANDLER_UDF" + "name": "EXAMPLE_QUERY_HANDLER_UDF" } }, "temporary_output": { "bucketfs_location": { - "connection_name": "BFS_CON", + "connection_name": "EXAMPLE_BFS_CON", "directory": "temp" }, "schema_name": "EXAMPLE_TEMP_SCHEMA" diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index cd314e88..e51a5e5b 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -73,86 +73,32 @@ pip install exasol-advanced-analytics-framework Exasol executes User Defined Functions (UDFs) in an isolated Container whose root filesystem is derived from a Script Language Container (SLC). -Running the AAF requires an SLC. - -#### Uploading the SLC to the BucketFS - -The following command -* downloads the specified version `$VERSION` (preferrably the latest) of a prebuilt AAF SLC from the [AAF releases](https://github.com/exasol/advanced-analytics-framework/releases/latest) on GitHub, +Running the AAF requires an SLC. The following command +* downloads the related prebuilt AAF SLC from the [AAF releases](https://github.com/exasol/advanced-analytics-framework/releases) on GitHub, * uploads the file into the BucketFS, * and registers it to the database. -The variable `$LANGUAGE_ALIAS` will be reused in [Defining Additional SQL Scripts](#defining-additional-sql-scripts). - ```shell -LANGUAGE_ALIAS=PYTHON3_AAF -python -m exasol_advanced_analytics_framework.deploy language-container \ - --dsn "$DB_HOST:$DB_PORT" \ - --db-user "$DB_USER" \ - --db-pass "$DB_PASSWORD" \ - --bucketfs-name "$BUCKETFS_NAME" \ - --bucketfs-host "$BUCKETFS_HOST" \ - --bucketfs-port "$BUCKETFS_PORT" \ - --bucketfs-user "$BUCKETFS_USER" \ - --bucketfs-password "$BUCKETFS_PASSWORD" \ - --bucket "$BUCKETFS_NAME" \ - --path-in-bucket "$PATH_IN_BUCKET" \ - --version "$VERSION" \ - --language-alias "$LANGUAGE_ALIAS" +python -m exasol_advanced_analytics_framework.deploy language-container ``` -#### Activating the AAF SLC by Setting a Language Alias - -AAF requires activating the AAF SLC as _script language_. This is accomplished by setting a _language alias_, see [Activate the New Script Language Container](https://docs.exasol.com/db/latest/database_concepts/udf_scripts/adding_new_packages_script_languages.htm) in the official Exasol documentation. - -When using the AAF command `deploy` `language-container` for uploading the SLC you can activate the language alias on one of two available levels using the corresponding CLI option: - -| Level | CLI option | Required Permissions | Persistent? | -|------------------------|---------------------|----------------------------|-------------| -| system level (default) | `--alter-system` | Administration permissions | yes | -| session level (fallback) | `--no-alter-system` | none | no | - -By default the AAF deploy command will try to set the language alias on _system level_. If the specified user does not have administration permissions, then the command falls back to using only _session level_. - -When setting the language alias on _session level_, you need to set it for each session again. The session used by the AAF deploy command will be closed after the command has terminated and hence the language alias will no longer exist when you want to execute a query handler script. - -The following SQL statement displays the language aliases currently defined in the `EXA_PARAMETERS` system table: - -```sql -SELECT * FROM exa_parameters WHERE parameter_name='SCRIPT_LANGUAGES'; -``` - -When using the AAF deploy command with CLI option `--no-alter-system` then the command will display the `ALTER SESSION` SQL statements so you can copy them into your SQL editor and execute them for each session. - -Here is an example output - -``` -In SQL, you can activate the SLC by using the following statements: - -To activate the SLC only for the current session: -ALTER SESSION SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; - -To activate the SLC on the system: -ALTER SYSTEM SET SCRIPT_LANGUAGES='R=builtin_r JAVA=builtin_java PYTHON3=builtin_python3 PYTHON3_AAF=localzmq+protobuf:///bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release?lang=python#/buckets/bfsdefault/default/temp/exasol_advanced_analytics_framework_container_release/exaudf/exaudfclient_py3'; -``` +See the documentation in the Exasol Python Extension Common package for [options common to all Exasol extensions](https://github.com/exasol/python-extension-common/blob/0.8.0/doc/user_guide/user-guide). ### Defining Additional SQL Scripts Besides the BucketFS connection, the SLC, and the Python package AAF also requires some additional Lua scripts to be created in the Exasol database. -The following command deploys the additional scripts to the specified `DB_SCHEMA` using the `LANGUAGE_ALIAS` of the SLC: +The following command deploys the additional scripts to the specified database schema `$AAF_DB_SCHEMA` using the same language alias `$LANGUAGE_ALIAS` as for uploading the SLC before: ```shell python -m exasol_advanced_analytics_framework.deploy scripts \ - --dsn "$DB_HOST:DB_PORT" \ + --dsn "$DB_HOST:$DB_PORT" \ --db-user "$DB_USER" \ --db-pass "$DB_PASSWORD" \ --schema "$AAF_DB_SCHEMA" \ --language-alias "$LANGUAGE_ALIAS" ``` -When later on [executing the script](#placeholders) you must use the schema name `AAF_DB_SCHEMA` or make it the current schema. - ## Usage The entry point of this framework is `AAF_RUN_QUERY_HANDLER` script. This script is simply a query loop which is responsible for executing the implemented algorithm. @@ -238,7 +184,7 @@ In order to execute the example successfully you need to 3. Make sure the database schemas used in the example exist. The example assumes -* the name for the BucketFS Connection `` to be `BFS_CON` +* the name for the BucketFS Connection `` to be `EXAMPLE_BFS_CON` * the name for the AAF database schema `` to be `AAF_DB_SCHEMA`, see [Defining Additional SQL Scripts](#defining-additional-sql-scripts). The following SQL statements create the required database schemas unless they already exist: diff --git a/tests/integration_tests/with_db/test_user_guide_example.py b/tests/integration_tests/with_db/test_user_guide_example.py index 80fed913..492d638d 100644 --- a/tests/integration_tests/with_db/test_user_guide_example.py +++ b/tests/integration_tests/with_db/test_user_guide_example.py @@ -34,7 +34,7 @@ def test_user_guide_example(database_with_slc, pyexasol_connection, example_db_s statement = ( (dir / "execute.sql") .read_text() - .replace("BFS_CON", bucketfs_connection_name) + .replace("EXAMPLE_BFS_CON", bucketfs_connection_name) .replace("AAF_DB_SCHEMA", schema_name) .replace("EXAMPLE_SCHEMA", example_db_schemas[0]) .replace("EXAMPLE_TEMP_SCHEMA", example_db_schemas[1]) From 9d94ad3d79cbdd55e4cb6b8ccf44db3ddf4143b4 Mon Sep 17 00:00:00 2001 From: ckunki Date: Fri, 25 Oct 2024 10:56:14 +0200 Subject: [PATCH 49/49] Fixed integration test --- tests/integration_tests/with_db/test_user_guide_example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/with_db/test_user_guide_example.py b/tests/integration_tests/with_db/test_user_guide_example.py index 492d638d..e2e0f377 100644 --- a/tests/integration_tests/with_db/test_user_guide_example.py +++ b/tests/integration_tests/with_db/test_user_guide_example.py @@ -10,8 +10,8 @@ @pytest.fixture def example_db_schemas(pyexasol_connection): with ExitStack() as stack: - s1 = stack.enter_context(temp_schema(example_connection)) - s2 = stack.enter_context(temp_schema(example_connection)) + s1 = stack.enter_context(temp_schema(pyexasol_connection)) + s2 = stack.enter_context(temp_schema(pyexasol_connection)) yield (s1, s2)