From a3983fb642d64d3b1b04e0956a11f9ce4cc220b3 Mon Sep 17 00:00:00 2001
From: Praveen Kundurthy
Date: Thu, 17 Oct 2024 11:38:42 -0700
Subject: [PATCH] [Compile] Update compile.py table_display option to also
display output for stating queries (#864)
Cleanup commits
Co-authored-by: Praveen Kundurthy
---
api/py/ai/chronon/repo/compile.py | 9 +++-
.../sample_team/sample_group_by_group_by.v1 | 4 +-
.../sample_join.group_by_of_group_by | 6 +--
api/py/test/test_compile.py | 47 +++++++++++++++++++
4 files changed, 59 insertions(+), 7 deletions(-)
diff --git a/api/py/ai/chronon/repo/compile.py b/api/py/ai/chronon/repo/compile.py
index 37fa064ab..a1a2cb139 100755
--- a/api/py/ai/chronon/repo/compile.py
+++ b/api/py/ai/chronon/repo/compile.py
@@ -370,12 +370,14 @@ def _handle_deprecation_warning(
)
-def _print_tables(obj: Union[Join, GroupBy], obj_class: Type[Union[Join, GroupBy]]) -> None:
+def _print_tables(obj: utils.ChrononJobTypes, obj_class: Type[utils.ChrononJobTypes]) -> None:
tables = utils.get_modes_tables(obj)
if obj_class is Join:
_print_modes_tables("Output Join Tables", tables)
if obj_class is GroupBy:
_print_modes_tables("Output GroupBy Tables", tables)
+ if obj_class is StagingQuery:
+ _print_modes_tables("Output StagingQuery Tables", tables)
def _handle_extra_conf_objects_to_materialize(
@@ -515,7 +517,10 @@ def _print_features_names(left, right):
def _print_modes_tables(left, right):
text = textwrap.indent(json.dumps(right, indent=2), " " * 27)
- print(f"{left:>25} - \u001b[32m\n{text}\u001b[0m")
+ json_start = "json.start"
+ json_end = "json.end\n"
+
+ print(f"{left:>25} - \n{json_start:>25} \u001b[32m\n{text}\u001b[0m \n{json_end:>25}")
def _print_error(left, right):
diff --git a/api/py/test/sample/production/group_bys/sample_team/sample_group_by_group_by.v1 b/api/py/test/sample/production/group_bys/sample_team/sample_group_by_group_by.v1
index 4ee217ad9..942d3f0b1 100644
--- a/api/py/test/sample/production/group_bys/sample_team/sample_group_by_group_by.v1
+++ b/api/py/test/sample/production/group_bys/sample_team/sample_group_by_group_by.v1
@@ -4,7 +4,7 @@
"production": 0,
"customJson": "{\"lag\": 0, \"groupby_tags\": null, \"column_tags\": {}}",
"dependencies": [
- "{\"name\": \"wait_for_sample_namespace.sample_team_sample_group_by_require_backfill_ds\", \"spec\": \"sample_namespace.sample_team_sample_group_by_require_backfill/ds={{ ds }}\", \"start\": \"2021-04-09\", \"end\": null}"
+ "{\"name\": \"wait_for_sample_namespace.sample_team_sample_group_by_group_by_require_backfill_ds\", \"spec\": \"sample_namespace.sample_team_sample_group_by_group_by_require_backfill/ds={{ ds }}\", \"start\": \"2021-04-09\", \"end\": null}"
],
"tableProperties": {
"sample_config_json": "{\"sample_key\": \"sample_value\"}",
@@ -17,7 +17,7 @@
"sources": [
{
"events": {
- "table": "sample_namespace.sample_team_sample_group_by_require_backfill",
+ "table": "sample_namespace.sample_team_sample_group_by_group_by_require_backfill",
"query": {
"selects": {
"event": "event_expr",
diff --git a/api/py/test/sample/production/joins/sample_team/sample_join.group_by_of_group_by b/api/py/test/sample/production/joins/sample_team/sample_join.group_by_of_group_by
index 0f0fe60b6..51df118a8 100644
--- a/api/py/test/sample/production/joins/sample_team/sample_join.group_by_of_group_by
+++ b/api/py/test/sample/production/joins/sample_team/sample_join.group_by_of_group_by
@@ -6,7 +6,7 @@
"customJson": "{\"check_consistency\": false, \"lag\": 0, \"join_tags\": null, \"join_part_tags\": {}}",
"dependencies": [
"{\"name\": \"wait_for_sample_namespace.sample_team_sample_staging_query_v1_ds\", \"spec\": \"sample_namespace.sample_team_sample_staging_query_v1/ds={{ ds }}\", \"start\": \"2021-03-01\", \"end\": null}",
- "{\"name\": \"wait_for_sample_namespace.sample_team_sample_group_by_require_backfill_ds\", \"spec\": \"sample_namespace.sample_team_sample_group_by_require_backfill/ds={{ ds }}\", \"start\": \"2021-04-09\", \"end\": null}"
+ "{\"name\": \"wait_for_sample_namespace.sample_team_sample_group_by_group_by_require_backfill_ds\", \"spec\": \"sample_namespace.sample_team_sample_group_by_group_by_require_backfill/ds={{ ds }}\", \"start\": \"2021-04-09\", \"end\": null}"
],
"tableProperties": {
"source": "chronon"
@@ -39,7 +39,7 @@
"production": 0,
"customJson": "{\"lag\": 0, \"groupby_tags\": null, \"column_tags\": {}}",
"dependencies": [
- "{\"name\": \"wait_for_sample_namespace.sample_team_sample_group_by_require_backfill_ds\", \"spec\": \"sample_namespace.sample_team_sample_group_by_require_backfill/ds={{ ds }}\", \"start\": \"2021-04-09\", \"end\": null}"
+ "{\"name\": \"wait_for_sample_namespace.sample_team_sample_group_by_group_by_require_backfill_ds\", \"spec\": \"sample_namespace.sample_team_sample_group_by_group_by_require_backfill/ds={{ ds }}\", \"start\": \"2021-04-09\", \"end\": null}"
],
"tableProperties": {
"sample_config_json": "{\"sample_key\": \"sample_value\"}",
@@ -52,7 +52,7 @@
"sources": [
{
"events": {
- "table": "sample_namespace.sample_team_sample_group_by_require_backfill",
+ "table": "sample_namespace.sample_team_sample_group_by_group_by_require_backfill",
"query": {
"selects": {
"event": "event_expr",
diff --git a/api/py/test/test_compile.py b/api/py/test/test_compile.py
index 17b1b0d37..c3a6da393 100644
--- a/api/py/test/test_compile.py
+++ b/api/py/test/test_compile.py
@@ -16,7 +16,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import json
import os
+import re
import pytest
from ai.chronon.repo.compile import extract_and_convert
@@ -53,6 +55,17 @@ def _assert_file_exists(full_file_path, message):
assert os.path.isfile(full_file_path), message
+def _extract_display_output_json_block(output, block_name):
+ """Extract a JSON block from the output of a CLI command."""
+
+ field_index = output.find(block_name)
+ subset = output[field_index:]
+ start_index = subset.find("json.start")
+ end_index = subset.find("json.end")
+ s = re.sub(r"\x1b\[[0-9;]*m", "", subset[start_index + 10 : end_index].strip())
+ return json.loads(s)
+
+
@pytest.fixture
def specific_setup():
# This setup code will only run for tests that request this fixture
@@ -281,6 +294,18 @@ def test_compile_table_display():
result = _invoke_cli_with_params(runner, input_path, ["--table-display"])
assert "Output Join Tables" in result.output
+ output_json_dict = _extract_display_output_json_block(result.output, "Output Join Tables")
+ expected_json_dict = {
+ "backfill": ["chronon_db.sample_team_sample_join_with_derivations_on_external_parts_v1"],
+ "stats-summary": ["chronon_db.sample_team_sample_join_with_derivations_on_external_parts_v1_daily_stats"],
+ "log-flattener": ["chronon_db.sample_team_sample_join_with_derivations_on_external_parts_v1_logged"],
+ "bootstrap": ["chronon_db.sample_team_sample_join_with_derivations_on_external_parts_v1_bootstrap"],
+ "join_parts": [
+ "sample_team_sample_join_with_derivations_on_external_parts_v1_sample_team_event_sample_group_by_v1",
+ "sample_team_sample_join_with_derivations_on_external_parts_v1_sample_team_entity_sample_group_by_from_module_v1",
+ ],
+ }
+ assert json.dumps(output_json_dict, sort_keys=True) == json.dumps(expected_json_dict, sort_keys=True)
assert result.exit_code == 0
@@ -307,6 +332,28 @@ def test_compile_feature_display():
assert result.exit_code == 0
+def test_table_display_staging_query():
+ """
+ Test a staging query compile produces related table
+ """
+ runner = CliRunner()
+ input_path = f"staging_queries/sample_team/sample_staging_query.py"
+ result = runner.invoke(
+ extract_and_convert,
+ [
+ "--chronon_root=test/sample",
+ f"--input_path={input_path}",
+ "--table-display",
+ ],
+ )
+
+ assert "Output StagingQuery Tables" in result.output
+ output_json_dict = _extract_display_output_json_block(result.output, "Output StagingQuery Tables")
+ expected_json_dict = {"backfill": ["sample_namespace.sample_team_sample_staging_query_v1"]}
+ assert json.dumps(output_json_dict, sort_keys=True) == json.dumps(expected_json_dict, sort_keys=True)
+ assert result.exit_code == 0
+
+
def test_compile_dependency_staging_query():
"""
Test that compiling a staging query does not error out