Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Join recursive cte to base #880

Merged
merged 18 commits into from
Jul 16, 2020
30 changes: 18 additions & 12 deletions graphql_compiler/compiler/emit_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ def generate_subquery(self) -> str:
return alias


@dataclass
@dataclass(eq=False) # Setting eq=False allows the class to inherit id-based hash from object
class ColumnRouter:
"""Container for columns selected from a variety of selectables.

Expand Down Expand Up @@ -743,7 +743,7 @@ def __init__(self, sql_schema_info: SQLAlchemySchemaInfo, ir: IrAndMetadata):
self._relocate(ir.query_metadata_table.root_location)

# Mapping aliases to the column used to join into them.
self._came_from: Dict[Alias, Column] = {}
self._came_from: Dict[Union[Alias, ColumnRouter], Column] = {}
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The mention of aliases in the docstring is outdated. So is the entire _aliases field, since it doesn't only contain aliases anymore. I'm working on fixing this in the next PR, and I created an issue #882


self._recurse_needs_cte: bool = False

Expand Down Expand Up @@ -913,10 +913,13 @@ def _wrap_into_cte(self) -> None:
if self._recurse_needs_cte:
self._current_alias = self.get_query(extra_outputs).cte(recursive=False)
self._from_clause = self._current_alias

self._filters = [] # The filters are already included in the cte
self._aliases = {
alias_key: ColumnRouter(

# After creating this CTE, accessing a field of any of the currently marked
# locations should redirect to the column of the cte that exposes that field.
bojanserafimov marked this conversation as resolved.
Show resolved Hide resolved
# Here we replace all aliases with ColumnRouters pointing to the CTE.
old_to_new_alias = {
alias_value: ColumnRouter(
{
external_name: self._current_alias.c[internal_name]
for external_name, internal_name in column_mappings.get(
Expand All @@ -926,6 +929,14 @@ def _wrap_into_cte(self) -> None:
)
for alias_key, alias_value in self._aliases.items()
}
self._came_from = {
old_to_new_alias[alias]: old_to_new_alias[alias].c[came_from.name]
for alias, came_from in self._came_from.items()
}
self._aliases = {
alias_key: old_to_new_alias[alias] for alias_key, alias in self._aliases.items()
}

if not isinstance(self._current_location, Location):
raise AssertionError(
f"Attempted to wrap to CTE while the _current_location of was type "
Expand Down Expand Up @@ -1020,13 +1031,8 @@ def recurse(self, vertex_field: str, depth: int) -> None:
.where(base.c[CTE_DEPTH_NAME] < literal_depth)
)

# Instead of joining to the current _from_clause, we make this alias the _from_clause.
# If the existing _from_clause had any information in it, then the _current_alias would
# be a cte that contains all that information.
# TODO(bojanserafimov): If the existing _from_clause had no filters or traversals, but
# had an output, the output needs to be moved into the cte or
# we need to join to the _from_clause.
self._from_clause = self._current_alias
# TODO(bojanserafimov): This creates an unused alias if there's no tags or outputs so far
self._join_to_parent_location(previous_alias, primary_key, CTE_KEY_NAME, False)

def start_global_operations(self) -> None:
"""Execute a GlobalOperationsStart block."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def test_recurse(self) -> None:

# (query, expected_results) pairs. All of them running with the same parameters.
# The queries are ran in the order specified here.
queries = [
queries: List[Tuple[str, List[Dict[str, Any]]]] = [
# Query 1: Just the root
(
"""
Expand Down Expand Up @@ -426,6 +426,92 @@ def test_recurse(self) -> None:
{"descendant_name": "Animal 4"}, # depth 3 match
],
),
# Query 8: Skip depth 0
(
"""
{
Animal {
name @filter(op_name: "=", value: ["$starting_animal_name"])
out_Animal_ParentOf {
out_Animal_ParentOf @recurse(depth: 2){
bojanserafimov marked this conversation as resolved.
Show resolved Hide resolved
name @output(out_name: "descendant_name")
}
}
}
}""",
bojanserafimov marked this conversation as resolved.
Show resolved Hide resolved
[
{"descendant_name": "Animal 1"}, # depth 1 match
{"descendant_name": "Animal 2"}, # depth 1 match
{"descendant_name": "Animal 3"}, # depth 1 match
{"descendant_name": "Animal 1"}, # depth 2 match
{"descendant_name": "Animal 2"}, # depth 2 match
{"descendant_name": "Animal 3"}, # depth 2 match
{"descendant_name": "Animal 4"}, # depth 2 match
{"descendant_name": "Animal 1"}, # depth 3 match
{"descendant_name": "Animal 2"}, # depth 3 match
{"descendant_name": "Animal 3"}, # depth 3 match
{"descendant_name": "Animal 4"}, # depth 3 match
],
),
# Query 9: Output child name
(
"""
{
Animal {
name @filter(op_name: "=", value: ["$starting_animal_name"])
out_Animal_ParentOf {
name @output(out_name: "child_name")
out_Animal_ParentOf @recurse(depth: 2){
bojanserafimov marked this conversation as resolved.
Show resolved Hide resolved
name @output(out_name: "descendant_name")
}
}
}
}""",
[
{"child_name": "Animal 1", "descendant_name": "Animal 1"}, # depth 1 match
{"child_name": "Animal 2", "descendant_name": "Animal 2"}, # depth 1 match
{"child_name": "Animal 3", "descendant_name": "Animal 3"}, # depth 1 match
bojanserafimov marked this conversation as resolved.
Show resolved Hide resolved
{"child_name": "Animal 1", "descendant_name": "Animal 1"}, # depth 2 match
{"child_name": "Animal 1", "descendant_name": "Animal 2"}, # depth 2 match
{"child_name": "Animal 1", "descendant_name": "Animal 3"}, # depth 2 match
{"child_name": "Animal 3", "descendant_name": "Animal 4"}, # depth 2 match
{"child_name": "Animal 1", "descendant_name": "Animal 1"}, # depth 3 match
{"child_name": "Animal 1", "descendant_name": "Animal 2"}, # depth 3 match
{"child_name": "Animal 1", "descendant_name": "Animal 3"}, # depth 3 match
{"child_name": "Animal 1", "descendant_name": "Animal 4"}, # depth 3 match
],
),
# Query 10: Recurse within optional scope. Animal_1 has no grandchildren from its
# child Animal_2, but since we use an @optional edge, Animal_2 should
# still appear in the result as a child of Animal_1. Here we are testing
# that the use of @recurse does not interfere with @optional semantics.
(
"""
{
Animal {
name @filter(op_name: "=", value: ["$starting_animal_name"])
out_Animal_ParentOf {
name @output(out_name: "child_name")
out_Animal_ParentOf @optional {
out_Animal_ParentOf @recurse(depth: 1){
name @output(out_name: "descendant_name")
}
}
}
}
}""",
[
{"child_name": "Animal 1", "descendant_name": "Animal 1"}, # depth 2 match
{"child_name": "Animal 1", "descendant_name": "Animal 2"}, # depth 2 match
{"child_name": "Animal 1", "descendant_name": "Animal 3"}, # depth 2 match
{"child_name": "Animal 2", "descendant_name": None}, # depth 2 match
{"child_name": "Animal 3", "descendant_name": "Animal 4"}, # depth 2 match
{"child_name": "Animal 1", "descendant_name": "Animal 1"}, # depth 3 match
{"child_name": "Animal 1", "descendant_name": "Animal 2"}, # depth 3 match
{"child_name": "Animal 1", "descendant_name": "Animal 3"}, # depth 3 match
{"child_name": "Animal 1", "descendant_name": "Animal 4"}, # depth 3 match
],
),
]

# TODO(bojanserafimov): Only testing in MSSQL because none of our backends agree on recurse
Expand Down Expand Up @@ -713,7 +799,7 @@ def test_snapshot_graphql_schema_from_orientdb_schema(self):
"UniquelyIdentifiable": {"uuid": GraphQLID}
}
schema, _ = generate_schema(
self.orientdb_client, # type: ignore # from fixture
self.orientdb_client,
class_to_field_type_overrides=class_to_field_type_overrides,
hidden_classes={ORIENTDB_BASE_VERTEX_CLASS_NAME},
)
Expand Down
8 changes: 4 additions & 4 deletions graphql_compiler/tests/snapshot_tests/test_cost_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1701,7 +1701,7 @@ def test_disjoint_intervals(self) -> None:

@pytest.mark.usefixtures("snapshot_orientdb_client")
def test_int_value_conversion_uuid(self):
schema_graph = generate_schema_graph(self.orientdb_client) # type: ignore # from fixture
schema_graph = generate_schema_graph(self.orientdb_client)
graphql_schema, type_equivalence_hints = get_graphql_schema_from_schema_graph(schema_graph)
pagination_keys = {vertex_name: "uuid" for vertex_name in schema_graph.vertex_class_names}
uuid4_field_info = {
Expand Down Expand Up @@ -1738,7 +1738,7 @@ def test_int_value_conversion_uuid(self):

@pytest.mark.usefixtures("snapshot_orientdb_client")
def test_int_value_conversion_mssql_uuid(self):
schema_graph = generate_schema_graph(self.orientdb_client) # type: ignore # from fixture
schema_graph = generate_schema_graph(self.orientdb_client)
graphql_schema, type_equivalence_hints = get_graphql_schema_from_schema_graph(schema_graph)
pagination_keys = {vertex_name: "uuid" for vertex_name in schema_graph.vertex_class_names}
uuid4_field_info = {
Expand Down Expand Up @@ -1775,7 +1775,7 @@ def test_int_value_conversion_mssql_uuid(self):

@pytest.mark.usefixtures("snapshot_orientdb_client")
def test_int_value_conversion_datetime(self):
schema_graph = generate_schema_graph(self.orientdb_client) # type: ignore # from fixture
schema_graph = generate_schema_graph(self.orientdb_client)
graphql_schema, type_equivalence_hints = get_graphql_schema_from_schema_graph(schema_graph)
pagination_keys = {vertex_name: "uuid" for vertex_name in schema_graph.vertex_class_names}
uuid4_field_info = {
Expand Down Expand Up @@ -1812,7 +1812,7 @@ def test_int_value_conversion_datetime(self):

@pytest.mark.usefixtures("snapshot_orientdb_client")
def test_int_value_conversion_date(self):
schema_graph = generate_schema_graph(self.orientdb_client) # type: ignore # from fixture
schema_graph = generate_schema_graph(self.orientdb_client)
graphql_schema, type_equivalence_hints = get_graphql_schema_from_schema_graph(schema_graph)
pagination_keys = {vertex_name: "uuid" for vertex_name in schema_graph.vertex_class_names}
uuid4_field_info = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def test_eligible_fields(self) -> None:

@pytest.mark.usefixtures("snapshot_orientdb_client")
def test_distinct_result_set_estimates_with_revisit_counts(self):
schema_graph = generate_schema_graph(self.orientdb_client) # type: ignore # from fixture
schema_graph = generate_schema_graph(self.orientdb_client)
bojanserafimov marked this conversation as resolved.
Show resolved Hide resolved
graphql_schema, type_equivalence_hints = get_graphql_schema_from_schema_graph(schema_graph)
pagination_keys = {vertex_name: "uuid" for vertex_name in schema_graph.vertex_class_names}
uuid4_field_info = {
Expand Down
47 changes: 35 additions & 12 deletions graphql_compiler/tests/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2335,7 +2335,9 @@ def test_simple_recurse(self):
SELECT
anon_1.name AS relation_name
FROM
anon_1
db_1.schema_1.[Animal] AS [Animal_1]
JOIN anon_1
ON [Animal_1].uuid = anon_1.__cte_key
"""
expected_cypher = """
MATCH (Animal___1:Animal)
Expand Down Expand Up @@ -2368,7 +2370,9 @@ def test_simple_recurse(self):
SELECT
anon_1.name AS relation_name
FROM
anon_1
schema_1."Animal" AS "Animal_1"
JOIN anon_1
ON "Animal_1".uuid = anon_1.__cte_key
"""

check_test_data(
Expand Down Expand Up @@ -2425,7 +2429,10 @@ def test_recurse_with_new_output_inside_recursion_and_filter_at_root(self):
SELECT
anon_1.color AS animal_color,
anon_1.name AS relation_name
FROM anon_1
FROM
anon_2
JOIN anon_1
ON anon_2.[Animal__uuid] = anon_1.__cte_key
"""
expected_cypher = SKIP_TEST
expected_postgresql = SKIP_TEST
Expand Down Expand Up @@ -2480,7 +2487,9 @@ def test_filter_then_recurse(self):
SELECT
anon_1.name AS relation_name
FROM
anon_1
anon_2
JOIN anon_1
ON anon_2.[Animal__uuid] = anon_1.__cte_key
"""
expected_cypher = SKIP_TEST
expected_postgresql = SKIP_TEST
Expand Down Expand Up @@ -2865,7 +2874,9 @@ def test_filter_within_recurse(self):
SELECT
anon_1.name AS relation_name
FROM
anon_1
db_1.schema_1.[Animal] AS [Animal_1]
JOIN anon_1
ON [Animal_1].uuid = anon_1.__cte_key
WHERE
anon_1.color = :wanted
"""
Expand Down Expand Up @@ -2898,7 +2909,9 @@ def test_filter_within_recurse(self):
SELECT
anon_1.name AS relation_name
FROM
anon_1
schema_1."Animal" AS "Animal_1"
JOIN anon_1
ON "Animal_1".uuid = anon_1.__cte_key
WHERE
anon_1.color = %(wanted)s
"""
Expand Down Expand Up @@ -5459,8 +5472,10 @@ def test_fold_after_recurse(self):
[Animal_1].name AS animal_name,
folded_subquery_1.fold_output_name AS homes_list
FROM
db_1.schema_1.[Animal] AS [Animal_1],
anon_1 JOIN (
db_1.schema_1.[Animal] AS [Animal_1]
JOIN anon_1
ON [Animal_1].uuid = anon_1.__cte_key
JOIN (
SELECT
anon_2.uuid AS uuid,
coalesce((
Expand Down Expand Up @@ -9662,10 +9677,14 @@ def test_simple_optional_recurse(self):
anon_1.[Animal_in_Animal_ParentOf__name] AS child_name,
anon_1.[Animal__name] AS name,
anon_2.name AS self_and_ancestor_name
FROM anon_1, anon_2
FROM
anon_1
LEFT OUTER JOIN anon_2
bojanserafimov marked this conversation as resolved.
Show resolved Hide resolved
ON anon_1.[Animal_in_Animal_ParentOf__uuid] = anon_2.__cte_key
WHERE anon_2.__cte_key IS NOT NULL OR anon_1.[Animal_in_Animal_ParentOf__uuid] IS NULL
"""
# TODO(bojanserafimov) Add an integration test for this query to make sure the recurse
# preserves left join misses from the parent optional traversal.
# TODO(bojanserafimov) Test with a traversal inside the recurse. See that the recursive
# cte uses LEFT OUTER JOIN.
expected_cypher = SKIP_TEST
expected_postgresql = """
WITH RECURSIVE anon_1 AS (
Expand Down Expand Up @@ -9708,7 +9727,11 @@ def test_simple_optional_recurse(self):
anon_1."Animal__name" AS name,
anon_2.name AS self_and_ancestor_name
FROM
anon_1, anon_2
anon_1
LEFT OUTER JOIN anon_2
ON anon_1."Animal_in_Animal_ParentOf__uuid" = anon_2.__cte_key
WHERE anon_2.__cte_key IS NOT NULL OR anon_1."Animal_in_Animal_ParentOf__uuid"
IS NULL
"""

check_test_data(
Expand Down
2 changes: 1 addition & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ disallow_untyped_calls = True
disallow_incomplete_defs = True
disallow_untyped_defs = True
disallow_untyped_decorators = True
warn_unused_ignores = False # mypy-copilot: disabled as mypy only reports it if other checks pass
warn_unused_ignores = True
bojanserafimov marked this conversation as resolved.
Show resolved Hide resolved
ignore_missing_imports = False


Expand Down