Skip to content

Commit

Permalink
refactor(interactive): Support Unfold Operation in Cypher Queries (#…
Browse files Browse the repository at this point in the history
…4241)

<!--
Thanks for your contribution! please review
https://github.com/alibaba/GraphScope/blob/main/CONTRIBUTING.md before
opening an issue.
-->

## What do these changes do?
as titled.

<!-- Please give a short brief about these changes. -->

## Related issue number

<!-- Are there any issues opened that will be resolved by merging this
change? -->

Fixes

---------

Co-authored-by: BingqingLyu <[email protected]>
  • Loading branch information
shirly121 and BingqingLyu authored Sep 23, 2024
1 parent ee28147 commit 27dc402
Show file tree
Hide file tree
Showing 12 changed files with 224 additions and 6 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/gaia.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,9 @@ jobs:
run: |
cd ${GITHUB_WORKSPACE}/interactive_engine/executor/ir/integrated
cargo test --features=mimalloc
cd ${GITHUB_WORKSPACE}/interactive_engine/executor/store/groot
cargo test --features=mimalloc
# TODO: With mimalloc, it may occur buffer overflow in groot store test sometimes. Currently, we do not use mimalloc (as default) in groot store.
# cd ${GITHUB_WORKSPACE}/interactive_engine/executor/store/groot
# cargo test --features=mimalloc
- name: Ir Integration Test on Experimental Store
run: |
Expand Down
1 change: 1 addition & 0 deletions docs/interactive_engine/neo4j/supported_cypher.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ RETURN a, b, c;
| WHERE NOT EXIST (an edge/path) | implements as anti join | <input type="checkbox" checked />| |
| ORDER BY | | <input type="checkbox" disabled checked /> | |
| LIMIT | | <input type="checkbox" disabled checked /> | |
| UNFOLD | The operation is similar to SQL's 'UNSET', as it unfolds elements from a collection type | <input type="checkbox" disabled checked /> | |

Additionally, we support two types of procedure call invocations in Cypher:
- We offer a set of built-in procedures that can be invoked directly within Cypher queries. These procedures are all prefixed with `gs.procedure.`.
Expand Down
7 changes: 6 additions & 1 deletion interactive_engine/compiler/src/main/antlr4/CypherGS.g4
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ CALL : ( 'C' | 'c' ) ( 'A' | 'a' ) ( 'L' | 'l' ) ( 'L' | 'l' ) ;
YIELD : ( 'Y' | 'y' ) ( 'I' | 'i' ) ( 'E' | 'e' ) ( 'L' | 'l' ) ( 'D' | 'd' ) ;

oC_RegularQuery
: oC_Match ( SP? ( oC_Match | oC_With ) )* ( SP oC_Return ) ;
: oC_Match ( SP? ( oC_Match | oC_With | oC_Unwind ) )* ( SP oC_Return ) ;

oC_Match
: ( OPTIONAL SP )? MATCH SP? oC_Pattern ( SP? oC_Where )? ;
Expand All @@ -61,6 +61,11 @@ MATCH : ( 'M' | 'm' ) ( 'A' | 'a' ) ( 'T' | 't' ) ( 'C' | 'c' ) ( 'H' | 'h' ) ;

OPTIONAL : ( 'O' | 'o' ) ( 'P' | 'p' ) ( 'T' | 't' ) ( 'I' | 'i' ) ( 'O' | 'o' ) ( 'N' | 'n' ) ( 'A' | 'a' ) ( 'L' | 'l' ) ;

oC_Unwind
: UNWIND SP? oC_Expression SP AS SP oC_Variable ;

UNWIND : ( 'U' | 'u' ) ( 'N' | 'n' ) ( 'W' | 'w' ) ( 'I' | 'i' ) ( 'N' | 'n' ) ( 'D' | 'd' ) ;

// multiple sentences
oC_Pattern
: oC_PatternPart ( SP? ',' SP? oC_PatternPart )* ;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
*
* * Copyright 2020 Alibaba Group Holding Limited.
* *
* * Licensed under the Apache License, Version 2.0 (the "License");
* * you may not use this file except in compliance with the License.
* * You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
*
*/

package com.alibaba.graphscope.common.ir.rel;

import com.alibaba.graphscope.common.ir.tools.AliasInference;
import com.alibaba.graphscope.common.ir.tools.Utils;

import org.apache.calcite.plan.GraphOptCluster;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelShuttle;
import org.apache.calcite.rel.RelWriter;
import org.apache.calcite.rel.SingleRel;
import org.apache.calcite.rel.type.*;
import org.apache.calcite.rex.RexNode;
import org.checkerframework.checker.nullness.qual.Nullable;

import java.util.List;
import java.util.stream.Collectors;

public class GraphLogicalUnfold extends SingleRel {
private final RexNode key;
private final String aliasName;
private final int aliasId;

public GraphLogicalUnfold(
GraphOptCluster cluster, RelNode input, RexNode key, @Nullable String aliasName) {
super(cluster, RelTraitSet.createEmpty(), input);
this.key = key;
this.aliasName =
AliasInference.inferDefault(
aliasName, AliasInference.getUniqueAliasList(input, true));
this.aliasId = cluster.getIdGenerator().generate(this.aliasName);
}

public RexNode getUnfoldKey() {
return this.key;
}

@Override
public RelWriter explainTerms(RelWriter pw) {
return super.explainTerms(pw).item("key", key).item("alias", aliasName);
}

@Override
public RelDataType deriveRowType() {
RelDataType inputOutputType = Utils.getOutputType(input);
List<RelDataTypeField> fields =
inputOutputType.getFieldList().stream()
.filter(k -> k.getIndex() != AliasInference.DEFAULT_ID)
.collect(Collectors.toList());
fields.add(
new RelDataTypeFieldImpl(
this.aliasName, this.aliasId, key.getType().getComponentType()));
return new RelRecordType(StructKind.FULLY_QUALIFIED, fields);
}

@Override
public GraphLogicalUnfold copy(RelTraitSet traitSet, List<RelNode> inputs) {
return new GraphLogicalUnfold(
(GraphOptCluster) getCluster(), inputs.get(0), this.key, this.aliasName);
}

@Override
public RelNode accept(RelShuttle shuttle) {
if (shuttle instanceof GraphShuttle) {
return ((GraphShuttle) shuttle).visit(this);
}
return shuttle.visit(this);
}

public String getAliasName() {
return this.aliasName;
}

public int getAliasId() {
return this.aliasId;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ public RelNode visit(LogicalJoin join) {
return visitChildren(join);
}

public RelNode visit(GraphLogicalUnfold unfold) {
return visitChildren(unfold);
}

@Override
public RelNode visit(RelNode other) {
if (other instanceof MultiJoin) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,37 @@ public RelNode visit(MultiJoin multiJoin) {
return multiJoin;
}

@Override
public RelNode visit(GraphLogicalUnfold unfold) {
visitChildren(unfold);
RexNode unfoldKey = unfold.getUnfoldKey();
Preconditions.checkArgument(
unfoldKey instanceof RexGraphVariable,
"unfold key should be a variable, but is [%s]",
unfoldKey);
int keyAliasId = ((RexGraphVariable) unfoldKey).getAliasId();
GraphAlgebraPhysical.Unfold.Builder unfoldBuilder =
GraphAlgebraPhysical.Unfold.newBuilder().setTag(Utils.asAliasId(keyAliasId));
if (unfold.getAliasId() != AliasInference.DEFAULT_ID) {
unfoldBuilder.setAlias(Utils.asAliasId(unfold.getAliasId()));
}
List<RelDataTypeField> fullFields = unfold.getRowType().getFieldList();
Preconditions.checkArgument(!fullFields.isEmpty(), "there is no fields in unfold row type");
RelDataType curRowType =
new RelRecordType(
StructKind.FULLY_QUALIFIED,
fullFields.subList(fullFields.size() - 1, fullFields.size()));
physicalBuilder.addPlan(
GraphAlgebraPhysical.PhysicalOpr.newBuilder()
.setOpr(
GraphAlgebraPhysical.PhysicalOpr.Operator.newBuilder()
.setUnfold(unfoldBuilder)
.build())
.addAllMetaData(Utils.physicalProtoRowType(curRowType, isColumnId))
.build());
return unfold;
}

private List<RexGraphVariable> getLeftRightVariables(RexNode condition) {
List<RexGraphVariable> vars = Lists.newArrayList();
if (condition instanceof RexCall) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1655,6 +1655,20 @@ public GraphBuilder dedupBy(Iterable<? extends RexNode> nodes) {
return this;
}

public GraphBuilder unfold(RexNode unfoldKey, @Nullable String aliasName) {
RelNode input = requireNonNull(peek(), "frame stack is empty");
RelDataType keyType = unfoldKey.getType();
Preconditions.checkArgument(
keyType.getComponentType() != null,
"input type of 'unfold' should be set or array with single component type, but is"
+ " [%s]",
keyType);
GraphLogicalUnfold unfold =
new GraphLogicalUnfold((GraphOptCluster) getCluster(), input, unfoldKey, aliasName);
replaceTop(unfold);
return this;
}

@Override
public RelBuilder join(
JoinRelType joinType, RexNode condition, Set<CorrelationId> variablesSet) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,15 @@ public RexNode makeIn(RexNode arg, List<? extends RexNode> ranges) {
makeSearchArgumentLiteral(sarg, sargType));
}
}
if (ranges.size() == 1 && ranges.get(0).getKind() == SqlKind.DYNAMIC_PARAM) {
return makeCall(GraphStdOperatorTable.IN, arg, ranges.get(0));
if (ranges.size() == 1) {
RexNode range = ranges.get(0);
switch (range.getKind()) {
// right operand is a dynamic parameter ( name in $names ), or a variable ( name
// in names )
case DYNAMIC_PARAM:
case INPUT_REF:
return makeCall(GraphStdOperatorTable.IN, arg, ranges.get(0));
}
}
return RexUtil.composeDisjunction(
this,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ public static final SqlFunction USER_DEFINED_PROCEDURE(StoredProcedureMeta meta)
true,
ReturnTypes.BOOLEAN_NULLABLE,
GraphInferTypes.IN_OPERANDS_TYPE,
OperandTypes.ANY);
GraphOperandTypes.ANY_ANY);

public static final SqlOperator PATH_CONCAT =
new SqlFunction(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ public GraphBuilder visitOC_Cypher(CypherGSParser.OC_CypherContext ctx) {
return visitOC_Statement(ctx.oC_Statement());
}

@Override
public GraphBuilder visitOC_Unwind(CypherGSParser.OC_UnwindContext ctx) {
RexNode expr = expressionVisitor.visitOC_Expression(ctx.oC_Expression()).getExpr();
String alias = ctx.oC_Variable() == null ? null : ctx.oC_Variable().getText();
return builder.unfold(expr, alias);
}

@Override
public GraphBuilder visitOC_Match(CypherGSParser.OC_MatchContext ctx) {
int childCnt = ctx.oC_Pattern().getChildCount();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ public abstract class GraphOperandTypes {
public static final SqlSingleOperandTypeChecker INTERVALINTERVAL_INTERVALDATETIME =
OperandTypes.or(INTERVAL_SAME_SAME, INTERVAL_DATETIME);

public static final SqlSingleOperandTypeChecker ANY_ANY =
family(SqlTypeFamily.ANY, SqlTypeFamily.ANY);

/**
* create {@code RexFamilyOperandTypeChecker} to validate type based on {@code RexNode}
* @param families
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,56 @@ public void bi3_test() {
com.alibaba.graphscope.common.ir.tools.Utils.toString(after).trim());
}

@Test
public void bi4_test() {
GraphBuilder builder = Utils.mockGraphBuilder(optimizer, irMeta);
RelNode before =
com.alibaba.graphscope.cypher.antlr4.Utils.eval(
"MATCH (forum:FORUM)\n"
+ "WITH collect(forum) AS topForums\n"
+ "UNWIND topForums AS topForums1\n"
+ "MATCH"
+ " (topForums1:FORUM)-[:CONTAINEROF]->(post:POST)<-[:REPLYOF*0..10]-(message:POST|COMMENT)-[:HASCREATOR]->(person:PERSON)<-[:HASMEMBER]-(topForums2:FORUM)\n"
+ "WHERE topForums2 IN topForums\n"
+ "RETURN\n"
+ " person.id AS personId\n"
+ " ORDER BY\n"
+ " personId ASC\n"
+ " LIMIT 100;",
builder)
.build();
RelNode after = optimizer.optimize(before, new GraphIOProcessor(builder, irMeta));
Assert.assertEquals(
"GraphLogicalSort(sort0=[personId], dir0=[ASC], fetch=[100])\n"
+ " GraphLogicalProject(personId=[person.id], isAppend=[false])\n"
+ " LogicalJoin(condition=[AND(=(topForums1, topForums1), IN(topForums2,"
+ " topForums))], joinType=[inner])\n"
+ " GraphLogicalUnfold(key=[topForums], alias=[topForums1])\n"
+ " GraphLogicalAggregate(keys=[{variables=[], aliases=[]}],"
+ " values=[[{operands=[forum], aggFunction=COLLECT, alias='topForums',"
+ " distinct=false}]])\n"
+ " GraphLogicalSource(tableConfig=[{isAll=false, tables=[FORUM]}],"
+ " alias=[forum], opt=[VERTEX])\n"
+ " GraphPhysicalExpand(tableConfig=[{isAll=false, tables=[HASMEMBER]}],"
+ " alias=[topForums2], startAlias=[person], opt=[IN], physicalOpt=[VERTEX])\n"
+ " GraphPhysicalExpand(tableConfig=[{isAll=false,"
+ " tables=[HASCREATOR]}], alias=[person], startAlias=[message], opt=[OUT],"
+ " physicalOpt=[VERTEX])\n"
+ " GraphLogicalGetV(tableConfig=[{isAll=false, tables=[POST,"
+ " COMMENT]}], alias=[message], opt=[END])\n"
+ " "
+ " GraphLogicalPathExpand(fused=[GraphPhysicalExpand(tableConfig=[{isAll=false,"
+ " tables=[REPLYOF]}], alias=[_], opt=[IN], physicalOpt=[VERTEX])\n"
+ "], fetch=[10], path_opt=[ARBITRARY], result_opt=[END_V], alias=[_],"
+ " start_alias=[post])\n"
+ " GraphPhysicalExpand(tableConfig=[{isAll=false,"
+ " tables=[CONTAINEROF]}], alias=[post], startAlias=[topForums1], opt=[OUT],"
+ " physicalOpt=[VERTEX])\n"
+ " GraphLogicalSource(tableConfig=[{isAll=false,"
+ " tables=[FORUM]}], alias=[topForums1], opt=[VERTEX])",
after.explain().trim());
}

@Test
public void bi5_test() {
GraphBuilder builder = Utils.mockGraphBuilder(optimizer, irMeta);
Expand Down

0 comments on commit 27dc402

Please sign in to comment.