Skip to content

Commit

Permalink
cache mergetree data
Browse files Browse the repository at this point in the history
  • Loading branch information
zzcclp authored and liuneng1994 committed Jul 29, 2024
1 parent 242dc35 commit 4abc92a
Show file tree
Hide file tree
Showing 18 changed files with 1,107 additions and 26 deletions.
20 changes: 20 additions & 0 deletions backends-clickhouse/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
</dependency>
</dependencies>

<build>
Expand Down Expand Up @@ -365,6 +369,22 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr4-maven-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>antlr4</goal>
</goals>
</execution>
</executions>
<configuration>
<visitor>true</visitor>
<sourceDirectory>../backends-clickhouse/src/main/antlr4</sourceDirectory>
<treatWarningsAsErrors>true</treatWarningsAsErrors>
</configuration>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

grammar GlutenClickhouseSqlBase;

@members {
/**
* Verify whether current token is a valid decimal token (which contains dot).
* Returns true if the character that follows the token is not a digit or letter or underscore.
*
* For example:
* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
* For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is folllowed
* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
* which is not a digit or letter or underscore.
*/
public boolean isValidDecimal() {
int nextChar = _input.LA(1);
if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
nextChar == '_') {
return false;
} else {
return true;
}
}
}

tokens {
DELIMITER
}

singleStatement
: statement ';'* EOF
;

statement
: CACHE META? DATA ASYN? SELECT selectedColumns=selectedColumnNames
FROM (path=STRING | table=qualifiedName) (AFTER filter=filterClause)?
(CACHEPROPERTIES cacheProps=propertyList)? #cacheData
| .*? #passThrough
;

qualifiedName
: identifier (DOT identifier)*
;

selectedColumnNames
: ASTERISK
| identifier (COMMA identifier)*
;

filterClause
: TIMESTAMP AS OF timestamp=STRING
| datepartition=identifier AS OF datetime=STRING
;

propertyList
: LEFT_PAREN property (COMMA property)* RIGHT_PAREN
;

property
: key=propertyKey (EQ? value=propertyValue)?
;

propertyKey
: identifier (DOT identifier)*
| stringLit
;

propertyValue
: INTEGER_VALUE
| DECIMAL_VALUE
| booleanValue
| identifier LEFT_PAREN stringLit COMMA stringLit RIGHT_PAREN
| value=stringLit
;

stringLit
: STRING
| DOUBLEQUOTED_STRING
;

booleanValue
: TRUE | FALSE
;

identifier
: IDENTIFIER #unquotedIdentifier
| quotedIdentifier #quotedIdentifierAlternative
| nonReserved #unquotedIdentifier
;

quotedIdentifier
: BACKQUOTED_IDENTIFIER
;

// Add keywords here so that people's queries don't break if they have a column name as one of
// these tokens
nonReserved
: CACHE | META | ASYN | DATA
| SELECT | FOR | AFTER | CACHEPROPERTIES
| TIMESTAMP | AS | OF | DATE_PARTITION
;

// Define how the keywords above should appear in a user's SQL statement.
CACHE: 'CACHE';
META: 'META';
ASYN: 'ASYN';
DATA: 'DATA';
SELECT: 'SELECT';
COMMA: ',';
FOR: 'FOR';
FROM: 'FROM';
AFTER: 'AFTER';
CACHEPROPERTIES: 'CACHEPROPERTIES';
DOT: '.';
ASTERISK: '*';
TIMESTAMP: 'TIMESTAMP';
AS: 'AS';
OF: 'OF';
DATE_PARTITION: 'DATE_PARTITION';
LEFT_PAREN: '(';
RIGHT_PAREN: ')';
TRUE: 'TRUE';
FALSE: 'FALSE';

EQ : '=' | '==';
NSEQ: '<=>';
NEQ : '<>';
NEQJ: '!=';
LTE : '<=' | '!>';
GTE : '>=' | '!<';
CONCAT_PIPE: '||';

STRING
: '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
| '"' ( ~('"'|'\\') | ('\\' .) )* '"'
;

DOUBLEQUOTED_STRING
:'"' ( ~('"'|'\\') | ('\\' .) )* '"'
;

BIGINT_LITERAL
: DIGIT+ 'L'
;

SMALLINT_LITERAL
: DIGIT+ 'S'
;

TINYINT_LITERAL
: DIGIT+ 'Y'
;

INTEGER_VALUE
: DIGIT+
;

DECIMAL_VALUE
: DIGIT+ EXPONENT
| DECIMAL_DIGITS EXPONENT? {isValidDecimal()}?
;

DOUBLE_LITERAL
: DIGIT+ EXPONENT? 'D'
| DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
;

BIGDECIMAL_LITERAL
: DIGIT+ EXPONENT? 'BD'
| DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
;

IDENTIFIER
: (LETTER | DIGIT | '_')+
;

BACKQUOTED_IDENTIFIER
: '`' ( ~'`' | '``' )* '`'
;

fragment DECIMAL_DIGITS
: DIGIT+ '.' DIGIT*
| '.' DIGIT+
;

fragment EXPONENT
: 'E' [+-]? DIGIT+
;

fragment DIGIT
: [0-9]
;

fragment LETTER
: [A-Z]
;

SIMPLE_COMMENT
: '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
;

BRACKETED_COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
;

WS : [ \r\n\t]+ -> channel(HIDDEN)
;

// Catch-all for anything we can't recognize.
// We use this to be able to ignore and recover all the text
// when splitting statements with DelimiterLexer
UNRECOGNIZED
: .
;
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.gluten.execution;

import java.util.Set;

public class CHNativeCacheManager {
public static void cacheParts(String table, Set<String> columns, boolean async) {
nativeCacheParts(table, String.join(",", columns), async);
}
public static void cacheParts(String table, Set<String> columns, boolean async) {
nativeCacheParts(table, String.join(",", columns), async);
}

private static native void nativeCacheParts(String table, String columns, boolean async);
private static native void nativeCacheParts(String table, String columns, boolean async);
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import org.apache.gluten.extension.{CountDistinctWithoutExpand, FallbackBroadcas
import org.apache.gluten.extension.columnar.AddFallbackTagRule
import org.apache.gluten.extension.columnar.MiscColumnarRules.TransformPreOverrides
import org.apache.gluten.extension.columnar.transition.Convention
import org.apache.gluten.parser.GlutenClickhouseSqlParser
import org.apache.gluten.sql.shims.SparkShimLoader
import org.apache.gluten.substrait.expression.{ExpressionBuilder, ExpressionNode, WindowFunctionNode}
import org.apache.gluten.utils.{CHJoinValidateUtil, UnknownJoinStrategy}
Expand All @@ -40,6 +41,7 @@ import org.apache.spark.sql.catalyst.{CHAggregateFunctionRewriteRule, EqualToRew
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, CollectList, CollectSet}
import org.apache.spark.sql.catalyst.optimizer.BuildSide
import org.apache.spark.sql.catalyst.parser.ParserInterface
import org.apache.spark.sql.catalyst.plans.JoinType
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, HashPartitioning, Partitioning, RangePartitioning}
Expand Down Expand Up @@ -611,6 +613,11 @@ class CHSparkPlanExecApi extends SparkPlanExecApi {
override def genExtendedStrategies(): List[SparkSession => Strategy] =
List()

override def genInjectExtendedParser()
: List[(SparkSession, ParserInterface) => ParserInterface] = {
List((spark, parserInterface) => new GlutenClickhouseSqlParser(spark, parserInterface))
}

/** Define backend specfic expression mappings. */
override def extraExpressionMappings: Seq[Sig] = {
List(
Expand Down
Loading

0 comments on commit 4abc92a

Please sign in to comment.