From b9fdeb794c3e65077a3d0c36ed0edcaec765d455 Mon Sep 17 00:00:00 2001 From: James Xu Date: Mon, 1 Apr 2024 15:26:25 +0800 Subject: [PATCH] [GLUTEN-5211][VL] Fix typos in velox-backend-support-progress.md (#5212) --- docs/velox-backend-support-progress.md | 56 ++++++++++++++------------ 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/docs/velox-backend-support-progress.md b/docs/velox-backend-support-progress.md index c08a2e6aa18f..b8cfa08ab74c 100644 --- a/docs/velox-backend-support-progress.md +++ b/docs/velox-backend-support-progress.md @@ -5,11 +5,15 @@ nav_order: 4 --- # The Operators and Functions Support Progress -Gluten is still in active development. Here is a list of supported operators and functions. +Gluten is still under active development. Here is a list of supported operators and functions. -Since the same function may have different semantics between Presto and Spark, Velox implement the functions in Presto category, if we note a different sematics from Spark, then the function is implemented in Spark category. So Gluten firstly will use Velox's spark category, if a function isn't implemented there then refer to Presto category. +Since the same function may have different semantics between Presto and Spark, Velox implement the functions in Presto category, if we note a +different semantics from Spark, then the function is implemented in Spark category. So Gluten will first try to find function in Velox's spark +category, if a function isn't implemented then refer to Presto category. -The total supported functions' number for [Spark3.3 is 387](https://spark.apache.org/docs/latest/api/sql/), Gluten supported 189 functions now. +The total number of functions in [Spark3.3](https://spark.apache.org/docs/latest/api/sql/) is 387, Gluten supports 189 of them. + +We use some notations to describe the supporting status of operators/functions in the tables below, they are: | Value | Description | |--------------|-------------------------------------------------------------------------------------------| @@ -20,18 +24,20 @@ The total supported functions' number for [Spark3.3 is 387](https://spark.apache | NS | Not Supported. Velox backend does not support it. | +And also some notations for the function implementation's restrictions: + | Value | Description | -| ---------- |------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Mismatched | Some functions are implemented by Velox, which return results mismatched with Apache Spark. So we marked then as "Mismatched". | -| Ansi OFF | Gluten doesn't support [ANSI mode](https://spark.apache.org/docs/latest/sql-ref-ansi-compliance.html). If it is enabled, Gluten will fall back to Vanilla Spark. | +|------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Mismatched | Some functions are implemented by Velox, but have different semantics from Apache Spark, we mark them as "Mismatched". | +| ANSI OFF | Gluten doesn't support [ANSI mode](https://spark.apache.org/docs/latest/sql-ref-ansi-compliance.html). If it is enabled, Gluten will fall back to Vanilla Spark. | ### Operator Map -Gluten supports 28 operators (Draw to right to see all data types) +Gluten supports 28 operators (Drag to right to see all data types) | Executor | Description | Gluten Name | Velox Name | BOOLEAN | BYTE | SHORT | INT | LONG | FLOAT | DOUBLE | STRING | NULL | BINARY | ARRAY | MAP | STRUCT(ROW) | DATE | TIMESTAMP | DECIMAL | CALENDAR | UDT | |-----------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------|-----------------------|---------|------|-------|-----|------|-------|--------|--------|------|--------|-------|-----|-------------|------|-----------|---------|----------|-----| -| FileSourceScanExec | Reading data from files, often from Hive tables | FileSourceScanExecTransformer | TableScanNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | +| FileSourceScanExec | Reading data from files, often from Hive tables | FileSourceScanExecTransformer | TableScanNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | | BatchScanExec | The backend for most file input | BatchScanExecTransformer | TableScanNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | | FilterExec | The backend for most filter statements | FilterExecTransformer | FilterNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | | ProjectExec | The backend for most select, withColumn and dropColumn statements | ProjectExecTransformer | ProjectNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | @@ -40,7 +46,7 @@ Gluten supports 28 operators (Draw to right to see all data types) | ShuffledHashJoinExec | Implementation of join using hashed shuffled data | ShuffleHashJoinExecTransformer | HashJoinNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | | SortExec | The backend for the sort operator | SortExecTransformer | OrderByNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | | SortMergeJoinExec | Sort merge join, replacing with shuffled hash join | SortMergeJoinExecTransformer | MergeJoinNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | -| WindowExec | Window-operator backend | WindowExecTransformer | WindowNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | +| WindowExec | Window operator backend | WindowExecTransformer | WindowNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | | GlobalLimitExec | Limiting of results across partitions | LimitTransformer | LimitNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | | LocalLimitExec | Per-partition limiting of results | LimitTransformer | LimitNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | | ExpandExec | The backend for the expand operator | ExpandExecTransformer | GroupIdNode | S | S | S | S | S | S | S | S | S | S | NS | NS | NS | S | NS | NS | NS | NS | @@ -84,18 +90,18 @@ Gluten supports 28 operators (Draw to right to see all data types) ### Function support -Gluten supports 199 functions. (Draw to right to see all data types) +Gluten supports 199 functions. (Drag to right to see all data types) | Spark Functions | Velox/Presto Functions | Velox/Spark functions | Gluten | Restrictions | BOOLEAN | BYTE | SHORT | INT | LONG | FLOAT | DOUBLE | DATE | TIMESTAMP | STRING | DECIMAL | NULL | BINARS | CALENDAR | ARRAY | MAP | STRUCT | UDT | |-------------------------------|------------------------|-----------------------|--------|------------------------|---------|------|-------|-----|------|-------|--------|------|-----------|--------|---------|------|--------| -------- |-------| ---- |--------| ---- | | ! | | not | S | | S | S | S | S | S | S | S | | | S | | | | | | | | | | != | neq | | S | | S | S | S | S | S | S | S | | | S | | | | | | | | | -| % | mod | remainder | S | Ansi Off | | S | S | S | S | S | | | | | | | | | | | | | +| % | mod | remainder | S | ANSI OFF | | S | S | S | S | S | | | | | | | | | | | | | | & | bitwise_and | bitwise_and | S | | | | | | | | | | | | | | | | | | | | -| * | multiply | multiply | S | Ansi Off | | S | S | S | S | S | | | | | | | | | | | | | -| + | plus | add | S | Ansi Off | | S | S | S | S | S | | | | | | | | | | | | | -| - | minus | substract | S | Ansi Off | | S | S | S | S | S | | | | | | | | | | | | | -| / | divide | divide | S | Ansi Off | | S | S | S | S | S | | | | | | | | | | | | | +| * | multiply | multiply | S | ANSI OFF | | S | S | S | S | S | | | | | | | | | | | | | +| + | plus | add | S | ANSI OFF | | S | S | S | S | S | | | | | | | | | | | | | +| - | minus | substract | S | ANSI OFF | | S | S | S | S | S | | | | | | | | | | | | | +| / | divide | divide | S | ANSI OFF | | S | S | S | S | S | | | | | | | | | | | | | | < | lt | lessthan | S | | S | S | S | S | S | S | S | | | S | | | | | | | | | | <= | lte | lessthanorequa | S | | S | S | S | S | S | S | S | | | S | | | | | | | | | | <=> | | equalnullsafe | S | | | | | | | | | | | | | | | | | | | | @@ -196,13 +202,13 @@ Gluten supports 199 functions. (Draw to right to see all data types) | xpath_short | | | | | | | | | | | | | | | | | | | | | | | | xpath_string | | | | | | | | | | | | | | | | | | | | | | | | like | like | | S | | | | | | | | | | | S | | | | | | | | | -| regexp | | rlike | S | Not support lookaround | | | | | | | | | | S | | | | | | | | | -| regexp_extract | regexp_extract | regexp_extract | S | Not support lookaround | | | | | | | | | | S | | | | | | | | | -| regexp_extract_all | regexp_extract_all | | S | Not support lookaround | | | | | | | | | | S | | | | | | | | | -| regexp_like | regexp_like | rlike | S | Not support lookaround | | | | | | | | | | S | | | | | | | | | +| regexp | | rlike | S | Lookaround not supported | | | | | | | | | | S | | | | | | | | | +| regexp_extract | regexp_extract | regexp_extract | S | Lookaround not supported | | | | | | | | | | S | | | | | | | | | +| regexp_extract_all | regexp_extract_all | | S | Lookaround not supported | | | | | | | | | | S | | | | | | | | | +| regexp_like | regexp_like | rlike | S | Lookaround not supported | | | | | | | | | | S | | | | | | | | | | regexp_replace | regexp_replace | | S | | | | | | | | | | | S | | | | | | | | | -| rlike | | rlike | S | Not support lookaround | | | | | | | | | | S | | | | | | | | | -| abs | abs | abs | S | Ansi Off | | S | S | S | S | S | S | | | | | | | | | | | | +| rlike | | rlike | S | Lookaround not supported | | | | | | | | | | S | | | | | | | | | +| abs | abs | abs | S | ANSI OFF | | S | S | S | S | S | S | | | | | | | | | | | | | acos | acos | | S | | | S | S | S | S | S | S | | | | | | | | | | | | | acosh | | acosh | S | | | S | S | S | S | S | S | | | | | | | | | | | | | asin | asin | | S | | | S | S | S | S | S | S | | | | | | | | | | | | @@ -232,7 +238,7 @@ Gluten supports 199 functions. (Draw to right to see all data types) | log1p | | | | | | | | | | | | | | | | | | | | | | | | log2 | log2 | | S | | | S | S | S | S | S | S | | | | | | | | | | | | | pi | pi | | S | | | S | S | S | S | S | S | | | | | | | | | | | | -| pmod | | pmod | S | Ansi Off | | S | S | S | S | S | | | | | | | | | | | | | +| pmod | | pmod | S | ANSI OFF | | S | S | S | S | S | | | | | | | | | | | | | | pow, power | pow,power | power | | | | | S | S | S | S | S | | | | | | | | | | | | | power, pow | power,pow | power | S | | | S | S | S | S | S | S | | | | | | | | | | | | | radians | radians | | S | | | S | S | S | S | S | S | | | | | | | | | | | | @@ -348,7 +354,7 @@ Gluten supports 199 functions. (Draw to right to see all data types) | any | | | | | | | | | | | | | | | | | | | | | | | | approx_count_distinct | approx_distinct | | S | | S | S | S | S | S | S | S | S | | S | | | | | | | | | | approx_percentile | | | | | | | | | | | | | | | | | | | | | | | -| avg | avg | | S | Ansi Off | | S | S | S | S | S | | | | | | | | | | | | | +| avg | avg | | S | ANSI OFF | | S | S | S | S | S | | | | | | | | | | | | | | bool_and | | | | | | | | | | | | | | | | | | | | | | | | bool_or | | | | | | | | | | | | | | | | | | | | | | | | collect_list | | | S | | | | | | | | | | | | | | | | | | | | @@ -369,7 +375,7 @@ Gluten supports 199 functions. (Draw to right to see all data types) | last_value | | last_value | S | | | | | | | | | | | | | | | | | | | | | max | max | | S | | | | S | S | S | S | S | | | | | | | | | | | | | max_by | | | S | | | | | | | | | | | | | | | | | | | | -| mean | avg | | S | Ansi Off | | | | | | | | | | | | | | | | | | | +| mean | avg | | S | ANSI OFF | | | | | | | | | | | | | | | | | | | | min | min | | S | | | | S | S | S | S | S | | | | | | | | | | | | | min_by | | | S | | | | | | | | | | | | | | | | | | | | | skewness | skewness | skewness | S | | | | S | S | S | S | S | | | | | | | | | | | | @@ -378,7 +384,7 @@ Gluten supports 199 functions. (Draw to right to see all data types) | stddev,std | stddev | | S | | | | S | S | S | S | S | | | | | | | | | | | | | stddev_pop | stddev_pop | | S | | | S | S | S | S | S | | | | | | | | | | | | | | stddev_samp | stddev_samp | | S | | | | S | S | S | S | S | | | | | | | | | | | | -| sum | sum | | S | Ansi Off | | S | S | S | S | S | | | | | | | | | | | | | +| sum | sum | | S | ANSI OFF | | S | S | S | S | S | | | | | | | | | | | | | | var_pop | var_pop | | S | | | S | S | S | S | S | | | | | | | | | | | | | | var_samp | var_samp | | S | | | S | S | S | S | S | | | | | | | | | | | | | | variance | variance | | S | | | S | S | S | S | S | | | | | | | | | | | | |