diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseHiveTableSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseHiveTableSuite.scala index d15f07aff6db..bb028de6abe8 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseHiveTableSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseHiveTableSuite.scala @@ -97,6 +97,7 @@ class GlutenClickHouseHiveTableSuite() "spark.sql.warehouse.dir", getClass.getResource("/").getPath + "unit-tests-working-home/spark-warehouse") .set("spark.hive.exec.dynamic.partition.mode", "nonstrict") + .set("spark.gluten.supported.hive.udfs", "my_add") .setMaster("local[*]") } @@ -1060,4 +1061,14 @@ class GlutenClickHouseHiveTableSuite() compareResultsAgainstVanillaSpark(select_sql, compareResult = true, _ => {}) spark.sql("DROP TABLE test_tbl_3548") } + + test("test 'hive udf'") { + val jarPath = "src/test/resources/udfs/hive-test-udfs.jar" + val jarUrl = s"file://${System.getProperty("user.dir")}/$jarPath" + spark.sql( + s"CREATE FUNCTION my_add as " + + s"'org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd2' USING JAR '$jarUrl'") + runQueryAndCompare("select MY_ADD(id, id+1) from range(10)")( + checkOperatorMatch[ProjectExecTransformer]) + } } diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala index 8baf5711c23d..2098ea6248af 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala @@ -48,7 +48,6 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite .set("spark.sql.autoBroadcastJoinThreshold", "10MB") .set("spark.gluten.sql.columnar.backend.ch.use.v2", "false") .set("spark.gluten.supported.scala.udfs", "my_add") - .set("spark.gluten.supported.hive.udfs", "my_add") } override protected val createNullableTables = true @@ -1319,16 +1318,6 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite checkOperatorMatch[ProjectExecTransformer]) } - ignore("test 'hive udf'") { - val jarPath = "backends-clickhouse/src/test/resources/udfs/hive-test-udfs.jar" - val jarUrl = s"file://${System.getProperty("user.dir")}/$jarPath" - spark.sql( - s"CREATE FUNCTION my_add as " + - "'org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd2' USING JAR '$jarUrl'") - runQueryAndCompare("select my_add(id, id+1) from range(10)")( - checkOperatorMatch[ProjectExecTransformer]) - } - override protected def runTPCHQuery( queryNum: Int, tpchQueries: String = tpchQueries, diff --git a/cpp-ch/local-engine/Parser/FunctionParser.h b/cpp-ch/local-engine/Parser/FunctionParser.h index e40143126580..0e9eaf07a49c 100644 --- a/cpp-ch/local-engine/Parser/FunctionParser.h +++ b/cpp-ch/local-engine/Parser/FunctionParser.h @@ -76,7 +76,7 @@ class FunctionParser { return plan_parser->toFunctionNode(action_dag, func_name, args); } - + const DB::ActionsDAG::Node * toFunctionNode(DB::ActionsDAGPtr & action_dag, const String & func_name, const String & result_name, const DB::ActionsDAG::NodeRawConstPtrs & args) const { diff --git a/cpp-ch/local-engine/tests/gtest_parquet_write.cpp b/cpp-ch/local-engine/tests/gtest_parquet_write.cpp index 3a96b92e7725..c2c71ff6eb07 100644 --- a/cpp-ch/local-engine/tests/gtest_parquet_write.cpp +++ b/cpp-ch/local-engine/tests/gtest_parquet_write.cpp @@ -199,7 +199,7 @@ TEST(ParquetWrite, ComplexTypes) ch2arrow.chChunkToArrowTable(arrow_table, input_chunks, header.columns()); /// Convert Arrow Table to CH Block - ArrowColumnToCHColumn arrow2ch(header, "Parquet", true, true, true); + ArrowColumnToCHColumn arrow2ch(header, "Parquet", true, true, FormatSettings::DateTimeOverflowBehavior::Ignore); Chunk output_chunk; arrow2ch.arrowTableToCHChunk(output_chunk, arrow_table, arrow_table->num_rows()); diff --git a/gluten-core/src/main/scala/io/glutenproject/expression/UDFMappings.scala b/gluten-core/src/main/scala/io/glutenproject/expression/UDFMappings.scala index 8d00d2bbf36d..4b6adef7c0bd 100644 --- a/gluten-core/src/main/scala/io/glutenproject/expression/UDFMappings.scala +++ b/gluten-core/src/main/scala/io/glutenproject/expression/UDFMappings.scala @@ -23,6 +23,8 @@ import org.apache.spark.internal.Logging import org.apache.commons.lang3.StringUtils +import java.util.Locale + import scala.collection.mutable.Map object UDFMappings extends Logging { @@ -41,7 +43,7 @@ object UDFMappings extends Logging { s"will be replaced by value:$value") } - res.put(key, value) + res.put(key.toLowerCase(Locale.ROOT), value) } private def parseStringToMap(input: String, res: Map[String, String]) { diff --git a/gluten-core/src/main/scala/org/apache/spark/sql/hive/HiveSimpleUDFTransformer.scala b/gluten-core/src/main/scala/org/apache/spark/sql/hive/HiveSimpleUDFTransformer.scala index 505d7319f739..77100e5999d0 100644 --- a/gluten-core/src/main/scala/org/apache/spark/sql/hive/HiveSimpleUDFTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/spark/sql/hive/HiveSimpleUDFTransformer.scala @@ -20,6 +20,8 @@ import io.glutenproject.expression.{ExpressionConverter, ExpressionTransformer, import org.apache.spark.sql.catalyst.expressions._ +import java.util.Locale + object HiveSimpleUDFTransformer { def isHiveSimpleUDF(expr: Expression): Boolean = { expr match { @@ -36,7 +38,8 @@ object HiveSimpleUDFTransformer { } val udf = expr.asInstanceOf[HiveSimpleUDF] - val substraitExprName = UDFMappings.hiveUDFMap.get(udf.name.stripPrefix("default.")) + val substraitExprName = + UDFMappings.hiveUDFMap.get(udf.name.stripPrefix("default.").toLowerCase(Locale.ROOT)) substraitExprName match { case Some(name) => GenericExpressionTransformer( @@ -44,7 +47,9 @@ object HiveSimpleUDFTransformer { udf.children.map(ExpressionConverter.replaceWithExpressionTransformer(_, attributeSeq)), udf) case _ => - throw new UnsupportedOperationException(s"Not supported hive simple udf: $udf.") + throw new UnsupportedOperationException( + s"Not supported hive simple udf:$udf" + + s" name:${udf.name} hiveUDFMap:${UDFMappings.hiveUDFMap}") } } }