Skip to content

Commit

Permalink
Merge branch 'main' into fallback
Browse files Browse the repository at this point in the history
  • Loading branch information
yaooqinn authored Dec 13, 2023
2 parents 455e24c + bf42fd7 commit 6b88c25
Show file tree
Hide file tree
Showing 57 changed files with 1,018 additions and 320 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/velox_be.yml
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ jobs:
MOUNT_MAVEN_CACHE=OFF \
OS_IMAGE=centos:7 \
OS_VERSION=7 \
tools/gluten-te/centos/cbash.sh sleep 14400
tools/gluten-te/centos/cbash-mount.sh sleep 14400
- name: Build Gluten velox third party
run: |
docker exec centos7-test-$GITHUB_RUN_ID bash -c '
Expand Down Expand Up @@ -488,7 +488,7 @@ jobs:
cd /opt/gluten && \
sudo -E ./dev/vcpkg/setup-build-depends.sh && \
source ./dev/vcpkg/env.sh && \
./dev/builddeps-veloxbe.sh --run_setup_script=OFF --build_tests=ON --build_benchmarks=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=OFF'
./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=ON --build_benchmarks=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=OFF'
- name: Build for Spark 3.2.2
run: |
docker exec static-build-test-$GITHUB_RUN_ID bash -c '
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ class GlutenClickHouseFileFormatSuite
)
}

test("read data from csv file format witsh agg") {
test("read data from csv file format with agg") {
val filePath = basePath + "/csv_test_agg.csv"
val csvFileFormat = "csv"
val sql =
Expand All @@ -214,8 +214,7 @@ class GlutenClickHouseFileFormatSuite
case f: FileSourceScanExecTransformer => f
}
assert(csvFileScan.size == 1)
},
noFallBack = false
}
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite
| l_returnflag,
| l_linestatus
|""".stripMargin
runQueryAndCompare(sql, noFallBack = false) { df => }
runQueryAndCompare(sql) { df => }
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2186,6 +2186,27 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite
}
}

test("GLUTEN-3861: Fix parse exception when join postJoinFilter contains singularOrList") {
withSQLConf(("spark.sql.autoBroadcastJoinThreshold", "-1")) {
val sql =
"""
|select t1.l_orderkey, t1.l_year, t2.o_orderkey, t2.o_year
|from (
| select l_orderkey, extract(year from l_shipdate) as l_year, count(1) as l_cnt
| from lineitem
| group by l_orderkey, l_shipdate) t1
|left join (
| select o_orderkey, extract(year from o_orderdate) as o_year, count(1) as o_cnt
| from orders
| group by o_orderkey, o_orderdate) t2
|on t1.l_orderkey = t2.o_orderkey
| and l_year in (1997, 1995, 1993)
|order by t1.l_orderkey, t1.l_year, t2.o_orderkey, t2.o_year
|""".stripMargin
compareResultsAgainstVanillaSpark(sql, true, { _ => })
}
}

test("GLUTEN-3467: Fix 'Names of tuple elements must be unique' error for ch backend") {
val sql =
"""
Expand Down Expand Up @@ -2221,5 +2242,16 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite
}
}

test("GLUTEN-3934: log10/log2/ln") {
withSQLConf(
SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> (ConstantFolding.ruleName + "," + NullPropagation.ruleName)) {
runQueryAndCompare(
"select log10(n_regionkey), log10(-1.0), log10(0), log10(n_regionkey - 100000), " +
"log2(n_regionkey), log2(-1.0), log2(0), log2(n_regionkey - 100000), " +
"ln(n_regionkey), ln(-1.0), ln(0), ln(n_regionkey - 100000) from nation"
)(checkOperatorMatch[ProjectExecTransformer])
}
}

}
// scalastyle:on line.size.limit
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,20 @@ class GlutenFunctionValidateSuite extends GlutenClickHouseWholeStageTransformerS
}
}

test("Test get_json_object 10") {
runQueryAndCompare("SELECT get_json_object(string_field1, '$.12345') from json_test") { _ => }
runQueryAndCompare("SELECT get_json_object(string_field1, '$.123.abc') from json_test") { _ => }
runQueryAndCompare("SELECT get_json_object(string_field1, '$.123.123') from json_test") { _ => }
runQueryAndCompare("SELECT get_json_object(string_field1, '$.123abc.123') from json_test") {
_ =>
}
runQueryAndCompare("SELECT get_json_object(string_field1, '$.abc.123') from json_test") { _ => }
runQueryAndCompare("SELECT get_json_object(string_field1, '$.123[0]') from json_test") { _ => }
runQueryAndCompare("SELECT get_json_object(string_field1, '$.123[0].123') from json_test") {
_ =>
}
}

test("Test covar_samp") {
runQueryAndCompare("SELECT covar_samp(double_field1, int_field1) from json_test") { _ => }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,6 @@ object BackendSettings extends BackendSettingsApi {
override def removeHashColumnFromColumnarShuffleExchangeExec(): Boolean = true
override def rescaleDecimalLiteral(): Boolean = true

override def replaceSortAggWithHashAgg: Boolean = GlutenConfig.getConf.forceToUseHashAgg

/** Get the config prefix for each backend */
override def getBackendConfigPrefix(): String =
GlutenConfig.GLUTEN_CONFIG_PREFIX + VeloxBackend.BACKEND_NAME
Expand Down
2 changes: 1 addition & 1 deletion cpp-ch/local-engine/Parser/JoinRelParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ bool JoinRelParser::tryAddPushDownFilter(
}
}
}
// if ch not support the join type or join conditions, it will throw an exception like 'not support'.
// if ch does not support the join type or join conditions, it will throw an exception like 'not support'.
catch (Poco::Exception & e)
{
// CH not support join condition has 'or' and has different table in each side.
Expand Down
12 changes: 9 additions & 3 deletions cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include <Interpreters/ActionsVisitor.h>
#include <Interpreters/CollectJoinOnKeysVisitor.h>
#include <Interpreters/Context.h>
#include <Interpreters/PreparedSets.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/QueryPriorities.h>
#include <Join/StorageJoinFromReadBuffer.h>
Expand Down Expand Up @@ -1883,7 +1884,7 @@ ActionsDAGPtr ASTParser::convertToActions(const NamesAndTypesList & name_and_typ
size_t(0),
name_and_types,
std::make_shared<ActionsDAG>(name_and_types),
nullptr /* prepared_sets */,
std::make_shared<PreparedSets>(),
false /* no_subqueries */,
false /* no_makeset */,
false /* only_consts */,
Expand All @@ -1895,6 +1896,8 @@ ActionsDAGPtr ASTParser::convertToActions(const NamesAndTypesList & name_and_typ
ASTPtr ASTParser::parseToAST(const Names & names, const substrait::Expression & rel)
{
LOG_DEBUG(&Poco::Logger::get("ASTParser"), "substrait plan:\n{}", rel.DebugString());
if (rel.has_singular_or_list())
return parseArgumentToAST(names, rel);
if (!rel.has_scalar_function())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "the root of expression should be a scalar function:\n {}", rel.DebugString());

Expand Down Expand Up @@ -2000,7 +2003,8 @@ ASTPtr ASTParser::parseArgumentToAST(const Names & names, const substrait::Expre

bool nullable = false;
size_t options_len = options.size();
args.reserve(options_len);
ASTs in_args;
in_args.reserve(options_len);

for (int i = 0; i < static_cast<int>(options_len); ++i)
{
Expand All @@ -2023,8 +2027,10 @@ ASTPtr ASTParser::parseArgumentToAST(const Names & names, const substrait::Expre
elem_type->getName(),
option_type->getName());

args.emplace_back(std::make_shared<ASTLiteral>(type_and_field.second));
in_args.emplace_back(std::make_shared<ASTLiteral>(type_and_field.second));
}
auto array_ast = makeASTFunction("array", in_args);
args.emplace_back(array_ast);

auto ast = makeASTFunction("in", args);
if (nullable)
Expand Down
3 changes: 0 additions & 3 deletions cpp-ch/local-engine/Parser/SerializedPlanParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,6 @@ static const std::map<std::string, std::string> SCALAR_FUNCTIONS
{"unhex", "unhex"},
{"hypot", "hypot"},
{"sign", "sign"},
{"log10", "log10"},
{"log2", "log2"},
{"log", "log"},
{"radians", "radians"},
{"greatest", "greatest"},
{"least", "least"},
Expand Down
39 changes: 39 additions & 0 deletions cpp-ch/local-engine/Parser/scalar_function_parser/ln.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <Parser/scalar_function_parser/logarithm.h>

namespace local_engine
{

class FunctionParserLn : public FunctionParserLogBase
{
public:
explicit FunctionParserLn(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {}
~FunctionParserLn() override = default;

static constexpr auto name = "log";

String getName() const override { return name; }
String getCHFunctionName() const override { return "log"; }
const DB::ActionsDAG::Node * getParameterLowerBound(ActionsDAGPtr & actions_dag, const DataTypePtr & data_type) const override
{
return addColumnToActionsDAG(actions_dag, data_type, 0.0);
}
};

static FunctionParserRegister<FunctionParserLn> register_ln;
}
39 changes: 39 additions & 0 deletions cpp-ch/local-engine/Parser/scalar_function_parser/log10.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <Parser/scalar_function_parser/logarithm.h>

namespace local_engine
{

class FunctionParserLog10 : public FunctionParserLogBase
{
public:
explicit FunctionParserLog10(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {}
~FunctionParserLog10() override = default;

static constexpr auto name = "log10";

String getName() const override { return name; }
String getCHFunctionName() const override { return "log10"; }
const DB::ActionsDAG::Node * getParameterLowerBound(ActionsDAGPtr & actions_dag, const DataTypePtr & data_type) const override
{
return addColumnToActionsDAG(actions_dag, data_type, 0.0);
}
};

static FunctionParserRegister<FunctionParserLog10> register_log10;
}
49 changes: 6 additions & 43 deletions cpp-ch/local-engine/Parser/scalar_function_parser/log1p.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,61 +14,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <Parser/FunctionParser.h>
#include <Common/CHUtil.h>
#include <Core/Field.h>
#include <DataTypes/IDataType.h>

namespace DB
{

namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
}
#include <Parser/scalar_function_parser/logarithm.h>

namespace local_engine
{

class FunctionParserLog1p : public FunctionParser
class FunctionParserLog1p : public FunctionParserLogBase
{
public:
explicit FunctionParserLog1p(SerializedPlanParser * plan_parser_) : FunctionParser(plan_parser_) {}
explicit FunctionParserLog1p(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {}
~FunctionParserLog1p() override = default;

static constexpr auto name = "log1p";

String getName() const override { return name; }

const ActionsDAG::Node * parse(
const substrait::Expression_ScalarFunction & substrait_func,
ActionsDAGPtr & actions_dag) const override
String getCHFunctionName() const override { return "log1p"; }
const DB::ActionsDAG::Node * getParameterLowerBound(ActionsDAGPtr & actions_dag, const DataTypePtr & data_type) const override
{
/*
parse log1p(x) as
if (x <= -1.0)
null
else
log1p(x)
*/
auto parsed_args = parseFunctionArguments(substrait_func, "", actions_dag);
if (parsed_args.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires exactly one arguments", getName());

const auto * arg_node = parsed_args[0];
const auto * log1p_node = toFunctionNode(actions_dag, "log1p", {arg_node});

auto result_type = log1p_node->result_type;
auto nullable_result_type = makeNullable(result_type);

const auto * null_const_node = addColumnToActionsDAG(actions_dag, nullable_result_type, Field());
const auto * nullable_log1p_node = ActionsDAGUtil::convertNodeType(actions_dag, log1p_node, nullable_result_type->getName(), log1p_node->result_name);

const auto * le_node = toFunctionNode(actions_dag, "lessOrEquals", {arg_node, addColumnToActionsDAG(actions_dag, result_type, -1.0)});
const auto * result_node = toFunctionNode(actions_dag, "if", {le_node, null_const_node, nullable_log1p_node});

return convertNodeTypeIfNeeded(substrait_func, result_node, actions_dag);
return addColumnToActionsDAG(actions_dag, data_type, -1.0);
}
};

Expand Down
39 changes: 39 additions & 0 deletions cpp-ch/local-engine/Parser/scalar_function_parser/log2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <Parser/scalar_function_parser/logarithm.h>

namespace local_engine
{

class FunctionParserLog2 : public FunctionParserLogBase
{
public:
explicit FunctionParserLog2(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {}
~FunctionParserLog2() override = default;

static constexpr auto name = "log2";

String getName() const override { return name; }
String getCHFunctionName() const override { return "log2"; }
const DB::ActionsDAG::Node * getParameterLowerBound(ActionsDAGPtr & actions_dag, const DataTypePtr & data_type) const override
{
return addColumnToActionsDAG(actions_dag, data_type, 0.0);
}
};

static FunctionParserRegister<FunctionParserLog2> register_log2;
}
Loading

0 comments on commit 6b88c25

Please sign in to comment.