Skip to content

Commit

Permalink
[CH] Support bit_length/octet_length function
Browse files Browse the repository at this point in the history
  • Loading branch information
exmy committed Jun 27, 2024
1 parent e71a0c4 commit 3c8ab16
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ object CHExpressionUtil {
URL_ENCODE -> DefaultValidator(),
SKEWNESS -> DefaultValidator(),
SOUNDEX -> DefaultValidator(),
BIT_LENGTH -> DefaultValidator(),
MAKE_YM_INTERVAL -> DefaultValidator(),
MAP_ZIP_WITH -> DefaultValidator(),
ZIP_WITH -> DefaultValidator(),
Expand Down
4 changes: 2 additions & 2 deletions cpp-ch/local-engine/Parser/SerializedPlanParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ static const std::map<std::string, std::string> SCALAR_FUNCTIONS
{"ltrim", ""}, // trimRight or trimRightSpark, depends on argument size
{"rtrim", ""}, // trimBoth or trimBothSpark, depends on argument size
{"strpos", "positionUTF8"},
{"char_length",
"char_length"}, /// Notice: when input argument is binary type, corresponding ch function is length instead of char_length
{"char_length", "char_length"}, /// Notice: when input argument is binary type, corresponding ch function is length instead of char_length
{"octet_length", "octet_length"},
{"replace", "replaceAll"},
{"regexp_replace", "replaceRegexpAll"},
{"regexp_extract_all", "regexpExtractAllSpark"},
Expand Down
59 changes: 59 additions & 0 deletions cpp-ch/local-engine/Parser/scalar_function_parser/bitLength.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <Parser/FunctionParser.h>
#include <DataTypes/IDataType.h>

namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
}

namespace local_engine
{
class FunctionParserBitLength : public FunctionParser
{
public:
explicit FunctionParserBitLength(SerializedPlanParser * plan_parser_) : FunctionParser(plan_parser_) { }
~FunctionParserBitLength() override = default;

static constexpr auto name = "bit_length";

String getName() const override { return name; }

const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction & substrait_func, ActionsDAGPtr & actions_dag) const override
{
// parse big_length(a) as octet_length(a) * 8
auto parsed_args = parseFunctionArguments(substrait_func, "", actions_dag);
if (parsed_args.size() != 1)
throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires exactly one arguments", getName());

const auto * a = parsed_args[0];
const auto * octet_length_node = toFunctionNode(actions_dag, "octet_length", {a});
const auto * eight_const_node = addColumnToActionsDAG(actions_dag, std::make_shared<DataTypeInt32>(), 8);
const auto * result_node = toFunctionNode(actions_dag, "multiply", {octet_length_node, eight_const_node});

return convertNodeTypeIfNeeded(substrait_func, result_node, actions_dag);;

}
};

static FunctionParserRegister<FunctionParserBitLength> register_bit_length;
}
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ object ExpressionMappings {
Sig[Encode](ENCODE),
Sig[Uuid](UUID),
Sig[BitLength](BIT_LENGTH),
Sig[OctetLength](OCTET_LENGTH),
Sig[Levenshtein](LEVENSHTEIN),
Sig[UnBase64](UNBASE64),
Sig[Base64](BASE64),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -865,7 +865,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("translate")
.exclude("LOCATE")
.exclude("REPEAT")
.exclude("length for string / binary")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
enableSuite[GlutenTryCastSuite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ object ExpressionNames {
final val ENCODE = "encode"
final val UUID = "uuid"
final val BIT_LENGTH = "bit_length"
final val OCTET_LENGTH = "octet_length"
final val LEVENSHTEIN = "levenshteinDistance"
final val UNBASE64 = "unbase64"
final val BASE64 = "base64"
Expand Down

0 comments on commit 3c8ab16

Please sign in to comment.