Skip to content

Commit

Permalink
fix failure on too large double number (apache#7570)
Browse files Browse the repository at this point in the history
  • Loading branch information
lgbo-ustc authored Oct 17, 2024
1 parent bd3d719 commit 2a2d5bd
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -326,4 +326,26 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite {
}
}

test("GLUTEN-7563 too large number in json") {
withTable("test_7563") {
sql("create table test_7563(a string) using parquet")
val insert_sql =
"""
|insert into test_7563 values
|('{"a":2.696539702293474E308}')
|,('{"a":1232}')
|,('{"a":1234xxx}')
|,('{"a":2.696539702293474E30123}')
|""".stripMargin
sql(insert_sql)
compareResultsAgainstVanillaSpark(
"""
|select a, get_json_object(a, '$.a') from test_7563
|""".stripMargin,
true,
{ _ => }
)
}
}

}
54 changes: 52 additions & 2 deletions cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
* limitations under the License.
*/
#pragma once
#include <cerrno>
#include <limits>
#include <memory>
#include <string>
#include <string_view>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnTuple.h>
Expand Down Expand Up @@ -156,10 +159,56 @@ class JSONTextNormalizer
// LOG_DEBUG(getLogger("GetJsonObject"), "xxx normalizeField. not field");
return nullptr;
}
copyToDst(dst, start_pos, pos - start_pos);
if (*start_pos == '"' || *start_pos == '\'')
{
copyToDst(dst, start_pos, pos - start_pos);
}
else
{
// If it's a too large number, replace it with "Infinity".
const char * inf_str = "\"\\\"Infinity\\\"\"";
size_t inf_str_len = 14;
const char * large_e = "308";
const auto * ep = find_first_symbols<'e', 'E'>(start_pos, pos);
if (pos - ep < 3)
copyToDst(dst, start_pos, pos - start_pos);
else if (pos - ep > 4 || (pos - ep == 4 and memcmp(ep + 1, large_e, 3) >= 0))
{
if (isTooLargeNumber(start_pos, pos))
{
copyToDst(dst, inf_str, inf_str_len);
}
else
{
copyToDst(dst, start_pos, pos - start_pos);
}
}
else
{
copyToDst(dst, start_pos, pos - start_pos);
}
}
return pos;
}

inline static bool isTooLargeNumber(const char * start, const char * end)
{
bool res = false;
try
{
double num2 = std::stod(String(start, end));
}
catch (const std::invalid_argument & e)
{
res = false;
}
catch (const std::out_of_range & e)
{
res = true;
}
return res;
}

inline static const char * normalizeString(const char * pos, const char * end, char *& dst)
{
const auto * start_pos = pos;
Expand Down Expand Up @@ -241,7 +290,7 @@ class JSONTextNormalizer
pos = find_first_symbols<'\''>(pos, end);
if (!isExpectedChar('\'', pos, end))
{
LOG_DEBUG(getLogger("GetJsonObject"), "xxx normalizeSingleQuotesString. not '");
// LOG_DEBUG(getLogger("GetJsonObject"), "xxx normalizeSingleQuotesString. not '");
return nullptr;
}
pos += 1;
Expand Down Expand Up @@ -642,6 +691,7 @@ class FlattenJSONStringOnRequiredFunction : public DB::IFunction
for (const auto & field : tokenizer)
{
auto normalized_field = JSONPathNormalizer::normalize(field);
// LOG_ERROR(getLogger("JSONPatch"), "xxx field {} -> {}", field, normalized_field);
required_fields.push_back(normalized_field);
tuple_columns.emplace_back(str_type->createColumn());

Expand Down

0 comments on commit 2a2d5bd

Please sign in to comment.