Skip to content

Commit

Permalink
Fix comment
Browse files Browse the repository at this point in the history
  • Loading branch information
PHILO-HE committed Dec 6, 2024
1 parent 1dc54a7 commit f6eb519
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 32 deletions.
2 changes: 1 addition & 1 deletion velox/docs/functions/spark/json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ JSON Functions
Valid ``path`` should start with '$' and then contain "[index]", "['field']" or ".field"
to define a JSON path. Here are some examples: "$.a" "$.a.b", "$[0]['a'].b". Returns
``jsonString`` if ``path`` is "$". Returns NULL if ``jsonString`` or ``path`` is malformed.
Also returns NULL if ``path`` doesn't exist. ::
Returns NULL if ``path`` does not exist. ::

SELECT get_json_object('{"a":"b"}', '$.a'); -- 'b'
SELECT get_json_object('{"a":{"b":"c"}}', '$.a'); -- '{"b":"c"}'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@

namespace facebook::velox::functions::sparksql {

/// Parses a JSON string and returns the value at the specified path.
/// Simdjson On-Demand API is used to parse JSON string.
/// get_json_object(jsonString, path) -> value
template <typename T>
struct GetJsonObjectFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
Expand All @@ -28,13 +31,11 @@ struct GetJsonObjectFunction {

FOLLY_ALWAYS_INLINE void initialize(
const std::vector<TypePtr>& /*inputTypes*/,
const core::QueryConfig& config,
const core::QueryConfig& /*config*/,
const arg_type<Varchar>* /*json*/,
const arg_type<Varchar>* jsonPath) {
if (jsonPath != nullptr) {
if (checkJsonPath(*jsonPath)) {
jsonPath_ = removeSingleQuotes(*jsonPath);
}
if (jsonPath != nullptr && checkJsonPath(*jsonPath)) {
jsonPath_ = removeSingleQuotes(*jsonPath);
}
}

Expand All @@ -56,10 +57,11 @@ struct GetJsonObjectFunction {
if (simdjsonParse(paddedJson).get(jsonDoc)) {
return false;
}
auto formattedJsonPath = jsonPath_.has_value()
? jsonPath_.value()
: removeSingleQuotes(jsonPath);
try {
auto rawResult = jsonPath_.has_value()
? jsonDoc.at_path(jsonPath_.value().data())
: jsonDoc.at_path(removeSingleQuotes(jsonPath));
auto rawResult = jsonDoc.at_path(formattedJsonPath);
if (rawResult.error()) {
return false;
}
Expand Down Expand Up @@ -108,7 +110,10 @@ struct GetJsonObjectFunction {
return result;
}

// Returns true if no error.
// Extracts a string representation from a simdjson result. Handles various
// JSON types including numbers, booleans, strings, objects, and arrays.
// Returns true if the conversion wes successful, false
// otherwise.
bool extractStringResult(
simdjson::simdjson_result<simdjson::ondemand::value> rawResult,
out_type<Varchar>& result) {
Expand Down Expand Up @@ -179,17 +184,15 @@ struct GetJsonObjectFunction {
result.append(ss.str());
return true;
}
default: {
default:
return false;
}
}
}

// This is a simple validation by checking whether the obtained result is
// followed by valid char. Because ondemand parsing we are using ignores json
// format validation for characters following the current parsing position.
// As json doc is padded with NULL characters, it's safe to do recursively
// check.
// Checks whether the obtained result is followed by valid char. Because
// On-Demand API we are using ignores json format validation for characters
// following the current parsing position. As json doc is padded with NULL
// characters, it's safe to do recursively check.
bool isValidEndingCharacter(const char* currentPos) {
char endingChar = *currentPos;
if (endingChar == ',' || endingChar == '}' || endingChar == ']') {
Expand All @@ -203,6 +206,7 @@ struct GetJsonObjectFunction {
return false;
}

// Used for constant json path.
std::optional<std::string> jsonPath_;
};

Expand Down
2 changes: 1 addition & 1 deletion velox/functions/sparksql/registration/RegisterJson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "velox/functions/lib/RegistrationHelpers.h"
#include "velox/functions/sparksql/JsonFunctions.h"
#include "velox/functions/sparksql/GetJsonObject.h"
#include "velox/functions/sparksql/JsonObjectKeys.h"

namespace facebook::velox::functions::sparksql {
Expand Down
2 changes: 1 addition & 1 deletion velox/functions/sparksql/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ add_executable(
DecimalRoundTest.cpp
DecimalUtilTest.cpp
ElementAtTest.cpp
GetJsonObjectTest.cpp
HashTest.cpp
InTest.cpp
JsonFunctionsTest.cpp
JsonObjectKeysTest.cpp
LeastGreatestTest.cpp
MakeDecimalTest.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,42 +56,42 @@ TEST_F(GetJsonObjectTest, basic) {
"001");
EXPECT_EQ(
getJsonObject(
R"([{"my": {"param": {"name": "Alice", "age": "5", "id": "001"}}}, {"other": "v1"}])",
"$[0]['my']['param']['age']"),
R"([{"my": {"info": {"name": "Alice", "age": "5", "id": "001"}}}, {"other": "v1"}])",
"$[0]['my']['info']['age']"),
"5");
EXPECT_EQ(
getJsonObject(
R"([{"my": {"param": {"name": "Alice", "age": "5", "id": "001"}}}, {"other": "v1"}])",
"$[0].my.param.age"),
R"([{"my": {"info": {"name": "Alice", "age": "5", "id": "001"}}}, {"other": "v1"}])",
"$[0].my.info.age"),
"5");

// Json object as result.
EXPECT_EQ(
getJsonObject(
R"({"my": {"param": {"name": "Alice", "age": "5", "id": "001"}}})",
"$.my.param"),
R"({"my": {"info": {"name": "Alice", "age": "5", "id": "001"}}})",
"$.my.info"),
R"({"name": "Alice", "age": "5", "id": "001"})");
EXPECT_EQ(
getJsonObject(
R"({"my": {"param": {"name": "Alice", "age": "5", "id": "001"}}})",
"$['my']['param']"),
R"({"my": {"info": {"name": "Alice", "age": "5", "id": "001"}}})",
"$['my']['info']"),
R"({"name": "Alice", "age": "5", "id": "001"})");

// Array as result.
EXPECT_EQ(
getJsonObject(
R"([{"my": {"param": {"name": "Alice"}}}, {"other": ["v1", "v2"]}])",
R"([{"my": {"info": {"name": "Alice"}}}, {"other": ["v1", "v2"]}])",
"$[1].other"),
R"(["v1", "v2"])");
// Array element as result.
EXPECT_EQ(
getJsonObject(
R"([{"my": {"param": {"name": "Alice"}}}, {"other": ["v1", "v2"]}])",
R"([{"my": {"info": {"name": "Alice"}}}, {"other": ["v1", "v2"]}])",
"$[1].other[0]"),
"v1");
EXPECT_EQ(
getJsonObject(
R"([{"my": {"param": {"name": "Alice"}}}, {"other": ["v1", "v2"]}])",
R"([{"my": {"info": {"name": "Alice"}}}, {"other": ["v1", "v2"]}])",
"$[1].other[1]"),
"v2");
}
Expand All @@ -115,8 +115,8 @@ TEST_F(GetJsonObjectTest, nullResult) {
// Invalid ending character.
EXPECT_EQ(
getJsonObject(
R"([{"my": {"param": {"name": "Alice"quoted""}}}, {"other": ["v1", "v2"]}])",
"$[0].my.param.name"),
R"([{"my": {"info": {"name": "Alice"quoted""}}}, {"other": ["v1", "v2"]}])",
"$[0].my.info.name"),
std::nullopt);
}

Expand Down

0 comments on commit f6eb519

Please sign in to comment.