Skip to content

Commit

Permalink
Expose stream parameter to get_json_object API (#14297)
Browse files Browse the repository at this point in the history
Add stream parameter to public APIs `cudf::get_json_object()`
Also removed the API from the `strings` namespace since it does not fit with the other strings library functions.
This resulted in updating the source file locations as well.

Reference #13744

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: #14297
  • Loading branch information
davidwendt authored Nov 1, 2023
1 parent f07d9cc commit 56fe5db
Show file tree
Hide file tree
Showing 11 changed files with 81 additions and 128 deletions.
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ add_library(
src/join/mixed_join_size_kernel_nulls.cu
src/join/mixed_join_size_kernels_semi.cu
src/join/semi_join.cu
src/json/json_path.cu
src/lists/contains.cu
src/lists/combine/concatenate_list_elements.cu
src/lists/combine/concatenate_rows.cu
Expand Down Expand Up @@ -571,7 +572,6 @@ add_library(
src/strings/filter_chars.cu
src/strings/like.cu
src/strings/padding.cu
src/strings/json/json_path.cu
src/strings/regex/regcomp.cpp
src/strings/regex/regexec.cpp
src/strings/regex/regex_program.cpp
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ ConfigureNVBench(

# ##################################################################################################
# * json benchmark -------------------------------------------------------------------
ConfigureBench(JSON_BENCH string/json.cu)
ConfigureBench(JSON_BENCH json/json.cu)
ConfigureNVBench(FST_NVBENCH io/fst.cu)
ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader_input.cpp)
ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp)
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/json.cu → cpp/benchmarks/json/json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
#include <cudf_test/column_wrapper.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/json/json.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/json.hpp>
#include <cudf/strings/string_view.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/types.hpp>
Expand Down Expand Up @@ -196,7 +196,7 @@ void BM_case(benchmark::State& state, std::string query_arg)

for (auto _ : state) {
cuda_event_timer raii(state, true);
auto result = cudf::strings::get_json_object(scv, json_path);
auto result = cudf::get_json_object(scv, json_path);
CUDF_CUDA_TRY(cudaStreamSynchronize(0));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@
#pragma once

#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/mr/device/per_device_resource.hpp>

#include <thrust/optional.h>

namespace cudf {
namespace strings {

/**
* @addtogroup strings_json
* @addtogroup json_object
* @{
* @file
*/
Expand Down Expand Up @@ -155,20 +155,21 @@ class get_json_object_options {
* https://tools.ietf.org/id/draft-goessner-dispatch-jsonpath-00.html
* Implements only the operators: $ . [] *
*
* @throw std::invalid_argument if provided an invalid operator or an empty name
*
* @param col The input strings column. Each row must contain a valid json string
* @param json_path The JSONPath string to be applied to each row
* @param options Options for controlling the behavior of the function
* @param mr Resource for allocating device memory.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Resource for allocating device memory
* @return New strings column containing the retrieved json object strings
*
* @throw std::invalid_argument if provided an invalid operator or an empty name
*/
std::unique_ptr<cudf::column> get_json_object(
cudf::strings_column_view const& col,
cudf::string_scalar const& json_path,
get_json_object_options options = get_json_object_options{},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
} // namespace strings
} // namespace cudf
43 changes: 0 additions & 43 deletions cpp/include/cudf/strings/detail/json.hpp

This file was deleted.

5 changes: 4 additions & 1 deletion cpp/include/doxygen_groups.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@
* @defgroup strings_replace Replacing
* @defgroup strings_split Splitting
* @defgroup strings_extract Extracting
* @defgroup strings_json JSON
* @defgroup strings_regex Regex
* @}
* @defgroup dictionary_apis Dictionary
Expand All @@ -146,6 +145,10 @@
* @defgroup io_datasources Data Sources
* @defgroup io_datasinks Data Sinks
* @}
* @defgroup json_apis JSON
* @{
* @defgroup json_object JSON Path
* @}
* @defgroup lists_apis Lists
* @{
* @defgroup lists_combine Combining
Expand Down
19 changes: 7 additions & 12 deletions cpp/src/strings/json/json_path.cu → cpp/src/json/json_path.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/json/json.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/json.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/types.hpp>
Expand All @@ -41,7 +41,6 @@
#include <thrust/tuple.h>

namespace cudf {
namespace strings {
namespace detail {

namespace {
Expand Down Expand Up @@ -224,7 +223,9 @@ enum json_element_type { NONE, OBJECT, ARRAY, VALUE };
class json_state : private parser {
public:
__device__ json_state() : parser() {}
__device__ json_state(char const* _input, int64_t _input_len, get_json_object_options _options)
__device__ json_state(char const* _input,
int64_t _input_len,
cudf::get_json_object_options _options)
: parser(_input, _input_len),

options(_options)
Expand Down Expand Up @@ -956,9 +957,6 @@ __launch_bounds__(block_size) __global__
}
}

/**
* @copydoc cudf::strings::detail::get_json_object
*/
std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& col,
cudf::string_scalar const& json_path,
get_json_object_options options,
Expand Down Expand Up @@ -1011,7 +1009,7 @@ std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& c
cudf::detail::get_value<size_type>(offsets_view, col.size(), stream);

// allocate output string column
auto chars = create_chars_child_column(output_size, stream, mr);
auto chars = cudf::strings::detail::create_chars_child_column(output_size, stream, mr);

// potential optimization : if we know that all outputs are valid, we could skip creating
// the validity mask altogether
Expand Down Expand Up @@ -1041,17 +1039,14 @@ std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& c
} // namespace
} // namespace detail

/**
* @copydoc cudf::strings::get_json_object
*/
std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& col,
cudf::string_scalar const& json_path,
get_json_object_options options,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::get_json_object(col, json_path, options, cudf::get_default_stream(), mr);
return detail::get_json_object(col, json_path, options, stream, mr);
}

} // namespace strings
} // namespace cudf
5 changes: 4 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,6 @@ ConfigureTest(
strings/format_lists_tests.cpp
strings/integers_tests.cpp
strings/ipv4_tests.cpp
strings/json_tests.cpp
strings/like_tests.cpp
strings/pad_tests.cpp
strings/repeat_strings_tests.cpp
Expand All @@ -537,6 +536,10 @@ ConfigureTest(
strings/urls_tests.cpp
)

# ##################################################################################################
# * json path test --------------------------------------------------------------------------------
ConfigureTest(JSON_PATH_TEST json/json_tests.cpp)

# ##################################################################################################
# * structs test ----------------------------------------------------------------------------------
ConfigureTest(STRUCTS_TEST structs/structs_column_tests.cpp structs/utilities_tests.cpp)
Expand Down
Loading

0 comments on commit 56fe5db

Please sign in to comment.