diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 3e52c502113..d3de9b39977 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -360,7 +360,7 @@ ConfigureNVBench( # ################################################################################################## # * strings benchmark ------------------------------------------------------------------- -ConfigureBench(STRINGS_BENCH string/factory.cu string/repeat_strings.cpp) +ConfigureBench(STRINGS_BENCH string/factory.cu) ConfigureNVBench( STRINGS_NVBENCH @@ -384,6 +384,7 @@ ConfigureNVBench( string/lengths.cpp string/like.cpp string/make_strings_column.cu + string/repeat_strings.cpp string/replace.cpp string/replace_re.cpp string/reverse.cpp diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp index f1d1516f248..29012e2cbf9 100644 --- a/cpp/benchmarks/string/repeat_strings.cpp +++ b/cpp/benchmarks/string/repeat_strings.cpp @@ -14,99 +14,58 @@ * limitations under the License. */ -#include "string_bench_args.hpp" - #include #include -#include #include #include #include -static constexpr cudf::size_type default_repeat_times = 16; -static constexpr cudf::size_type min_repeat_times = -16; -static constexpr cudf::size_type max_repeat_times = 16; +#include -static std::unique_ptr create_data_table(cudf::size_type n_cols, - cudf::size_type n_rows, - cudf::size_type max_str_length) +static void bench_repeat(nvbench::state& state) { - CUDF_EXPECTS(n_cols == 1 || n_cols == 2, "Invalid number of columns."); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const min_width = static_cast(state.get_int64("min_width")); + auto const max_width = static_cast(state.get_int64("max_width")); + auto const min_repeat = static_cast(state.get_int64("min_repeat")); + auto const max_repeat = static_cast(state.get_int64("max_repeat")); + auto const api = state.get_string("api"); - std::vector dtype_ids{cudf::type_id::STRING}; auto builder = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - - if (n_cols == 2) { - dtype_ids.push_back(cudf::type_id::INT32); - builder.distribution( - cudf::type_id::INT32, distribution_id::NORMAL, min_repeat_times, max_repeat_times); + cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width); + builder.distribution(cudf::type_id::INT32, distribution_id::NORMAL, min_repeat, max_repeat); + + auto const table = create_random_table( + {cudf::type_id::STRING, cudf::type_id::INT32}, row_count{num_rows}, data_profile{builder}); + auto const input = cudf::strings_column_view(table->view().column(0)); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = input.chars_size(stream); + state.add_global_memory_reads(chars_size); + + if (api == "scalar") { + state.add_global_memory_writes(chars_size * max_repeat); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::repeat_strings(input, max_repeat); }); + } else if (api == "column") { + auto repeats = table->view().column(1); + { + auto result = cudf::strings::repeat_strings(input, repeats); + auto output = cudf::strings_column_view(result->view()); + state.add_global_memory_writes(output.chars_size(stream)); + } + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::repeat_strings(input, repeats); }); } - - return create_random_table(dtype_ids, row_count{n_rows}, data_profile{builder}); } -static void BM_repeat_strings_scalar_times(benchmark::State& state) -{ - auto const n_rows = static_cast(state.range(0)); - auto const max_str_length = static_cast(state.range(1)); - auto const table = create_data_table(1, n_rows, max_str_length); - auto const strings_col = cudf::strings_column_view(table->view().column(0)); - - for ([[maybe_unused]] auto _ : state) { - [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream()); - cudf::strings::repeat_strings(strings_col, default_repeat_times); - } - - state.SetBytesProcessed(state.iterations() * strings_col.chars_size(cudf::get_default_stream())); -} - -static void BM_repeat_strings_column_times(benchmark::State& state) -{ - auto const n_rows = static_cast(state.range(0)); - auto const max_str_length = static_cast(state.range(1)); - auto const table = create_data_table(2, n_rows, max_str_length); - auto const strings_col = cudf::strings_column_view(table->view().column(0)); - auto const repeat_times_col = table->view().column(1); - - for ([[maybe_unused]] auto _ : state) { - [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream()); - cudf::strings::repeat_strings(strings_col, repeat_times_col); - } - - state.SetBytesProcessed(state.iterations() * (strings_col.chars_size(cudf::get_default_stream()) + - repeat_times_col.size() * sizeof(int32_t))); -} - -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 8; - int const max_rows = 1 << 18; - int const row_mult = 4; - int const min_strlen = 1 << 4; - int const max_strlen = 1 << 8; - int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_strlen, max_strlen, len_mult); -} - -class RepeatStrings : public cudf::benchmark {}; - -#define REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(RepeatStrings, name) \ - (::benchmark::State & st) { BM_repeat_strings_scalar_times(st); } \ - BENCHMARK_REGISTER_F(RepeatStrings, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -#define REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(RepeatStrings, name) \ - (::benchmark::State & st) { BM_repeat_strings_column_times(st); } \ - BENCHMARK_REGISTER_F(RepeatStrings, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(scalar_times) -REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(column_times) +NVBENCH_BENCH(bench_repeat) + .set_name("repeat") + .add_int64_axis("min_width", {0}) + .add_int64_axis("max_width", {32, 64, 128, 256}) + .add_int64_axis("min_repeat", {0}) + .add_int64_axis("max_repeat", {16}) + .add_int64_axis("num_rows", {32768, 262144, 2097152}) + .add_string_axis("api", {"scalar", "column"});