diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp index f0e9202612e..e679b4b62d2 100644 --- a/cpp/benchmarks/hashing/hash.cpp +++ b/cpp/benchmarks/hashing/hash.cpp @@ -17,32 +17,59 @@ #include #include +#include #include #include #include +#include + static void bench_hash(nvbench::state& state) { - auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const nulls = static_cast(state.get_float64("nulls")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const nulls = state.get_float64("nulls"); + // disable null bitmask if probability is exactly 0.0 + bool const no_nulls = nulls == 0.0; auto const hash_name = state.get_string("hash_name"); - data_profile const profile = data_profile_builder().null_probability(nulls); - auto const data = create_random_table( + data_profile const profile = + data_profile_builder().null_probability(no_nulls ? std::nullopt : std::optional{nulls}); + auto const data = create_random_table( {cudf::type_id::INT64, cudf::type_id::STRING}, row_count{num_rows}, profile); auto stream = cudf::get_default_stream(); state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + // collect statistics + cudf::strings_column_view input(data->get_column(1).view()); + auto const chars_size = input.chars_size(); + // add memory read from string column + state.add_global_memory_reads(chars_size); + // add memory read from int64_t column + state.add_global_memory_reads(num_rows); + // add memory read from bitmaks + if (!no_nulls) { + state.add_global_memory_reads(2 * + cudf::bitmask_allocation_size_bytes(num_rows)); + } + // memory written depends on used hash + if (hash_name == "murmurhash3_x86_32") { + state.add_global_memory_writes(num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::murmurhash3_x86_32(data->view()); }); } else if (hash_name == "md5") { + // md5 creates a 32-byte string + state.add_global_memory_writes(32 * num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::md5(data->view()); }); } else if (hash_name == "spark_murmurhash3_x86_32") { + state.add_global_memory_writes(num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::spark_murmurhash3_x86_32(data->view()); });