diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp index 44024333239..8124471982d 100644 --- a/cpp/include/cudf/detail/interop.hpp +++ b/cpp/include/cudf/detail/interop.hpp @@ -194,5 +194,18 @@ std::unique_ptr from_arrow(arrow::Table const& input_table, std::unique_ptr from_arrow(arrow::Scalar const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + +/** + * @brief Return a maximum precision for a given type. + * + * @tparam T the type to get the maximum precision for + */ +template +constexpr std::size_t max_precision() +{ + auto constexpr num_bits = sizeof(T) * 8; + return std::floor(num_bits * std::log(2) / std::log(10)); +} + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 865cc004107..2ee6f19614d 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -129,6 +129,12 @@ struct column_metadata { * @param stream CUDA stream used for device memory operations and kernel launches * @param ar_mr arrow memory pool to allocate memory for arrow Table * @return arrow Table generated from `input` + * + * @note For decimals, since the precision is not stored for them in libcudf, + * it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type + * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision + * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be + * converted to Arrow decimal128 of the precision 38. */ std::shared_ptr to_arrow(table_view input, std::vector const& metadata = {}, @@ -145,6 +151,12 @@ std::shared_ptr to_arrow(table_view input, * @param stream CUDA stream used for device memory operations and kernel launches * @param ar_mr arrow memory pool to allocate memory for arrow Scalar * @return arrow Scalar generated from `input` + * + * @note For decimals, since the precision is not stored for them in libcudf, + * it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type + * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision + * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be + * converted to Arrow decimal128 of the precision 38. */ std::shared_ptr to_arrow(cudf::scalar const& input, column_metadata const& metadata = {}, diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index 0cd750bc947..28230cf8e74 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -197,7 +197,9 @@ std::shared_ptr dispatch_to_arrow::operator()( arrow::MemoryPool* ar_mr, rmm::cuda_stream_view stream) { - return unsupported_decimals_to_arrow(input, 9, ar_mr, stream); + using DeviceType = int32_t; + return unsupported_decimals_to_arrow( + input, cudf::detail::max_precision(), ar_mr, stream); } template <> @@ -208,7 +210,9 @@ std::shared_ptr dispatch_to_arrow::operator()( arrow::MemoryPool* ar_mr, rmm::cuda_stream_view stream) { - return unsupported_decimals_to_arrow(input, 18, ar_mr, stream); + using DeviceType = int64_t; + return unsupported_decimals_to_arrow( + input, cudf::detail::max_precision(), ar_mr, stream); } template <> @@ -219,7 +223,8 @@ std::shared_ptr dispatch_to_arrow::operator() arrow::MemoryPool* ar_mr, rmm::cuda_stream_view stream) { - using DeviceType = __int128_t; + using DeviceType = __int128_t; + auto const max_precision = cudf::detail::max_precision(); rmm::device_uvector buf(input.size(), stream); @@ -234,7 +239,7 @@ std::shared_ptr dispatch_to_arrow::operator() CUDF_CUDA_TRY(cudaMemcpyAsync( data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value())); - auto type = arrow::decimal(18, -input.type().scale()); + auto type = arrow::decimal(max_precision, -input.type().scale()); auto mask = fetch_mask_buffer(input, ar_mr, stream); auto buffers = std::vector>{mask, std::move(data_buffer)}; auto data = std::make_shared(type, input.size(), buffers); diff --git a/cpp/tests/interop/arrow_utils.hpp b/cpp/tests/interop/arrow_utils.hpp index fc8f5b37f7e..2c5f7458ce5 100644 --- a/cpp/tests/interop/arrow_utils.hpp +++ b/cpp/tests/interop/arrow_utils.hpp @@ -186,7 +186,7 @@ template auto constexpr BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(T); std::shared_ptr arr; - arrow::Decimal128Builder decimal_builder(arrow::decimal(18, -scale), + arrow::Decimal128Builder decimal_builder(arrow::decimal(cudf::detail::max_precision(), -scale), arrow::default_memory_pool()); for (T i = 0; i < static_cast(data.size() / BIT_WIDTH_RATIO); ++i) { diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index 6bb4cdfd747..d6762e70d80 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -604,7 +604,9 @@ struct ToArrowDecimalScalarTest : public cudf::test::BaseFixture {}; TEST_F(ToArrowDecimalScalarTest, Basic) { auto const value{42}; - auto const precision{18}; // cudf will convert to the widest-precision Arrow scalar of the type + auto const precision = + cudf::detail::max_precision<__int128_t>(); // cudf will convert to the widest-precision Arrow + // scalar of the type int32_t const scale{4}; auto const cudf_scalar =