Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the precision when converting a decimal128 column to an arrow array #14230

Merged
merged 5 commits into from
Oct 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cpp/include/cudf/detail/interop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,5 +194,18 @@ std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @brief Return a maximum precision for a given type.
*
* @tparam T the type to get the maximum precision for
*/
template <typename T>
constexpr std::size_t max_precision()
{
auto constexpr num_bits = sizeof(T) * 8;
return std::floor(num_bits * std::log(2) / std::log(10));
}

} // namespace detail
} // namespace cudf
12 changes: 12 additions & 0 deletions cpp/include/cudf/interop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@ struct column_metadata {
* @param stream CUDA stream used for device memory operations and kernel launches
* @param ar_mr arrow memory pool to allocate memory for arrow Table
* @return arrow Table generated from `input`
*
* @note For decimals, since the precision is not stored for them in libcudf,
* it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type
* supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision
* 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be
* converted to Arrow decimal128 of the precision 38.
*/
std::shared_ptr<arrow::Table> to_arrow(table_view input,
std::vector<column_metadata> const& metadata = {},
Expand All @@ -145,6 +151,12 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
* @param stream CUDA stream used for device memory operations and kernel launches
* @param ar_mr arrow memory pool to allocate memory for arrow Scalar
* @return arrow Scalar generated from `input`
*
* @note For decimals, since the precision is not stored for them in libcudf,
* it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type
* supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision
* 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be
* converted to Arrow decimal128 of the precision 38.
*/
std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
column_metadata const& metadata = {},
Expand Down
13 changes: 9 additions & 4 deletions cpp/src/interop/to_arrow.cu
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal32>(
arrow::MemoryPool* ar_mr,
rmm::cuda_stream_view stream)
{
return unsupported_decimals_to_arrow<int32_t>(input, 9, ar_mr, stream);
using DeviceType = int32_t;
return unsupported_decimals_to_arrow<DeviceType>(
input, cudf::detail::max_precision<DeviceType>(), ar_mr, stream);
}

template <>
Expand All @@ -208,7 +210,9 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal64>(
arrow::MemoryPool* ar_mr,
rmm::cuda_stream_view stream)
{
return unsupported_decimals_to_arrow<int64_t>(input, 18, ar_mr, stream);
using DeviceType = int64_t;
return unsupported_decimals_to_arrow<DeviceType>(
input, cudf::detail::max_precision<DeviceType>(), ar_mr, stream);
}

template <>
Expand All @@ -219,7 +223,8 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal128>
arrow::MemoryPool* ar_mr,
rmm::cuda_stream_view stream)
{
using DeviceType = __int128_t;
using DeviceType = __int128_t;
auto const max_precision = cudf::detail::max_precision<DeviceType>();

rmm::device_uvector<DeviceType> buf(input.size(), stream);

Expand All @@ -234,7 +239,7 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal128>
CUDF_CUDA_TRY(cudaMemcpyAsync(
data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value()));

auto type = arrow::decimal(18, -input.type().scale());
auto type = arrow::decimal(max_precision, -input.type().scale());
auto mask = fetch_mask_buffer(input, ar_mr, stream);
auto buffers = std::vector<std::shared_ptr<arrow::Buffer>>{mask, std::move(data_buffer)};
auto data = std::make_shared<arrow::ArrayData>(type, input.size(), buffers);
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/interop/arrow_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ template <typename T>
auto constexpr BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(T);

std::shared_ptr<arrow::Array> arr;
arrow::Decimal128Builder decimal_builder(arrow::decimal(18, -scale),
arrow::Decimal128Builder decimal_builder(arrow::decimal(cudf::detail::max_precision<T>(), -scale),
arrow::default_memory_pool());

for (T i = 0; i < static_cast<T>(data.size() / BIT_WIDTH_RATIO); ++i) {
Expand Down
4 changes: 3 additions & 1 deletion cpp/tests/interop/to_arrow_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,9 @@ struct ToArrowDecimalScalarTest : public cudf::test::BaseFixture {};
TEST_F(ToArrowDecimalScalarTest, Basic)
{
auto const value{42};
auto const precision{18}; // cudf will convert to the widest-precision Arrow scalar of the type
auto const precision =
cudf::detail::max_precision<__int128_t>(); // cudf will convert to the widest-precision Arrow
// scalar of the type
int32_t const scale{4};

auto const cudf_scalar =
Expand Down