diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index 396cd487771..c58d0746665 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -438,7 +438,7 @@ __global__ void __launch_bounds__(decode_block_size) device_span chunks, size_t min_row, size_t num_rows, - int* error_code) + int32_t* error_code) { __shared__ __align__(16) page_state_s state_g; __shared__ __align__(16) @@ -602,8 +602,10 @@ __global__ void __launch_bounds__(decode_block_size) } __syncthreads(); } - - if (!t and s->error != 0) { atomicOr(error_code, s->error); } + if (!t and s->error != 0) { + cuda::atomic_ref ref{*error_code}; + ref.store(s->error, cuda::std::memory_order_relaxed); + } } struct mask_tform { @@ -629,7 +631,7 @@ void __host__ DecodePageData(cudf::detail::hostdevice_vector& pages, size_t num_rows, size_t min_row, int level_type_size, - int* error_code, + int32_t* error_code, rmm::cuda_stream_view stream) { CUDF_EXPECTS(pages.size() > 0, "There is no page to decode"); diff --git a/cpp/src/io/parquet/page_delta_decode.cu b/cpp/src/io/parquet/page_delta_decode.cu index af0904f3893..b512f2a775a 100644 --- a/cpp/src/io/parquet/page_delta_decode.cu +++ b/cpp/src/io/parquet/page_delta_decode.cu @@ -37,7 +37,7 @@ __global__ void __launch_bounds__(96) device_span chunks, size_t min_row, size_t num_rows, - int* error_code) + int32_t* error_code) { using cudf::detail::warp_size; __shared__ __align__(16) delta_binary_decoder db_state; @@ -151,7 +151,10 @@ __global__ void __launch_bounds__(96) __syncthreads(); } - if (!t and s->error != 0) { atomicOr(error_code, s->error); } + if (!t and s->error != 0) { + cuda::atomic_ref ref{*error_code}; + ref.store(s->error, cuda::std::memory_order_relaxed); + } } } // anonymous namespace @@ -164,7 +167,7 @@ void __host__ DecodeDeltaBinary(cudf::detail::hostdevice_vector& pages size_t num_rows, size_t min_row, int level_type_size, - int* error_code, + int32_t* error_code, rmm::cuda_stream_view stream) { CUDF_EXPECTS(pages.size() > 0, "There is no page to decode"); diff --git a/cpp/src/io/parquet/page_string_decode.cu b/cpp/src/io/parquet/page_string_decode.cu index c150938bdba..192979f082a 100644 --- a/cpp/src/io/parquet/page_string_decode.cu +++ b/cpp/src/io/parquet/page_string_decode.cu @@ -587,7 +587,7 @@ __global__ void __launch_bounds__(decode_block_size) device_span chunks, size_t min_row, size_t num_rows, - int* error_code) + int32_t* error_code) { __shared__ __align__(16) page_state_s state_g; __shared__ __align__(4) size_type last_offset; @@ -748,7 +748,10 @@ __global__ void __launch_bounds__(decode_block_size) auto const offptr = reinterpret_cast(nesting_info_base[leaf_level_index].data_out); block_excl_sum(offptr, value_count, s->page.str_offset); - if (!t and s->error != 0) { atomicOr(error_code, s->error); } + if (!t and s->error != 0) { + cuda::atomic_ref ref{*error_code}; + ref.store(s->error, cuda::std::memory_order_relaxed); + } } } // anonymous namespace @@ -782,7 +785,7 @@ void __host__ DecodeStringPageData(cudf::detail::hostdevice_vector& pa size_t num_rows, size_t min_row, int level_type_size, - int* error_code, + int32_t* error_code, rmm::cuda_stream_view stream) { CUDF_EXPECTS(pages.size() > 0, "There is no page to decode"); diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index a528a297328..73a8d08e5c2 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -574,7 +574,7 @@ void DecodePageData(cudf::detail::hostdevice_vector& pages, size_t num_rows, size_t min_row, int level_type_size, - int* error_code, + int32_t* error_code, rmm::cuda_stream_view stream); /** @@ -596,7 +596,7 @@ void DecodeStringPageData(cudf::detail::hostdevice_vector& pages, size_t num_rows, size_t min_row, int level_type_size, - int* error_code, + int32_t* error_code, rmm::cuda_stream_view stream); /** @@ -618,7 +618,7 @@ void DecodeDeltaBinary(cudf::detail::hostdevice_vector& pages, size_t num_rows, size_t min_row, int level_type_size, - int* error_code, + int32_t* error_code, rmm::cuda_stream_view stream); /** diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index fecc1acca45..0ae57d0d038 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -163,7 +163,7 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) chunk_nested_valids.host_to_device_async(_stream); chunk_nested_data.host_to_device_async(_stream); - rmm::device_scalar error_code(0, _stream); + rmm::device_scalar error_code(0, _stream); // get the number of streams we need from the pool and tell them to wait on the H2D copies int const nkernels = std::bitset<32>(kernel_mask).count();