Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add data-size member to libcudf column_view classes #14031

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp/benchmarks/sort/sort_lists.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ void sort_lists_of_structs(nvbench::state& state)
auto const new_child = cudf::column_view{cudf::data_type{cudf::type_id::STRUCT},
child.size(),
nullptr,
0,
child.null_mask(),
child.null_count(),
child.offset(),
Expand All @@ -63,6 +64,7 @@ void sort_lists_of_structs(nvbench::state& state)
cudf::column_view{cudf::data_type{cudf::type_id::LIST},
col.size(),
nullptr,
0,
col.null_mask(),
col.null_count(),
col.offset(),
Expand Down
1 change: 1 addition & 0 deletions cpp/benchmarks/string/case.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ void bench_case(nvbench::state& state)
col_view = cudf::column_view(col_view.type(),
col_view.size(),
nullptr,
0,
col_view.null_mask(),
col_view.null_count(),
0,
Expand Down
7 changes: 7 additions & 0 deletions cpp/include/cudf/column/column.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@ class column {
*/
[[nodiscard]] size_type size() const noexcept { return _size; }

/**
* @brief Returns the number of data bytes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be clearer. Is it the number of bytes of a single element? size() * sizeof(T)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be size() * sizeof(T) but only where T is a fixed-width-type.
The intention here is to expose the actual device-buffer size() which may be larger than sizeof(size_type) in the solutions we are looking at ways to allow the total number of bytes in a strings column to exceed size_type.

*
* @return The number of bytes
*/
[[nodiscard]] std::size_t size_bytes() const noexcept;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a fundamental invariant is that column.size() * size_of(column.type() == column.size_bytes(). However, given that column::size() returns int, then that invariant can be violated which seems bad.

Are we planning to make column::size() also return size_t?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are not changing size() but rather only adding size_bytes().

I think the invariant argument may break down with compounds types. We are only trying to solve for strings columns at this point.


/**
* @brief Returns the count of null elements.
*
Expand Down
44 changes: 42 additions & 2 deletions cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,13 @@ class alignas(16) column_device_view_base {
*/
[[nodiscard]] CUDF_HOST_DEVICE size_type size() const noexcept { return _size; }

/**
* @brief Returns the number of elements in the column.
*
* @return The number of elements in the column
*/
[[nodiscard]] CUDF_HOST_DEVICE std::size_t size_bytes() const noexcept { return _data_size; }

/**
* @brief Returns the element type
*
Expand Down Expand Up @@ -292,6 +299,7 @@ class alignas(16) column_device_view_base {
data_type _type{type_id::EMPTY}; ///< Element type
cudf::size_type _size{}; ///< Number of elements
void const* _data{}; ///< Pointer to device memory containing elements
std::size_t _data_size{}; ///< Number of bytes
bitmask_type const* _null_mask{}; ///< Pointer to device memory containing
///< bitmask representing null elements.
size_type _offset{}; ///< Index position of the first element.
Expand All @@ -311,7 +319,37 @@ class alignas(16) column_device_view_base {
void const* data,
bitmask_type const* null_mask,
size_type offset)
: _type{type}, _size{size}, _data{data}, _null_mask{null_mask}, _offset{offset}
: _type{type},
_size{size},
_data{data},
_data_size{size_of(type) * size},
_null_mask{null_mask},
_offset{offset}
{
}

/**
* @brief Constructs a column with the specified type, size, data, nullmask and offset.
*
* @param type The type of the column
* @param size The number of elements in the column
* @param data Pointer to device memory containing elements
* @param data_size The number of bytes pointed to by data
* @param null_mask Pointer to device memory containing bitmask representing valid elements
* @param offset Index position of the first element
*/
CUDF_HOST_DEVICE column_device_view_base(data_type type,
size_type size,
void const* data,
std::size_t data_size,
bitmask_type const* null_mask,
size_type offset)
: _type{type},
_size{size},
_data{data},
_data_size{data_size},
_null_mask{null_mask},
_offset{offset}
{
}

Expand Down Expand Up @@ -398,6 +436,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
return column_device_view{this->type(),
size,
this->head(),
this->size_bytes(),
this->null_mask(),
this->offset() + offset,
d_children,
Expand Down Expand Up @@ -883,11 +922,12 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
CUDF_HOST_DEVICE column_device_view(data_type type,
size_type size,
void const* data,
std::size_t data_size,
bitmask_type const* null_mask,
size_type offset,
column_device_view* children,
size_type num_children)
: column_device_view_base(type, size, data, null_mask, offset),
: column_device_view_base(type, size, data, data_size, null_mask, offset),
d_children(children),
_num_children(num_children)
{
Expand Down
40 changes: 28 additions & 12 deletions cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,13 @@ class column_view_base {
*/
[[nodiscard]] size_type size() const noexcept { return _size; }

/**
* @brief Returns the number of data bytes in the column
*
* @return The number of bytes
*/
[[nodiscard]] std::size_t size_bytes() const noexcept;

/**
* @brief Returns true if `size()` returns zero, or false otherwise
*
Expand Down Expand Up @@ -228,6 +235,7 @@ class column_view_base {
data_type _type{type_id::EMPTY}; ///< Element type
size_type _size{}; ///< Number of elements
void const* _data{}; ///< Pointer to device memory containing elements
std::size_t _data_size{}; ///< Number of bytes
bitmask_type const* _null_mask{}; ///< Pointer to device memory containing
///< bitmask representing null elements.
///< Optional if `null_count() == 0`
Expand All @@ -254,7 +262,7 @@ class column_view_base {

/**
* @brief Construct a `column_view_base` from pointers to device memory for
*the elements and bitmask of the column.
* the elements and bitmask of the column.
*
* If `null_count()` is zero, `null_mask` is optional.
*
Expand All @@ -264,21 +272,22 @@ class column_view_base {
* @throws cudf::logic_error if `size < 0`
* @throws cudf::logic_error if `size > 0` but `data == nullptr`
* @throws cudf::logic_error if `type.id() == EMPTY` but `data != nullptr`
*or `null_mask != nullptr`
* or `null_mask != nullptr`
* @throws cudf::logic_error if `null_count > 0`, but `null_mask == nullptr`
* @throws cudf::logic_error if `offset < 0`
*
* @param type The element type
* @param size The number of elements
* @param data Pointer to device memory containing the column elements
* @param null_mask Pointer to device memory containing the null
* indicator bitmask
* @param data_size Number of bytes in `data`
* @param null_mask Pointer to device memory containing the null indicator bitmask
* @param null_count The number of null elements.
* @param offset Optional, index of the first element
*/
column_view_base(data_type type,
size_type size,
void const* data,
std::size_t data_size,
bitmask_type const* null_mask,
size_type null_count,
size_type offset = 0);
Expand Down Expand Up @@ -354,15 +363,15 @@ class column_view : public detail::column_view_base {
* @throws cudf::logic_error if `size < 0`
* @throws cudf::logic_error if `size > 0` but `data == nullptr`
* @throws cudf::logic_error if `type.id() == EMPTY` but `data != nullptr`
*or `null_mask != nullptr`
* or `null_mask != nullptr`
* @throws cudf::logic_error if `null_count > 0`, but `null_mask == nullptr`
* @throws cudf::logic_error if `offset < 0`
*
* @param type The element type
* @param size The number of elements
* @param data Pointer to device memory containing the column elements
* @param null_mask Pointer to device memory containing the null
* indicator bitmask
* @param data_size Number of bytes in `data`
* @param null_mask Pointer to device memory containing the null indicator bitmask
* @param null_count The number of null elements.
* @param offset Optional, index of the first element
* @param children Optional, depending on the element type, child columns may
Expand All @@ -371,6 +380,7 @@ class column_view : public detail::column_view_base {
column_view(data_type type,
size_type size,
void const* data,
std::size_t data_size,
bitmask_type const* null_mask,
size_type null_count,
size_type offset = 0,
Expand Down Expand Up @@ -418,8 +428,14 @@ class column_view : public detail::column_view_base {
*/
template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
column_view(device_span<T const> data)
: column_view(
cudf::data_type{cudf::type_to_id<T>()}, data.size(), data.data(), nullptr, 0, 0, {})
: column_view(cudf::data_type{cudf::type_to_id<T>()},
data.size(),
data.data(),
data.size_bytes(),
nullptr,
0,
0,
{})
{
CUDF_EXPECTS(
data.size() <= static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
Expand Down Expand Up @@ -512,9 +528,8 @@ class mutable_column_view : public detail::column_view_base {
* @param type The element type
* @param size The number of elements
* @param data Pointer to device memory containing the column elements
* @param null_mask Pointer to device memory containing the null
indicator
* bitmask
* @param data_size Number of bytes in `data`
* @param null_mask Pointer to device memory containing the null indicator bitmask
* @param null_count The number of null elements.
* @param offset Optional, index of the first element
* @param children Optional, depending on the element type, child columns may
Expand All @@ -523,6 +538,7 @@ class mutable_column_view : public detail::column_view_base {
mutable_column_view(data_type type,
size_type size,
void* data,
std::size_t data_size,
bitmask_type* null_mask,
size_type null_count,
size_type offset = 0,
Expand Down
1 change: 1 addition & 0 deletions cpp/include/cudf/lists/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ std::unique_ptr<column> scatter(scalar const& slr,
auto wrapped = column_view(data_type{type_id::LIST},
1,
nullptr,
0,
static_cast<bitmask_type const*>(null_mask.data()),
slr_valid ? 0 : 1,
0,
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/convert/convert_datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ std::unique_ptr<column> from_timestamps(
column_view const& timestamps,
std::string_view format = "%Y-%m-%dT%H:%M:%SZ",
strings_column_view const& names = strings_column_view(column_view{
data_type{type_id::STRING}, 0, nullptr, nullptr, 0}),
data_type{type_id::STRING}, 0, nullptr, 0, nullptr, 0}),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/convert/convert_lists.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ std::unique_ptr<column> format_list_column(
lists_column_view const& input,
string_scalar const& na_rep = string_scalar("NULL"),
strings_column_view const& separators = strings_column_view(column_view{
data_type{type_id::STRING}, 0, nullptr, nullptr, 0}),
data_type{type_id::STRING}, 0, nullptr, 0, nullptr, 0}),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
14 changes: 14 additions & 0 deletions cpp/include/cudf/tdigest/tdigest_column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ class tdigest_column_view : private column_view {
*/
[[nodiscard]] column_view weights() const;

/**
* @brief Returns the internal column of minimum values
*
* @return The internal column of minimum values
*/
[[nodiscard]] column_view minimums() const;

/**
* @brief Returns the first min value for the column. Each row corresponds
* to the minimum value for the accompanying digest.
Expand All @@ -114,6 +121,13 @@ class tdigest_column_view : private column_view {
*/
[[nodiscard]] double const* min_begin() const;

/**
* @brief Returns the internal column of maximum values
*
* @return The internal column of maximum values
*/
[[nodiscard]] column_view maximums() const;

/**
* @brief Returns the first max value for the column. Each row corresponds
* to the maximum value for the accompanying digest.
Expand Down
14 changes: 12 additions & 2 deletions cpp/include/cudf_test/tdigest_utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,12 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv,
spans.end(),
expected_min->mutable_view().template begin<double>(),
column_min<T>{});
column_view result_min(data_type{type_id::FLOAT64}, tdv.size(), tdv.min_begin(), nullptr, 0);
column_view result_min(data_type{type_id::FLOAT64},
tdv.size(),
tdv.min_begin(),
tdv.minimums().size_bytes(),
nullptr,
0);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_min, *expected_min);

auto expected_max = cudf::make_fixed_width_column(
Expand All @@ -189,7 +194,12 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv,
spans.end(),
expected_max->mutable_view().template begin<double>(),
column_max<T>{});
column_view result_max(data_type{type_id::FLOAT64}, tdv.size(), tdv.max_begin(), nullptr, 0);
column_view result_max(data_type{type_id::FLOAT64},
tdv.size(),
tdv.max_begin(),
tdv.maximums().size_bytes(),
nullptr,
0);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_max, *expected_max);
}

Expand Down
10 changes: 8 additions & 2 deletions cpp/src/binaryop/compiled/binary_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ struct scalar_as_column_view {
auto col_v = column_view(s.type(),
1,
h_scalar_type_view.data(),
sizeof(T),
reinterpret_cast<bitmask_type const*>(s.validity_data()),
!s.is_valid());
return std::pair{col_v, std::unique_ptr<column>(nullptr)};
Expand All @@ -77,12 +78,17 @@ scalar_as_column_view::return_type scalar_as_column_view::operator()<cudf::strin
auto offsets_column = std::get<0>(cudf::detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + 1, stream, mr));

auto chars_column_v = column_view(
data_type{type_id::INT8}, h_scalar_type_view.size(), h_scalar_type_view.data(), nullptr, 0);
auto chars_column_v = column_view(data_type{type_id::INT8},
h_scalar_type_view.size(),
h_scalar_type_view.data(),
h_scalar_type_view.size(),
nullptr,
0);
// Construct string column_view
auto col_v = column_view(s.type(),
1,
nullptr,
0,
reinterpret_cast<bitmask_type const*>(s.validity_data()),
static_cast<size_type>(!s.is_valid(stream)),
0,
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/column/column.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ column_view column::view() const
return column_view{type(),
size(),
_data.data(),
_data.size(),
static_cast<bitmask_type const*>(_null_mask.data()),
null_count(),
0,
Expand All @@ -120,6 +121,7 @@ mutable_column_view column::mutable_view()
return mutable_column_view{type(),
size(),
_data.data(),
_data.size(),
static_cast<bitmask_type*>(_null_mask.data()),
_null_count,
0,
Expand Down Expand Up @@ -180,6 +182,7 @@ struct create_column_from_view {
auto indices_view = column_view(dict_view.indices().type(),
dict_view.size(),
dict_view.indices().head(),
dict_view.indices().size_bytes(),
nullptr,
0,
dict_view.offset());
Expand Down Expand Up @@ -261,4 +264,6 @@ column::column(column_view view, rmm::cuda_stream_view stream, rmm::mr::device_m
{
}

std::size_t column::size_bytes() const noexcept { return _data.size(); }

} // namespace cudf
Loading
Loading