diff --git a/cpp/include/cudf/detail/contiguous_split.hpp b/cpp/include/cudf/detail/contiguous_split.hpp index 4c6d19739cf..d9a35470b7d 100644 --- a/cpp/include/cudf/detail/contiguous_split.hpp +++ b/cpp/include/cudf/detail/contiguous_split.hpp @@ -67,7 +67,7 @@ class metadata_builder { * @brief Destructor that will be implemented as default, required because metadata_builder_impl * is incomplete at this stage. */ - ~metadata_builder() = default; + ~metadata_builder(); /** * @brief Add a column to this metadata builder. @@ -105,9 +105,23 @@ class metadata_builder { */ std::vector build() const; + /** + * @brief Clear the internal buffer containing all added metadata. + */ + void clear(); + private: std::unique_ptr impl; }; +/** + * @copydoc pack_metadata + * @param builder The reusable builder object to create packed column metadata. + */ +std::vector pack_metadata(table_view const& table, + uint8_t const* contiguous_buffer, + size_t buffer_size, + metadata_builder& builder); + } // namespace detail } // namespace cudf diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 4c3b4eddb8d..e7ac424001c 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -1251,6 +1251,8 @@ std::vector contiguous_split(cudf::table_view const& input, std::vector cols; cols.reserve(num_root_columns); auto cur_dst_buf_info = h_dst_buf_info; + cudf::detail::metadata_builder meta_builder(num_root_columns); + for (std::size_t idx = 0; idx < num_partitions; idx++) { // traverse the buffers and build the columns. cur_dst_buf_info = build_output_columns( @@ -1258,14 +1260,18 @@ std::vector contiguous_split(cudf::table_view const& input, // pack the columns cudf::table_view t{cols}; - result.push_back(packed_table{ - t, - packed_columns{ - std::make_unique>(cudf::pack_metadata( - t, reinterpret_cast(out_buffers[idx].data()), out_buffers[idx].size())), - std::make_unique(std::move(out_buffers[idx]))}}); - cols.clear(); + + cudf::packed_columns packed_cols{ + std::make_unique>( + cudf::detail::pack_metadata(t, + reinterpret_cast(out_buffers[idx].data()), + out_buffers[idx].size(), + meta_builder)), + std::make_unique(std::move(out_buffers[idx]))}; + meta_builder.clear(); + + result.emplace_back(packed_table{std::move(t), std::move(packed_cols)}); } return result; } diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp index bac9aac1886..e4de4a43b68 100644 --- a/cpp/src/copying/pack.cpp +++ b/cpp/src/copying/pack.cpp @@ -35,6 +35,8 @@ namespace { * and unpack. */ struct serialized_column { + serialized_column() = default; + serialized_column(data_type _type, size_type _size, size_type _null_count, @@ -150,24 +152,22 @@ packed_columns pack(cudf::table_view const& input, return contig_split_result.empty() ? packed_columns{} : std::move(contig_split_result[0].data); } -template -std::vector pack_metadata(ColumnIter begin, - ColumnIter end, +std::vector pack_metadata(table_view const& table, uint8_t const* contiguous_buffer, - size_t buffer_size) + size_t buffer_size, + metadata_builder& builder) { - auto mb = metadata_builder(std::distance(begin, end)); - - std::for_each(begin, end, [&mb, &contiguous_buffer, &buffer_size](column_view const& col) { - build_column_metadata(mb, col, contiguous_buffer, buffer_size); - }); + std::for_each( + table.begin(), table.end(), [&builder, contiguous_buffer, buffer_size](column_view const& col) { + build_column_metadata(builder, col, contiguous_buffer, buffer_size); + }); - return mb.build(); + return builder.build(); } class metadata_builder_impl { public: - metadata_builder_impl() = default; + metadata_builder_impl(size_type const num_root_columns) { metadata.reserve(num_root_columns); } void add_column_info_to_meta(data_type const col_type, size_type const col_size, @@ -182,14 +182,16 @@ class metadata_builder_impl { std::vector build() const { - // convert to anonymous bytes - std::vector metadata_bytes; - auto const metadata_begin = reinterpret_cast(metadata.data()); - std::copy(metadata_begin, - metadata_begin + (metadata.size() * sizeof(detail::serialized_column)), - std::back_inserter(metadata_bytes)); - - return metadata_bytes; + auto output = std::vector(metadata.size() * sizeof(detail::serialized_column)); + std::memcpy(output.data(), metadata.data(), output.size()); + return output; + } + + void clear() + { + // Clear all, except the first metadata entry storing the number of top level columns that + // was added upon object construction. + metadata.resize(1); } private: @@ -228,13 +230,16 @@ table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data) } metadata_builder::metadata_builder(size_type const num_root_columns) - : impl(std::make_unique()) + : impl(std::make_unique(num_root_columns + + 1 /*one more extra metadata entry as below*/)) { // first metadata entry is a stub indicating how many total (top level) columns // there are impl->add_column_info_to_meta(data_type{type_id::EMPTY}, num_root_columns, 0, -1, -1, 0); } +metadata_builder::~metadata_builder() = default; + void metadata_builder::add_column_info_to_meta(data_type const col_type, size_type const col_size, size_type const col_null_count, @@ -248,6 +253,8 @@ void metadata_builder::add_column_info_to_meta(data_type const col_type, std::vector metadata_builder::build() const { return impl->build(); } +void metadata_builder::clear() { return impl->clear(); } + } // namespace detail /** @@ -267,9 +274,10 @@ std::vector pack_metadata(table_view const& table, size_t buffer_size) { CUDF_FUNC_RANGE(); - return table.is_empty() - ? std::vector{} - : detail::pack_metadata(table.begin(), table.end(), contiguous_buffer, buffer_size); + if (table.is_empty()) { return std::vector{}; } + + auto builder = cudf::detail::metadata_builder(table.num_columns()); + return detail::pack_metadata(table, contiguous_buffer, buffer_size, builder); } /**