Skip to content

Commit

Permalink
Remove cudf._lib.null_mask in favor of inlining pylibcudf (#17440)
Browse files Browse the repository at this point in the history
Contributes to #17317

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: #17440
  • Loading branch information
mroeschke authored Nov 27, 2024
1 parent 4533085 commit 6e91f09
Show file tree
Hide file tree
Showing 12 changed files with 43 additions and 107 deletions.
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ set(cython_sources
interop.pyx
json.pyx
merge.pyx
null_mask.pyx
orc.pyx
parquet.pyx
reduce.pyx
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
interop,
json,
merge,
null_mask,
nvtext,
orc,
parquet,
Expand Down
21 changes: 14 additions & 7 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import pylibcudf
import rmm

import cudf
import cudf._lib as libcudf
from cudf.core.buffer import (
Buffer,
ExposureTrackedBuffer,
Expand All @@ -36,7 +35,6 @@ from cudf._lib.types cimport (
dtype_to_pylibcudf_type,
)

from cudf._lib.null_mask import bitmask_allocation_size_bytes
from cudf._lib.types import dtype_from_pylibcudf_column

cimport pylibcudf.libcudf.copying as cpp_copying
Expand Down Expand Up @@ -159,7 +157,10 @@ cdef class Column:
if self.base_mask is None or self.offset == 0:
self._mask = self.base_mask
else:
self._mask = libcudf.null_mask.copy_bitmask(self)
with acquire_spill_lock():
self._mask = as_buffer(
pylibcudf.null_mask.copy_bitmask(self.to_pylibcudf(mode="read"))
)
return self._mask

@property
Expand All @@ -183,7 +184,9 @@ cdef class Column:

if value is not None:
# bitmask size must be relative to offset = 0 data.
required_size = bitmask_allocation_size_bytes(self.base_size)
required_size = pylibcudf.null_mask.bitmask_allocation_size_bytes(
self.base_size
)
if value.size < required_size:
error_msg = (
"The Buffer for mask is smaller than expected, "
Expand Down Expand Up @@ -220,7 +223,7 @@ cdef class Column:
and compute new data Buffers zero-copy that use pointer arithmetic to
properly adjust the pointer.
"""
mask_size = bitmask_allocation_size_bytes(self.size)
mask_size = pylibcudf.null_mask.bitmask_allocation_size_bytes(self.size)
required_num_bytes = -(-self.size // 8) # ceiling divide
error_msg = (
"The value for mask is smaller than expected, got {} bytes, "
Expand Down Expand Up @@ -790,13 +793,17 @@ cdef class Column:
mask = as_buffer(
rmm.DeviceBuffer(
ptr=mask_ptr,
size=bitmask_allocation_size_bytes(base_size)
size=pylibcudf.null_mask.bitmask_allocation_size_bytes(
base_size
)
)
)
else:
mask = as_buffer(
data=mask_ptr,
size=bitmask_allocation_size_bytes(base_size),
size=pylibcudf.null_mask.bitmask_allocation_size_bytes(
base_size
),
owner=mask_owner,
exposed=True
)
Expand Down
65 changes: 0 additions & 65 deletions python/cudf/cudf/_lib/null_mask.pyx

This file was deleted.

23 changes: 14 additions & 9 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,6 @@
import cudf
from cudf import _lib as libcudf
from cudf._lib.column import Column
from cudf._lib.null_mask import (
MaskState,
bitmask_allocation_size_bytes,
create_null_mask,
)
from cudf._lib.scalar import as_device_scalar
from cudf._lib.stream_compaction import (
apply_boolean_mask,
Expand Down Expand Up @@ -383,7 +378,7 @@ def memory_usage(self) -> int:
if self.data is not None:
n += self.data.size
if self.nullable:
n += bitmask_allocation_size_bytes(self.size)
n += plc.null_mask.bitmask_allocation_size_bytes(self.size)
return n

def _fill(
Expand All @@ -410,7 +405,11 @@ def _fill(
)

if not slr.is_valid() and not self.nullable:
mask = create_null_mask(self.size, state=MaskState.ALL_VALID)
mask = as_buffer(
plc.null_mask.create_null_mask(
self.size, plc.null_mask.MaskState.ALL_VALID
)
)
self.set_base_mask(mask)

libcudf.filling.fill_in_place(self, begin, end, slr.device_value)
Expand Down Expand Up @@ -1553,7 +1552,11 @@ def column_empty(
data = as_buffer(rmm.DeviceBuffer(size=row_count * dtype.itemsize))

if masked:
mask = create_null_mask(row_count, state=MaskState.ALL_NULL)
mask = as_buffer(
plc.null_mask.create_null_mask(
row_count, plc.null_mask.MaskState.ALL_NULL
)
)
else:
mask = None

Expand Down Expand Up @@ -2210,7 +2213,9 @@ def _mask_from_cuda_array_interface_desc(obj, cai_mask) -> Buffer:
typestr = desc["typestr"]
typecode = typestr[1]
if typecode == "t":
mask_size = bitmask_allocation_size_bytes(desc["shape"][0])
mask_size = plc.null_mask.bitmask_allocation_size_bytes(
desc["shape"][0]
)
return as_buffer(data=desc["data"][0], size=mask_size, owner=obj)
elif typecode == "b":
col = as_column(cai_mask)
Expand Down
7 changes: 2 additions & 5 deletions python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,7 @@ def __init__(

@cached_property
def memory_usage(self):
n = 0
if self.nullable:
n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size)

n = super().memory_usage
child0_size = (self.size + 1) * self.base_children[0].dtype.itemsize
current_base_child = self.base_children[1]
current_offset = self.offset
Expand All @@ -97,7 +94,7 @@ def memory_usage(self):
) * current_base_child.dtype.itemsize

if current_base_child.nullable:
n += cudf._lib.null_mask.bitmask_allocation_size_bytes(
n += plc.null_mask.bitmask_allocation_size_bytes(
current_base_child.size
)
return n
Expand Down
6 changes: 1 addition & 5 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5750,17 +5750,13 @@ def end_offset(self) -> int:

@cached_property
def memory_usage(self) -> int:
n = 0
if self.data is not None:
n += self.data.size
n = super().memory_usage
if len(self.base_children) == 1:
child0_size = (self.size + 1) * self.base_children[
0
].dtype.itemsize

n += child0_size
if self.nullable:
n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size)
return n

@property
Expand Down
5 changes: 1 addition & 4 deletions python/cudf/cudf/core/column/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,7 @@ def to_pandas(

@cached_property
def memory_usage(self) -> int:
n = 0
if self.nullable:
n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size)

n = super().memory_usage
for child in self.children:
n += child.memory_usage

Expand Down
9 changes: 5 additions & 4 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from cudf.core import column, df_protocol, indexing_utils, reshape
from cudf.core._compat import PANDAS_LT_300
from cudf.core.abc import Serializable
from cudf.core.buffer import acquire_spill_lock
from cudf.core.buffer import acquire_spill_lock, as_buffer
from cudf.core.column import (
CategoricalColumn,
ColumnBase,
Expand Down Expand Up @@ -3191,9 +3191,10 @@ def where(self, cond, other=None, inplace=False, axis=None, level=None):

out.append(result._with_type_metadata(col.dtype))
else:
out_mask = cudf._lib.null_mask.create_null_mask(
len(source_col),
state=cudf._lib.null_mask.MaskState.ALL_NULL,
out_mask = as_buffer(
plc.null_mask.create_null_mask(
len(source_col), plc.null_mask.MaskState.ALL_NULL
)
)
out.append(source_col.set_mask(out_mask))

Expand Down
4 changes: 1 addition & 3 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import cudf
from cudf import _lib as libcudf
from cudf._lib import groupby as libgroupby
from cudf._lib.null_mask import bitmask_or
from cudf._lib.sort import segmented_sort_by_key
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
Expand Down Expand Up @@ -1118,8 +1117,7 @@ def ngroup(self, ascending=True):
"""
index = self.grouping.keys.unique().sort_values()
num_groups = len(index)
_, has_null_group = bitmask_or([*index._columns])

has_null_group = any(col.has_nulls() for col in index._columns)
if ascending:
# Count ascending from 0 to num_groups - 1
groups = range(num_groups)
Expand Down
5 changes: 3 additions & 2 deletions python/cudf/cudf/testing/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
from numba.cuda.cudadecl import registry as cuda_decl_registry
from numba.cuda.cudaimpl import lower as cuda_lower

import pylibcudf as plc

import cudf
from cudf._lib.null_mask import bitmask_allocation_size_bytes
from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
from cudf.core.udf.strings_lowering import cast_string_view_to_udf_string
from cudf.core.udf.strings_typing import StringView, string_view, udf_string
Expand Down Expand Up @@ -91,7 +92,7 @@ def random_bitmask(size):
size : int
number of bits
"""
sz = bitmask_allocation_size_bytes(size)
sz = plc.null_mask.bitmask_allocation_size_bytes(size)
rng = np.random.default_rng(seed=0)
data = rng.integers(0, 255, dtype="u1", size=sz)
return data.view("i1")
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import numpy as np
import pandas as pd

import pylibcudf as plc
import rmm

import cudf
Expand Down Expand Up @@ -252,7 +253,7 @@ def pa_mask_buffer_to_mask(mask_buf, size):
"""
Convert PyArrow mask buffer to cuDF mask buffer
"""
mask_size = cudf._lib.null_mask.bitmask_allocation_size_bytes(size)
mask_size = plc.null_mask.bitmask_allocation_size_bytes(size)
if mask_buf.size < mask_size:
dbuf = rmm.DeviceBuffer(size=mask_size)
dbuf.copy_from_host(np.asarray(mask_buf).view("u1"))
Expand Down

0 comments on commit 6e91f09

Please sign in to comment.