Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-24.04' into feat/copyin…
Browse files Browse the repository at this point in the history
…g_exceptions
  • Loading branch information
vyasr committed Mar 18, 2024
2 parents 0630e34 + 2a85404 commit 6880d69
Show file tree
Hide file tree
Showing 23 changed files with 96 additions and 122 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,5 +125,4 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
# pr mode uses the HEAD of the branch, which is also correct for nightlies
script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
script: ci/cudf_pandas_scripts/pandas-tests/run.sh main
12 changes: 2 additions & 10 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@ repos:
args: ["--config-root=python/", "--resolve-all-configs"]
files: python/.*
types_or: [python, cython, pyi]
- repo: https://github.com/psf/black
rev: 23.12.1
hooks:
- id: black
files: python/.*
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config", "pyproject.toml"]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.0
hooks:
Expand Down Expand Up @@ -64,9 +57,6 @@ repos:
# Use the cudf_kafka isort orderings in notebooks so that dask
# and RAPIDS packages have their own sections.
args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
- id: nbqa-black
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config=pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.6
hooks:
Expand Down Expand Up @@ -155,6 +145,8 @@ repos:
hooks:
- id: ruff
files: python/.*$
- id: ruff-format
files: python/.*$
- repo: https://github.com/rapidsai/pre-commit-hooks
rev: v0.0.1
hooks:
Expand Down
1 change: 1 addition & 0 deletions ci/cudf_pandas_scripts/pandas-tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json p
RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"}
mkdir -p "${RAPIDS_ARTIFACTS_DIR}"
mv pandas-testing/${PANDAS_TESTS_BRANCH}-results.json ${RAPIDS_ARTIFACTS_DIR}/
rapids-upload-to-s3 ${RAPIDS_ARTIFACTS_DIR}/${PANDAS_TESTS_BRANCH}-results.json "${RAPIDS_ARTIFACTS_DIR}"
3 changes: 1 addition & 2 deletions cpp/src/io/comp/gpuinflate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -804,8 +804,7 @@ __device__ void process_symbols(inflate_state_s* s, int t)
dist = symbol >> 16;
for (int i = t; i < len; i += 32) {
uint8_t const* src = out + ((i >= dist) ? (i % dist) : i) - dist;
uint8_t b = (src < outbase) ? 0 : *src;
if (out + i < outend) { out[i] = b; }
if (out + i < outend and src >= outbase) { out[i] = *src; }
}
out += len;
pos++;
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ class file_source : public datasource {
explicit file_source(char const* filepath) : _file(filepath, O_RDONLY)
{
if (detail::cufile_integration::is_kvikio_enabled()) {
// Workaround for https://github.com/rapidsai/cudf/issues/14140, where cuFileDriverOpen errors
// out if no CUDA calls have been made before it. This is a no-op if the CUDA context is
// already initialized
cudaFree(0);

_kvikio_file = kvikio::FileHandle(filepath);
CUDF_LOG_INFO("Reading a file using kvikIO, with compatibility mode {}.",
_kvikio_file.is_compat_mode_on() ? "on" : "off");
Expand Down
7 changes: 5 additions & 2 deletions cpp/tests/error/error_handling_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ TEST(DebugAssertDeathTest, cudf_assert_false)
testing::FLAGS_gtest_death_test_style = "threadsafe";

auto call_kernel = []() {
assert_false_kernel<<<1, 1>>>();
auto const stream = cudf::get_default_stream().value();
assert_false_kernel<<<1, 1, 0, stream>>>();

// Kernel should fail with `cudaErrorAssert`
// This error invalidates the current device context, so we need to kill
Expand All @@ -114,7 +115,8 @@ TEST(DebugAssertDeathTest, cudf_assert_false)

TEST(DebugAssert, cudf_assert_true)
{
assert_true_kernel<<<1, 1>>>();
auto const stream = cudf::get_default_stream().value();
assert_true_kernel<<<1, 1, 0, stream>>>();
ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize());
}

Expand All @@ -136,6 +138,7 @@ int main(int argc, char** argv)
auto adaptor = make_stream_checking_resource_adaptor(
resource, error_on_invalid_stream, check_default_stream);
rmm::mr::set_current_device_resource(&adaptor);
return RUN_ALL_TESTS();
}
return RUN_ALL_TESTS();
}
24 changes: 4 additions & 20 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,22 +1,4 @@
[tool.black]
line-length = 79
target-version = ["py39"]
include = '\.py?$'
force-exclude = '''
/(
thirdparty |
\.eggs |
\.git |
\.hg |
\.mypy_cache |
\.tox |
\.venv |
_build |
buck-out |
build |
dist
)/
'''
# Copyright (c) 2019-2024, NVIDIA CORPORATION.

[tool.pydocstyle]
# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather
Expand Down Expand Up @@ -60,13 +42,15 @@ select = ["E", "F", "W"]
ignore = [
# whitespace before :
"E203",
# line-too-long (due to Copyright header)
"E501",
]
fixable = ["ALL"]
exclude = [
# TODO: Remove this in a follow-up where we fix __all__.
"__init__.py",
]
line-length = 88
line-length = 79

[tool.ruff.per-file-ignores]
# Lots of pytest implicitly injected attributes in conftest-patch.py
Expand Down
5 changes: 3 additions & 2 deletions python/cudf/cudf/core/_internals/timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ def _read_tzfile_as_frame(tzdir, zone_name):
if not transition_times_and_offsets:
# this happens for UTC-like zones
min_date = np.int64(np.iinfo("int64").min + 1).astype("M8[s]")
transition_times_and_offsets = as_column([min_date]), as_column(
[np.timedelta64(0, "s")]
transition_times_and_offsets = (
as_column([min_date]),
as_column([np.timedelta64(0, "s")]),
)

return DataFrame._from_data(
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -1731,7 +1731,8 @@ def as_column(
If None (default), treats NaN values in arbitrary as null if there is
no mask passed along with it. If True, combines the mask and NaNs to
form a new validity mask. If False, leaves NaN values as is.
Only applies when arbitrary is not a cudf object (Index, Series, Column).
Only applies when arbitrary is not a cudf object
(Index, Series, Column).
dtype : optional
Optionally typecast the constructed Column to the given
dtype.
Expand Down
33 changes: 21 additions & 12 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,9 +470,12 @@ class _DataFrameIlocIndexer(_DataFrameIndexer):
_frame: DataFrame

def __getitem__(self, arg):
row_key, (
col_is_scalar,
column_names,
(
row_key,
(
col_is_scalar,
column_names,
),
) = indexing_utils.destructure_dataframe_iloc_indexer(arg, self._frame)
row_spec = indexing_utils.parse_row_iloc_indexer(
row_key, len(self._frame)
Expand Down Expand Up @@ -6901,16 +6904,18 @@ def stack(self, level=-1, dropna=no_default, future_stack=False):
if future_stack:
if dropna is not no_default:
raise ValueError(
"dropna must be unspecified with future_stack=True as the new "
"implementation does not introduce rows of NA values. This "
"argument will be removed in a future version of cudf."
"dropna must be unspecified with future_stack=True as "
"the new implementation does not introduce rows of NA "
"values. This argument will be removed in a future "
"version of cudf."
)
else:
if dropna is not no_default or self._data.nlevels > 1:
warnings.warn(
"The previous implementation of stack is deprecated and will be "
"removed in a future version of cudf. Specify future_stack=True "
"to adopt the new implementation and silence this warning.",
"The previous implementation of stack is deprecated and "
"will be removed in a future version of cudf. Specify "
"future_stack=True to adopt the new implementation and "
"silence this warning.",
FutureWarning,
)
if dropna is no_default:
Expand Down Expand Up @@ -7028,9 +7033,13 @@ def unnamed_group_generator():
unique_named_levels, axis=0, fill_value=-1
).values
else:
yield grpdf.reindex(
unique_named_levels, axis=0, fill_value=-1
).sort_index().values
yield (
grpdf.reindex(
unique_named_levels, axis=0, fill_value=-1
)
.sort_index()
.values
)
else:
if future_stack:
yield column_idx_df.values
Expand Down
13 changes: 8 additions & 5 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,12 @@ def __iter__(self):
if isinstance(group_names, cudf.BaseIndex):
group_names = group_names.to_pandas()
for i, name in enumerate(group_names):
yield (name,) if isinstance(self._by, list) and len(
self._by
) == 1 else name, grouped_values[offsets[i] : offsets[i + 1]]
yield (
(name,)
if isinstance(self._by, list) and len(self._by) == 1
else name,
grouped_values[offsets[i] : offsets[i + 1]],
)

@property
def dtypes(self):
Expand Down Expand Up @@ -2269,8 +2272,8 @@ def fillna(
"""
warnings.warn(
"groupby fillna is deprecated and "
"will be removed in a future version. Use groupby ffill or groupby bfill "
"for forward or backward filling instead.",
"will be removed in a future version. Use groupby ffill "
"or groupby bfill for forward or backward filling instead.",
FutureWarning,
)
if inplace:
Expand Down
35 changes: 11 additions & 24 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ def _get_label_range_or_mask(index, start, stop, step):
return slice(start_loc, stop_loc)
else:
raise KeyError(
"Value based partial slicing on non-monotonic DatetimeIndexes "
"with non-existing keys is not allowed.",
"Value based partial slicing on non-monotonic "
"DatetimeIndexes with non-existing keys is not allowed.",
)
elif start is not None:
boolean_mask = index >= start
Expand Down Expand Up @@ -2449,7 +2449,8 @@ def squeeze(self, axis: Literal["index", "columns", 0, 1, None] = None):
----------
axis : {0 or 'index', 1 or 'columns', None}, default None
A specific axis to squeeze. By default, all length-1 axes are
squeezed. For `Series` this parameter is unused and defaults to `None`.
squeezed. For `Series` this parameter is unused and defaults
to `None`.
Returns
-------
Expand Down Expand Up @@ -5835,9 +5836,7 @@ def floordiv(self, other, axis, level=None, fill_value=None): # noqa: D102
),
)
)
def rfloordiv(
self, other, axis, level=None, fill_value=None
): # noqa: D102
def rfloordiv(self, other, axis, level=None, fill_value=None): # noqa: D102
if level is not None:
raise NotImplementedError("level parameter is not supported yet.")

Expand Down Expand Up @@ -5967,9 +5966,7 @@ def rtruediv(self, other, axis, level=None, fill_value=None): # noqa: D102
),
)
)
def eq(
self, other, axis="columns", level=None, fill_value=None
): # noqa: D102
def eq(self, other, axis="columns", level=None, fill_value=None): # noqa: D102
return self._binaryop(
other=other, op="__eq__", fill_value=fill_value, can_reindex=True
)
Expand Down Expand Up @@ -6009,9 +6006,7 @@ def eq(
),
)
)
def ne(
self, other, axis="columns", level=None, fill_value=None
): # noqa: D102
def ne(self, other, axis="columns", level=None, fill_value=None): # noqa: D102
return self._binaryop(
other=other, op="__ne__", fill_value=fill_value, can_reindex=True
)
Expand Down Expand Up @@ -6051,9 +6046,7 @@ def ne(
),
)
)
def lt(
self, other, axis="columns", level=None, fill_value=None
): # noqa: D102
def lt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102
return self._binaryop(
other=other, op="__lt__", fill_value=fill_value, can_reindex=True
)
Expand Down Expand Up @@ -6093,9 +6086,7 @@ def lt(
),
)
)
def le(
self, other, axis="columns", level=None, fill_value=None
): # noqa: D102
def le(self, other, axis="columns", level=None, fill_value=None): # noqa: D102
return self._binaryop(
other=other, op="__le__", fill_value=fill_value, can_reindex=True
)
Expand Down Expand Up @@ -6135,9 +6126,7 @@ def le(
),
)
)
def gt(
self, other, axis="columns", level=None, fill_value=None
): # noqa: D102
def gt(self, other, axis="columns", level=None, fill_value=None): # noqa: D102
return self._binaryop(
other=other, op="__gt__", fill_value=fill_value, can_reindex=True
)
Expand Down Expand Up @@ -6177,9 +6166,7 @@ def gt(
),
)
)
def ge(
self, other, axis="columns", level=None, fill_value=None
): # noqa: D102
def ge(self, other, axis="columns", level=None, fill_value=None): # noqa: D102
return self._binaryop(
other=other, op="__ge__", fill_value=fill_value, can_reindex=True
)
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,9 @@ def to_datetime(

if errors == "ignore":
warnings.warn(
"errors='ignore' is deprecated and will raise in a future version. "
"Use to_datetime without passing `errors` and catch exceptions "
"explicitly instead",
"errors='ignore' is deprecated and will raise in a "
"future version. Use to_datetime without passing `errors` "
"and catch exceptions explicitly instead",
FutureWarning,
)

Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/tools/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ def to_numeric(arg, errors="raise", downcast=None):
raise ValueError("invalid error value specified")
elif errors == "ignore":
warnings.warn(
"errors='ignore' is deprecated and will raise in a future version. "
"Use to_numeric without passing `errors` and catch exceptions "
"explicitly instead",
"errors='ignore' is deprecated and will raise in "
"a future version. Use to_numeric without passing `errors` "
"and catch exceptions explicitly instead",
FutureWarning,
)

Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/udf/strings_lowering.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.

import operator
from functools import partial
Expand Down Expand Up @@ -249,7 +249,7 @@ def replace_impl(context, builder, sig, args):
replacement_ptr = builder.alloca(args[2].type)

builder.store(args[0], src_ptr)
builder.store(args[1], to_replace_ptr),
builder.store(args[1], to_replace_ptr)
builder.store(args[2], replacement_ptr)

udf_str_ptr = builder.alloca(default_manager[udf_string].get_value_type())
Expand Down
4 changes: 1 addition & 3 deletions python/cudf/cudf/core/udf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@
from cudf.utils.utils import initfunc

# Maximum size of a string column is 2 GiB
_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get(
"STRINGS_UDF_HEAP_SIZE", 2**31
)
_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get("STRINGS_UDF_HEAP_SIZE", 2**31)
_heap_size = 0
_cudf_str_dtype = dtype(str)

Expand Down
Loading

0 comments on commit 6880d69

Please sign in to comment.