Skip to content

Commit

Permalink
Merge pull request #801 from NVIDIA/branch-24.12
Browse files Browse the repository at this point in the history
[auto-merge] branch-24.12 to branch-25.02 [skip ci] [bot]
  • Loading branch information
nvauto authored Dec 9, 2024
2 parents 384cfb0 + 949e7aa commit 2317937
Showing 1 changed file with 45 additions and 0 deletions.
45 changes: 45 additions & 0 deletions python/tests/test_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2264,3 +2264,48 @@ def test_sparse_all_zeroes(
cpu_lr = SparkLogisticRegression(**params)
cpu_model = cpu_lr.fit(bdf)
compare_model(gpu_model, cpu_model, bdf)


@pytest.mark.parametrize("standardization", [True])
@pytest.mark.parametrize("fit_intercept", [True, False])
def test_sparse_one_gpu_all_zeroes(
standardization: bool,
fit_intercept: bool,
gpu_number: int,
) -> None:
tolerance = 0.001

if gpu_number < 2:
pytest.skip(reason="test_sparse_one_gpu_zeroes requires at least 2 GPUs")
gpu_number = 2

with CleanSparkSession() as spark:
data = [
Row(label=1.0, weight=1.0, features=Vectors.sparse(2, {0: 10.0, 1: 20.0})),
Row(label=1.0, weight=1.0, features=Vectors.sparse(2, {})),
Row(label=0.0, weight=1.0, features=Vectors.sparse(2, {})),
Row(label=0.0, weight=1.0, features=Vectors.sparse(2, {})),
]

bdf = spark.createDataFrame(data)

params: Dict[str, Any] = {
"regParam": 0.1,
"fitIntercept": fit_intercept,
"standardization": standardization,
"featuresCol": "features",
"labelCol": "label",
}

if version.parse(pyspark.__version__) < version.parse("3.4.0"):
return

gpu_lr = LogisticRegression(
enable_sparse_data_optim=True, verbose=True, **params
)
gpu_model = gpu_lr.fit(bdf)
check_sparse_model_preprocess(gpu_model, bdf)

cpu_lr = SparkLogisticRegression(**params)
cpu_model = cpu_lr.fit(bdf)
compare_model(gpu_model, cpu_model, bdf)

0 comments on commit 2317937

Please sign in to comment.