From f9e9c650d2c0a1a46bebf17c8b0049d187c86b05 Mon Sep 17 00:00:00 2001 From: ChadGueli Date: Thu, 7 Apr 2022 15:10:50 -0400 Subject: [PATCH 1/2] Partially fixes Issue #849 --- dask_ml/model_selection/_search.py | 13 +++++++------ dask_ml/model_selection/utils.py | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/dask_ml/model_selection/_search.py b/dask_ml/model_selection/_search.py index fbaa99596..ca9af7787 100644 --- a/dask_ml/model_selection/_search.py +++ b/dask_ml/model_selection/_search.py @@ -477,8 +477,8 @@ def do_fit( out_append = out.append for X, y, t, p in zip(Xs, ys, tokens, params): - if (X, y, t) in seen: - out_append(seen[X, y, t]) + if (id(X), id(y), t) in seen: + out_append(seen[id(X), id(y), t]) else: for n, fit_params in n_and_fit_params: dsk[(fit_name, m, n)] = ( @@ -491,7 +491,7 @@ def do_fit( p, fit_params, ) - seen[(X, y, t)] = (fit_name, m) + seen[(id(X), id(y), t)] = (fit_name, m) out_append((fit_name, m)) m += 1 @@ -564,8 +564,8 @@ def do_fit_transform( out_append = out.append for X, y, t, p in zip(Xs, ys, tokens, params): - if (X, y, t) in seen: - out_append(seen[X, y, t]) + if (id(X), id(y), t) in seen: + out_append(seen[id(X), id(y), t]) else: for n, fit_params in n_and_fit_params: dsk[(fit_Xt_name, m, n)] = ( @@ -580,7 +580,7 @@ def do_fit_transform( ) dsk[(fit_name, m, n)] = (getitem, (fit_Xt_name, m, n), 0) dsk[(Xt_name, m, n)] = (getitem, (fit_Xt_name, m, n), 1) - seen[X, y, t] = m + seen[id(X), id(y), t] = m out_append(m) m += 1 @@ -664,6 +664,7 @@ def _do_fit_step( # Extract the proper subset of Xs, ys sub_Xs = get(ids, Xs) sub_ys = get(ids, ys) + # Only subset the parameters/tokens if necessary if sub_fields: sub_tokens = list(pluck(sub_inds, get(ids, tokens))) diff --git a/dask_ml/model_selection/utils.py b/dask_ml/model_selection/utils.py index db5f9e0f5..01b4968a7 100644 --- a/dask_ml/model_selection/utils.py +++ b/dask_ml/model_selection/utils.py @@ -76,9 +76,9 @@ def to_keys(dsk, *args): if x is None: yield None elif isinstance(x, (da.Array, dd.DataFrame)): - x = delayed(x) + #x = delayed(x) dsk.update(x.dask) - yield x.key + yield x#.key elif isinstance(x, Delayed): dsk.update(x.dask) yield x.key From 5a81eb6dad9730779b7a209f36af095b50d229cb Mon Sep 17 00:00:00 2001 From: ChadGueli Date: Fri, 8 Apr 2022 15:22:20 -0400 Subject: [PATCH 2/2] Removed Implicit NumPy Conversion #849 --- dask_ml/model_selection/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dask_ml/model_selection/utils.py b/dask_ml/model_selection/utils.py index 01b4968a7..ee84dc5ff 100644 --- a/dask_ml/model_selection/utils.py +++ b/dask_ml/model_selection/utils.py @@ -75,10 +75,13 @@ def to_keys(dsk, *args): for x in args: if x is None: yield None - elif isinstance(x, (da.Array, dd.DataFrame)): - #x = delayed(x) + elif isinstance(x, da.Array): dsk.update(x.dask) - yield x#.key + yield x + elif isinstance(x, dd.DataFrame): + x = delayed(x) + dsk.update(x.dask) + yield x.key elif isinstance(x, Delayed): dsk.update(x.dask) yield x.key