From b797058cf2cc0e4283b005069ed02761174da487 Mon Sep 17 00:00:00 2001 From: Parashar Date: Thu, 23 Nov 2023 15:22:07 +0100 Subject: [PATCH 1/2] Do not update feat I --- VERSION | 2 +- scarf/assay.py | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/VERSION b/VERSION index cf86fe7..a4b9c5a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.28.7 +0.28.8 diff --git a/scarf/assay.py b/scarf/assay.py index 546c658..41ffeee 100644 --- a/scarf/assay.py +++ b/scarf/assay.py @@ -818,16 +818,12 @@ class this method is optimized for scRNA-Seq data and takes additional self.normMethod = norm_method_cache return val - def set_feature_stats(self, cell_key: str, min_cells: int) -> None: + def set_feature_stats(self, cell_key: str) -> None: """Calculates summary statistics for the features of the assay using only cells that are marked True by the 'cell_key' parameter. Args: cell_key: Name of the key (column) from cell attribute table. - min_cells: Minimum number of cells across which a given feature should be present. If a feature is present - (has non zero un-normalized value) in fewer cells that it is ignored and summary statistics - are not calculated for that feature. Also, such features will be disabled and `I` value of these - features in the feature attribute table will be set to False Returns: None """ @@ -855,9 +851,9 @@ def set_feature_stats(self, cell_key: str, min_cells: int) -> None: f"({self.name}) Computing sigmas", self.nthreads, ) - idx = n_cells > min_cells - self.feats.update_key(idx, key=feat_key) - n_cells, tot, sigmas = n_cells[idx], tot[idx], sigmas[idx] + # idx = n_cells > min_cells + # self.feats.update_key(idx, key=feat_key) + # n_cells, tot, sigmas = n_cells[idx], tot[idx], sigmas[idx] self.z.create_group(stats_loc, overwrite=True) self.feats.mount_location(self.z[stats_loc], identifier) @@ -870,9 +866,10 @@ def set_feature_stats(self, cell_key: str, min_cells: int) -> None: overwrite=True, location=identifier, ) + nz_mean = np.divide(tot, n_cells, out=np.zeros_like(tot).astype(float), where=n_cells != 0) self.feats.insert( "nz_mean", - (tot / n_cells).astype(float), + nz_mean.astype(float), overwrite=True, location=identifier, ) @@ -950,7 +947,7 @@ def mark_hvgs( def col_renamer(x): return f"{identifier}_{x}" - self.set_feature_stats(cell_key, min_cells) + self.set_feature_stats(cell_key) identifier = self._load_stats_loc(cell_key) c_var_col = f"c_var__{n_bins}__{lowess_frac}" if col_renamer(c_var_col) in self.feats.columns: From 60e04f120afa36e9fc1b49343c7c81f02ce95753 Mon Sep 17 00:00:00 2001 From: Parashar Date: Thu, 23 Nov 2023 18:44:02 +0100 Subject: [PATCH 2/2] allow zero values in remove_trend --- scarf/assay.py | 4 +++- scarf/metadata.py | 21 ++++++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/scarf/assay.py b/scarf/assay.py index 41ffeee..ebb7513 100644 --- a/scarf/assay.py +++ b/scarf/assay.py @@ -866,7 +866,9 @@ def set_feature_stats(self, cell_key: str) -> None: overwrite=True, location=identifier, ) - nz_mean = np.divide(tot, n_cells, out=np.zeros_like(tot).astype(float), where=n_cells != 0) + nz_mean = np.divide( + tot, n_cells, out=np.zeros_like(tot).astype(float), where=n_cells != 0 + ) self.feats.insert( "nz_mean", nz_mean.astype(float), diff --git a/scarf/metadata.py b/scarf/metadata.py index 1e60e9e..b874825 100644 --- a/scarf/metadata.py +++ b/scarf/metadata.py @@ -566,7 +566,12 @@ def grep(self, pattern: str, only_valid=False) -> List[str]: ) def remove_trend( - self, x: str, y: str, n_bins: int = 200, lowess_frac: float = 0.1 + self, + x: str, + y: str, + n_bins: int = 200, + lowess_frac: float = 0.1, + fill_value: float = 0, ) -> np.ndarray: """ @@ -575,17 +580,23 @@ def remove_trend( y: n_bins: lowess_frac: + fill_value: Returns: """ - a = fit_lowess( - self.fetch(x).astype(float), - self.fetch(y).astype(float), + a = self.fetch(x).astype(float) + b = self.fetch(y).astype(float) + idx = a > 0 + c = fit_lowess( + a[idx], + b[idx], n_bins, lowess_frac, ) - return a + ret_val = np.repeat(fill_value, len(a)).astype(float) + ret_val[idx] = c + return ret_val def __repr__(self): return f"MetaData of {self.fetch_all('I').sum()}({self.N}) elements"