Merge branch 'development'

parashardhapola · Nov 24, 2023 · 6474aa2 · 6474aa2
2 parents 480f8af + 60e04f1
commit 6474aa2
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 16 deletions.
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.28.7
+0.28.8
diff --git a/scarf/assay.py b/scarf/assay.py
@@ -818,16 +818,12 @@ class this method is optimized for scRNA-Seq data and takes additional
         self.normMethod = norm_method_cache
         return val
 
-    def set_feature_stats(self, cell_key: str, min_cells: int) -> None:
+    def set_feature_stats(self, cell_key: str) -> None:
         """Calculates summary statistics for the features of the assay using
         only cells that are marked True by the 'cell_key' parameter.
 
         Args:
             cell_key: Name of the key (column) from cell attribute table.
-            min_cells: Minimum number of cells across which a given feature should be present. If a feature is present
-                       (has non zero un-normalized value) in fewer cells that it is ignored and summary statistics
-                       are not calculated for that feature. Also, such features will be disabled and `I` value of these
-                       features in the feature attribute table will be set to False
 
         Returns: None
         """
@@ -855,9 +851,9 @@ def set_feature_stats(self, cell_key: str, min_cells: int) -> None:
             f"({self.name}) Computing sigmas",
             self.nthreads,
         )
-        idx = n_cells > min_cells
-        self.feats.update_key(idx, key=feat_key)
-        n_cells, tot, sigmas = n_cells[idx], tot[idx], sigmas[idx]
+        # idx = n_cells > min_cells
+        # self.feats.update_key(idx, key=feat_key)
+        # n_cells, tot, sigmas = n_cells[idx], tot[idx], sigmas[idx]
 
         self.z.create_group(stats_loc, overwrite=True)
         self.feats.mount_location(self.z[stats_loc], identifier)
@@ -870,9 +866,12 @@ def set_feature_stats(self, cell_key: str, min_cells: int) -> None:
             overwrite=True,
             location=identifier,
         )
+        nz_mean = np.divide(
+            tot, n_cells, out=np.zeros_like(tot).astype(float), where=n_cells != 0
+        )
         self.feats.insert(
             "nz_mean",
-            (tot / n_cells).astype(float),
+            nz_mean.astype(float),
             overwrite=True,
             location=identifier,
         )
@@ -950,7 +949,7 @@ def mark_hvgs(
         def col_renamer(x):
             return f"{identifier}_{x}"
 
-        self.set_feature_stats(cell_key, min_cells)
+        self.set_feature_stats(cell_key)
         identifier = self._load_stats_loc(cell_key)
         c_var_col = f"c_var__{n_bins}__{lowess_frac}"
         if col_renamer(c_var_col) in self.feats.columns:

diff --git a/scarf/metadata.py b/scarf/metadata.py
@@ -566,7 +566,12 @@ def grep(self, pattern: str, only_valid=False) -> List[str]:
         )
 
     def remove_trend(
-        self, x: str, y: str, n_bins: int = 200, lowess_frac: float = 0.1
+        self,
+        x: str,
+        y: str,
+        n_bins: int = 200,
+        lowess_frac: float = 0.1,
+        fill_value: float = 0,
     ) -> np.ndarray:
         """
 
@@ -575,17 +580,23 @@ def remove_trend(
             y:
             n_bins:
             lowess_frac:
+            fill_value:
 
         Returns:
 
         """
-        a = fit_lowess(
-            self.fetch(x).astype(float),
-            self.fetch(y).astype(float),
+        a = self.fetch(x).astype(float)
+        b = self.fetch(y).astype(float)
+        idx = a > 0
+        c = fit_lowess(
+            a[idx],
+            b[idx],
             n_bins,
             lowess_frac,
         )
-        return a
+        ret_val = np.repeat(fill_value, len(a)).astype(float)
+        ret_val[idx] = c
+        return ret_val
 
     def __repr__(self):
         return f"MetaData of {self.fetch_all('I').sum()}({self.N}) elements"