fix for masked values in FitTrace

astropy · Jan 24, 2024 · 5c21308 · 5c21308
1 parent b0b1b08
commit 5c21308
Show file tree

Hide file tree

Showing 3 changed files with 158 additions and 40 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -29,6 +29,16 @@ Bug Fixes
 
 - HorneExtract now accepts 'None' as a vaild option for ``bkgrd_prof``. [#171]
 
+- Fix for fully masked bins in FitTrace when using ``gaussian`` for ``peak_method``.
+  Fully masked columns now have a peak of nan, which is used for the all-bin fit
+  for the Trace. Warning messages for ``peak_method`` == ``max`` and ``centroid``
+  are also now reflective of what the bin peak is being set to. [#205]
+
+- Fix in FitTrace to set fully-masked column bin peaks to NaN. Previously, for
+  peak_method='max' these were set to 0.0, and for peak_method='centroid' they
+  were set to the number of rows in the image, biasing the final fit to all bin
+  peaks. [#206]
+
 Other changes
 ^^^^^^^^^^^^^
 

diff --git a/specreduce/tests/test_tracing.py b/specreduce/tests/test_tracing.py
@@ -113,9 +113,6 @@ def test_fit_trace():
     with pytest.raises(ValueError):
         t = FitTrace(img, peak_method='invalid')
 
-    # create same-shaped variations of image with invalid values
-    img_all_nans = np.tile(np.nan, (nrows, ncols))
-
     window = 10
     guess = int(nrows/2)
     img_win_nans = img.copy()
@@ -141,28 +138,113 @@ def test_fit_trace():
     with pytest.raises(ValueError, match=r'bins must be <'):
         FitTrace(img, bins=ncols + 1)
 
-    # error on trace of otherwise valid image with all-nan window around guess
-    with pytest.raises(ValueError, match='pixels in window region are masked'):
-        FitTrace(img_win_nans, guess=guess, window=window)
-
-    # error on trace of all-nan image
-    with pytest.raises(ValueError, match=r'image is fully masked'):
-        FitTrace(img_all_nans)
-
-    # test that warning is raised when several bins are masked
-    mask = np.zeros(img.shape)
-    mask[:, 100] = 1
-    mask[:, 20] = 1
-    mask[:, 30] = 1
-    nddat = NDData(data=img, mask=mask, unit=u.DN)
-    msg = "All pixels in bins 20, 30, 100 are masked. Falling back on trace value from all-bin fit."
-    with pytest.warns(UserWarning, match=msg):
-        FitTrace(nddat)
-
-    # and when many bins are masked
-    mask = np.zeros(img.shape)
-    mask[:, 0:21] = 1
-    nddat = NDData(data=img, mask=mask, unit=u.DN)
-    msg = 'All pixels in bins 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ..., 20 are masked.'
-    with pytest.warns(UserWarning, match=msg):
-        FitTrace(nddat)
+
+class TestMasksTracing():
+
+    def mk_img(self, nrows=200, ncols=160):
+
+        np.random.seed(7)
+
+        sigma_pix = 4
+        sigma_noise = 1
+
+        col_model = models.Gaussian1D(amplitude=1, mean=nrows/2, stddev=sigma_pix)
+        noise = np.random.normal(scale=sigma_noise, size=(nrows, ncols))
+
+        index_arr = np.tile(np.arange(nrows), (ncols, 1))
+        img = col_model(index_arr.T) + noise
+
+        return img
+
+    def test_window_fit_trace(self):
+
+        """This test function will test that masked values are treated correctly in
+        FitTrace, and produce the correct results and warning messages based on
+        `peak_method`."""
+        img = self.mk_img()
+
+        # create same-shaped variations of image with invalid values
+        nrows = 200
+        ncols = 160
+        img_all_nans = np.tile(np.nan, (nrows, ncols))
+
+        window = 10
+        guess = int(nrows/2)
+        img_win_nans = img.copy()
+        img_win_nans[guess - window:guess + window] = np.nan
+
+        # error on trace of otherwise valid image with all-nan window around guess
+        with pytest.raises(ValueError, match='pixels in window region are masked'):
+            FitTrace(img_win_nans, guess=guess, window=window)
+
+        # error on trace of all-nan image
+        with pytest.raises(ValueError, match=r'image is fully masked'):
+            FitTrace(img_all_nans)
+
+    @pytest.mark.filterwarnings("ignore:The fit may be unsuccessful")
+    @pytest.mark.filterwarnings("ignore:Model is linear in parameters")
+    @pytest.mark.filterwarnings("ignore:All pixels in bins")
+    def test_fit_trace_all_nan_cols(self):
+        pass
+        # make sure that the actual trace that is fit is correct when
+        # all-masked bin peaks are set to NaN
+        img = self.mk_img(nrows=10, ncols=11)
+
+        img[:, 7] = np.nan
+        img[:, 4] = np.nan
+        img[:, 0] = np.nan
+
+        # peak_method = 'max'
+        truth = [1.6346154, 2.2371795, 2.8397436, 3.4423077, 4.0448718,
+                 4.6474359, 5.25, 5.8525641, 6.4551282, 7.0576923,
+                 7.6602564]
+        max_trace = FitTrace(img, peak_method='max')
+        np.testing.assert_allclose(truth, max_trace.trace)
+
+        # peak_method = 'gaussian'
+        truth = [1.947455, 2.383634, 2.8198131, 3.2559921, 3.6921712,
+                 4.1283502, 4.5645293, 5.0007083, 5.4368874, 5.8730665,
+                 6.3092455]
+        max_trace = FitTrace(img, peak_method='gaussian')
+        np.testing.assert_allclose(truth, max_trace.trace)
+
+        # peak_method = 'centroid'
+        truth = [2.5318835, 2.782069, 3.0322546, 3.2824402, 3.5326257,
+                 3.7828113, 4.0329969, 4.2831824, 4.533368, 4.7835536,
+                 5.0337391]
+        max_trace = FitTrace(img, peak_method='centroid')
+        np.testing.assert_allclose(truth, max_trace.trace)
+
+    @pytest.mark.filterwarnings("ignore:The fit may be unsuccessful")
+    @pytest.mark.filterwarnings("ignore:Model is linear in parameters")
+    def test_warn_msg_fit_trace_all_nan_cols(self):
+
+        img = self.mk_img()
+
+        # test that warning (dependent on choice of `peak_method`) is raised when a
+        # few bins are masked, and that theyre listed individually
+        mask = np.zeros(img.shape)
+        mask[:, 100] = 1
+        mask[:, 20] = 1
+        mask[:, 30] = 1
+        nddat = NDData(data=img, mask=mask, unit=u.DN)
+
+        match_str = 'All pixels in bins 20, 30, 100 are fully masked. Setting bin peaks to NaN.'
+
+        with pytest.warns(UserWarning, match=match_str):
+            FitTrace(nddat, peak_method='max')
+
+        with pytest.warns(UserWarning, match=match_str):
+            FitTrace(nddat, peak_method='centroid')
+
+        with pytest.warns(UserWarning, match=match_str):
+            FitTrace(nddat, peak_method='gaussian')
+
+        # and when many bins are masked, that the message is consolidated
+        mask = np.zeros(img.shape)
+        mask[:, 0:21] = 1
+        nddat = NDData(data=img, mask=mask, unit=u.DN)
+        with pytest.warns(UserWarning, match='All pixels in bins '
+                          '0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ..., 20 are '
+                          'fully masked. Setting bin peaks to NaN.'):
+            FitTrace(nddat)
diff --git a/specreduce/tracing.py b/specreduce/tracing.py
@@ -209,6 +209,8 @@ class FitTrace(Trace, _ImageParser):
     _disp_axis = 1
 
     def __post_init__(self):
+
+        # parse image
         self.image = self._parse_image(self.image)
 
         # mask any previously uncaught invalid values
@@ -262,14 +264,20 @@ def __post_init__(self):
             warnings.warn('TRACE: Converting window to int')
             self.window = int(self.window)
 
+        # fit the trace
+        self._fit_trace(img)
+
+    def _fit_trace(self, img):
+
+        yy = np.arange(img.shape[self._crossdisp_axis])
+
         # set max peak location by user choice or wavelength with max avg flux
         ztot = img.sum(axis=self._disp_axis) / img.shape[self._disp_axis]
         peak_y = self.guess if self.guess is not None else ztot.argmax()
         # NOTE: peak finder can be bad if multiple objects are on slit
 
-        yy = np.arange(img.shape[self._crossdisp_axis])
-
         if self.peak_method == 'gaussian':
+
             # guess the peak width as the FWHM, roughly converted to gaussian sigma
             yy_above_half_max = np.sum(ztot > (ztot.max() / 2))
             width_guess = yy_above_half_max / gaussian_sigma_to_fwhm
@@ -292,6 +300,8 @@ def __post_init__(self):
         ilum2 = (yy if self.window is None
                  else yy[np.arange(peak_y - self.window,
                                    peak_y + self.window, dtype=int)])
+
+        # check if everything in window region is masked
         if img[ilum2].mask.all():
             raise ValueError('All pixels in window region are masked. Check '
                              'for invalid values or use a larger window value.')
@@ -302,15 +312,21 @@ def __post_init__(self):
 
         warn_bins = []
         for i in range(self.bins):
-            # repeat earlier steps to create gaussian fit for each bin
+
+            # binned columns, summed along disp. axis.
+            # or just a single, unbinned column if no bins
             z_i = img[ilum2, x_bins[i]:x_bins[i+1]].sum(axis=self._disp_axis)
-            if not z_i.mask.all():
-                peak_y_i = ilum2[z_i.argmax()]
-            else:
+
+            # if this bin is fully masked, set bin peak to NaN so it can be
+            # filtered in the final fit to all bin peaks for the trace
+            if z_i.mask.all():
                 warn_bins.append(i)
-                peak_y_i = peak_y
+                y_bins[i] = np.nan
+                continue
 
             if self.peak_method == 'gaussian':
+                peak_y_i = ilum2[z_i.argmax()]
+
                 yy_i_above_half_max = np.sum(z_i > (z_i.max() / 2))
                 width_guess_i = yy_i_above_half_max / gaussian_sigma_to_fwhm
 
@@ -336,24 +352,34 @@ def __post_init__(self):
                     y_bins[i] = popt_i.mean_0.value
                     popt_tot = popt_i
 
-            elif self.peak_method == 'centroid':
+            if self.peak_method == 'centroid':
                 z_i_cumsum = np.cumsum(z_i)
-                # find the interpolated index where the cumulative array reaches half the total
-                # cumulative values
+                # find the interpolated index where the cumulative array reaches
+                # half the total cumulative values
                 y_bins[i] = np.interp(z_i_cumsum[-1]/2., z_i_cumsum, ilum2)
 
+                # NOTE this reflects current behavior, should eventually be changed
+                # to set to nan by default (or zero fill / interpoate option once
+                # available)
+
             elif self.peak_method == 'max':
                 # TODO: implement smoothing with provided width
                 y_bins[i] = ilum2[z_i.argmax()]
 
-        # warn about fully-masked bins (which, currently, means any bin with a single masked value)
+                # NOTE: a fully masked should eventually be changed to set to
+                # nan by default (or zero fill / interpoate option once available)
+
+        # warn about fully-masked bins
         if len(warn_bins) > 0:
+
             # if there are a ton of bins, we don't want to print them all out
             if len(warn_bins) > 20:
                 warn_bins = warn_bins[0: 10] + ['...'] + [warn_bins[-1]]
+
             warnings.warn(f"All pixels in {'bins' if len(warn_bins) else 'bin'} "
                           f"{', '.join([str(x) for x in warn_bins])}"
-                          " are masked. Falling back on trace value from all-bin fit.")
+                          " are fully masked. Setting bin"
+                          f" peak{'s' if len(warn_bins) else ''} to NaN.")
 
         # recenter bin positions
         x_bins = (x_bins[:-1] + x_bins[1:]) / 2