diff --git a/CHANGES.rst b/CHANGES.rst index 4ce17f2..7fbd825 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -29,6 +29,16 @@ Bug Fixes - HorneExtract now accepts 'None' as a vaild option for ``bkgrd_prof``. [#171] +- Fix for fully masked bins in FitTrace when using ``gaussian`` for ``peak_method``. + Fully masked columns now have a peak of nan, which is used for the all-bin fit + for the Trace. Warning messages for ``peak_method`` == ``max`` and ``centroid`` + are also now reflective of what the bin peak is being set to. [#205] + +- Fix in FitTrace to set fully-masked column bin peaks to NaN. Previously, for + peak_method='max' these were set to 0.0, and for peak_method='centroid' they + were set to the number of rows in the image, biasing the final fit to all bin + peaks. [#206] + Other changes ^^^^^^^^^^^^^ diff --git a/specreduce/tests/test_tracing.py b/specreduce/tests/test_tracing.py index 021bd0b..dcded43 100644 --- a/specreduce/tests/test_tracing.py +++ b/specreduce/tests/test_tracing.py @@ -113,9 +113,6 @@ def test_fit_trace(): with pytest.raises(ValueError): t = FitTrace(img, peak_method='invalid') - # create same-shaped variations of image with invalid values - img_all_nans = np.tile(np.nan, (nrows, ncols)) - window = 10 guess = int(nrows/2) img_win_nans = img.copy() @@ -141,28 +138,113 @@ def test_fit_trace(): with pytest.raises(ValueError, match=r'bins must be <'): FitTrace(img, bins=ncols + 1) - # error on trace of otherwise valid image with all-nan window around guess - with pytest.raises(ValueError, match='pixels in window region are masked'): - FitTrace(img_win_nans, guess=guess, window=window) - - # error on trace of all-nan image - with pytest.raises(ValueError, match=r'image is fully masked'): - FitTrace(img_all_nans) - - # test that warning is raised when several bins are masked - mask = np.zeros(img.shape) - mask[:, 100] = 1 - mask[:, 20] = 1 - mask[:, 30] = 1 - nddat = NDData(data=img, mask=mask, unit=u.DN) - msg = "All pixels in bins 20, 30, 100 are masked. Falling back on trace value from all-bin fit." - with pytest.warns(UserWarning, match=msg): - FitTrace(nddat) - - # and when many bins are masked - mask = np.zeros(img.shape) - mask[:, 0:21] = 1 - nddat = NDData(data=img, mask=mask, unit=u.DN) - msg = 'All pixels in bins 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ..., 20 are masked.' - with pytest.warns(UserWarning, match=msg): - FitTrace(nddat) + +class TestMasksTracing(): + + def mk_img(self, nrows=200, ncols=160): + + np.random.seed(7) + + sigma_pix = 4 + sigma_noise = 1 + + col_model = models.Gaussian1D(amplitude=1, mean=nrows/2, stddev=sigma_pix) + noise = np.random.normal(scale=sigma_noise, size=(nrows, ncols)) + + index_arr = np.tile(np.arange(nrows), (ncols, 1)) + img = col_model(index_arr.T) + noise + + return img + + def test_window_fit_trace(self): + + """This test function will test that masked values are treated correctly in + FitTrace, and produce the correct results and warning messages based on + `peak_method`.""" + img = self.mk_img() + + # create same-shaped variations of image with invalid values + nrows = 200 + ncols = 160 + img_all_nans = np.tile(np.nan, (nrows, ncols)) + + window = 10 + guess = int(nrows/2) + img_win_nans = img.copy() + img_win_nans[guess - window:guess + window] = np.nan + + # error on trace of otherwise valid image with all-nan window around guess + with pytest.raises(ValueError, match='pixels in window region are masked'): + FitTrace(img_win_nans, guess=guess, window=window) + + # error on trace of all-nan image + with pytest.raises(ValueError, match=r'image is fully masked'): + FitTrace(img_all_nans) + + @pytest.mark.filterwarnings("ignore:The fit may be unsuccessful") + @pytest.mark.filterwarnings("ignore:Model is linear in parameters") + @pytest.mark.filterwarnings("ignore:All pixels in bins") + def test_fit_trace_all_nan_cols(self): + pass + # make sure that the actual trace that is fit is correct when + # all-masked bin peaks are set to NaN + img = self.mk_img(nrows=10, ncols=11) + + img[:, 7] = np.nan + img[:, 4] = np.nan + img[:, 0] = np.nan + + # peak_method = 'max' + truth = [1.6346154, 2.2371795, 2.8397436, 3.4423077, 4.0448718, + 4.6474359, 5.25, 5.8525641, 6.4551282, 7.0576923, + 7.6602564] + max_trace = FitTrace(img, peak_method='max') + np.testing.assert_allclose(truth, max_trace.trace) + + # peak_method = 'gaussian' + truth = [1.947455, 2.383634, 2.8198131, 3.2559921, 3.6921712, + 4.1283502, 4.5645293, 5.0007083, 5.4368874, 5.8730665, + 6.3092455] + max_trace = FitTrace(img, peak_method='gaussian') + np.testing.assert_allclose(truth, max_trace.trace) + + # peak_method = 'centroid' + truth = [2.5318835, 2.782069, 3.0322546, 3.2824402, 3.5326257, + 3.7828113, 4.0329969, 4.2831824, 4.533368, 4.7835536, + 5.0337391] + max_trace = FitTrace(img, peak_method='centroid') + np.testing.assert_allclose(truth, max_trace.trace) + + @pytest.mark.filterwarnings("ignore:The fit may be unsuccessful") + @pytest.mark.filterwarnings("ignore:Model is linear in parameters") + def test_warn_msg_fit_trace_all_nan_cols(self): + + img = self.mk_img() + + # test that warning (dependent on choice of `peak_method`) is raised when a + # few bins are masked, and that theyre listed individually + mask = np.zeros(img.shape) + mask[:, 100] = 1 + mask[:, 20] = 1 + mask[:, 30] = 1 + nddat = NDData(data=img, mask=mask, unit=u.DN) + + match_str = 'All pixels in bins 20, 30, 100 are fully masked. Setting bin peaks to NaN.' + + with pytest.warns(UserWarning, match=match_str): + FitTrace(nddat, peak_method='max') + + with pytest.warns(UserWarning, match=match_str): + FitTrace(nddat, peak_method='centroid') + + with pytest.warns(UserWarning, match=match_str): + FitTrace(nddat, peak_method='gaussian') + + # and when many bins are masked, that the message is consolidated + mask = np.zeros(img.shape) + mask[:, 0:21] = 1 + nddat = NDData(data=img, mask=mask, unit=u.DN) + with pytest.warns(UserWarning, match='All pixels in bins ' + '0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ..., 20 are ' + 'fully masked. Setting bin peaks to NaN.'): + FitTrace(nddat) diff --git a/specreduce/tracing.py b/specreduce/tracing.py index c1a9b6f..bf5c754 100644 --- a/specreduce/tracing.py +++ b/specreduce/tracing.py @@ -209,6 +209,8 @@ class FitTrace(Trace, _ImageParser): _disp_axis = 1 def __post_init__(self): + + # parse image self.image = self._parse_image(self.image) # mask any previously uncaught invalid values @@ -262,14 +264,20 @@ def __post_init__(self): warnings.warn('TRACE: Converting window to int') self.window = int(self.window) + # fit the trace + self._fit_trace(img) + + def _fit_trace(self, img): + + yy = np.arange(img.shape[self._crossdisp_axis]) + # set max peak location by user choice or wavelength with max avg flux ztot = img.sum(axis=self._disp_axis) / img.shape[self._disp_axis] peak_y = self.guess if self.guess is not None else ztot.argmax() # NOTE: peak finder can be bad if multiple objects are on slit - yy = np.arange(img.shape[self._crossdisp_axis]) - if self.peak_method == 'gaussian': + # guess the peak width as the FWHM, roughly converted to gaussian sigma yy_above_half_max = np.sum(ztot > (ztot.max() / 2)) width_guess = yy_above_half_max / gaussian_sigma_to_fwhm @@ -292,6 +300,8 @@ def __post_init__(self): ilum2 = (yy if self.window is None else yy[np.arange(peak_y - self.window, peak_y + self.window, dtype=int)]) + + # check if everything in window region is masked if img[ilum2].mask.all(): raise ValueError('All pixels in window region are masked. Check ' 'for invalid values or use a larger window value.') @@ -302,15 +312,21 @@ def __post_init__(self): warn_bins = [] for i in range(self.bins): - # repeat earlier steps to create gaussian fit for each bin + + # binned columns, summed along disp. axis. + # or just a single, unbinned column if no bins z_i = img[ilum2, x_bins[i]:x_bins[i+1]].sum(axis=self._disp_axis) - if not z_i.mask.all(): - peak_y_i = ilum2[z_i.argmax()] - else: + + # if this bin is fully masked, set bin peak to NaN so it can be + # filtered in the final fit to all bin peaks for the trace + if z_i.mask.all(): warn_bins.append(i) - peak_y_i = peak_y + y_bins[i] = np.nan + continue if self.peak_method == 'gaussian': + peak_y_i = ilum2[z_i.argmax()] + yy_i_above_half_max = np.sum(z_i > (z_i.max() / 2)) width_guess_i = yy_i_above_half_max / gaussian_sigma_to_fwhm @@ -336,24 +352,34 @@ def __post_init__(self): y_bins[i] = popt_i.mean_0.value popt_tot = popt_i - elif self.peak_method == 'centroid': + if self.peak_method == 'centroid': z_i_cumsum = np.cumsum(z_i) - # find the interpolated index where the cumulative array reaches half the total - # cumulative values + # find the interpolated index where the cumulative array reaches + # half the total cumulative values y_bins[i] = np.interp(z_i_cumsum[-1]/2., z_i_cumsum, ilum2) + # NOTE this reflects current behavior, should eventually be changed + # to set to nan by default (or zero fill / interpoate option once + # available) + elif self.peak_method == 'max': # TODO: implement smoothing with provided width y_bins[i] = ilum2[z_i.argmax()] - # warn about fully-masked bins (which, currently, means any bin with a single masked value) + # NOTE: a fully masked should eventually be changed to set to + # nan by default (or zero fill / interpoate option once available) + + # warn about fully-masked bins if len(warn_bins) > 0: + # if there are a ton of bins, we don't want to print them all out if len(warn_bins) > 20: warn_bins = warn_bins[0: 10] + ['...'] + [warn_bins[-1]] + warnings.warn(f"All pixels in {'bins' if len(warn_bins) else 'bin'} " f"{', '.join([str(x) for x in warn_bins])}" - " are masked. Falling back on trace value from all-bin fit.") + " are fully masked. Setting bin" + f" peak{'s' if len(warn_bins) else ''} to NaN.") # recenter bin positions x_bins = (x_bins[:-1] + x_bins[1:]) / 2