From e24107f34a67852208a554bcfbaabb74751660ef Mon Sep 17 00:00:00 2001
From: Curtis McCully <cmccully@lco.global>
Date: Thu, 5 Oct 2023 09:57:14 -0400
Subject: [PATCH] Migrate to no longer use SEP and use astropy Photutils
 instead.

---
 CHANGES.md           |   4 +
 Dockerfile           |   9 +-
 banzai/bpm.py        |   2 +-
 banzai/photometry.py | 220 +++++++++++++++++++++----------------------
 banzai/settings.py   |   3 +
 docs/index.rst       |  17 ++--
 setup.cfg            |   3 +-
 7 files changed, 131 insertions(+), 127 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 53ecf9f0..e05e1e5f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,7 @@
+1.13.0 (2023-10-05)
+-------------------
+- Migrated photometry extraction to be done by astropy's photutils instead of SEP.
+
 1.12.0 (2023-08-10)
 -------------------
 - Added the process_by_group keyword to stages to fix a bug that wouldn't allow grouping only by instrument
diff --git a/Dockerfile b/Dockerfile
index 5cbf2795..2d2e5119 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,8 +4,8 @@ RUN conda install -y numpy pip scipy astropy pytest mock requests ipython covera
         && conda install -y -c conda-forge kombu=4.4.0 elasticsearch\<6.0.0,\>=5.0.0 pytest-astropy mysql-connector-python\
         && conda clean -y --all
 
-RUN pip install --no-cache-dir cython logutils lcogt_logging python-dateutil sqlalchemy\>=1.3.0b1 psycopg2-binary celery[redis]==4.3.0 \
-        apscheduler ocs-ingester tenacity amqp==2.6.0 cosmic-conn
+RUN pip install --no-cache-dir photutils bottleneck cython logutils lcogt_logging python-dateutil sqlalchemy\>=1.3.0b1 psycopg2-binary \
+        celery[redis]==4.3.0 apscheduler ocs-ingester tenacity amqp==2.6.0 cosmic-conn
 
 RUN mkdir /home/archive && /usr/sbin/groupadd -g 10000 "domainusers" \
         && /usr/sbin/useradd -g 10000 -d /home/archive -M -N -u 10087 archive \
@@ -13,11 +13,6 @@ RUN mkdir /home/archive && /usr/sbin/groupadd -g 10000 "domainusers" \
 
 COPY --chown=10087:10000 . /lco/banzai
 
-RUN apt-get -y update && apt-get -y install gcc && \
-        pip install --no-cache-dir git+https://github.com/cmccully/sep.git@deblending /lco/banzai/ && \
-        apt-get -y remove gcc && \
-        apt-get autoclean && \
-        rm -rf /var/lib/apt/lists/*
 
 USER archive
 
diff --git a/banzai/bpm.py b/banzai/bpm.py
index 473f3354..59d2ac14 100644
--- a/banzai/bpm.py
+++ b/banzai/bpm.py
@@ -39,6 +39,6 @@ def calibration_type(self):
 class SaturatedPixelFlagger(Stage):
     def do_stage(self, image):
         for image_extension in image.ccd_hdus:
-            image_extension.mask[image_extension.data > image_extension.saturate] |= 2
+            image_extension.mask[image_extension.data >= image_extension.saturate] |= 2
 
         return image
diff --git a/banzai/photometry.py b/banzai/photometry.py
index afb4eaec..b803a1e5 100755
--- a/banzai/photometry.py
+++ b/banzai/photometry.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 from astropy.table import Table
-import sep
 from requests import HTTPError
 
 from banzai.utils import stats, array_utils
@@ -11,10 +10,14 @@
 from banzai.data import DataTable
 from banzai import logs
 
+from photutils.background import Background2D
 from skimage import measure
+from photutils.segmentation import make_2dgaussian_kernel, detect_sources, deblend_sources, SourceCatalog
+from astropy.convolution import convolve
+from astropy.convolution.kernels import CustomKernel
+
 
 logger = logs.get_logger()
-sep.set_sub_object_limit(int(1e6))
 
 
 def radius_of_contour(contour, source):
@@ -23,13 +26,49 @@ def radius_of_contour(contour, source):
     x_center = (source['xmax'] - source['xmin'] + 1) / 2.0 - 0.5
     y_center = (source['ymax'] - source['ymin'] + 1) / 2.0 - 0.5
 
-    return np.percentile(np.sqrt((x - x_center)**2.0 + (y - y_center)** 2.0), 90)
+    return np.percentile(np.sqrt((x - x_center)**2.0 + (y - y_center) ** 2.0), 90)
+
+
+def flag_sources(sources, source_labels, segmentation_map, mask, flag, mask_value):
+    affected_sources = np.unique(segmentation_map[mask == mask_value])
+    sources['flag'][np.in1d(source_labels, affected_sources)] |= flag
+
+
+def flag_deblended(sources, catalog, segmentation_map, deblended_seg_map, flag_value=2):
+    # By default deblending appends labels instead of reassigning them so we can just use the
+    # extras in the deblended map
+    deblended_sources = np.unique(deblended_seg_map[deblended_seg_map > np.max(segmentation_map)])
+    # Get the sources that were originally blended
+    original_blends = np.unique(segmentation_map[deblended_seg_map > np.max(segmentation_map)])
+    deblended_sources = np.hstack([deblend_sources, original_blends])
+    sources['flag'][np.in1d(catalog.labels, deblended_sources)] |= flag_value
+
+
+def flag_edge_sources(image, sources, flag_value=8):
+    ny, nx = image.shape
+    # Check 4 points on the kron aperture, one on each side of the major and minor axis
+    minor_xmin = sources['x'] - sources['b'] * sources['kronrad'] * np.sin(np.deg2rad(sources['theta']))
+    minor_xmax = sources['x'] + sources['b'] * sources['kronrad'] * np.sin(np.deg2rad(sources['theta']))
+    minor_ymin = sources['y'] - sources['b'] * sources['kronrad'] * np.cos(np.deg2rad(sources['theta']))
+    minor_ymax = sources['y'] + sources['b'] * sources['kronrad'] * np.cos(np.deg2rad(sources['theta']))
+    major_ymin = sources['y'] - sources['a'] * sources['kronrad'] * np.sin(np.deg2rad(sources['theta']))
+    major_ymax = sources['y'] + sources['a'] * sources['kronrad'] * np.sin(np.deg2rad(sources['theta']))
+    major_xmin = sources['x'] - sources['a'] * sources['kronrad'] * np.cos(np.deg2rad(sources['theta']))
+    major_xmax = sources['x'] + sources['a'] * sources['kronrad'] * np.cos(np.deg2rad(sources['theta']))
+
+    # Note we are already 1 indexed here
+    sources_off = np.logical_or(minor_xmin < 1, major_xmin < 1)
+    sources_off = np.logical_or(sources_off, minor_ymin < 1)
+    sources_off = np.logical_or(sources_off, major_ymin < 1)
+    sources_off = np.logical_or(sources_off, minor_xmax > nx)
+    sources_off = np.logical_or(sources_off, major_xmax > nx)
+    sources_off = np.logical_or(sources_off, minor_ymax > ny)
+    sources_off = np.logical_or(sources_off, major_ymax > ny)
+    sources[sources_off] |= flag_value
 
 
 class SourceDetector(Stage):
-    # Note that threshold is number of sigma, not an absolute number because we provide the error
-    # array to SEP.
-    threshold = 10.0
+    threshold = 2.5
     min_area = 9
 
     def __init__(self, runtime_context):
@@ -39,79 +78,75 @@ def do_stage(self, image):
         try:
             # Increase the internal buffer size in sep. This is most necessary for crowded fields.
             ny, nx = image.shape
-            sep.set_extract_pixstack(int(nx * ny - 1))
 
             data = image.data.copy()
             error = image.uncertainty
-            mask = image.mask > 0
 
-            # Fits can be backwards byte order, so fix that if need be and subtract
-            # the background
-            try:
-                bkg = sep.Background(data, mask=mask, bw=32, bh=32, fw=3, fh=3)
-            except ValueError:
-                data = data.byteswap(True).newbyteorder()
-                bkg = sep.Background(data, mask=mask, bw=32, bh=32, fw=3, fh=3)
-            bkg.subfrom(data)
+            # From what I can piece together, the background estimator makes a low resolution mesh set by box size
+            # (32, 32) here and then applies a filter to the low resolution image. The filter size is 3x3 here.
+            # The defaults we use here are a mesh creator is from source extractor which is a mode estimator.
+            # The default filter that works on the mesh image is a median filter.
+            bkg = Background2D(data, (32, 32), filter_size=(3, 3))
+            data -= bkg.background
+
+            # Convolve the image with a 2D Guassian, but with the normalization SEP uses as
+            # that is correct.
+            # The default kernel used by Source Extractor is [[1,2,1], [2,4,2], [1,2,1]]
+            # The kernel corresponds to fwhm = 1.9 which we adopt here
+            kernel = make_2dgaussian_kernel(1.9, size=3)
+            convolved_data = convolve(data / (error * error), kernel)
+
+            # We include the correct match filter normalization here that is not included in
+            # vanilla source extractor
+            kernel_squared = CustomKernel(kernel.array * kernel.array)
+            normalization = np.sqrt(convolve(1 / (error * error), kernel_squared))
+            convolved_data /= normalization
 
             # Do an initial source detection
-            try:
-                sources = sep.extract(data, self.threshold, mask=mask, minarea=self.min_area,
-                                      err=error, deblend_cont=0.005)
-            except Exception:
-                logger.error(logs.format_exception(), image=image)
-                return image
-
-            # Convert the detections into a table
-            sources = Table(sources)
-
-            # We remove anything with a detection flag >= 8
-            # This includes memory overflows and objects that are too close the edge
-            sources = sources[sources['flag'] < 8]
+            segmentation_map = detect_sources(convolved_data, self.threshold, npixels=self.min_area)
+
+            # Note that nlevels here is DEBLEND_NTHRESH in source extractor which is 32 by default
+            deblended_seg_map = deblend_sources(convolved_data, segmentation_map,
+                                                npixels=self.min_area, nlevels=32,
+                                                contrast=0.005, progress_bar=False,
+                                                nproc=self.runtime_context.N_PHOT_CORES)
+            # Convert the segmentation map to a source catalog
+            catalog = SourceCatalog(data, deblended_seg_map, convolved_data=convolved_data, error=error,
+                                    background=bkg.background)
+
+            sources = Table({'x': catalog.xcentroid + 1.0, 'y': catalog.ycentroid + 1.0,
+                             'xwin': catalog.xcentroid_win + 1.0, 'ywin': catalog.ycentroid_win + 1.0,
+                             'xpeak': catalog.maxval_xindex + 1, 'ypeak': catalog.maxval_yindex + 1,
+                             'peak': catalog.max_value,
+                             'a': catalog.semimajor_sigma.value, 'b': catalog.semiminor_sigma.value,
+                             'theta': catalog.orientation.to('deg').value, 'ellipticity': catalog.ellipticity.value,
+                             'kronrad': catalog.kron_radius.value,
+                             'flux': catalog.kron_flux, 'fluxerr': catalog,
+                             'x2': catalog.covar_sigx2.value, 'y2': catalog.covar_sigy2.value,
+                             'xy': catalog.covar_sigxy.value,
+                             'background': catalog.background_mean})
+
+            for r in range(1, 7):
+                radius_arcsec = r / image.pixel_scale
+                sources[f'fluxaper{r}'], sources[f'fluxerr{r}'] = catalog.circular_photometry(radius_arcsec)
+
+            for r in [0.25, 0.5, 0.75]:
+                sources['fluxrad' + f'{r:.2f}'.lstrip("0.")] = catalog.fluxfrac_radius(r)
+            
+            sources['flag'] = 0
+
+            # Flag = 1 for sources with bad pixels
+            flag_sources(sources, catalog.labels, deblended_seg_map, image.mask, flag=1, mask_value=1)
+            # Flag = 2 for sources that are deblended
+            flag_deblended(sources, catalog, segmentation_map, deblended_seg_map, flag_value=2)
+            # Flag = 4 for sources that have saturated pixels
+            flag_sources(sources, catalog.labels, deblended_seg_map, image.mask, flag=4, mask_value=2)
+            # Flag = 8 if kron aperture falls off the image
+            # Flag = 16 if source has cosmic ray pixels
+            flag_sources(sources, catalog.labels, deblended_seg_map, image.mask, flag=16, mask_value=8)
 
             sources = array_utils.prune_nans_from_table(sources)
 
-            # Calculate the ellipticity
-            sources['ellipticity'] = 1.0 - (sources['b'] / sources['a'])
-
-            # Fix any value of theta that are invalid due to floating point rounding
-            # -pi / 2 < theta < pi / 2
-            sources['theta'][sources['theta'] > (np.pi / 2.0)] -= np.pi
-            sources['theta'][sources['theta'] < (-np.pi / 2.0)] += np.pi
-
-            # Calculate the kron radius
-            kronrad, krflag = sep.kron_radius(data, sources['x'], sources['y'],
-                                              sources['a'], sources['b'],
-                                              sources['theta'], 6.0)
-            sources['flag'] |= krflag
-            sources['kronrad'] = kronrad
-
-            # Calcuate the equivilent of flux_auto
-            flux, fluxerr, flag = sep.sum_ellipse(data, sources['x'], sources['y'],
-                                                  sources['a'], sources['b'],
-                                                  np.pi / 2.0, 2.5 * kronrad,
-                                                  subpix=1, err=error)
-            sources['flux'] = flux
-            sources['fluxerr'] = fluxerr
-            sources['flag'] |= flag
-
-            # Do circular aperture photometry for diameters of 1" to 6"
-            for diameter in [1, 2, 3, 4, 5, 6]:
-                flux, fluxerr, flag = sep.sum_circle(data, sources['x'], sources['y'],
-                                                     diameter / 2.0 / image.pixel_scale, gain=1.0, err=error)
-                sources['fluxaper{0}'.format(diameter)] = flux
-                sources['fluxerr{0}'.format(diameter)] = fluxerr
-                sources['flag'] |= flag
-
-            # Measure the flux profile
-            flux_radii, flag = sep.flux_radius(data, sources['x'], sources['y'],
-                                               6.0 * sources['a'], [0.25, 0.5, 0.75],
-                                               normflux=sources['flux'], subpix=5)
-            sources['flag'] |= flag
-            sources['fluxrad25'] = flux_radii[:, 0]
-            sources['fluxrad50'] = flux_radii[:, 1]
-            sources['fluxrad75'] = flux_radii[:, 2]
-
             # Cut individual bright pixels. Often cosmic rays
             sources = sources[sources['fluxrad50'] > 0.5]
 
@@ -130,45 +165,6 @@ def do_stage(self, image):
                             contour_radii = [radius_of_contour(contour, source) for contour in contours]
                             source[keyword] = 2.0 * np.nanmax(contour_radii)
 
-            # Calculate the windowed positions
-            sig = 2.0 / 2.35 * sources['fwhm']
-            xwin, ywin, flag = sep.winpos(data, sources['x'], sources['y'], sig)
-            sources['flag'] |= flag
-            sources['xwin'] = xwin
-            sources['ywin'] = ywin
-
-            # Calculate the average background at each source
-            bkgflux, fluxerr, flag = sep.sum_ellipse(bkg.back(), sources['x'], sources['y'],
-                                                     sources['a'], sources['b'], np.pi / 2.0,
-                                                     2.5 * sources['kronrad'], subpix=1)
-            # masksum, fluxerr, flag = sep.sum_ellipse(mask, sources['x'], sources['y'],
-            #                                         sources['a'], sources['b'], np.pi / 2.0,
-            #                                         2.5 * kronrad, subpix=1)
-
-            background_area = (2.5 * sources['kronrad']) ** 2.0 * sources['a'] * sources['b'] * np.pi # - masksum
-            sources['background'] = bkgflux
-            sources['background'][background_area > 0] /= background_area[background_area > 0]
-            # Update the catalog to match fits convention instead of python array convention
-            sources['x'] += 1.0
-            sources['y'] += 1.0
-
-            sources['xpeak'] += 1
-            sources['ypeak'] += 1
-
-            sources['xwin'] += 1.0
-            sources['ywin'] += 1.0
-
-            sources['theta'] = np.degrees(sources['theta'])
-
-            catalog = sources['x', 'y', 'xwin', 'ywin', 'xpeak', 'ypeak',
-                              'flux', 'fluxerr', 'peak', 'fluxaper1', 'fluxerr1',
-                              'fluxaper2', 'fluxerr2', 'fluxaper3', 'fluxerr3',
-                              'fluxaper4', 'fluxerr4', 'fluxaper5', 'fluxerr5',
-                              'fluxaper6', 'fluxerr6', 'background', 'fwhm', 'fwtm',
-                              'a', 'b', 'theta', 'kronrad', 'ellipticity',
-                              'fluxrad25', 'fluxrad50', 'fluxrad75',
-                              'x2', 'y2', 'xy', 'flag']
-
             # Add the units and description to the catalogs
             catalog['x'].unit = 'pixel'
             catalog['x'].description = 'X coordinate of the object'
@@ -227,15 +223,15 @@ def do_stage(self, image):
             catalog.reverse()
 
             # Save some background statistics in the header
-            mean_background = stats.sigma_clipped_mean(bkg.back(), 5.0)
+            mean_background = stats.sigma_clipped_mean(bkg.background(), 5.0)
             image.meta['L1MEAN'] = (mean_background,
                                     '[counts] Sigma clipped mean of frame background')
 
-            median_background = np.median(bkg.back())
+            median_background = np.median(bkg.background())
             image.meta['L1MEDIAN'] = (median_background,
                                       '[counts] Median of frame background')
 
-            std_background = stats.robust_standard_deviation(bkg.back())
+            std_background = stats.robust_standard_deviation(bkg.background())
             image.meta['L1SIGMA'] = (std_background,
                                      '[counts] Robust std dev of frame background')
 
diff --git a/banzai/settings.py b/banzai/settings.py
index d85197b1..f674657e 100644
--- a/banzai/settings.py
+++ b/banzai/settings.py
@@ -156,3 +156,6 @@
 CELERY_TASK_QUEUE_NAME = os.getenv('CELERY_TASK_QUEUE_NAME', 'celery')
 
 REFERENCE_CATALOG_URL = os.getenv('REFERENCE_CATALOG_URL', 'http://phot-catalog.lco.gtn/')
+
+# Number of cores to use for photometry deblending
+N_PHOT_CORES = 4
diff --git a/docs/index.rst b/docs/index.rst
index ac03b777..79ed3331 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -107,20 +107,18 @@ our models are 94% complete for our training data. Pixels flagged as cosmic-ray
 
 Source Detection
 ================
-Source detection uses the "Source Extraction in Python" (SEP; https://github.com/kbarbary/sep).
-This is similar to Source Extractor, but is written purely in Python and Cython. This allows more
-customization.
+Source detection uses the "astropy.photutils" image segmentation.
+This is similar to Source Extractor, but is written purely in Python. This allows more customization than the original SExtractor.
 
 We estimate the background by taking a 3x3 median filter of the image and the doing a 32x32 block
 average of the image.
 
-We use the default match filter for source detection that is provided by SEP.
+We use the default match filter for source detection that is provided in Source Extractor. We include the proper match filter normalization though so the extraction will be slightly different.
 
 We do aperture photometry using an elliptical aperture that is set by 2.5 times the Kron radius. This
 produces approximately the same results as ``FLUX_AUTO`` from SExtractor.
 
-We set the source detection limit at 3 times the global rms of the image. ``MINAREA`` is set to 5,
-the default. This should minimize false detections, but may miss the faintest sources.
+We set the source detection limit at 2.5 times the uncertainty image. ``MINAREA`` is set to 9. This should minimize false detections, but may miss the faintest sources.
 
 To assess the image quality, we estimate the full-width half maximum (FWHM) of the stars in the image. We reject any
 sources that have a FWHM of less than a pixel to ensure that they do not bias our results. The PSFs for LCO are
@@ -132,6 +130,13 @@ estimate of the FWHM. This ensures that we do not underestimate the FWHM when de
 we take the robust standard deviation (see below) to estimate the overall FWHM of the image. This value is recorded
 in the header under the L1FWHM keyword.
 
+Flags are as follows:
+- 1: Source has bad pixels in the image segmentation
+- 2: Object is deblended
+- 4: Source has saturated pixels in the image segmentation
+- 8: Source kron aperture falls off the image
+- 16: Source has cosmic ray pixels in the image segmentation
+
 
 Astrometry
 ==========
diff --git a/setup.cfg b/setup.cfg
index 3a8e574f..1e54b8bc 100755
--- a/setup.cfg
+++ b/setup.cfg
@@ -109,7 +109,8 @@ install_requires =
     cython
     mysql-connector-python
     lcogt_logging==0.3.2
-    sep
+    photutils
+    bottleneck
     kombu==4.4.0
     amqp==2.6.0
     requests