From 3e6dc1d758b97651e6a3840cd9a37d06712d5972 Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Wed, 12 Jul 2023 15:58:30 -0500
Subject: [PATCH 01/31] Pulled in Andrew's code for corrections and re-wrote
 the entire correction code

---
 vast_post_processing/catalogs.py            | 298 +++++++++++++++++
 vast_post_processing/cli/correct_vast.py    | 158 +++++++--
 vast_post_processing/cli/run_corrections.py | 346 ++++++++++++++++++++
 vast_post_processing/corrections.py         | 117 ++++++-
 vast_post_processing/crossmatch.py          | 153 +++++++++
 5 files changed, 1046 insertions(+), 26 deletions(-)
 create mode 100644 vast_post_processing/catalogs.py
 create mode 100644 vast_post_processing/cli/run_corrections.py
 create mode 100644 vast_post_processing/crossmatch.py

diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py
new file mode 100644
index 0000000..a75b000
--- /dev/null
+++ b/vast_post_processing/catalogs.py
@@ -0,0 +1,298 @@
+import logging
+from pathlib import Path
+from typing import Tuple, Union, Dict, Optional
+from urllib.parse import quote
+
+from astropy.coordinates import SkyCoord
+from astropy.table import Table, QTable, join
+import astropy.units as u
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+SELAVY_COLUMN_UNITS = {
+    "ra_deg_cont": u.deg,
+    "dec_deg_cont": u.deg,
+    "ra_err": u.arcsec,
+    "dec_err": u.arcsec,
+    "flux_peak": u.mJy / u.beam,
+    "flux_peak_err": u.mJy / u.beam,
+    "maj_axis": u.arcsec,
+    "maj_axis_err": u.arcsec,
+    "min_axis": u.arcsec,
+    "min_axis_err": u.arcsec,
+    "pos_ang": u.deg,
+    "pos_ang_err": u.deg,
+    "rms_image": u.mJy / u.beam,
+}
+
+
+class UnknownCatalogInputFormat(Exception):
+    pass
+
+
+class Catalog:
+    CATALOG_TYPE_TILE = "TILE"
+    CATALOG_TYPE_COMBINED = "COMBINED"
+    CATALOG_TYPES = (
+        CATALOG_TYPE_TILE,
+        CATALOG_TYPE_COMBINED,
+    )
+
+    def __init__(
+        self,
+        path: Path,
+        psf: Optional[Tuple[float, float]] = None,
+        input_format: str = "selavy",
+        condon: bool = False,
+        positive_fluxes_only: bool = True,
+    ):
+        self.path: Path
+        self.table: QTable
+        self.field: Optional[str]
+        self.epoch: Optional[str]
+        self.sbid: Optional[str]
+        self.psf_major: Optional[u.Quantity]
+        self.psf_minor: Optional[u.Quantity]
+        self.type: str
+
+        # read catalog
+        if input_format == "selavy":
+            if path.suffix == ".txt":
+                logger.debug("Reading %s as a Selavy txt catalog.", path)
+                read_catalog = read_selavy
+            else:
+                logger.debug("Reading %s as a Selavy VOTable catalog.", path)
+                read_catalog = read_selavy_votable
+        elif input_format == "aegean":
+            logger.debug("Reading %s as an Aegean catalog.", path)
+            read_catalog = read_aegean_csv
+        else:
+            logger.error(
+                "The format of input files is not supported. Only selavy and aegean are supported"
+            )
+            raise SystemExit
+        self.path = path
+        self.table = read_catalog(path)
+
+        # filter sources with bad sizes and optionally negative/0 fluxes
+        if positive_fluxes_only:
+            logger.info(
+                "Filtering %d sources with fluxes <= 0.",
+                (self.table["flux_peak"] <= 0).sum(),
+            )
+            self.table = self.table[self.table["flux_peak"] > 0]
+        logger.info(
+            "Filtering %d sources with fitted sizes <= 0.",
+            ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(),
+        )
+        self.table = self.table[
+            (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0)
+        ]
+
+        # read epoch, field, sbid, psf's
+        epoch_name = path.parent.name
+        _, _, field, sbid, *_ = path.name.split(".")
+        self.epoch = epoch_name
+        self.field = field.replace("VAST_", "")
+        self.sbid = sbid
+
+        if psf is not None:
+            self.psf_major, self.psf_minor = psf * u.arcsec
+            logger.debug(
+                "Using user provided PSF for %s: %s, %s.",
+                self.path,
+                self.psf_major,
+                self.psf_minor,
+            )
+        else:
+            logger.warning(
+                "PSF is unknown for %s. Condon errors will be unavailable.", self.path
+            )
+            self.psf_major = None
+            self.psf_minor = None
+
+        if condon and self.psf_major is not None and self.psf_minor is not None:
+            _ = self.calculate_condon_flux_errors(correct_peak_for_noise=True)
+            logger.debug("Condon errors computed for %s.", self.path)
+
+    def calculate_condon_flux_errors(
+        self,
+        alpha_maj1=2.5,
+        alpha_min1=0.5,
+        alpha_maj2=0.5,
+        alpha_min2=2.5,
+        alpha_maj3=1.5,
+        alpha_min3=1.5,
+        clean_bias=0.0,
+        clean_bias_error=0.0,
+        frac_flux_cal_error=0.0,
+        correct_peak_for_noise=False,
+    ):
+        noise = self.table["rms_image"]
+        snr = self.table["flux_peak"] / noise
+
+        rho_sq3 = (
+            (
+                self.table["maj_axis"]
+                * self.table["min_axis"]
+                / (4.0 * self.psf_major * self.psf_minor)
+            )
+            * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj3
+            * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min3
+            * snr**2
+        )
+
+        flux_peak_col = self.table["flux_peak"]
+        flux_peak_condon = self.table["flux_peak"] + (
+            -(noise**2) / self.table["flux_peak"] + clean_bias
+        )
+        if correct_peak_for_noise:
+            flux_peak_col = flux_peak_condon
+
+        errorpeaksq = (
+            (frac_flux_cal_error * flux_peak_col) ** 2
+            + clean_bias_error**2
+            + 2.0 * flux_peak_col**2 / rho_sq3
+        )
+        errorpeak = np.sqrt(errorpeaksq)
+
+        self.table["flux_peak_condon"] = flux_peak_condon
+        self.table["flux_peak_selavy"] = self.table["flux_peak"]
+        self.table["flux_peak_err_condon"] = errorpeak
+        self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"]
+        self.table["flux_peak_err"] = self.table["flux_peak_err_condon"]
+        if correct_peak_for_noise:
+            self.table["flux_peak"] = self.table["flux_peak_condon"]
+        return flux_peak_condon, errorpeak
+
+
+def _convert_selavy_columns_to_quantites(
+    qt: QTable, units: Dict[str, u.Unit] = SELAVY_COLUMN_UNITS
+) -> QTable:
+    for col, unit in units.items():
+        qt[col].unit = unit
+    return qt
+
+
+def read_selavy(catalog_path: Path) -> QTable:
+    """Read a Selavy fixed-width component catalog and return a QTable.
+    Assumed to contain at least the following columns with the given units:
+        - `ra_deg_cont` and `dec_deg_cont`: degrees.
+        - `ra_err` and `dec_err`: arcseconds.
+        - `flux_peak` and `flux_peak_err`: mJy/beam.
+        - `maj_axis`, `maj_axis_err`, `min_axis`, `min_axis_err`: arcseconds.
+        - `pos_ang` and `pos_ang_err`: degrees.
+        - `rms_image`: mJy/beam.
+    These columns will be converted to Astropy quantites assuming the above units.
+
+    Parameters
+    ----------
+    catalog_path : Path
+        Path to the Selavy catalog file.
+
+    Returns
+    -------
+    QTable
+        Selavy catalog as a QTable, with extra columns:
+        - `coord`: `SkyCoord` object of the source coordinate.
+        - `nn_separation`: separation to the nearest-neighbour source as a Quantity with
+            angular units.
+    """
+    df = pd.read_fwf(catalog_path, skiprows=[1]).drop(columns="#")
+    qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df))
+    qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"])
+    _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky(
+        qt["coord"], nthneighbor=2
+    )
+    return qt
+
+
+def read_selavy_votable(catalog_path: Path) -> QTable:
+    t = Table.read(catalog_path, format="votable", use_names_over_ids=True)
+    # remove units from str columns and fix unrecognized flux units
+    for col in t.itercols():
+        if col.dtype.kind == "U":
+            col.unit = None
+        elif col.unit == u.UnrecognizedUnit("mJy/beam"):
+            col.unit = u.Unit("mJy/beam")
+    qt = QTable(t)
+    qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"])
+    _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky(
+        qt["coord"], nthneighbor=2
+    )
+    return qt
+
+
+def read_hdf(catalog_path: Path) -> pd.DataFrame:
+    df = pd.read_hdf(catalog_path, key="data")
+    df["field"] = df.field.str.split(".", n=1, expand=True)[0]
+    qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df))
+    qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"])
+    _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky(
+        qt["coord"], nthneighbor=2
+    )
+    return qt
+
+
+def read_aegean_csv(catalog_path: Path) -> QTable:
+    """Read an Aegean CSV component catalog and return a QTable.
+    Assumed to contain at least the following columns with the given units:
+        - `ra` and `dec`: degrees.
+        - `err_ra` and `err_dec`: degrees.
+        - `peak_flux` and `err_peak_flux`: Jy/beam.
+        - `a`, `err_a`, `b`, `err_b`: fitted semi-major and -minor axes in arcseconds.
+        - `pa` and `err_pa`: degrees.
+        - `local_rms`: Jy/beam.
+    These columns will be converted to Astropy quantites assuming the above units.
+
+    Parameters
+    ----------
+    catalog_path : Path
+        Path to the Selavy catalog file.
+
+    Returns
+    -------
+    QTable
+        Aegean component catalog as a QTable, with extra columns:
+        - `coord`: `SkyCoord` object of the source coordinate.
+        - `nn_separation`: separation to the nearest-neighbour source as a Quantity with
+            angular units.
+    """
+    AEGEAN_COLUMN_MAP = {
+        # aegean name: (selavy name, aegean unit)
+        "ra": ("ra_deg_cont", u.deg),
+        "dec": ("dec_deg_cont", u.deg),
+        "err_ra": ("ra_err", u.deg),
+        "err_dec": ("dec_err", u.deg),
+        "peak_flux": ("flux_peak", u.Jy / u.beam),
+        "err_peak_flux": ("flux_peak_err", u.Jy / u.beam),
+        "a": ("maj_axis", u.arcsec),
+        "b": ("min_axis", u.arcsec),
+        "pa": ("pos_ang", u.arcsec),
+        "err_a": ("maj_axis_err", u.arcsec),
+        "err_b": ("min_axis_err", u.deg),
+        "err_pa": ("pos_ang_err", u.deg),
+        "local_rms": ("rms_image", u.Jy / u.beam),
+    }
+    qt = QTable.read(catalog_path)
+    # rename columns to match selavy convention and assign units
+    for col, (new_col, unit) in AEGEAN_COLUMN_MAP.items():
+        qt.rename_column(col, new_col)
+        qt[new_col].unit = unit
+    # add has_siblings column
+    island_source_counts = (
+        qt[["island", "source"]].group_by("island").groups.aggregate(np.sum)
+    )
+    island_source_counts.rename_column("source", "has_siblings")
+    island_source_counts["has_siblings"] = island_source_counts["has_siblings"].astype(
+        bool
+    )
+    qt = join(qt, island_source_counts, keys="island", join_type="left")
+
+    qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"])
+    _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky(
+        qt["coord"], nthneighbor=2
+    )
+    return qt
diff --git a/vast_post_processing/cli/correct_vast.py b/vast_post_processing/cli/correct_vast.py
index 90cb6fe..a54686c 100644
--- a/vast_post_processing/cli/correct_vast.py
+++ b/vast_post_processing/cli/correct_vast.py
@@ -6,8 +6,59 @@
 from loguru import logger
 import pandas as pd
 import typer
+from astropy.table import QTable
+from astropy.io import fits
+from astropy import units as u
 
-from vast_post_processing.corrections import shift_and_scale_catalog, shift_and_scale_image
+from vast_post_processing.corrections import (
+    shift_and_scale_catalog,
+    shift_and_scale_image,
+    calculate_positional_offsets,
+    calculate_flux_offsets,
+)
+
+
+def get_correct_correction_file(correction_files_list, epoch, img_field, img_sbid):
+    count = 0
+    for f in chain.from_iterable(correction_files_list):
+        epoch_name = f.parent.name
+        if epoch_name in epoch:
+            filename = f.name
+            _, _, _, sbid, field, *_ = filename.split("_")
+            sbid = sbid.replace("-VAST", "")
+            field = field.replace(".csv", "")
+            if (sbid in img_sbid) & (field in img_field):
+                df = QTable.read(f)
+                flux_shifts = calculate_flux_offsets(df)
+                pos_shifts = calculate_positional_offsets(df)
+                count += 1
+                return flux_shifts, pos_shifts
+            else:
+                continue
+    if count == 0:
+        return None, None
+
+
+def get_psf_from_image(image_path: str):
+    """
+    Funtion used to get the point spread function (PSF) extent in major and minor axis.
+    These will be in the header of the image file
+
+    Parameters
+    ----------
+    image_path: str
+        Path to the image file
+
+    Returns
+    -------
+    Tuple(psf_major, psf_minor)
+        Major and minor axes of the PSF.
+    """
+
+    hdu = fits.open(image_path)
+    psf_maj = hdu["BMAJ"] * u.degree
+    psf_min = hdu["BMIN"] * u.degree
+    return psf_maj.to(u.arcsec), psf_min.to(u.arcsec)
 
 
 def main(
@@ -21,9 +72,13 @@ def main(
         file_okay=False,
         dir_okay=True,
     ),
-    vast_corrections_csv: Path = typer.Argument(
-        ...,
-        help="Path to VAST corrections CSV file produced by vast-xmatch.",
+    vast_corrections_csv_root: Path = typer.Option(
+        "/data/vast-survey/VAST/askap-surveys-database/vast/db/",
+        help=(
+            "Path to VAST corrections CSV file produced by vast-xmatch. Tries to use"
+            " the default path of these files. If not the user can override this by"
+            "giving a path to file"
+        ),
         exists=True,
         file_okay=True,
         dir_okay=False,
@@ -49,13 +104,14 @@ def main(
         logger.add(sys.stderr, level="INFO")
 
     # read corrections
-    corrections_df = (
-        pd.read_csv(vast_corrections_csv)
-        .set_index(["release_epoch", "field", "sbid"])
-        .sort_index()
-    )
+    # corrections_df = (
+    #     pd.read_csv(vast_corrections_csv)
+    #     .set_index(["release_epoch", "field", "sbid"])
+    #     .sort_index()
+    # )
     image_path_glob_list: list[Generator[Path, None, None]] = []
     components_path_glob_list: list[Generator[Path, None, None]] = []
+    correction_files_path_glob_list: list[Generator[Path, None, None]] = []
     if epoch is None or len(epoch) == 0:
         image_path_glob_list.append(
             vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
@@ -63,6 +119,9 @@ def main(
         components_path_glob_list.append(
             vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml")
         )
+        correction_files_path_glob_list.append(
+            vast_corrections_csv_root.glob("epoch_*/cat_match_RACS0*.csv")
+        )
     else:
         for n in epoch:
             image_path_glob_list.append(
@@ -71,6 +130,9 @@ def main(
             components_path_glob_list.append(
                 vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml")
             )
+            correction_files_path_glob_list.append(
+                vast_corrections_csv_root.glob(f"epoch_{n}/cat_match_RACS0*.csv")
+            )
 
     # correct images
     for image_path in chain.from_iterable(image_path_glob_list):
@@ -92,14 +154,29 @@ def main(
         )
         # get corrections
         skip = False
-        try:
-            corrections = corrections_df.loc[(epoch_dir, field, sbid)]
-        except KeyError:
+        # try:
+        #     corrections = corrections_df.loc[(epoch_dir, field, sbid)]
+        # except KeyError:
+        #     skip = True
+        #     logger.warning(
+        #         f"Corrections not found for {image_path} ({epoch_dir}, {field},"
+        #         f" {sbid})."
+        #     )
+        flux_corrections, pos_corrections = get_correct_correction_file(
+            correction_files_list=correction_files_path_glob_list,
+            epoch=epoch_dir,
+            img_field=field,
+            img_sbid=sbid_str,
+        )
+        if (flux_corrections is None) | (pos_corrections is None):
             skip = True
             logger.warning(
                 f"Corrections not found for {image_path} ({epoch_dir}, {field},"
                 f" {sbid})."
             )
+        else:
+            scale, offset, scale_err, offset_err = flux_corrections
+            dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections
         if not rms_path.exists():
             logger.warning(f"RMS image not found for {image_path}.")
         if not bkg_path.exists():
@@ -113,13 +190,22 @@ def main(
             stokes_dir = f"{path.parent.parent.name}_CORRECTED"
             output_dir = vast_tile_data_root / stokes_dir / epoch_dir
             output_dir.mkdir(parents=True, exist_ok=True)
+            # _ = shift_and_scale_image(
+            #     path,
+            #     output_dir,
+            #     flux_scale=corrections.flux_peak_correction_multiplicative,
+            #     flux_offset_mJy=corrections.flux_peak_correction_additive,
+            #     ra_offset_arcsec=corrections.ra_correction,
+            #     dec_offset_arcsec=corrections.dec_correction,
+            #     overwrite=overwrite,
+            # )
             _ = shift_and_scale_image(
                 path,
                 output_dir,
-                flux_scale=corrections.flux_peak_correction_multiplicative,
-                flux_offset_mJy=corrections.flux_peak_correction_additive,
-                ra_offset_arcsec=corrections.ra_correction,
-                dec_offset_arcsec=corrections.dec_correction,
+                flux_scale=scale,
+                flux_offset_mJy=offset,
+                ra_offset_arcsec=dra_median,
+                dec_offset_arcsec=ddec_median,
                 overwrite=overwrite,
             )
 
@@ -134,14 +220,29 @@ def main(
         )
         # get corrections
         skip = False
-        try:
-            corrections = corrections_df.loc[(epoch_dir, field, sbid)]
-        except KeyError:
+        # try:
+        #     corrections = corrections_df.loc[(epoch_dir, field, sbid)]
+        # except KeyError:
+        #     skip = True
+        #     logger.warning(
+        #         f"Corrections not found for {image_path} ({epoch_dir}, {field},"
+        #         f" {sbid})."
+        #     )
+        flux_corrections, pos_corrections = get_correct_correction_file(
+            correction_files_list=correction_files_path_glob_list,
+            epoch=epoch_dir,
+            img_field=field,
+            img_sbid=sbid_str,
+        )
+        if (flux_corrections is None) | (pos_corrections is None):
             skip = True
             logger.warning(
-                f"Corrections not found for {components_path} ({epoch_dir}, {field},"
+                f"Corrections not found for {image_path} ({epoch_dir}, {field},"
                 f" {sbid})."
             )
+        else:
+            scale, offset, scale_err, offset_err = flux_corrections
+            dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections
         if not islands_path.exists():
             logger.warning(f"Islands catalogue not found for {components_path}.")
         skip = not islands_path.exists() or skip
@@ -153,13 +254,22 @@ def main(
             stokes_dir = f"{path.parent.parent.name}_CORRECTED"
             output_dir = vast_tile_data_root / stokes_dir / epoch_dir
             output_dir.mkdir(parents=True, exist_ok=True)
+            # _ = shift_and_scale_catalog(
+            #     path,
+            #     output_dir,
+            #     flux_scale=corrections.flux_peak_correction_multiplicative,
+            #     flux_offset_mJy=corrections.flux_peak_correction_additive,
+            #     ra_offset_arcsec=corrections.ra_correction,
+            #     dec_offset_arcsec=corrections.dec_correction,
+            #     overwrite=overwrite,
+            # )
             _ = shift_and_scale_catalog(
                 path,
                 output_dir,
-                flux_scale=corrections.flux_peak_correction_multiplicative,
-                flux_offset_mJy=corrections.flux_peak_correction_additive,
-                ra_offset_arcsec=corrections.ra_correction,
-                dec_offset_arcsec=corrections.dec_correction,
+                flux_scale=scale,
+                flux_offset_mJy=offset,
+                ra_offset_arcsec=dra_median,
+                dec_offset_arcsec=ddec_median,
                 overwrite=overwrite,
             )
 
diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
new file mode 100644
index 0000000..da4e95f
--- /dev/null
+++ b/vast_post_processing/cli/run_corrections.py
@@ -0,0 +1,346 @@
+from loguru import logger
+from pathlib import Path
+from typing import Optional, Tuple, Generator
+from astropy.coordinates import Angle
+import astropy.units as u
+import click, sys, os
+from uncertainties import ufloat
+from itertools import chain
+import pandas as pd
+import typer
+from astropy.table import QTable
+from astropy.io import fits
+from astropy import units as u
+from vast_post_processing.catalogs import Catalog
+
+from vast_post_processing.corrections import (
+    shift_and_scale_catalog,
+    shift_and_scale_image,
+    vast_xmatch_qc,
+)
+
+
+class _AstropyUnitType(click.ParamType):
+    def convert(self, value, param, ctx, unit_physical_type):
+        try:
+            unit = u.Unit(value)
+        except ValueError:
+            self.fail(f"astropy.units.Unit does not understand: {value}.")
+        if unit.physical_type != unit_physical_type:
+            self.fail(
+                f"{unit} is a {unit.physical_type} unit. It must be of type"
+                f" {unit_physical_type}."
+            )
+        else:
+            return unit
+
+
+class AngleUnitType(_AstropyUnitType):
+    name = "angle_unit"
+
+    def convert(self, value, param, ctx):
+        return super().convert(value, param, ctx, "angle")
+
+
+class FluxUnitType(_AstropyUnitType):
+    name = "flux_unit"
+
+    def convert(self, value, param, ctx):
+        return super().convert(value, param, ctx, "spectral flux density")
+
+
+class AngleQuantityType(click.ParamType):
+    name = "angle_quantity"
+
+    def convert(self, value, param, ctx):
+        try:
+            angle = Angle(value)
+            return angle
+        except ValueError:
+            self.fail(f"astropy.coordinates.Angle does not understand: {value}.")
+
+
+ANGLE_UNIT_TYPE = AngleUnitType()
+FLUX_UNIT_TYPE = FluxUnitType()
+ANGLE_QUANTITY_TYPE = AngleQuantityType()
+
+
+def get_correct_correction_file(correction_files_list, img_field):
+    count = 0
+    for f in chain.from_iterable(correction_files_list):
+        filename = f.name
+        _, _, field, *_ = filename.split(".")
+        field = field.replace("RACS", "VAST")
+        if field in img_field:
+            count += 1
+            return f
+        else:
+            continue
+    if count == 0:
+        return None
+
+
+def get_psf_from_image(image_path: str):
+    """
+    Funtion used to get the point spread function (PSF) extent in major and minor axis.
+    These will be in the header of the image file
+
+    Parameters
+    ----------
+    image_path: str
+        Path to the image file
+
+    Returns
+    -------
+    Tuple(psf_major, psf_minor)
+        Major and minor axes of the PSF.
+    """
+
+    hdu = fits.open(image_path)
+    psf_maj = hdu["BMAJ"] * u.degree
+    psf_min = hdu["BMIN"] * u.degree
+    hdu.close()
+    return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec))
+
+
+def main(
+    vast_tile_data_root: Path = typer.Argument(
+        ...,
+        help=(
+            "Path to VAST TILES data directory, i.e. the directory that contains the"
+            " STOKES* directories."
+        ),
+        exists=True,
+        file_okay=False,
+        dir_okay=True,
+    ),
+    vast_corrections_root: Path = typer.Option(
+        "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY",
+        help=(
+            "Path to RACS data that is can be used to correct VAST data. Tries to use"
+            " EPOCH00 as the defualt epoch. If not the user can override this by"
+            " giving a path to a folder that contain the selavy output"
+        ),
+        exists=True,
+        file_okay=False,
+        dir_okay=True,
+    ),
+    epoch: Optional[list[int]] = typer.Option(
+        None,
+        help=(
+            "Only correct the given observation epochs. Can be given multiple times,"
+            " e.g. --epoch 1 --epoch 2. If no epochs are given (the default), then"
+            " correct all available epochs."
+        ),
+    ),
+    radius: Optional[ANGLE_QUANTITY_TYPE] = typer.Option(
+        "10 arcsec",
+        help=(
+            "Maximum separation limit for nearest-neighbour crossmatch. Accepts any "
+            "string understood by astropy.coordinates.Angle."
+        ),
+    ),
+    condon: Optional[bool] = typer.Option(
+        True,
+        help=(
+            "Calculate Condon (1997) flux errors and use them instead of the original "
+            "errors. Will also correct the peak flux values for noise. Requires that the "
+            "input images follow the VAST naming convention, for TILE images: EPOCH01/"
+            "TILES/STOKESI_IMAGES/selavy-image.i.SB9667.cont.VAST_0102-06A.linmos.taylor.0"
+            ".restored.conv.fits. Note that for TILE images, the epoch is determined "
+            "from the full path. If the input catalogs do not follow this convention, then "
+            "the PSF sizes must be supplied using --psf-reference and/or --psf. The "
+            "deafult behaviour is to lookup the PSF sizes from the header of the image"
+        ),
+    ),
+    psf_ref: Optional[list[float]] = typer.Option(
+        None,
+        help=(
+            "If using --condon but want to give the psfs manually, use this specified PSF size in "
+            "arcsec for `reference_catalog`. First argument is major axis followed by nimor axis."
+        ),
+    ),
+    psf: Optional[list[float]] = typer.Option(
+        None,
+        help=(
+            "If using --condon but want to give the psfs manually, use this specified PSF size in "
+            "arcsec for `catalof`. First argument is major axis followed by nimor axis."
+        ),
+    ),
+    overwrite: bool = False,
+    verbose: bool = False,
+):
+    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
+    VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
+    """
+    # configure logger
+    if not verbose:
+        # replace the default sink
+        logger.remove()
+        logger.add(sys.stderr, level="INFO")
+
+    # read corrections
+    image_path_glob_list: list[Generator[Path, None, None]] = []
+    components_path_glob_list: list[Generator[Path, None, None]] = []
+    correction_files_path_glob_list: list[Generator[Path, None, None]] = []
+
+    correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml"))
+
+    if epoch is None or len(epoch) == 0:
+        image_path_glob_list.append(
+            vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
+        )
+        components_path_glob_list.append(
+            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml")
+        )
+    else:
+        for n in epoch:
+            image_path_glob_list.append(
+                vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
+            )
+            components_path_glob_list.append(
+                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml")
+            )
+
+    # construct output path to store corrections
+    corr_dir = vast_tile_data_root / "corr_db"
+    if not os.path.isdir(corr_dir):
+        os.mkdir(corr_dir)
+
+    # get corrections for an image and the correct it
+    for image_path in chain.from_iterable(image_path_glob_list):
+        epoch_dir = image_path.parent.name
+        _, _, field, sbid_str, *_ = image_path.name.split(".")
+        sbid = int(sbid_str[2:])
+
+        # get rms and background images
+        rms_path = (
+            vast_tile_data_root
+            / "STOKESI_RMSMAPS"
+            / epoch_dir
+            / f"noiseMap.{image_path.name}"
+        )
+        bkg_path = (
+            vast_tile_data_root
+            / "STOKESI_RMSMAPS"
+            / epoch_dir
+            / f"meanMap.{image_path.name}"
+        )
+
+        # construct output path to store corrections for each epoch
+        epoch_corr_dir = vast_tile_data_root / "corr_db" / epoch_dir
+
+        if not os.path.isdir(epoch_corr_dir):
+            os.mkdir(epoch_corr_dir)
+
+        ref_file = get_correct_correction_file(
+            correction_files_list=correction_files_path_glob_list,
+            img_field=field,
+        )
+
+        skip = False
+        if not rms_path.exists():
+            logger.warning(f"RMS image not found for {image_path}.")
+        if not bkg_path.exists():
+            logger.warning(f"Background image not found for {image_path}.")
+        skip = (
+            not ((rms_path.exists()) and (bkg_path.exists()) and (ref_file is not None))
+            or skip
+        )
+        if skip:
+            if not ((rms_path.exists()) and (bkg_path.exists())):
+                logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist")
+            elif ref_file is None:
+                logger.warning(f"Skipping {image_path}, no reference field found.")
+            continue
+        else:
+            crossmatch_file = epoch_corr_dir / image_path.replace(
+                "components.xml", "corrections.csv"
+            )
+            csv_file = epoch_corr_dir / "corrections.csv"
+
+            # Get the psf measurements to estimate errors follwoing Condon 1997
+            if psf_ref is not None:
+                psf_reference = psf_ref
+            else:
+                psf_reference = get_psf_from_image(ref_file)
+
+            if psf is not None:
+                psf_image = psf
+            else:
+                psf_image = get_psf_from_image(image_path)
+            (
+                dra_median_value,
+                ddec_median_value,
+                flux_corr_mult,
+                flux_corr_add,
+            ) = vast_xmatch_qc(
+                reference_catalog_path=ref_file,
+                catalog_path=image_path,
+                radius=Angle(radius),
+                condon=condon,
+                psf_reference=psf_reference,
+                psf=psf_image,
+                fix_m=False,
+                fix_b=False,
+                crossmatch_output=crossmatch_file,
+                csv_output=csv_file,
+            )
+
+            # get corrections
+            for path in (image_path, rms_path, bkg_path):
+                stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+                output_dir = vast_tile_data_root / stokes_dir / epoch_dir
+                output_dir.mkdir(parents=True, exist_ok=True)
+                _ = shift_and_scale_image(
+                    path,
+                    output_dir,
+                    flux_scale=flux_corr_mult,
+                    flux_offset_mJy=flux_corr_add,
+                    ra_offset_arcsec=dra_median_value,
+                    dec_offset_arcsec=ddec_median_value,
+                    overwrite=overwrite,
+                )
+
+        # Do the same for catalog files
+        # Look for any component and island files correspnding to this image
+        comp_files = []
+        for p in list(components_path_glob_list[0]):
+            comp_file_name = p.name
+            comp_file_epoch = p.parent.name
+            if (
+                (epoch_dir in comp_file_epoch)
+                and (field in comp_file_name)
+                and (f"SB{sbid}" in comp_file_name)
+            ):
+                comp_files.append(p)
+
+        if len(comp_files) == 0:
+            logger.warning(f"Selavy catalogue not found for the image {image_path}")
+        elif len(comp_files) == 1:
+            if ".components" in comp_files[0].name:
+                logger.warning(
+                    f"Islannd catalogue not found for the image {image_path}"
+                )
+            else:
+                logger.warning(
+                    f"Islannd catalogue not found for the image {image_path}"
+                )
+        else:
+            for path in comp_files:
+                stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+                output_dir = vast_tile_data_root / stokes_dir / epoch_dir
+                output_dir.mkdir(parents=True, exist_ok=True)
+                _ = shift_and_scale_catalog(
+                    path,
+                    output_dir,
+                    flux_scale=flux_corr_mult,
+                    flux_offset_mJy=flux_corr_add,
+                    ra_offset_arcsec=dra_median_value,
+                    dec_offset_arcsec=ddec_median_value,
+                    overwrite=overwrite,
+                )
+
+
+if __name__ == "__main__":
+    typer.run(main)
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 927a8f0..7c1d0f4 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -1,13 +1,126 @@
 from pathlib import Path
 import warnings
-
-from astropy.coordinates import SkyCoord
+from astropy.coordinates import SkyCoord, Angle
 from astropy.io import fits
 from astropy.io.votable import parse
 import astropy.units as u
+from uncertainties import ufloat
 from astropy.wcs import WCS, FITSFixedWarning
 from loguru import logger
 import numpy as np
+from typing import Tuple, Optional
+from vast_post_processing.catalogs import Catalog
+from vast_post_processing.crossmatch import (
+    crossmatch_qtables,
+    calculate_positional_offsets,
+    calculate_flux_offsets,
+)
+
+
+def vast_xmatch_qc(
+    reference_catalog_path: str,
+    catalog_path: str,
+    radius: Angle = Angle("10arcsec"),
+    condon: bool = False,
+    psf_reference: Optional[Tuple[float, float]] = None,
+    psf: Optional[Tuple[float, float]] = None,
+    fix_m: bool = False,
+    fix_b: bool = False,
+    positional_unit: u.Unit = u.Unit("arcsec"),
+    flux_unit: u.Unit = u.Unit("mJy"),
+    crossmatch_output: Optional[str] = None,
+    csv_output: Optional[str] = None,
+):
+    # convert catalog path strings to Path objects
+    reference_catalog_path = Path(reference_catalog_path)
+    catalog_path = Path(catalog_path)
+    flux_unit /= u.beam  # add beam divisor as we currently only work with peak fluxes
+
+    reference_catalog = Catalog(
+        reference_catalog_path,
+        psf=psf_reference,
+        condon=condon,
+        input_format="selavy",
+    )
+    catalog = Catalog(
+        catalog_path,
+        psf=psf,
+        condon=condon,
+        input_format="selavy",
+    )
+
+    # perform the crossmatch
+    xmatch_qt = crossmatch_qtables(catalog, reference_catalog, radius=radius)
+    # select xmatches with non-zero flux errors and no siblings
+    logger.info("Removing crossmatched sources with siblings or flux peak errors = 0.")
+    mask = xmatch_qt["flux_peak_err"] > 0
+    mask &= xmatch_qt["flux_peak_err_reference"] > 0
+    mask &= xmatch_qt["has_siblings"] == 0
+    mask &= xmatch_qt["has_siblings_reference"] == 0
+    data = xmatch_qt[mask]
+    logger.info(
+        f"{len(data):.2f} crossmatched sources remaining ({(len(data) / len(xmatch_qt)) * 100:.2f}%).",
+    )
+
+    # Write the cross-match data into csv
+    if crossmatch_output is not None:
+        data.write("crossmatch.csv", overwrite=True)
+    # calculate positional offsets and flux ratio
+    dra_median, ddec_median, dra_madfm, ddec_madfm = calculate_positional_offsets(data)
+    dra_median_value = dra_median.to(positional_unit).value
+    dra_madfm_value = dra_madfm.to(positional_unit).value
+    ddec_median_value = ddec_median.to(positional_unit).value
+    ddec_madfm_value = ddec_madfm.to(positional_unit).value
+    logger.info(
+        f"dRA median: {dra_median_value:.2f} MADFM: {dra_madfm_value:.2f} {positional_unit}. dDec median: {ddec_median_value:.2f} MADFM: {ddec_madfm_value:.2f} {positional_unit}.",
+    )
+
+    gradient, offset, gradient_err, offset_err = calculate_flux_offsets(
+        data, fix_m=fix_m, fix_b=fix_b
+    )
+    ugradient = ufloat(gradient, gradient_err)
+    uoffset = ufloat(offset.to(flux_unit).value, offset_err.to(flux_unit).value)
+    logger.info(
+        f"ODR fit parameters: Sp = Sp,ref * {ugradient} + {uoffset} {flux_unit}.",
+    )
+
+    flux_corr_mult = 1 / ugradient
+    flux_corr_add = -1 * uoffset
+
+    if csv_output is not None:
+        # output has been requested
+
+        if True:  # csv_output is not None:
+            csv_output_path = Path(csv_output)  # ensure Path object
+            sbid = catalog.sbid if catalog.sbid is not None else ""
+            if not csv_output_path.exists():
+                f = open(csv_output_path, "w")
+                print(
+                    "field,release_epoch,sbid,ra_correction,dec_correction,ra_madfm,"
+                    "dec_madfm,flux_peak_correction_multiplicative,flux_peak_correction_additive,"
+                    "flux_peak_correction_multiplicative_err,flux_peak_correction_additive_err,"
+                    "n_sources",
+                    file=f,
+                )
+            else:
+                f = open(csv_output_path, "a")
+            logger.info(
+                "Writing corrections CSV. To correct positions, add the corrections to"
+                " the original source positions i.e. RA' = RA + ra_correction /"
+                " cos(Dec). To correct fluxes, add the additive correction and multiply"
+                " the result by the multiplicative correction i.e. S' ="
+                " flux_peak_correction_multiplicative(S +"
+                " flux_peak_correction_additive)."
+            )
+            print(
+                f"{catalog.field},{catalog.epoch},{sbid},{dra_median_value * -1},"
+                f"{ddec_median_value * -1},{dra_madfm_value},{ddec_madfm_value},"
+                f"{flux_corr_mult.nominal_value},{flux_corr_add.nominal_value},"
+                f"{flux_corr_mult.std_dev},{flux_corr_add.std_dev},{len(data)}",
+                file=f,
+            )
+            f.close()
+    return dra_median_value, ddec_median_value, flux_corr_mult, flux_corr_add
 
 
 def shift_and_scale_image(
diff --git a/vast_post_processing/crossmatch.py b/vast_post_processing/crossmatch.py
new file mode 100644
index 0000000..bdbdc3c
--- /dev/null
+++ b/vast_post_processing/crossmatch.py
@@ -0,0 +1,153 @@
+import logging
+from typing import Tuple
+
+from astropy.coordinates import SkyCoord, Angle, match_coordinates_sky
+from astropy.table import QTable, join, join_skycoord
+import astropy.units as u
+import numpy as np
+from scipy import odr
+
+from vast_post_processing.catalogs import Catalog
+
+
+logger = logging.getLogger(__name__)
+
+
+def median_abs_deviation(data):
+    median = np.median(data)
+    return np.median(np.abs(data - median))
+
+
+def straight_line(B, x):
+    m, b = B
+    return m * x + b
+
+
+def join_match_coordinates_sky(
+    coords1: SkyCoord, coords2: SkyCoord, seplimit: u.arcsec
+):
+    idx, separation, dist_3d = match_coordinates_sky(coords1, coords2)
+    mask = separation < seplimit
+    return np.where(mask)[0], idx[mask], separation[mask], dist_3d[mask]
+
+
+def crossmatch_qtables(
+    catalog: Catalog,
+    catalog_reference: Catalog,
+    radius: Angle = Angle("10 arcsec"),
+    catalog_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"),
+    catalog_reference_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"),
+) -> QTable:
+    catalog_ra, catalog_dec = catalog_coord_cols
+    catalog_reference_ra, catalog_reference_dec = catalog_reference_coord_cols
+
+    logger.debug("Using crossmatch radius: %s.", radius)
+
+    xmatch = join(
+        catalog.table,
+        catalog_reference.table,
+        keys="coord",
+        table_names=["", "reference"],
+        join_funcs={
+            "coord": join_skycoord(radius, distance_func=join_match_coordinates_sky)
+        },
+    )
+    # remove trailing _ from catalog column names
+    xmatch.rename_columns(
+        [col for col in xmatch.colnames if col.endswith("_")],
+        [col.rstrip("_") for col in xmatch.colnames if col.endswith("_")],
+    )
+    # compute the separations
+    xmatch["separation"] = xmatch["coord_reference"].separation(xmatch["coord"])
+    xmatch["dra"], xmatch["ddec"] = xmatch["coord_reference"].spherical_offsets_to(
+        xmatch["coord"]
+    )
+    xmatch["flux_peak_ratio"] = (
+        xmatch["flux_peak"] / xmatch["flux_peak_reference"]
+    ).decompose()
+
+    logger.info(
+        "Num cross-matches: %d. Num cross-matches to unique reference source: %d"
+        " (%d%%).",
+        len(xmatch),
+        len(set(xmatch["coord_id"])),
+        (len(set(xmatch["coord_id"])) / len(xmatch)) * 100,
+    )
+
+    return xmatch
+
+
+def calculate_positional_offsets(
+    xmatch_qt: QTable,
+) -> Tuple[u.Quantity, u.Quantity, u.Quantity, u.Quantity]:
+    """Calculate the median positional offsets and the median absolute deviation between
+    matched sources.
+
+    Parameters
+    ----------
+    xmatch_qt : QTable
+        QTable of crossmatched sources. Must contain columns: dra, ddec.
+
+    Returns
+    -------
+    Tuple[u.Quantity, u.Quantity, u.Quantity, u.Quantity]
+        Median RA offset, median Dec offset, median absolute deviation of RA offsets,
+        median absolute deviation of Dec offsets. Units match their inputs and are of
+        angular type.
+    """
+    dra_median = np.median(xmatch_qt["dra"])
+    dra_madfm = median_abs_deviation(xmatch_qt["dra"])
+    ddec_median = np.median(xmatch_qt["ddec"])
+    ddec_madfm = median_abs_deviation(xmatch_qt["ddec"])
+
+    return dra_median, ddec_median, dra_madfm, ddec_madfm
+
+
+def calculate_flux_offsets(
+    xmatch_qt: QTable,
+    init_m: float = 1.0,
+    init_b: float = 0.0,
+    fix_m: bool = False,
+    fix_b: bool = False,
+) -> Tuple[float, u.Quantity, float, u.Quantity]:
+    """Calculate the gradient and offset of a straight-line fit to the peak fluxes for
+    crossmatched sources. The function `y = mx + b` is fit to the reference peak fluxes
+    vs the peak fluxes using orthogonal distance regression with `scipy.odr`.
+
+    Parameters
+    ----------
+    xmatch_qt : QTable
+        QTable of crossmatched sources. Must contain columns: flux_peak,
+        flux_peak_reference, flux_peak_err, flux_peak_err_reference.
+    init_m : float
+        Initial gradient parameter passed to the fitting function, default 1.0.
+    init_b : float
+        Initial offset parameter passed to the fitting function, default 0.0.
+    fix_m : bool
+        If True, do not allow the gradient to vary during fitting, default False.
+    fix_b : bool
+        If True, do not allow the offest to vary during fitting, default False.
+
+    Returns
+    -------
+    Tuple[float, u.Quantity, float, u.Quantity]
+        Model fit parameters: the gradient, intercept (offset), gradient error, and
+        intercept error. Offset and offset error unit match the reference flux peak
+        input and are of spectral flux density type.
+    """
+    ifixb = [0 if fix_m else 1, 0 if fix_b else 1]
+    flux_unit = xmatch_qt["flux_peak_reference"].unit
+    linear_model = odr.Model(straight_line)
+    # convert all to reference flux unit as ODR does not preserve Quantity objects
+    odr_data = odr.RealData(
+        xmatch_qt["flux_peak_reference"].to(flux_unit).value,
+        xmatch_qt["flux_peak"].to(flux_unit).value,
+        sx=xmatch_qt["flux_peak_err_reference"].to(flux_unit).value,
+        sy=xmatch_qt["flux_peak_err"].to(flux_unit).value,
+    )
+    odr_obj = odr.ODR(odr_data, linear_model, beta0=[init_m, init_b], ifixb=ifixb)
+    odr_out = odr_obj.run()
+    gradient, offset = odr_out.beta
+    gradient_err, offset_err = odr_out.sd_beta
+
+    return gradient, offset * flux_unit, gradient_err, offset_err * flux_unit

From ce61320dea60d2bf1baa8305e342db12875bb583 Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Wed, 12 Jul 2023 18:20:47 -0500
Subject: [PATCH 02/31] Cleaned up minor naming issues with variables

---
 vast_post_processing/cli/run_corrections.py | 92 +++++++++++----------
 vast_post_processing/corrections.py         |  2 +-
 2 files changed, 50 insertions(+), 44 deletions(-)

diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index da4e95f..6f1426f 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -71,9 +71,9 @@ def get_correct_correction_file(correction_files_list, img_field):
         filename = f.name
         _, _, field, *_ = filename.split(".")
         field = field.replace("RACS", "VAST")
-        if field in img_field:
+        if (field in img_field) and ("components" in filename):
             count += 1
-            return f
+            return f.as_posix()
         else:
             continue
     if count == 0:
@@ -95,10 +95,12 @@ def get_psf_from_image(image_path: str):
     Tuple(psf_major, psf_minor)
         Major and minor axes of the PSF.
     """
-
+    image_path = image_path.replace("SELAVY", "IMAGES")
+    image_path = image_path.replace("selavy-", "")
+    image_path = image_path.replace(".components.xml", ".fits")
     hdu = fits.open(image_path)
-    psf_maj = hdu["BMAJ"] * u.degree
-    psf_min = hdu["BMIN"] * u.degree
+    psf_maj = hdu[0].header["BMAJ"] * u.degree
+    psf_min = hdu[0].header["BMIN"] * u.degree
     hdu.close()
     return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec))
 
@@ -133,8 +135,8 @@ def main(
             " correct all available epochs."
         ),
     ),
-    radius: Optional[ANGLE_QUANTITY_TYPE] = typer.Option(
-        "10 arcsec",
+    radius: Optional[float] = typer.Option(
+        10,
         help=(
             "Maximum separation limit for nearest-neighbour crossmatch. Accepts any "
             "string understood by astropy.coordinates.Angle."
@@ -164,7 +166,7 @@ def main(
         None,
         help=(
             "If using --condon but want to give the psfs manually, use this specified PSF size in "
-            "arcsec for `catalof`. First argument is major axis followed by nimor axis."
+            "arcsec for `catalog`. First argument is major axis followed by nimor axis."
         ),
     ),
     overwrite: bool = False,
@@ -228,7 +230,7 @@ def main(
         )
 
         # construct output path to store corrections for each epoch
-        epoch_corr_dir = vast_tile_data_root / "corr_db" / epoch_dir
+        epoch_corr_dir = corr_dir / epoch_dir
 
         if not os.path.isdir(epoch_corr_dir):
             os.mkdir(epoch_corr_dir)
@@ -243,8 +245,37 @@ def main(
             logger.warning(f"RMS image not found for {image_path}.")
         if not bkg_path.exists():
             logger.warning(f"Background image not found for {image_path}.")
+
+        # Look for any component and island files correspnding to this image
+        comp_files = []
+        for p in list(components_path_glob_list[0]):
+            comp_file_name = p.name
+            comp_file_epoch = p.parent.name
+            if (
+                (epoch_dir in comp_file_epoch)
+                and (field in comp_file_name)
+                and (f"SB{sbid}" in comp_file_name)
+            ):
+                comp_files.append(p)
+
+        component_file = None
+        island_file = None
+        if len(comp_files) == 0:
+            logger.warning(f"Selavy catalogue not found for the image {image_path}")
+        else:
+            for i in comp_files:
+                if "components" in i.as_posix():
+                    component_file = i
+                elif "islands" in i.as_posix():
+                    island_file = i
+
         skip = (
-            not ((rms_path.exists()) and (bkg_path.exists()) and (ref_file is not None))
+            not (
+                (rms_path.exists())
+                and (bkg_path.exists())
+                and (ref_file is not None)
+                and (component_file is not None)
+            )
             or skip
         )
         if skip:
@@ -254,10 +285,9 @@ def main(
                 logger.warning(f"Skipping {image_path}, no reference field found.")
             continue
         else:
-            crossmatch_file = epoch_corr_dir / image_path.replace(
-                "components.xml", "corrections.csv"
-            )
-            csv_file = epoch_corr_dir / "corrections.csv"
+            fname = image_path.name.replace(".fits", "corrections.csv")
+            crossmatch_file = epoch_corr_dir / fname
+            csv_file = epoch_corr_dir / "all_fields_corrections.csv"
 
             # Get the psf measurements to estimate errors follwoing Condon 1997
             if psf_ref is not None:
@@ -268,7 +298,7 @@ def main(
             if psf is not None:
                 psf_image = psf
             else:
-                psf_image = get_psf_from_image(image_path)
+                psf_image = get_psf_from_image(image_path.as_posix())
             (
                 dra_median_value,
                 ddec_median_value,
@@ -276,8 +306,8 @@ def main(
                 flux_corr_add,
             ) = vast_xmatch_qc(
                 reference_catalog_path=ref_file,
-                catalog_path=image_path,
-                radius=Angle(radius),
+                catalog_path=component_file.as_posix(),
+                radius=Angle(radius * u.arcsec),
                 condon=condon,
                 psf_reference=psf_reference,
                 psf=psf_image,
@@ -302,32 +332,8 @@ def main(
                     overwrite=overwrite,
                 )
 
-        # Do the same for catalog files
-        # Look for any component and island files correspnding to this image
-        comp_files = []
-        for p in list(components_path_glob_list[0]):
-            comp_file_name = p.name
-            comp_file_epoch = p.parent.name
-            if (
-                (epoch_dir in comp_file_epoch)
-                and (field in comp_file_name)
-                and (f"SB{sbid}" in comp_file_name)
-            ):
-                comp_files.append(p)
-
-        if len(comp_files) == 0:
-            logger.warning(f"Selavy catalogue not found for the image {image_path}")
-        elif len(comp_files) == 1:
-            if ".components" in comp_files[0].name:
-                logger.warning(
-                    f"Islannd catalogue not found for the image {image_path}"
-                )
-            else:
-                logger.warning(
-                    f"Islannd catalogue not found for the image {image_path}"
-                )
-        else:
-            for path in comp_files:
+            # Do the same for catalog files
+            for path in (component_file, island_file):
                 stokes_dir = f"{path.parent.parent.name}_CORRECTED"
                 output_dir = vast_tile_data_root / stokes_dir / epoch_dir
                 output_dir.mkdir(parents=True, exist_ok=True)
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 7c1d0f4..95ada94 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -64,7 +64,7 @@ def vast_xmatch_qc(
 
     # Write the cross-match data into csv
     if crossmatch_output is not None:
-        data.write("crossmatch.csv", overwrite=True)
+        data.write(crossmatch_output, overwrite=True)
     # calculate positional offsets and flux ratio
     dra_median, ddec_median, dra_madfm, ddec_madfm = calculate_positional_offsets(data)
     dra_median_value = dra_median.to(positional_unit).value

From 270ec3eec7165afde8d539bfd5f1bb18b4279ae1 Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Wed, 12 Jul 2023 18:24:10 -0500
Subject: [PATCH 03/31] Removed redundant code

---
 vast_post_processing/cli/correct_vast.py | 278 -----------------------
 1 file changed, 278 deletions(-)
 delete mode 100644 vast_post_processing/cli/correct_vast.py

diff --git a/vast_post_processing/cli/correct_vast.py b/vast_post_processing/cli/correct_vast.py
deleted file mode 100644
index a54686c..0000000
--- a/vast_post_processing/cli/correct_vast.py
+++ /dev/null
@@ -1,278 +0,0 @@
-from itertools import chain
-from pathlib import Path
-import sys
-from typing import Optional, Generator
-
-from loguru import logger
-import pandas as pd
-import typer
-from astropy.table import QTable
-from astropy.io import fits
-from astropy import units as u
-
-from vast_post_processing.corrections import (
-    shift_and_scale_catalog,
-    shift_and_scale_image,
-    calculate_positional_offsets,
-    calculate_flux_offsets,
-)
-
-
-def get_correct_correction_file(correction_files_list, epoch, img_field, img_sbid):
-    count = 0
-    for f in chain.from_iterable(correction_files_list):
-        epoch_name = f.parent.name
-        if epoch_name in epoch:
-            filename = f.name
-            _, _, _, sbid, field, *_ = filename.split("_")
-            sbid = sbid.replace("-VAST", "")
-            field = field.replace(".csv", "")
-            if (sbid in img_sbid) & (field in img_field):
-                df = QTable.read(f)
-                flux_shifts = calculate_flux_offsets(df)
-                pos_shifts = calculate_positional_offsets(df)
-                count += 1
-                return flux_shifts, pos_shifts
-            else:
-                continue
-    if count == 0:
-        return None, None
-
-
-def get_psf_from_image(image_path: str):
-    """
-    Funtion used to get the point spread function (PSF) extent in major and minor axis.
-    These will be in the header of the image file
-
-    Parameters
-    ----------
-    image_path: str
-        Path to the image file
-
-    Returns
-    -------
-    Tuple(psf_major, psf_minor)
-        Major and minor axes of the PSF.
-    """
-
-    hdu = fits.open(image_path)
-    psf_maj = hdu["BMAJ"] * u.degree
-    psf_min = hdu["BMIN"] * u.degree
-    return psf_maj.to(u.arcsec), psf_min.to(u.arcsec)
-
-
-def main(
-    vast_tile_data_root: Path = typer.Argument(
-        ...,
-        help=(
-            "Path to VAST TILES data directory, i.e. the directory that contains the"
-            " STOKES* directories."
-        ),
-        exists=True,
-        file_okay=False,
-        dir_okay=True,
-    ),
-    vast_corrections_csv_root: Path = typer.Option(
-        "/data/vast-survey/VAST/askap-surveys-database/vast/db/",
-        help=(
-            "Path to VAST corrections CSV file produced by vast-xmatch. Tries to use"
-            " the default path of these files. If not the user can override this by"
-            "giving a path to file"
-        ),
-        exists=True,
-        file_okay=True,
-        dir_okay=False,
-    ),
-    epoch: Optional[list[int]] = typer.Option(
-        None,
-        help=(
-            "Only correct the given observation epochs. Can be given multiple times,"
-            " e.g. --epoch 1 --epoch 2. If no epochs are given (the default), then"
-            " correct all available epochs."
-        ),
-    ),
-    overwrite: bool = False,
-    verbose: bool = False,
-):
-    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
-    VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
-    """
-    # configure logger
-    if not verbose:
-        # replace the default sink
-        logger.remove()
-        logger.add(sys.stderr, level="INFO")
-
-    # read corrections
-    # corrections_df = (
-    #     pd.read_csv(vast_corrections_csv)
-    #     .set_index(["release_epoch", "field", "sbid"])
-    #     .sort_index()
-    # )
-    image_path_glob_list: list[Generator[Path, None, None]] = []
-    components_path_glob_list: list[Generator[Path, None, None]] = []
-    correction_files_path_glob_list: list[Generator[Path, None, None]] = []
-    if epoch is None or len(epoch) == 0:
-        image_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
-        )
-        components_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml")
-        )
-        correction_files_path_glob_list.append(
-            vast_corrections_csv_root.glob("epoch_*/cat_match_RACS0*.csv")
-        )
-    else:
-        for n in epoch:
-            image_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
-            )
-            components_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml")
-            )
-            correction_files_path_glob_list.append(
-                vast_corrections_csv_root.glob(f"epoch_{n}/cat_match_RACS0*.csv")
-            )
-
-    # correct images
-    for image_path in chain.from_iterable(image_path_glob_list):
-        epoch_dir = image_path.parent.name
-        _, _, field, sbid_str, *_ = image_path.name.split(".")
-        sbid = int(sbid_str[2:])
-        # get rms and background images
-        rms_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"noiseMap.{image_path.name}"
-        )
-        bkg_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"meanMap.{image_path.name}"
-        )
-        # get corrections
-        skip = False
-        # try:
-        #     corrections = corrections_df.loc[(epoch_dir, field, sbid)]
-        # except KeyError:
-        #     skip = True
-        #     logger.warning(
-        #         f"Corrections not found for {image_path} ({epoch_dir}, {field},"
-        #         f" {sbid})."
-        #     )
-        flux_corrections, pos_corrections = get_correct_correction_file(
-            correction_files_list=correction_files_path_glob_list,
-            epoch=epoch_dir,
-            img_field=field,
-            img_sbid=sbid_str,
-        )
-        if (flux_corrections is None) | (pos_corrections is None):
-            skip = True
-            logger.warning(
-                f"Corrections not found for {image_path} ({epoch_dir}, {field},"
-                f" {sbid})."
-            )
-        else:
-            scale, offset, scale_err, offset_err = flux_corrections
-            dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections
-        if not rms_path.exists():
-            logger.warning(f"RMS image not found for {image_path}.")
-        if not bkg_path.exists():
-            logger.warning(f"Background image not found for {image_path}.")
-        skip = not (rms_path.exists() and bkg_path.exists()) or skip
-        if skip:
-            logger.warning(f"Skipping {image_path}.")
-            continue
-
-        for path in (image_path, rms_path, bkg_path):
-            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-            output_dir.mkdir(parents=True, exist_ok=True)
-            # _ = shift_and_scale_image(
-            #     path,
-            #     output_dir,
-            #     flux_scale=corrections.flux_peak_correction_multiplicative,
-            #     flux_offset_mJy=corrections.flux_peak_correction_additive,
-            #     ra_offset_arcsec=corrections.ra_correction,
-            #     dec_offset_arcsec=corrections.dec_correction,
-            #     overwrite=overwrite,
-            # )
-            _ = shift_and_scale_image(
-                path,
-                output_dir,
-                flux_scale=scale,
-                flux_offset_mJy=offset,
-                ra_offset_arcsec=dra_median,
-                dec_offset_arcsec=ddec_median,
-                overwrite=overwrite,
-            )
-
-    # correct catalogs
-    for components_path in chain.from_iterable(components_path_glob_list):
-        epoch_dir = components_path.parent.name
-        _, _, field, sbid_str, *_ = components_path.name.split(".")
-        sbid = int(sbid_str[2:])
-        # get island catalog
-        islands_path = components_path.with_name(
-            components_path.name.replace(".components", ".islands")
-        )
-        # get corrections
-        skip = False
-        # try:
-        #     corrections = corrections_df.loc[(epoch_dir, field, sbid)]
-        # except KeyError:
-        #     skip = True
-        #     logger.warning(
-        #         f"Corrections not found for {image_path} ({epoch_dir}, {field},"
-        #         f" {sbid})."
-        #     )
-        flux_corrections, pos_corrections = get_correct_correction_file(
-            correction_files_list=correction_files_path_glob_list,
-            epoch=epoch_dir,
-            img_field=field,
-            img_sbid=sbid_str,
-        )
-        if (flux_corrections is None) | (pos_corrections is None):
-            skip = True
-            logger.warning(
-                f"Corrections not found for {image_path} ({epoch_dir}, {field},"
-                f" {sbid})."
-            )
-        else:
-            scale, offset, scale_err, offset_err = flux_corrections
-            dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections
-        if not islands_path.exists():
-            logger.warning(f"Islands catalogue not found for {components_path}.")
-        skip = not islands_path.exists() or skip
-        if skip:
-            logger.warning(f"Skipping {components_path}.")
-            continue
-
-        for path in (components_path, islands_path):
-            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-            output_dir.mkdir(parents=True, exist_ok=True)
-            # _ = shift_and_scale_catalog(
-            #     path,
-            #     output_dir,
-            #     flux_scale=corrections.flux_peak_correction_multiplicative,
-            #     flux_offset_mJy=corrections.flux_peak_correction_additive,
-            #     ra_offset_arcsec=corrections.ra_correction,
-            #     dec_offset_arcsec=corrections.dec_correction,
-            #     overwrite=overwrite,
-            # )
-            _ = shift_and_scale_catalog(
-                path,
-                output_dir,
-                flux_scale=scale,
-                flux_offset_mJy=offset,
-                ra_offset_arcsec=dra_median,
-                dec_offset_arcsec=ddec_median,
-                overwrite=overwrite,
-            )
-
-
-if __name__ == "__main__":
-    typer.run(main)

From 66919e1e078ef68a40a17c7b5663cb21722d54a7 Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Fri, 14 Jul 2023 01:09:46 -0500
Subject: [PATCH 04/31] Fixed quantities with units; component files matching
 made easy

---
 vast_post_processing/cli/run_corrections.py | 51 +++++++--------------
 1 file changed, 17 insertions(+), 34 deletions(-)

diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index 6f1426f..99bb051 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -67,7 +67,7 @@ def convert(self, value, param, ctx):
 
 def get_correct_correction_file(correction_files_list, img_field):
     count = 0
-    for f in chain.from_iterable(correction_files_list):
+    for f in correction_files_list:
         filename = f.name
         _, _, field, *_ = filename.split(".")
         field = field.replace("RACS", "VAST")
@@ -187,13 +187,14 @@ def main(
     correction_files_path_glob_list: list[Generator[Path, None, None]] = []
 
     correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml"))
+    correction_files_path_glob_list = list(correction_files_path_glob_list[0])
 
     if epoch is None or len(epoch) == 0:
         image_path_glob_list.append(
             vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
         )
         components_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml")
+            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml")
         )
     else:
         for n in epoch:
@@ -201,7 +202,7 @@ def main(
                 vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
             )
             components_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml")
+                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml")
             )
 
     # construct output path to store corrections
@@ -247,27 +248,9 @@ def main(
             logger.warning(f"Background image not found for {image_path}.")
 
         # Look for any component and island files correspnding to this image
-        comp_files = []
-        for p in list(components_path_glob_list[0]):
-            comp_file_name = p.name
-            comp_file_epoch = p.parent.name
-            if (
-                (epoch_dir in comp_file_epoch)
-                and (field in comp_file_name)
-                and (f"SB{sbid}" in comp_file_name)
-            ):
-                comp_files.append(p)
-
-        component_file = None
-        island_file = None
-        if len(comp_files) == 0:
-            logger.warning(f"Selavy catalogue not found for the image {image_path}")
-        else:
-            for i in comp_files:
-                if "components" in i.as_posix():
-                    component_file = i
-                elif "islands" in i.as_posix():
-                    island_file = i
+
+        component_file = Path(ref_file)
+        island_file = Path(ref_file.replace("components", "islands"))
 
         skip = (
             not (
@@ -290,12 +273,12 @@ def main(
             csv_file = epoch_corr_dir / "all_fields_corrections.csv"
 
             # Get the psf measurements to estimate errors follwoing Condon 1997
-            if psf_ref is not None:
+            if len(psf_ref) > 0:
                 psf_reference = psf_ref
             else:
                 psf_reference = get_psf_from_image(ref_file)
 
-            if psf is not None:
+            if len(psf) > 0:
                 psf_image = psf
             else:
                 psf_image = get_psf_from_image(image_path.as_posix())
@@ -325,10 +308,10 @@ def main(
                 _ = shift_and_scale_image(
                     path,
                     output_dir,
-                    flux_scale=flux_corr_mult,
-                    flux_offset_mJy=flux_corr_add,
-                    ra_offset_arcsec=dra_median_value,
-                    dec_offset_arcsec=ddec_median_value,
+                    flux_scale=flux_corr_mult.n,
+                    flux_offset_mJy=flux_corr_add.n,
+                    ra_offset_arcsec=dra_median_value.item(),
+                    dec_offset_arcsec=ddec_median_value.item(),
                     overwrite=overwrite,
                 )
 
@@ -340,10 +323,10 @@ def main(
                 _ = shift_and_scale_catalog(
                     path,
                     output_dir,
-                    flux_scale=flux_corr_mult,
-                    flux_offset_mJy=flux_corr_add,
-                    ra_offset_arcsec=dra_median_value,
-                    dec_offset_arcsec=ddec_median_value,
+                    flux_scale=flux_corr_mult.n,
+                    flux_offset_mJy=flux_corr_add.n,
+                    ra_offset_arcsec=dra_median_value.item(),
+                    dec_offset_arcsec=ddec_median_value.item(),
                     overwrite=overwrite,
                 )
 

From 842267dd41c3500d4638da923c99f52dbec1aa9b Mon Sep 17 00:00:00 2001
From: Hansen <hansen@sidratresearch.com>
Date: Sun, 6 Aug 2023 17:15:29 -0400
Subject: [PATCH 05/31] Start of reorg

---
 vast_post_processing/__init__.py        |  8 ++++++++
 vast_post_processing/cli/__init__.py    | 12 ++++++++++++
 vast_post_processing/utils/__init__.py  |  6 ++++++
 vast_post_processing/utils/fileutils.py |  6 ++++++
 vast_post_processing/utils/misc.py      |  1 +
 vast_post_processing/validation.py      |  5 +++++
 6 files changed, 38 insertions(+)
 create mode 100644 vast_post_processing/cli/__init__.py
 create mode 100644 vast_post_processing/utils/__init__.py
 create mode 100644 vast_post_processing/utils/fileutils.py
 create mode 100644 vast_post_processing/utils/misc.py
 create mode 100644 vast_post_processing/validation.py

diff --git a/vast_post_processing/__init__.py b/vast_post_processing/__init__.py
index e69de29..a26f4d7 100644
--- a/vast_post_processing/__init__.py
+++ b/vast_post_processing/__init__.py
@@ -0,0 +1,8 @@
+import combine
+import corrections
+import crop
+import neighbours
+import validation
+
+import utils
+import cli
diff --git a/vast_post_processing/cli/__init__.py b/vast_post_processing/cli/__init__.py
new file mode 100644
index 0000000..9ed0ea9
--- /dev/null
+++ b/vast_post_processing/cli/__init__.py
@@ -0,0 +1,12 @@
+#
+# The CLI bindings for VAST Post-processing
+#
+
+import _util
+import cleanup
+import convolve_neighbours
+import correct_vast
+import link_neighbours
+import run_crop
+import selavy_combined
+import swarp
diff --git a/vast_post_processing/utils/__init__.py b/vast_post_processing/utils/__init__.py
new file mode 100644
index 0000000..8420b50
--- /dev/null
+++ b/vast_post_processing/utils/__init__.py
@@ -0,0 +1,6 @@
+#
+# Utility functions for VAST Post-processing
+#
+
+import misc
+import fileutils
diff --git a/vast_post_processing/utils/fileutils.py b/vast_post_processing/utils/fileutils.py
new file mode 100644
index 0000000..8502dc9
--- /dev/null
+++ b/vast_post_processing/utils/fileutils.py
@@ -0,0 +1,6 @@
+"""
+Utility Functions for Files
+"""
+
+
+# Move cleanup functions/logic here
diff --git a/vast_post_processing/utils/misc.py b/vast_post_processing/utils/misc.py
new file mode 100644
index 0000000..9058a05
--- /dev/null
+++ b/vast_post_processing/utils/misc.py
@@ -0,0 +1 @@
+# Miscellaneous Utilities
diff --git a/vast_post_processing/validation.py b/vast_post_processing/validation.py
new file mode 100644
index 0000000..f7e6bbd
--- /dev/null
+++ b/vast_post_processing/validation.py
@@ -0,0 +1,5 @@
+"""
+
+Validation generation code 
+
+"""

From fcd07cdc5ffde07b2ba1771b95107f69c78c5254 Mon Sep 17 00:00:00 2001
From: Hansen <hansen@sidratresearch.com>
Date: Sun, 6 Aug 2023 19:08:58 -0400
Subject: [PATCH 06/31] created scripts directory

---
 scripts/combined-fix-timestamps.py | 136 +++++++++++++++++++++++++++++
 1 file changed, 136 insertions(+)
 create mode 100644 scripts/combined-fix-timestamps.py

diff --git a/scripts/combined-fix-timestamps.py b/scripts/combined-fix-timestamps.py
new file mode 100644
index 0000000..e5d16de
--- /dev/null
+++ b/scripts/combined-fix-timestamps.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from dataclasses import dataclass
+from pathlib import Path
+from astropy.io import fits
+from astropy.time import Time
+import astropy.units as u
+import pandas as pd
+from tqdm import tqdm
+
+
+@dataclass
+class VastNeighbour:
+    field: str
+    sbid: int
+    filename: str
+    date_start_isot: str
+    date_end_isot: str
+    combined_field: str
+    combined_release_epoch: str
+    combined_main_field: bool
+
+
+def combined_date_obs(df) -> str:
+    main_field_obs = df.query("combined_main_field").reset_index().set_index("sbid")
+    idx = main_field_obs.date_start_isot.idxmin()
+    return main_field_obs.loc[idx][["date_start_isot", "date_end_isot"]]
+
+
+VAST_REPO = Path("/data/VAST/askap-surveys-database/vast/db")
+RACS_REPO = Path("/data/VAST/askap-surveys-database/racs/db")
+
+# read the VAST metadata
+vast_df = pd.DataFrame()
+for field_data_path in VAST_REPO.glob("epoch_*/field_data.csv"):
+    vast_df = vast_df.append(pd.read_csv(field_data_path))
+vast_df["DATE-BEG"] = pd.to_datetime(
+    Time(vast_df["SCAN_START"].values * u.s, format="mjd").isot
+)
+vast_df["DATE-END"] = vast_df["DATE-BEG"] + pd.to_timedelta(
+    vast_df["SCAN_LEN"], unit="sec"
+)
+
+# read the RACS metadata
+racs_df = pd.DataFrame()
+for field_data_path in RACS_REPO.glob("epoch_[01]/field_data.csv"):
+    racs_df = racs_df.append(pd.read_csv(field_data_path))
+racs_df["DATE-BEG"] = pd.to_datetime(
+    Time(racs_df["SCAN_START"].values * u.s, format="mjd").isot
+)
+racs_df["DATE-END"] = racs_df["DATE-BEG"] + pd.to_timedelta(
+    racs_df["SCAN_LEN"], unit="sec"
+)
+racs_df["FIELD_NAME"] = racs_df["FIELD_NAME"].str.replace("RACS", "VAST")
+
+# add racs to vast
+vast_df = pd.concat((vast_df, racs_df))
+
+# remove duplicates
+vast_df = vast_df.drop_duplicates(subset=["FIELD_NAME", "SBID"], keep=False)
+vast_df = (
+    vast_df[["FIELD_NAME", "SBID", "DATE-BEG", "DATE-END"]]
+    .set_index(["FIELD_NAME", "SBID"])
+    .sort_index()
+)
+
+# get the neighbours from the hard-links generated by ~/vast-post-processing/link_neighbours.py
+neighbours_list = []
+for field_dir_path in Path("/data/.staging/convolved/").glob("EPOCH*/VAST_*"):
+    combined_field = field_dir_path.name
+    combined_release_epoch = field_dir_path.parent.name
+
+    for input_field_path in (field_dir_path / "inputs").glob("image.i.VAST_*.fits"):
+        _, _, field, sbid_str, *_ = input_field_path.name.split(".")
+        sbid = int(sbid_str[2:])
+        # use the metadata instead of the imager header, some of the image headers
+        # appeared incorrect e.g. duplicate field observation had the same DATE-OBS
+        # which is impossible!
+        date_start_meta = Time(vast_df.loc[(field, sbid), "DATE-BEG"].isoformat())
+        date_end_meta = Time(vast_df.loc[(field, sbid), "DATE-END"].isoformat())
+        neighbours_list.append(
+            VastNeighbour(
+                field=field,
+                sbid=sbid,
+                filename=input_field_path.name,
+                date_start_isot=pd.Timestamp(date_start_meta.utc.isot),
+                date_end_isot=pd.Timestamp(date_end_meta.utc.isot),
+                combined_field=combined_field,
+                combined_release_epoch=combined_release_epoch,
+                combined_main_field=(field == combined_field),
+            )
+        )
+neighbours_df = (
+    pd.DataFrame(neighbours_list)
+    .set_index(["combined_field", "combined_release_epoch"])
+    .sort_index()
+)
+
+combined_timestamps = neighbours_df.groupby(
+    ["combined_field", "combined_release_epoch"]
+).apply(combined_date_obs)
+
+# update headers
+epochs = "EPOCH14"  # "EPOCH*"
+for image_path in tqdm(
+    list(
+        Path("/data/VAST/vast-data/COMBINED/STOKESI_IMAGES/").glob(
+            f"{epochs}/VAST_*.fits"
+        )
+    )
+):
+    combined_field = image_path.name.split(".")[0]
+    combined_release_epoch = image_path.parent.name
+    with fits.open(image_path, mode="update") as hdul:
+        date_start = Time(
+            combined_timestamps.loc[
+                (combined_field, combined_release_epoch), "date_start_isot"
+            ]
+        )
+        date_end = Time(
+            combined_timestamps.loc[
+                (combined_field, combined_release_epoch), "date_end_isot"
+            ]
+        )
+        hdul[0].header["DATE-OBS"] = date_start.utc.isot
+        hdul[0].header["DATE-BEG"] = date_start.utc.isot
+        hdul[0].header["DATE-END"] = date_end.utc.isot
+        hdul[0].header["MJD-OBS"] = date_start.utc.mjd
+        hdul[0].header["MJD-BEG"] = date_start.utc.mjd
+        hdul[0].header["MJD-END"] = date_end.utc.mjd
+        hdul[0].header.add_history(
+            "Set dates to earliest observation of the main central field. Edges may"
+            " contain data from other dates."
+        )
+        # closing the file will save the changes, i.e. when the "with" block exits

From b417e3462ce5476cc51b5dc8a8034258c03d7bcb Mon Sep 17 00:00:00 2001
From: Hansen <hansen@sidratresearch.com>
Date: Sun, 6 Aug 2023 19:09:31 -0400
Subject: [PATCH 07/31] created docker directory

---
 .dockerignore => docker/.dockerignore               | 0
 Dockerfile => docker/Dockerfile                     | 0
 build_singularity.sh => docker/build_singularity.sh | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename .dockerignore => docker/.dockerignore (100%)
 rename Dockerfile => docker/Dockerfile (100%)
 rename build_singularity.sh => docker/build_singularity.sh (100%)

diff --git a/.dockerignore b/docker/.dockerignore
similarity index 100%
rename from .dockerignore
rename to docker/.dockerignore
diff --git a/Dockerfile b/docker/Dockerfile
similarity index 100%
rename from Dockerfile
rename to docker/Dockerfile
diff --git a/build_singularity.sh b/docker/build_singularity.sh
similarity index 100%
rename from build_singularity.sh
rename to docker/build_singularity.sh

From f69668167bc4dbe54a13479443b18c7da35cec0e Mon Sep 17 00:00:00 2001
From: Hansen <hansen@sidratresearch.com>
Date: Sun, 6 Aug 2023 19:13:47 -0400
Subject: [PATCH 08/31] scripts directory

---
 combined-fix-timestamps.py | 136 -------------------------------------
 1 file changed, 136 deletions(-)
 delete mode 100644 combined-fix-timestamps.py

diff --git a/combined-fix-timestamps.py b/combined-fix-timestamps.py
deleted file mode 100644
index e5d16de..0000000
--- a/combined-fix-timestamps.py
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-from dataclasses import dataclass
-from pathlib import Path
-from astropy.io import fits
-from astropy.time import Time
-import astropy.units as u
-import pandas as pd
-from tqdm import tqdm
-
-
-@dataclass
-class VastNeighbour:
-    field: str
-    sbid: int
-    filename: str
-    date_start_isot: str
-    date_end_isot: str
-    combined_field: str
-    combined_release_epoch: str
-    combined_main_field: bool
-
-
-def combined_date_obs(df) -> str:
-    main_field_obs = df.query("combined_main_field").reset_index().set_index("sbid")
-    idx = main_field_obs.date_start_isot.idxmin()
-    return main_field_obs.loc[idx][["date_start_isot", "date_end_isot"]]
-
-
-VAST_REPO = Path("/data/VAST/askap-surveys-database/vast/db")
-RACS_REPO = Path("/data/VAST/askap-surveys-database/racs/db")
-
-# read the VAST metadata
-vast_df = pd.DataFrame()
-for field_data_path in VAST_REPO.glob("epoch_*/field_data.csv"):
-    vast_df = vast_df.append(pd.read_csv(field_data_path))
-vast_df["DATE-BEG"] = pd.to_datetime(
-    Time(vast_df["SCAN_START"].values * u.s, format="mjd").isot
-)
-vast_df["DATE-END"] = vast_df["DATE-BEG"] + pd.to_timedelta(
-    vast_df["SCAN_LEN"], unit="sec"
-)
-
-# read the RACS metadata
-racs_df = pd.DataFrame()
-for field_data_path in RACS_REPO.glob("epoch_[01]/field_data.csv"):
-    racs_df = racs_df.append(pd.read_csv(field_data_path))
-racs_df["DATE-BEG"] = pd.to_datetime(
-    Time(racs_df["SCAN_START"].values * u.s, format="mjd").isot
-)
-racs_df["DATE-END"] = racs_df["DATE-BEG"] + pd.to_timedelta(
-    racs_df["SCAN_LEN"], unit="sec"
-)
-racs_df["FIELD_NAME"] = racs_df["FIELD_NAME"].str.replace("RACS", "VAST")
-
-# add racs to vast
-vast_df = pd.concat((vast_df, racs_df))
-
-# remove duplicates
-vast_df = vast_df.drop_duplicates(subset=["FIELD_NAME", "SBID"], keep=False)
-vast_df = (
-    vast_df[["FIELD_NAME", "SBID", "DATE-BEG", "DATE-END"]]
-    .set_index(["FIELD_NAME", "SBID"])
-    .sort_index()
-)
-
-# get the neighbours from the hard-links generated by ~/vast-post-processing/link_neighbours.py
-neighbours_list = []
-for field_dir_path in Path("/data/.staging/convolved/").glob("EPOCH*/VAST_*"):
-    combined_field = field_dir_path.name
-    combined_release_epoch = field_dir_path.parent.name
-
-    for input_field_path in (field_dir_path / "inputs").glob("image.i.VAST_*.fits"):
-        _, _, field, sbid_str, *_ = input_field_path.name.split(".")
-        sbid = int(sbid_str[2:])
-        # use the metadata instead of the imager header, some of the image headers
-        # appeared incorrect e.g. duplicate field observation had the same DATE-OBS
-        # which is impossible!
-        date_start_meta = Time(vast_df.loc[(field, sbid), "DATE-BEG"].isoformat())
-        date_end_meta = Time(vast_df.loc[(field, sbid), "DATE-END"].isoformat())
-        neighbours_list.append(
-            VastNeighbour(
-                field=field,
-                sbid=sbid,
-                filename=input_field_path.name,
-                date_start_isot=pd.Timestamp(date_start_meta.utc.isot),
-                date_end_isot=pd.Timestamp(date_end_meta.utc.isot),
-                combined_field=combined_field,
-                combined_release_epoch=combined_release_epoch,
-                combined_main_field=(field == combined_field),
-            )
-        )
-neighbours_df = (
-    pd.DataFrame(neighbours_list)
-    .set_index(["combined_field", "combined_release_epoch"])
-    .sort_index()
-)
-
-combined_timestamps = neighbours_df.groupby(
-    ["combined_field", "combined_release_epoch"]
-).apply(combined_date_obs)
-
-# update headers
-epochs = "EPOCH14"  # "EPOCH*"
-for image_path in tqdm(
-    list(
-        Path("/data/VAST/vast-data/COMBINED/STOKESI_IMAGES/").glob(
-            f"{epochs}/VAST_*.fits"
-        )
-    )
-):
-    combined_field = image_path.name.split(".")[0]
-    combined_release_epoch = image_path.parent.name
-    with fits.open(image_path, mode="update") as hdul:
-        date_start = Time(
-            combined_timestamps.loc[
-                (combined_field, combined_release_epoch), "date_start_isot"
-            ]
-        )
-        date_end = Time(
-            combined_timestamps.loc[
-                (combined_field, combined_release_epoch), "date_end_isot"
-            ]
-        )
-        hdul[0].header["DATE-OBS"] = date_start.utc.isot
-        hdul[0].header["DATE-BEG"] = date_start.utc.isot
-        hdul[0].header["DATE-END"] = date_end.utc.isot
-        hdul[0].header["MJD-OBS"] = date_start.utc.mjd
-        hdul[0].header["MJD-BEG"] = date_start.utc.mjd
-        hdul[0].header["MJD-END"] = date_end.utc.mjd
-        hdul[0].header.add_history(
-            "Set dates to earliest observation of the main central field. Edges may"
-            " contain data from other dates."
-        )
-        # closing the file will save the changes, i.e. when the "with" block exits

From a9065403d8401a1cb0d2cf3d19e3a068beae8e91 Mon Sep 17 00:00:00 2001
From: Hansen <hansen@sidratresearch.com>
Date: Sun, 6 Aug 2023 19:14:36 -0400
Subject: [PATCH 09/31] added READMEs to explain directories

---
 docker/README.md   | 10 ++++++++++
 docs/README.md     | 35 +++++++++++++++++++++++++++++++++++
 examples/README.md | 15 +++++++++++++++
 scripts/README.md  |  7 +++++++
 4 files changed, 67 insertions(+)
 create mode 100644 docker/README.md
 create mode 100644 docs/README.md
 create mode 100644 examples/README.md
 create mode 100644 scripts/README.md

diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 0000000..5855ee2
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,10 @@
+# Docker
+
+This directory contains the relevant Docker files for this project's Docker
+services. 
+
+## Included
+
+1. `.dockerignore`
+2. `build_singularity.sh`
+3. `Dockerfile`
\ No newline at end of file
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..3b5702c
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,35 @@
+# Documentation
+
+This directory contains the documentation for this project. 
+
+The modules of this project are documented using docstrings
+in [numpydoc](https://numpydoc.readthedocs.io/en/latest/format.html) style and
+comments, and formatted using code blocking, parentheses, and other
+[PEP8](https://peps.python.org/pep-0008/) style guidelines, using [the Black
+formatter](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html). 
+
+They are auto-generated in `HTML` format with [`sphinx`](https://www.sphinx-doc.org/en/master/index.html). 
+
+## Included
+
+1. `source/`
+    1. `conf.py`
+    2. `index.rst`
+    3. `modules.rst`
+    4. `vast_post_processing.rst`
+2. `make.bat`
+3. `Makefile`
+
+## Instructions
+
+To view `sphinx` documentation for this project, navigate to the root of the
+package (i.e. `vast-post-processing`), and
+run
+```
+poetry install
+poetry shell
+cd docs
+make html
+```
+The pages are built in `vast-post-processing/docs/build/html`, and you can load
+the index page by opening `index.html` in a browser. 
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..b42e8c6
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,15 @@
+# Examples
+
+This directory contains program runtime examples. 
+
+## Included
+
+1. `mortimer/`
+    1. `01-convolve_neighbours.sbatch`
+    2. `02-swarp.sbatch`
+    3. `03-selavy_setup.sbatch`
+    4. `04-selavy_submit.sbatch`
+    5. `05-rsync_outputs.sh`
+    6. `field_list.txt`
+    7. `selavy_template.in`
+    8. `selavy_template.sbatch`
\ No newline at end of file
diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 0000000..bca0fcc
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,7 @@
+# Scripts
+
+This directory contains standalone scripts. 
+
+## Included
+
+1. `combined-fix-timestamps`

From 852ca0e6ae33e14c5cfa1b6bb34541f1adaef9b8 Mon Sep 17 00:00:00 2001
From: Hansen <hansen@sidratresearch.com>
Date: Sun, 6 Aug 2023 19:14:54 -0400
Subject: [PATCH 10/31] moved documentation instructions to docs

---
 README.md | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/README.md b/README.md
index b668a3c..e75445d 100644
--- a/README.md
+++ b/README.md
@@ -10,20 +10,6 @@ This repository holds the code of VAST Post Processing.
 
 ## Screenshots and Previews
 
-## Documentation
-
-To view `sphinx` documentation for this project, navigate to the root of the package, and
-enter the following commands - 
-```
-poetry install
-poetry shell
-cd docs
-make html
-```
-The pages will be built in `vast-post-processing/docs/build/html`, and you can
-load the index page by opening `index.html` in a browser.
-
-
 ## Contributors
 
 * Andrew O'Brien – [Department of Physics, University of Wisconsin-Milwaukee](https://uwm.edu/physics/research/astronomy-gravitation-cosmology/)

From d0fb95854f59546a21e59b1af4a68c7c0fc0c5c0 Mon Sep 17 00:00:00 2001
From: Hansen <hansen@sidratresearch.com>
Date: Wed, 9 Aug 2023 13:47:27 -0400
Subject: [PATCH 11/31] separating logic - cleanup module

---
 vast_post_processing/cli/cleanup.py     | 19 ++-----------------
 vast_post_processing/utils/fileutils.py | 22 +++++++++++++++++++++-
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/vast_post_processing/cli/cleanup.py b/vast_post_processing/cli/cleanup.py
index 3ec0c5c..4b0b4bd 100644
--- a/vast_post_processing/cli/cleanup.py
+++ b/vast_post_processing/cli/cleanup.py
@@ -8,24 +8,9 @@
 from pathlib import Path
 from shutil import rmtree
 
-from loguru import logger
 import typer
 
-
-def cleanup_directory(directory: Path):
-    DELETE_EXT = (".fits", ".ann", ".txt", ".xml")
-    DELETE_DIR = ("inputs", "tmp")
-
-    for path in directory.iterdir():
-        if path.is_file():
-            if path.suffix in DELETE_EXT:
-                path.unlink()
-                logger.info(f"Deleted file {path}.")
-        elif path.is_dir() and path.name in DELETE_DIR:
-            rmtree(path)
-            logger.info(f"Deleted directory {path}.")
-        else:
-            logger.debug(f"Leaving {path}.")
+from vast_post_processing.utils import fileutils
 
 
 def main(neighbour_data_dir: Path, delete_all: bool = False):
@@ -33,7 +18,7 @@ def main(neighbour_data_dir: Path, delete_all: bool = False):
         rmtree(neighbour_data_dir)
     else:
         for field_path in neighbour_data_dir.glob("VAST_*"):
-            cleanup_directory(field_path)
+            fileutils.cleanup_directory(field_path)
 
 
 if __name__ == "__main__":
diff --git a/vast_post_processing/utils/fileutils.py b/vast_post_processing/utils/fileutils.py
index 8502dc9..a6c4ec9 100644
--- a/vast_post_processing/utils/fileutils.py
+++ b/vast_post_processing/utils/fileutils.py
@@ -2,5 +2,25 @@
 Utility Functions for Files
 """
 
-
 # Move cleanup functions/logic here
+
+from pathlib import Path
+from shutil import rmtree
+
+from loguru import logger
+
+
+def cleanup_directory(directory: Path):
+    DELETE_EXT = (".fits", ".ann", ".txt", ".xml")
+    DELETE_DIR = ("inputs", "tmp")
+
+    for path in directory.iterdir():
+        if path.is_file():
+            if path.suffix in DELETE_EXT:
+                path.unlink()
+                logger.info(f"Deleted file {path}.")
+        elif path.is_dir() and path.name in DELETE_DIR:
+            rmtree(path)
+            logger.info(f"Deleted directory {path}.")
+        else:
+            logger.debug(f"Leaving {path}.")

From 66d50f99b381832e70de39a0d8f799972619d7bc Mon Sep 17 00:00:00 2001
From: Hansen <hansen@sidratresearch.com>
Date: Wed, 9 Aug 2023 17:12:24 -0400
Subject: [PATCH 12/31] moved logic from cli calls into relevant modules

---
 tests/__init__.py                             |   0
 vast_post_processing/cli/cleanup.py           |   7 +-
 .../cli/convolve_neighbours.py                |  85 +-----
 vast_post_processing/cli/correct_vast.py      | 133 +--------
 vast_post_processing/cli/link_neighbours.py   |  67 +----
 vast_post_processing/cli/selavy_combined.py   |  69 +----
 vast_post_processing/cli/swarp.py             | 177 +-----------
 vast_post_processing/combine.py               | 254 ++++++++++++++++++
 vast_post_processing/corrections.py           | 144 ++++++++++
 vast_post_processing/crop.py                  | 174 ++++++------
 vast_post_processing/neighbours.py            | 171 ++++++++++++
 vast_post_processing/utils/fileutils.py       |   8 +
 12 files changed, 688 insertions(+), 601 deletions(-)
 delete mode 100644 tests/__init__.py

diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/vast_post_processing/cli/cleanup.py b/vast_post_processing/cli/cleanup.py
index 4b0b4bd..bce46b0 100644
--- a/vast_post_processing/cli/cleanup.py
+++ b/vast_post_processing/cli/cleanup.py
@@ -6,7 +6,6 @@
 """
 
 from pathlib import Path
-from shutil import rmtree
 
 import typer
 
@@ -14,11 +13,7 @@
 
 
 def main(neighbour_data_dir: Path, delete_all: bool = False):
-    if delete_all:
-        rmtree(neighbour_data_dir)
-    else:
-        for field_path in neighbour_data_dir.glob("VAST_*"):
-            fileutils.cleanup_directory(field_path)
+    fileutils.cleanup(neighbour_data_dir, delete_all)
 
 
 if __name__ == "__main__":
diff --git a/vast_post_processing/cli/convolve_neighbours.py b/vast_post_processing/cli/convolve_neighbours.py
index 2251049..9d2fee3 100644
--- a/vast_post_processing/cli/convolve_neighbours.py
+++ b/vast_post_processing/cli/convolve_neighbours.py
@@ -1,44 +1,16 @@
 """Requires setup_neighbours.py to be run first.
 """
-from dataclasses import dataclass, fields
-from functools import partial
 from pathlib import Path
 from typing import Optional, List
 
-from loguru import logger
-from racs_tools import beamcon_2D
-from radio_beam import Beam
 import typer
 
-from vast_post_processing.cli._util import get_pool, _get_worker_name
-from vast_post_processing.neighbours import convolve_image
+from vast_post_processing import neighbours
 
 
 app = typer.Typer()
 
 
-@dataclass
-class WorkerArgs:
-    image_path: Path
-    output_dir_path: Path
-    target_beam: Beam
-    mode: str
-    suffix: str = "sm"
-    prefix: Optional[str] = None
-    cutoff: Optional[float] = None
-    dry_run: bool = False
-
-    def __iter__(self):
-        # Makes the class fields iterable so they can be unpacked
-        # e.g. func(*args) where args is a WorkerArgs object.
-        return (getattr(self, field.name) for field in fields(self))
-
-
-def worker(args: WorkerArgs, mpi: bool = False, n_proc: int = 1):
-    with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)):
-        return convolve_image(*args)
-
-
 @app.command()
 def main(
     neighbour_data_dir: Path,
@@ -48,55 +20,6 @@ def main(
     racs: bool = False,
     field_list: Optional[List[str]] = typer.Option(None, "--field"),
 ):
-    # neighbour_data_dir has the structure:
-    # <neighbour_data_dir>/<field>/inputs contains the input FITS images
-    # to be convolved to a common resolution and their weights FITS images.
-
-    pool = get_pool(mpi=mpi, n_proc=n_proc)
-    logger.debug(f"pool created, type: {type(pool)}")
-
-    glob_expr = "RACS_*" if racs else "VAST_*"
-    worker_args_list: list[WorkerArgs] = []
-    n_images: int = 0
-    for field_dir in neighbour_data_dir.glob(glob_expr):
-        if field_list and field_dir.name not in field_list:
-            logger.info(
-                f"Glob found field {field_dir} but it was not given as a --field option. Skipping."
-            )
-            continue
-        if max_images is not None and n_images >= max_images:
-            logger.warning(
-                f"Reached maximum image limit of {max_images}. Skipping remaining images."
-            )
-            break
-        if len(list(field_dir.glob("*.sm.fits"))) > 0:
-            logger.warning(f"Smoothed images already exist in {field_dir}. Skipping.")
-            continue
-        image_path_list = list(field_dir.glob("inputs/image.*.fits"))
-        logger.debug(
-            f"Found {len(image_path_list)} images for {field_dir.name}"
-        )
-        # find the smallest common beam
-        common_beam, _ = beamcon_2D.getmaxbeam(image_path_list)
-        logger.debug(
-            f"{field_dir} common beam major {common_beam.major} type"
-            f" {type(common_beam)}"
-        )
-        for image_path in image_path_list:
-            worker_args = WorkerArgs(
-                image_path=image_path,
-                output_dir_path=field_dir,
-                target_beam=common_beam,
-                mode="robust",
-            )
-            worker_args_list.append(worker_args)
-            n_images += 1
-            if max_images is not None and n_images >= max_images:
-                logger.warning(
-                    f"Reached maximum image limit of {max_images}. Skipping remaining images."
-                )
-                break
-
-    # start convolutions
-    _ = list(pool.map(partial(worker, mpi=mpi, n_proc=n_proc), worker_args_list))
-    pool.close()
+    neighbours.convolve_neighbours(
+        neighbour_data_dir, n_proc, mpi, max_images, racs, field_list
+    )
diff --git a/vast_post_processing/cli/correct_vast.py b/vast_post_processing/cli/correct_vast.py
index 90cb6fe..611208e 100644
--- a/vast_post_processing/cli/correct_vast.py
+++ b/vast_post_processing/cli/correct_vast.py
@@ -1,13 +1,8 @@
-from itertools import chain
 from pathlib import Path
-import sys
-from typing import Optional, Generator
-
-from loguru import logger
-import pandas as pd
+from typing import Optional
 import typer
 
-from vast_post_processing.corrections import shift_and_scale_catalog, shift_and_scale_image
+from vast_post_processing import corrections
 
 
 def main(
@@ -39,129 +34,9 @@ def main(
     overwrite: bool = False,
     verbose: bool = False,
 ):
-    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
-    VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
-    """
-    # configure logger
-    if not verbose:
-        # replace the default sink
-        logger.remove()
-        logger.add(sys.stderr, level="INFO")
-
-    # read corrections
-    corrections_df = (
-        pd.read_csv(vast_corrections_csv)
-        .set_index(["release_epoch", "field", "sbid"])
-        .sort_index()
+    corrections.correct_vast(
+        vast_tile_data_root, vast_corrections_csv, epoch, overwrite, verbose
     )
-    image_path_glob_list: list[Generator[Path, None, None]] = []
-    components_path_glob_list: list[Generator[Path, None, None]] = []
-    if epoch is None or len(epoch) == 0:
-        image_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
-        )
-        components_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml")
-        )
-    else:
-        for n in epoch:
-            image_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
-            )
-            components_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml")
-            )
-
-    # correct images
-    for image_path in chain.from_iterable(image_path_glob_list):
-        epoch_dir = image_path.parent.name
-        _, _, field, sbid_str, *_ = image_path.name.split(".")
-        sbid = int(sbid_str[2:])
-        # get rms and background images
-        rms_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"noiseMap.{image_path.name}"
-        )
-        bkg_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"meanMap.{image_path.name}"
-        )
-        # get corrections
-        skip = False
-        try:
-            corrections = corrections_df.loc[(epoch_dir, field, sbid)]
-        except KeyError:
-            skip = True
-            logger.warning(
-                f"Corrections not found for {image_path} ({epoch_dir}, {field},"
-                f" {sbid})."
-            )
-        if not rms_path.exists():
-            logger.warning(f"RMS image not found for {image_path}.")
-        if not bkg_path.exists():
-            logger.warning(f"Background image not found for {image_path}.")
-        skip = not (rms_path.exists() and bkg_path.exists()) or skip
-        if skip:
-            logger.warning(f"Skipping {image_path}.")
-            continue
-
-        for path in (image_path, rms_path, bkg_path):
-            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-            output_dir.mkdir(parents=True, exist_ok=True)
-            _ = shift_and_scale_image(
-                path,
-                output_dir,
-                flux_scale=corrections.flux_peak_correction_multiplicative,
-                flux_offset_mJy=corrections.flux_peak_correction_additive,
-                ra_offset_arcsec=corrections.ra_correction,
-                dec_offset_arcsec=corrections.dec_correction,
-                overwrite=overwrite,
-            )
-
-    # correct catalogs
-    for components_path in chain.from_iterable(components_path_glob_list):
-        epoch_dir = components_path.parent.name
-        _, _, field, sbid_str, *_ = components_path.name.split(".")
-        sbid = int(sbid_str[2:])
-        # get island catalog
-        islands_path = components_path.with_name(
-            components_path.name.replace(".components", ".islands")
-        )
-        # get corrections
-        skip = False
-        try:
-            corrections = corrections_df.loc[(epoch_dir, field, sbid)]
-        except KeyError:
-            skip = True
-            logger.warning(
-                f"Corrections not found for {components_path} ({epoch_dir}, {field},"
-                f" {sbid})."
-            )
-        if not islands_path.exists():
-            logger.warning(f"Islands catalogue not found for {components_path}.")
-        skip = not islands_path.exists() or skip
-        if skip:
-            logger.warning(f"Skipping {components_path}.")
-            continue
-
-        for path in (components_path, islands_path):
-            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-            output_dir.mkdir(parents=True, exist_ok=True)
-            _ = shift_and_scale_catalog(
-                path,
-                output_dir,
-                flux_scale=corrections.flux_peak_correction_multiplicative,
-                flux_offset_mJy=corrections.flux_peak_correction_additive,
-                ra_offset_arcsec=corrections.ra_correction,
-                dec_offset_arcsec=corrections.dec_correction,
-                overwrite=overwrite,
-            )
 
 
 if __name__ == "__main__":
diff --git a/vast_post_processing/cli/link_neighbours.py b/vast_post_processing/cli/link_neighbours.py
index 7d81fcf..82c6d49 100644
--- a/vast_post_processing/cli/link_neighbours.py
+++ b/vast_post_processing/cli/link_neighbours.py
@@ -2,10 +2,7 @@
 from typing import Optional
 import typer
 
-from vast_post_processing.neighbours import (
-    read_release_epochs,
-    find_vast_neighbours_by_release_epoch,
-)
+from vast_post_processing import neighbours
 
 
 app = typer.Typer()
@@ -79,61 +76,15 @@ def main(
         ),
     ),
 ):
-    # get the release epochs
-    release_epochs = read_release_epochs(release_epochs_csv)
-    # get the neighbours DataFrame and filter for the requested release epoch and
-    # overlap area threshold
-    vast_neighbours_df = find_vast_neighbours_by_release_epoch(
+    neighbours.link_neighbours(
         release_epoch,
         vast_data_root,
+        release_epochs_csv,
+        output_root,
         vast_db_repo,
-        release_epochs,
-        racs_db_repo=racs_db_repo,
-        use_corrected=use_corrected,
-    ).query(
-        "release_epoch_a == @release_epoch and overlap_frac >= @overlap_frac_thresh"
+        racs_db_repo,
+        overlap_frac_thresh,
+        use_corrected,
+        neighbours_output,
+        make_links,
     )
-
-    if neighbours_output is not None:
-        vast_neighbours_df[
-            [
-                "field_a",
-                "sbid_a",
-                "obs_epoch_a",
-                "release_epoch_a",
-                "field_b",
-                "sbid_b",
-                "obs_epoch_b",
-                "release_epoch_b",
-                "overlap_frac",
-                "delta_t_days",
-            ]
-        ].to_csv(neighbours_output, index=False)
-
-    # create a directory for each field and create links to the neighbouring images
-    if make_links:
-        release_output_path = output_root / release_epoch
-        release_output_path.mkdir(parents=True, exist_ok=True)
-        for _, obs_pair in vast_neighbours_df.iterrows():
-            # create directories
-            field_inputs_path_a = release_output_path / obs_pair.field_a / "inputs"
-            field_inputs_path_a.mkdir(parents=True, exist_ok=True)
-            field_inputs_path_b = release_output_path / obs_pair.field_b / "inputs"
-            field_inputs_path_b.mkdir(parents=True, exist_ok=True)
-
-            # create a hard link for each field in the pair in both directions, e.g.
-            # A/inputs/A.fits, A/inputs/B.fits, B/inputs/A.fits, B/inputs/B.fits (plus weights)
-            for output_path in (field_inputs_path_a, field_inputs_path_b):
-                target_image_a = output_path / obs_pair.image_path_a.name
-                target_weights_a = output_path / obs_pair.weights_path_a.name
-                if not target_image_a.exists():
-                    obs_pair.image_path_a.link_to(target_image_a)
-                if not target_weights_a.exists():
-                    obs_pair.weights_path_a.link_to(target_weights_a)
-
-                target_image_b = output_path / obs_pair.image_path_b.name
-                target_weights_b = output_path / obs_pair.weights_path_b.name
-                if not target_image_b.exists():
-                    obs_pair.image_path_b.link_to(target_image_b)
-                if not target_weights_b.exists():
-                    obs_pair.weights_path_b.link_to(target_weights_b)
diff --git a/vast_post_processing/cli/selavy_combined.py b/vast_post_processing/cli/selavy_combined.py
index bf2b4c9..0b9bf73 100644
--- a/vast_post_processing/cli/selavy_combined.py
+++ b/vast_post_processing/cli/selavy_combined.py
@@ -7,47 +7,12 @@
 from pathlib import Path
 from typing import Optional, List
 
-from loguru import logger
 import typer
 
-app = typer.Typer()
-
-
-def write_selavy_files(
-    field_name: str,
-    epoch_name: str,
-    image_path: Path,
-    parset_template_path: Path,
-    sbatch_template_path: Path,
-    weights_path: Optional[Path] = None,
-):
-    if image_path is None:
-        raise FileNotFoundError(f"Image {image_path} doesn't exist.")
-    if weights_path is None:
-        # try to find the weights file using the combined naming convention
-        weights_path = image_path.with_name(f"{image_path.stem}.weight.fits")
-    if not weights_path.exists():
-        raise FileNotFoundError(f"Weights image {weights_path} doesn't exist.")
+from vast_post_processing import combine
 
-    image_name = image_path.stem
-    weights_name = weights_path.stem
 
-    parset_template = parset_template_path.read_text().format(
-        image_name=image_name, weights_name=weights_name
-    )
-    parset_path = image_path.with_name(f"selavy.{image_name}.in")
-    parset_path.write_text(parset_template)
-
-    sbatch_template = sbatch_template_path.read_text().format(
-        job_name=f"selavy-{field_name}-{epoch_name}",
-        parset_path=parset_path.relative_to(image_path.parent),
-        log_path=parset_path.with_suffix(".log").relative_to(image_path.parent),
-        working_dir_path=parset_path.parent,
-    )
-    sbatch_path = image_path.with_name(f"selavy.{image_name}.sbatch")
-    sbatch_path.write_text(sbatch_template)
-
-    return sbatch_path
+app = typer.Typer()
 
 
 @app.command()
@@ -59,25 +24,11 @@ def main(
     racs: bool = False,
     field_list: Optional[List[str]] = typer.Option(None, "--field"),
 ):
-    glob_expr = "RACS_*" if racs else "VAST_*"
-    for field_path in neighbour_data_dir.glob(glob_expr):
-        if field_list and field_path.name not in field_list:
-            logger.info(
-                f"Glob found field {field_path} but it was not given as a --field option."
-                " Skipping."
-            )
-            continue
-        field_name = field_path.name
-        epoch_name = field_path.parent.name
-        image_path = field_path / f"{field_name}.{epoch_name}.{stokes}.conv.fits"
-        try:
-            _ = write_selavy_files(
-                field_name,
-                epoch_name,
-                image_path,
-                parset_template_path,
-                sbatch_template_path,
-            )
-        except FileNotFoundError as e:
-            logger.error(e)
-            continue
+    combine.selavy_combined(
+        neighbour_data_dir,
+        parset_template_path,
+        sbatch_template_path,
+        stokes,
+        racs,
+        field_list,
+    )
diff --git a/vast_post_processing/cli/swarp.py b/vast_post_processing/cli/swarp.py
index 110b329..2ba4682 100644
--- a/vast_post_processing/cli/swarp.py
+++ b/vast_post_processing/cli/swarp.py
@@ -6,93 +6,14 @@
 Assumes convolved files are named *.sm.fits and are organized:
 <EPOCH label>/<field>/*.sm.fits.
 """
-from functools import partial
-import os
 from pathlib import Path
-import subprocess
-
-from loguru import logger
 import typer
 
-from vast_post_processing.cli._util import get_pool, _get_worker_name
-from vast_post_processing.combine import (
-    add_degenerate_axes,
-    mask_weightless_pixels,
-    get_image_geometry,
-    write_swarp_config,
-    CentralImageNotFound,
-    COPY_FITS_KEYWORDS,
-)
-
-# configure logging
-# logger.remove()  # remove default log sink
-# logger.add(sys.stderr, level="DEBUG", enqueue=True)
-
-slurm_job_id = os.environ.get("SLURM_JOB_ID", "no-slurm")
+from vast_post_processing import combine
 
 app = typer.Typer()
 
 
-def worker(
-    args: tuple[list[str], str, Path, Path, Path], mpi: bool = False, n_proc: int = 1
-):
-    with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)):
-        swarp_cmd: list[str]
-        field_name: str
-        output_mosaic_path: Path
-        output_weight_path: Path
-        central_image_path: Path
-
-        (
-            swarp_cmd,
-            field_name,
-            output_mosaic_path,
-            output_weight_path,
-            central_image_path,
-        ) = args
-        logger.debug(f"worker args: {args}")
-
-        config_path = Path(swarp_cmd[2])
-        field_name = config_path.parent.name
-        try:
-            logger.debug(f"SWarping {field_name} ...")
-            _ = subprocess.run(swarp_cmd, check=True)
-        except subprocess.CalledProcessError as e:
-            logger.error(
-                f"Error while calling SWarp for {field_name}. Return code: {e.returncode}"
-            )
-            logger.debug(e.cmd)
-            raise e
-        add_degenerate_axes(output_mosaic_path, central_image_path)
-        add_degenerate_axes(output_weight_path, central_image_path)
-        mask_weightless_pixels(output_mosaic_path, output_weight_path)
-        logger.info(f"SWarp completed for {field_name}.")
-
-
-def test_worker(
-    args: tuple[list[str], str, Path, Path, Path], mpi: bool = False, n_proc: int = 1
-):
-    with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)):
-        swarp_cmd: list[str]
-        field_name: str
-        output_mosaic_path: Path
-        output_weight_path: Path
-        central_image_path: Path
-
-        (
-            swarp_cmd,
-            field_name,
-            output_mosaic_path,
-            output_weight_path,
-            central_image_path,
-        ) = args
-        logger.debug(f"worker args: {args}")
-
-        config_path = Path(swarp_cmd[2])
-        field_name = config_path.parent.name
-        logger.debug(f"Would SWarp {field_name}")
-
-
 @app.command()
 def main(
     neighbour_data_dir: Path,
@@ -101,98 +22,4 @@ def main(
     test: bool = False,
     racs: bool = False,
 ):
-    # neighbour_data_dir has the structure:
-    # <neighbour_data_dir>/<field> contain the smoothed images to combine.
-    # <neighbour_data_dir>/<field>/inputs contain the original images and weights.
-    # setup_logger(mpi=mpi)
-    # logger.info("checking rank and size")
-    # pool = schwimmbad.choose_pool(mpi=mpi, processes=n_proc)
-    pool = get_pool(mpi=mpi, n_proc=n_proc)
-    # if using MPI, the following is executed only on the main process
-    epoch_name = neighbour_data_dir.name
-    arg_list: list[tuple[list[str], str, Path, Path, Path]] = []
-    glob_expr = "RACS_*" if racs else "VAST_*"
-    for field_path in neighbour_data_dir.glob(glob_expr):
-        field_name = field_path.name
-        output_mosaic_path = field_path / f"{field_name}.{epoch_name}.I.conv.fits"
-        output_weight_path = (
-            field_path / f"{field_name}.{epoch_name}.I.conv.weight.fits"
-        )
-        if output_mosaic_path.exists():
-            logger.debug(
-                f"COMBINED image {output_mosaic_path} already exists, skipping"
-            )
-            continue
-        images = list(field_path.glob("*.sm.fits"))
-        # get the central image
-        for image in images:
-            if field_name in image.name:
-                central_image = image
-                break
-        else:
-            raise CentralImageNotFound(
-                f"Could not find central image for {field_path}."
-            )
-        weight_path = field_path / "inputs"
-        weights = [
-            weight_path
-            / image.name.replace("image", "weights")
-            .replace(".sm", "")
-            .replace(".restored", "")
-            .replace(".conv", "")
-            .replace(".corrected", "")
-            for image in images
-        ]
-        image_geo = get_image_geometry(central_image)
-        tmp_dir = field_path / "tmp"
-        tmp_dir.mkdir(exist_ok=True)
-        swarp_config_dict = {
-            "VMEM_MAX": 4000,
-            "MEM_MAX": 4000,
-            "COMBINE_BUFSIZE": 2000,
-            "VMEM_DIR": tmp_dir,
-            "IMAGEOUT_NAME": output_mosaic_path,
-            "WEIGHTOUT_NAME": output_weight_path,
-            "COMBINE": "Y",
-            "COMBINE_TYPE": "WEIGHTED",
-            "SUBTRACT_BACK": "N",
-            "WRITE_XML": "N",
-            "FSCALASTRO_TYPE": "NONE",
-            "WEIGHT_TYPE": "MAP_WEIGHT",
-            "RESCALE_WEIGHTS": "Y",
-            "WEIGHT_IMAGE": " ".join([str(p) for p in weights]),
-            "PROJECTION_TYPE": "SIN",
-            "RESAMPLE_DIR": field_path,
-            "CENTER_TYPE": "MANUAL",
-            "CENTER": image_geo.center_hmsdms,
-            "IMAGE_SIZE": f"{image_geo.npix_x},{image_geo.npix_y}",
-            "PIXELSCALE_TYPE": "MANUAL",
-            "PIXEL_SCALE": image_geo.pixel_arcsec,
-            "COPY_KEYWORDS": ",".join(COPY_FITS_KEYWORDS),
-        }
-        config_path = write_swarp_config(
-            swarp_config_dict, output_mosaic_path.with_suffix(".cfg")
-        )
-        swarp_cmd = [
-            "SWarp",
-            "-c",
-            str(config_path),
-        ]
-        swarp_cmd.extend([str(p) for p in images])
-        arg_list.append(
-            (
-                swarp_cmd,
-                field_name,
-                output_mosaic_path,
-                output_weight_path,
-                central_image,
-            )
-        )
-        logger.info(f"Added SWarp command for {field_path.name}.")
-        logger.debug(swarp_cmd)
-
-    # distribute tasks
-
-    worker_func = partial(worker if not test else test_worker, mpi=mpi, n_proc=n_proc)
-    _ = list(pool.map(worker_func, arg_list))
-    pool.close()
+    combine.swarp(neighbour_data_dir, n_proc, mpi, test, racs)
diff --git a/vast_post_processing/combine.py b/vast_post_processing/combine.py
index b6dafa9..e3c8da9 100644
--- a/vast_post_processing/combine.py
+++ b/vast_post_processing/combine.py
@@ -206,3 +206,257 @@ def mask_weightless_pixels(image_path: Path, weights_path: Path):
 
         # Output operation to log
         logger.info(f"Masked weightless pixels in {image_path}.")
+
+
+# Logic separation
+
+from pathlib import Path
+from typing import Optional, List
+
+from loguru import logger
+
+
+def write_selavy_files(
+    field_name: str,
+    epoch_name: str,
+    image_path: Path,
+    parset_template_path: Path,
+    sbatch_template_path: Path,
+    weights_path: Optional[Path] = None,
+):
+    if image_path is None:
+        raise FileNotFoundError(f"Image {image_path} doesn't exist.")
+    if weights_path is None:
+        # try to find the weights file using the combined naming convention
+        weights_path = image_path.with_name(f"{image_path.stem}.weight.fits")
+    if not weights_path.exists():
+        raise FileNotFoundError(f"Weights image {weights_path} doesn't exist.")
+
+    image_name = image_path.stem
+    weights_name = weights_path.stem
+
+    parset_template = parset_template_path.read_text().format(
+        image_name=image_name, weights_name=weights_name
+    )
+    parset_path = image_path.with_name(f"selavy.{image_name}.in")
+    parset_path.write_text(parset_template)
+
+    sbatch_template = sbatch_template_path.read_text().format(
+        job_name=f"selavy-{field_name}-{epoch_name}",
+        parset_path=parset_path.relative_to(image_path.parent),
+        log_path=parset_path.with_suffix(".log").relative_to(image_path.parent),
+        working_dir_path=parset_path.parent,
+    )
+    sbatch_path = image_path.with_name(f"selavy.{image_name}.sbatch")
+    sbatch_path.write_text(sbatch_template)
+
+    return sbatch_path
+
+
+def selavy_combined(
+    neighbour_data_dir: Path,
+    parset_template_path: Path,
+    sbatch_template_path: Path,
+    stokes: str,
+    racs: bool,
+    field_list: Optional[List[str]],
+):
+    glob_expr = "RACS_*" if racs else "VAST_*"
+    for field_path in neighbour_data_dir.glob(glob_expr):
+        if field_list and field_path.name not in field_list:
+            logger.info(
+                f"Glob found field {field_path} but it was not given as a --field option."
+                " Skipping."
+            )
+            continue
+        field_name = field_path.name
+        epoch_name = field_path.parent.name
+        image_path = field_path / f"{field_name}.{epoch_name}.{stokes}.conv.fits"
+        try:
+            _ = write_selavy_files(
+                field_name,
+                epoch_name,
+                image_path,
+                parset_template_path,
+                sbatch_template_path,
+            )
+        except FileNotFoundError as e:
+            logger.error(e)
+            continue
+
+
+from functools import partial
+import os
+import subprocess
+
+from loguru import logger
+
+from vast_post_processing.cli._util import get_pool, _get_worker_name
+
+# configure logging
+# logger.remove()  # remove default log sink
+# logger.add(sys.stderr, level="DEBUG", enqueue=True)
+
+
+def worker(
+    args: tuple[list[str], str, Path, Path, Path], mpi: bool = False, n_proc: int = 1
+):
+    with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)):
+        swarp_cmd: list[str]
+        field_name: str
+        output_mosaic_path: Path
+        output_weight_path: Path
+        central_image_path: Path
+
+        (
+            swarp_cmd,
+            field_name,
+            output_mosaic_path,
+            output_weight_path,
+            central_image_path,
+        ) = args
+        logger.debug(f"worker args: {args}")
+
+        config_path = Path(swarp_cmd[2])
+        field_name = config_path.parent.name
+        try:
+            logger.debug(f"SWarping {field_name} ...")
+            _ = subprocess.run(swarp_cmd, check=True)
+        except subprocess.CalledProcessError as e:
+            logger.error(
+                f"Error while calling SWarp for {field_name}. Return code: {e.returncode}"
+            )
+            logger.debug(e.cmd)
+            raise e
+        add_degenerate_axes(output_mosaic_path, central_image_path)
+        add_degenerate_axes(output_weight_path, central_image_path)
+        mask_weightless_pixels(output_mosaic_path, output_weight_path)
+        logger.info(f"SWarp completed for {field_name}.")
+
+
+def test_worker(
+    args: tuple[list[str], str, Path, Path, Path], mpi: bool = False, n_proc: int = 1
+):
+    with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)):
+        swarp_cmd: list[str]
+        field_name: str
+        output_mosaic_path: Path
+        output_weight_path: Path
+        central_image_path: Path
+
+        (
+            swarp_cmd,
+            field_name,
+            output_mosaic_path,
+            output_weight_path,
+            central_image_path,
+        ) = args
+        logger.debug(f"worker args: {args}")
+
+        config_path = Path(swarp_cmd[2])
+        field_name = config_path.parent.name
+        logger.debug(f"Would SWarp {field_name}")
+
+
+def swarp(
+    neighbour_data_dir: Path,
+    n_proc: int,
+    mpi: bool,
+    test: bool,
+    racs: bool,
+):
+    # neighbour_data_dir has the structure:
+    # <neighbour_data_dir>/<field> contain the smoothed images to combine.
+    # <neighbour_data_dir>/<field>/inputs contain the original images and weights.
+    # setup_logger(mpi=mpi)
+    # logger.info("checking rank and size")
+    # pool = schwimmbad.choose_pool(mpi=mpi, processes=n_proc)
+    pool = get_pool(mpi=mpi, n_proc=n_proc)
+    # if using MPI, the following is executed only on the main process
+    epoch_name = neighbour_data_dir.name
+    arg_list: list[tuple[list[str], str, Path, Path, Path]] = []
+    glob_expr = "RACS_*" if racs else "VAST_*"
+    for field_path in neighbour_data_dir.glob(glob_expr):
+        field_name = field_path.name
+        output_mosaic_path = field_path / f"{field_name}.{epoch_name}.I.conv.fits"
+        output_weight_path = (
+            field_path / f"{field_name}.{epoch_name}.I.conv.weight.fits"
+        )
+        if output_mosaic_path.exists():
+            logger.debug(
+                f"COMBINED image {output_mosaic_path} already exists, skipping"
+            )
+            continue
+        images = list(field_path.glob("*.sm.fits"))
+        # get the central image
+        for image in images:
+            if field_name in image.name:
+                central_image = image
+                break
+        else:
+            raise CentralImageNotFound(
+                f"Could not find central image for {field_path}."
+            )
+        weight_path = field_path / "inputs"
+        weights = [
+            weight_path
+            / image.name.replace("image", "weights")
+            .replace(".sm", "")
+            .replace(".restored", "")
+            .replace(".conv", "")
+            .replace(".corrected", "")
+            for image in images
+        ]
+        image_geo = get_image_geometry(central_image)
+        tmp_dir = field_path / "tmp"
+        tmp_dir.mkdir(exist_ok=True)
+        swarp_config_dict = {
+            "VMEM_MAX": 4000,
+            "MEM_MAX": 4000,
+            "COMBINE_BUFSIZE": 2000,
+            "VMEM_DIR": tmp_dir,
+            "IMAGEOUT_NAME": output_mosaic_path,
+            "WEIGHTOUT_NAME": output_weight_path,
+            "COMBINE": "Y",
+            "COMBINE_TYPE": "WEIGHTED",
+            "SUBTRACT_BACK": "N",
+            "WRITE_XML": "N",
+            "FSCALASTRO_TYPE": "NONE",
+            "WEIGHT_TYPE": "MAP_WEIGHT",
+            "RESCALE_WEIGHTS": "Y",
+            "WEIGHT_IMAGE": " ".join([str(p) for p in weights]),
+            "PROJECTION_TYPE": "SIN",
+            "RESAMPLE_DIR": field_path,
+            "CENTER_TYPE": "MANUAL",
+            "CENTER": image_geo.center_hmsdms,
+            "IMAGE_SIZE": f"{image_geo.npix_x},{image_geo.npix_y}",
+            "PIXELSCALE_TYPE": "MANUAL",
+            "PIXEL_SCALE": image_geo.pixel_arcsec,
+            "COPY_KEYWORDS": ",".join(COPY_FITS_KEYWORDS),
+        }
+        config_path = write_swarp_config(
+            swarp_config_dict, output_mosaic_path.with_suffix(".cfg")
+        )
+        swarp_cmd = [
+            "SWarp",
+            "-c",
+            str(config_path),
+        ]
+        swarp_cmd.extend([str(p) for p in images])
+        arg_list.append(
+            (
+                swarp_cmd,
+                field_name,
+                output_mosaic_path,
+                output_weight_path,
+                central_image,
+            )
+        )
+        logger.info(f"Added SWarp command for {field_path.name}.")
+        logger.debug(swarp_cmd)
+
+    # distribute tasks
+
+    worker_func = partial(worker if not test else test_worker, mpi=mpi, n_proc=n_proc)
+    _ = list(pool.map(worker_func, arg_list))
+    pool.close()
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 6521736..c5d6ade 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -211,3 +211,147 @@ def shift_and_scale_catalog(
         votablefile.to_xml(str(output_path))
     logger.success(f"Wrote corrected catalogue: {output_path}.")
     return output_path
+
+
+# Separated logic
+
+from itertools import chain
+from pathlib import Path
+import sys
+from typing import Optional, Generator
+
+from loguru import logger
+import pandas as pd
+
+
+def correct_vast(
+    vast_tile_data_root: Path,
+    vast_corrections_csv: Path,
+    epoch: Optional[list[int]],
+    overwrite: bool = False,
+    verbose: bool = False,
+):
+    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
+    VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
+    """
+    # configure logger
+    if not verbose:
+        # replace the default sink
+        logger.remove()
+        logger.add(sys.stderr, level="INFO")
+
+    # read corrections
+    corrections_df = (
+        pd.read_csv(vast_corrections_csv)
+        .set_index(["release_epoch", "field", "sbid"])
+        .sort_index()
+    )
+    image_path_glob_list: list[Generator[Path, None, None]] = []
+    components_path_glob_list: list[Generator[Path, None, None]] = []
+    if epoch is None or len(epoch) == 0:
+        image_path_glob_list.append(
+            vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
+        )
+        components_path_glob_list.append(
+            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml")
+        )
+    else:
+        for n in epoch:
+            image_path_glob_list.append(
+                vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
+            )
+            components_path_glob_list.append(
+                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml")
+            )
+
+    # correct images
+    for image_path in chain.from_iterable(image_path_glob_list):
+        epoch_dir = image_path.parent.name
+        _, _, field, sbid_str, *_ = image_path.name.split(".")
+        sbid = int(sbid_str[2:])
+        # get rms and background images
+        rms_path = (
+            vast_tile_data_root
+            / "STOKESI_RMSMAPS"
+            / epoch_dir
+            / f"noiseMap.{image_path.name}"
+        )
+        bkg_path = (
+            vast_tile_data_root
+            / "STOKESI_RMSMAPS"
+            / epoch_dir
+            / f"meanMap.{image_path.name}"
+        )
+        # get corrections
+        skip = False
+        try:
+            corrections = corrections_df.loc[(epoch_dir, field, sbid)]
+        except KeyError:
+            skip = True
+            logger.warning(
+                f"Corrections not found for {image_path} ({epoch_dir}, {field},"
+                f" {sbid})."
+            )
+        if not rms_path.exists():
+            logger.warning(f"RMS image not found for {image_path}.")
+        if not bkg_path.exists():
+            logger.warning(f"Background image not found for {image_path}.")
+        skip = not (rms_path.exists() and bkg_path.exists()) or skip
+        if skip:
+            logger.warning(f"Skipping {image_path}.")
+            continue
+
+        # TODO determine what these variables are and where they are from
+        for path in (image_path, rms_path, bkg_path):
+            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
+            output_dir.mkdir(parents=True, exist_ok=True)
+            _ = shift_and_scale_image(
+                path,
+                output_dir,
+                flux_scale=corrections.flux_peak_correction_multiplicative,
+                flux_offset_mJy=corrections.flux_peak_correction_additive,
+                ra_offset_arcsec=corrections.ra_correction,
+                dec_offset_arcsec=corrections.dec_correction,
+                overwrite=overwrite,
+            )
+
+    # correct catalogs
+    for components_path in chain.from_iterable(components_path_glob_list):
+        epoch_dir = components_path.parent.name
+        _, _, field, sbid_str, *_ = components_path.name.split(".")
+        sbid = int(sbid_str[2:])
+        # get island catalog
+        islands_path = components_path.with_name(
+            components_path.name.replace(".components", ".islands")
+        )
+        # get corrections
+        skip = False
+        try:
+            corrections = corrections_df.loc[(epoch_dir, field, sbid)]
+        except KeyError:
+            skip = True
+            logger.warning(
+                f"Corrections not found for {components_path} ({epoch_dir}, {field},"
+                f" {sbid})."
+            )
+        if not islands_path.exists():
+            logger.warning(f"Islands catalogue not found for {components_path}.")
+        skip = not islands_path.exists() or skip
+        if skip:
+            logger.warning(f"Skipping {components_path}.")
+            continue
+
+        for path in (components_path, islands_path):
+            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
+            output_dir.mkdir(parents=True, exist_ok=True)
+            _ = shift_and_scale_catalog(
+                path,
+                output_dir,
+                flux_scale=corrections.flux_peak_correction_multiplicative,
+                flux_offset_mJy=corrections.flux_peak_correction_additive,
+                ra_offset_arcsec=corrections.ra_correction,
+                dec_offset_arcsec=corrections.dec_correction,
+                overwrite=overwrite,
+            )
diff --git a/vast_post_processing/crop.py b/vast_post_processing/crop.py
index 946f86b..44200d9 100644
--- a/vast_post_processing/crop.py
+++ b/vast_post_processing/crop.py
@@ -20,7 +20,7 @@
 from pathlib import Path
 from itertools import chain
 
-warnings.filterwarnings('ignore', category=FITSFixedWarning)
+warnings.filterwarnings("ignore", category=FITSFixedWarning)
 
 
 def get_field_centre(header):
@@ -28,14 +28,15 @@ def get_field_centre(header):
     w = WCS(header, naxis=2)
     size_x = header["NAXIS1"]
     size_y = header["NAXIS2"]
-    field_centre = w.pixel_to_world(size_x/2, size_y/2)
-    
+    field_centre = w.pixel_to_world(size_x / 2, size_y / 2)
+
     logger.debug(field_centre)
 
     return field_centre
 
-def crop_hdu(hdu, field_centre, size=6.3*u.deg, rotation=0.0*u.deg):
-    if rotation != 0.0*u.deg:
+
+def crop_hdu(hdu, field_centre, size=6.3 * u.deg, rotation=0.0 * u.deg):
+    if rotation != 0.0 * u.deg:
         raise NotImplementedError("Rotation handling is not yet available")
     logger.debug("Cropping HDU")
     wcs = WCS(hdu.header, naxis=2)
@@ -43,91 +44,87 @@ def crop_hdu(hdu, field_centre, size=6.3*u.deg, rotation=0.0*u.deg):
     data = hdu.data
 
     if data.ndim == 4:
-        data = data[0,0,:,:]
-    
-    cutout = Cutout2D(data,
-                      position=field_centre,
-                      size=size,
-                      wcs=wcs
-                      )
+        data = data[0, 0, :, :]
+
+    cutout = Cutout2D(data, position=field_centre, size=size, wcs=wcs)
     hdu.data = cutout.data
     hdu.header.update(cutout.wcs.to_header())
-    
-    coord_str = field_centre.to_string('hmsdms', sep=':')
-    hdu.header.add_history(f"Cropped to a {size.to(u.deg):.1f} deg square "
-                           f"centered on {coord_str} on {datetime.now()}")
+
+    coord_str = field_centre.to_string("hmsdms", sep=":")
+    hdu.header.add_history(
+        f"Cropped to a {size.to(u.deg):.1f} deg square "
+        f"centered on {coord_str} on {datetime.now()}"
+    )
 
     return hdu
-    
+
+
 def crop_catalogue(vot, cropped_hdu, field_centre, size):
     logger.debug("Cropping catalogue")
     votable = vot.get_first_table()
-    
+
     cropped_wcs = WCS(cropped_hdu.header, naxis=2)
-    
+
     ra_deg = votable.array["col_ra_deg_cont"] * u.deg
     dec_deg = votable.array["col_dec_deg_cont"] * u.deg
     sc = SkyCoord(ra_deg, dec_deg)
-    
+
     in_footprint = cropped_wcs.footprint_contains(sc)
     votable.array = votable.array[in_footprint]
-    
+
     return votable
-    
+
+
 def wcs_to_moc(cropped_hdu):
     logger.debug("Creating MOC")
-    
+
     cropped_wcs = WCS(cropped_hdu.header, naxis=2)
-    
+
     nx, ny = cropped_wcs._naxis
     sc1 = wcs.utils.pixel_to_skycoord(0, 0, cropped_wcs)
-    sc2 = wcs.utils.pixel_to_skycoord(0, ny-1, cropped_wcs)
-    sc4 = wcs.utils.pixel_to_skycoord(nx-1, 0, cropped_wcs)
-    sc3 = wcs.utils.pixel_to_skycoord(nx-1, ny-1, cropped_wcs)
-    
-    sc = SkyCoord([sc1,sc2,sc3,sc4])
-    
+    sc2 = wcs.utils.pixel_to_skycoord(0, ny - 1, cropped_wcs)
+    sc4 = wcs.utils.pixel_to_skycoord(nx - 1, 0, cropped_wcs)
+    sc3 = wcs.utils.pixel_to_skycoord(nx - 1, ny - 1, cropped_wcs)
+
+    sc = SkyCoord([sc1, sc2, sc3, sc4])
+
     return MOC.from_polygon_skycoord(sc)
 
+
 def moc_to_stmoc(moc, hdu):
-    start = Time([hdu.header['DATE-BEG']])
-    end = Time([hdu.header['DATE-END']])
-    
+    start = Time([hdu.header["DATE-BEG"]])
+    end = Time([hdu.header["DATE-END"]])
+
     stmoc = STMOC.from_spatial_coverages(start, end, [moc])
-    
+
     return stmoc
 
-def run_full_crop(data_root: Union[str, Path],
-                  crop_size: u.quantity.Quantity,
-                  epoch: Union[str, int, list],
-                  stokes: str,
-                  out_root: Optional[Union[str, Path]]=None,
-                  create_moc: Optional[bool]=False,
-                  overwrite: Optional[bool]=False,
-                  ):
 
+def run_full_crop(
+    data_root: Union[str, Path],
+    crop_size: u.quantity.Quantity,
+    epoch: Union[str, int, list],
+    stokes: str,
+    out_root: Optional[Union[str, Path]] = None,
+    create_moc: Optional[bool] = False,
+    overwrite: Optional[bool] = False,
+):
     if out_root is None:
         out_root = data_root
 
     image_path_glob_list: list[Generator[Path, None, None]] = []
-    
+
     image_root = data_root / f"STOKES{stokes}_IMAGES"
     logger.debug(image_root)
-    
-    
-    
+
     if type(epoch) is int:
         epoch = list(epoch)
     if epoch is None or len(epoch) == 0:
-        image_path_glob_list.append(
-            image_root.glob(f"epoch_*/*.fits")
-        )
+        image_path_glob_list.append(image_root.glob(f"epoch_*/*.fits"))
     else:
         for n in epoch:
-            image_path_glob_list.append(
-                image_root.glob(f"epoch_{n}/*.fits")
-            )
-    
+            image_path_glob_list.append(image_root.glob(f"epoch_{n}/*.fits"))
+
     for image_path in chain.from_iterable(image_path_glob_list):
         logger.info(f"Working on {image_path}...")
         epoch_dir = image_path.parent.name
@@ -140,33 +137,29 @@ def run_full_crop(data_root: Union[str, Path],
             / epoch_dir
             / f"noiseMap.{image_path.name}"
         )
-        
+
         bkg_path = (
             data_root
             / f"STOKES{stokes}_RMSMAPS"
             / epoch_dir
             / f"meanMap.{image_path.name}"
         )
-        
+
         # get selavy files
-        components_name = f"selavy-{image_path.name}".replace(".fits",
-                                                          ".components.xml"
-                                                          )           
-        islands_name = components_name.replace("components", "islands")
-        
-        selavy_dir = (
-            data_root
-            / f"STOKES{stokes}_SELAVY"
-            / epoch_dir
+        components_name = f"selavy-{image_path.name}".replace(
+            ".fits", ".components.xml"
         )
+        islands_name = components_name.replace("components", "islands")
+
+        selavy_dir = data_root / f"STOKES{stokes}_SELAVY" / epoch_dir
         components_path = selavy_dir / components_name
         islands_path = selavy_dir / islands_name
-        
+
         exists = True
         if not rms_path.exists():
             exists = False
             logger.warning(f"noisemap file ({rms_path}) is missing.")
-        
+
         if not bkg_path.exists():
             exists = False
             logger.warning(f"meanmap file ({bkg_path}) is missing.")
@@ -178,79 +171,74 @@ def run_full_crop(data_root: Union[str, Path],
             logger.warning(f"selavy islands file ({islands_path}) is missing.")
         if not exists:
             logger.warning(f"Skipping {image_path} due to missing files.")
-        
+
         for path in (rms_path, bkg_path, image_path):
             stokes_dir = f"{path.parent.parent.name}_CROPPED"
             fits_output_dir = out_root / stokes_dir / epoch_dir
-            
+
             if not fits_output_dir.exists():
                 fits_output_dir.mkdir(parents=True)
-            
+
             outfile = fits_output_dir / path.name
             hdu = fits.open(path)[0]
             field_centre = get_field_centre(hdu.header)
             cropped_hdu = crop_hdu(hdu, field_centre, size=crop_size)
             cropped_hdu.writeto(outfile, overwrite=overwrite)
             logger.debug(f"Wrote {outfile}")
-        
-        
+
         # Crop the catalogues
         stokes_dir = f"{components_path.parent.parent.name}_CROPPED"
         cat_output_dir = out_root / stokes_dir / epoch_dir
-        
+
         if not cat_output_dir.exists():
             cat_output_dir.mkdir(parents=True)
-        
+
         components_outfile = cat_output_dir / components_path.name
         islands_outfile = cat_output_dir / islands_path.name
-        
+
         components_vot = parse(str(components_path))
         islands_vot = parse(str(islands_path))
-        
+
         # This uses the last cropped hdu from the above for loop
         # which should be the image file, but doesn't actually matter
-        cropped_components_vot = crop_catalogue(components_vot,
-                                                cropped_hdu,
-                                                field_centre,
-                                                size
-                                                )
-        cropped_islands_vot = crop_catalogue(islands_vot,
-                                             cropped_hdu,
-                                             field_centre,
-                                             size
-                                             )
+        cropped_components_vot = crop_catalogue(
+            components_vot, cropped_hdu, field_centre, size
+        )
+        cropped_islands_vot = crop_catalogue(
+            islands_vot, cropped_hdu, field_centre, size
+        )
 
         if components_outfile.exists() and not overwrite:
             logger.critical(f"{components_outfile} exists, not overwriting")
         else:
             components_vot.to_xml(str(components_outfile))
             logger.debug(f"Wrote {components_outfile}")
-        
+
         if islands_outfile.exists() and not overwrite:
             logger.critical(f"{components_outfile} exists, not overwriting")
         else:
             components_vot.to_xml(str(islands_outfile))
             logger.debug(f"Wrote {islands_outfile}")
-        
+
         # Create the MOC
         if not create_moc:
             continue
 
         moc_dir = f"STOKES{stokes}_MOC_CROPPED"
         moc_output_dir = out_root / moc_dir / epoch_dir
-        
-        moc_filename = image_path.name.replace('.fits','.moc.fits')
+
+        moc_filename = image_path.name.replace(".fits", ".moc.fits")
         moc_outfile = moc_output_dir / moc_filename
-        
+
         if not moc_output_dir.exists():
             moc_output_dir.mkdir(parents=True)
         moc = vpc.wcs_to_moc(cropped_hdu)
         moc.write(moc_outfile, overwrite=overwrite)
         logger.debug(f"Wrote {moc_outfile}")
-        
-        stmoc_filename = image_path.name.replace('.fits','.stmoc.fits')
+
+        stmoc_filename = image_path.name.replace(".fits", ".stmoc.fits")
         stmoc_outfile = moc_output_dir / stmoc_filename
-        
+
         stmoc = vpc.moc_to_stmoc(moc, cropped_hdu)
         stmoc.write(stmoc_outfile, overwrite=overwrite)
         logger.debug("Wrote {stmoc_outfile}")
diff --git a/vast_post_processing/neighbours.py b/vast_post_processing/neighbours.py
index ebbc0fa..2545213 100644
--- a/vast_post_processing/neighbours.py
+++ b/vast_post_processing/neighbours.py
@@ -553,3 +553,174 @@ def convolve_image(
         return output_dir_path / output_filename
     else:
         return None
+
+
+# Separated logic
+
+"""Requires setup_neighbours.py to be run first.
+"""
+from dataclasses import dataclass, fields
+from functools import partial
+from pathlib import Path
+from typing import Optional, List
+
+from loguru import logger
+from racs_tools import beamcon_2D
+from radio_beam import Beam
+import typer
+
+from vast_post_processing.cli._util import get_pool, _get_worker_name
+
+
+@dataclass
+class WorkerArgs:
+    image_path: Path
+    output_dir_path: Path
+    target_beam: Beam
+    mode: str
+    suffix: str = "sm"
+    prefix: Optional[str] = None
+    cutoff: Optional[float] = None
+    dry_run: bool = False
+
+    def __iter__(self):
+        # Makes the class fields iterable so they can be unpacked
+        # e.g. func(*args) where args is a WorkerArgs object.
+        return (getattr(self, field.name) for field in fields(self))
+
+
+def worker(args: WorkerArgs, mpi: bool = False, n_proc: int = 1):
+    with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)):
+        return convolve_image(*args)
+
+
+def convolve_neighbours(
+    neighbour_data_dir: Path,
+    n_proc: int = 1,
+    mpi: bool = False,
+    max_images: Optional[int] = None,
+    racs: bool = False,
+    field_list: Optional[List[str]] = typer.Option(None, "--field"),
+):
+    # neighbour_data_dir has the structure:
+    # <neighbour_data_dir>/<field>/inputs contains the input FITS images
+    # to be convolved to a common resolution and their weights FITS images.
+
+    pool = get_pool(mpi=mpi, n_proc=n_proc)
+    logger.debug(f"pool created, type: {type(pool)}")
+
+    glob_expr = "RACS_*" if racs else "VAST_*"
+    worker_args_list: list[WorkerArgs] = []
+    n_images: int = 0
+    for field_dir in neighbour_data_dir.glob(glob_expr):
+        if field_list and field_dir.name not in field_list:
+            logger.info(
+                f"Glob found field {field_dir} but it was not given as a --field option. Skipping."
+            )
+            continue
+        if max_images is not None and n_images >= max_images:
+            logger.warning(
+                f"Reached maximum image limit of {max_images}. Skipping remaining images."
+            )
+            break
+        if len(list(field_dir.glob("*.sm.fits"))) > 0:
+            logger.warning(f"Smoothed images already exist in {field_dir}. Skipping.")
+            continue
+        image_path_list = list(field_dir.glob("inputs/image.*.fits"))
+        logger.debug(f"Found {len(image_path_list)} images for {field_dir.name}")
+        # find the smallest common beam
+        common_beam, _ = beamcon_2D.getmaxbeam(image_path_list)
+        logger.debug(
+            f"{field_dir} common beam major {common_beam.major} type"
+            f" {type(common_beam)}"
+        )
+        for image_path in image_path_list:
+            worker_args = WorkerArgs(
+                image_path=image_path,
+                output_dir_path=field_dir,
+                target_beam=common_beam,
+                mode="robust",
+            )
+            worker_args_list.append(worker_args)
+            n_images += 1
+            if max_images is not None and n_images >= max_images:
+                logger.warning(
+                    f"Reached maximum image limit of {max_images}. Skipping remaining images."
+                )
+                break
+
+    # start convolutions
+    _ = list(pool.map(partial(worker, mpi=mpi, n_proc=n_proc), worker_args_list))
+    pool.close()
+
+
+def link_neighbours(
+    release_epoch: str,
+    vast_data_root: Path,
+    release_epochs_csv: Path,
+    output_root: Path,
+    vast_db_repo: Path,
+    racs_db_repo: Optional[Path],
+    overlap_frac_thresh: float,
+    use_corrected: bool,
+    neighbours_output: Optional[Path],
+    make_links: bool,
+):
+    # get the release epochs
+    release_epochs = read_release_epochs(release_epochs_csv)
+    # get the neighbours DataFrame and filter for the requested release epoch and
+    # overlap area threshold
+    vast_neighbours_df = find_vast_neighbours_by_release_epoch(
+        release_epoch,
+        vast_data_root,
+        vast_db_repo,
+        release_epochs,
+        racs_db_repo=racs_db_repo,
+        use_corrected=use_corrected,
+    ).query(
+        "release_epoch_a == @release_epoch and overlap_frac >= @overlap_frac_thresh"
+    )
+
+    if neighbours_output is not None:
+        vast_neighbours_df[
+            [
+                "field_a",
+                "sbid_a",
+                "obs_epoch_a",
+                "release_epoch_a",
+                "field_b",
+                "sbid_b",
+                "obs_epoch_b",
+                "release_epoch_b",
+                "overlap_frac",
+                "delta_t_days",
+            ]
+        ].to_csv(neighbours_output, index=False)
+
+    # create a directory for each field and create links to the neighbouring images
+    if make_links:
+        release_output_path = output_root / release_epoch
+        release_output_path.mkdir(parents=True, exist_ok=True)
+        for _, obs_pair in vast_neighbours_df.iterrows():
+            # create directories
+            field_inputs_path_a = release_output_path / obs_pair.field_a / "inputs"
+            field_inputs_path_a.mkdir(parents=True, exist_ok=True)
+            field_inputs_path_b = release_output_path / obs_pair.field_b / "inputs"
+            field_inputs_path_b.mkdir(parents=True, exist_ok=True)
+
+            # create a hard link for each field in the pair in both directions, e.g.
+            # A/inputs/A.fits, A/inputs/B.fits, B/inputs/A.fits, B/inputs/B.fits (plus weights)
+            for output_path in (field_inputs_path_a, field_inputs_path_b):
+                target_image_a = output_path / obs_pair.image_path_a.name
+                target_weights_a = output_path / obs_pair.weights_path_a.name
+                if not target_image_a.exists():
+                    obs_pair.image_path_a.link_to(target_image_a)
+                if not target_weights_a.exists():
+                    obs_pair.weights_path_a.link_to(target_weights_a)
+
+                target_image_b = output_path / obs_pair.image_path_b.name
+                target_weights_b = output_path / obs_pair.weights_path_b.name
+                if not target_image_b.exists():
+                    obs_pair.image_path_b.link_to(target_image_b)
+                if not target_weights_b.exists():
+                    obs_pair.weights_path_b.link_to(target_weights_b)
diff --git a/vast_post_processing/utils/fileutils.py b/vast_post_processing/utils/fileutils.py
index a6c4ec9..7e037ec 100644
--- a/vast_post_processing/utils/fileutils.py
+++ b/vast_post_processing/utils/fileutils.py
@@ -24,3 +24,11 @@ def cleanup_directory(directory: Path):
             logger.info(f"Deleted directory {path}.")
         else:
             logger.debug(f"Leaving {path}.")
+
+
+def cleanup(neighbour_data_dir: Path, delete_all: bool = False):
+    if delete_all:
+        rmtree(neighbour_data_dir)
+    else:
+        for field_path in neighbour_data_dir.glob("VAST_*"):
+            cleanup_directory(field_path)

From 07ae31c3c243c5aff1f1f62d1a744d94ddbe53c5 Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Thu, 10 Aug 2023 13:44:49 -0500
Subject: [PATCH 13/31] Corrected the wrong path for the catalog files

---
 vast_post_processing/cli/run_corrections.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index 99bb051..9eac01b 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -248,22 +248,31 @@ def main(
             logger.warning(f"Background image not found for {image_path}.")
 
         # Look for any component and island files correspnding to this image
+        image_root = image_path.parent.as_posix()
+        catalog_root = image_root.replace("IMAGES", "SELAVY")
 
-        component_file = Path(ref_file)
-        island_file = Path(ref_file.replace("components", "islands"))
+        catalog_filename = image_path.name.replace("image", "selavy-image")
+        catalog_filename = catalog_filename.replace(".fits", ".components.xml")
+
+        catalog_filepath = f"{catalog_root}/{catalog_filename}"
+
+        component_file = Path(catalog_filepath)
+        island_file = Path(catalog_filepath.replace("components", "islands"))
 
         skip = (
             not (
                 (rms_path.exists())
                 and (bkg_path.exists())
                 and (ref_file is not None)
-                and (component_file is not None)
+                and (component_file.exists())
             )
             or skip
         )
         if skip:
             if not ((rms_path.exists()) and (bkg_path.exists())):
                 logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist")
+            elif not (component_file.exists()):
+                logger.warning(f"Skipping {image_path}, catalog files do not exist")
             elif ref_file is None:
                 logger.warning(f"Skipping {image_path}, no reference field found.")
             continue

From b2fccaf14e4784b950a01d04ec957323907356e6 Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Mon, 14 Aug 2023 00:36:32 -0500
Subject: [PATCH 14/31] Re-organized code so that this can be passed to
 cropping, added docstrings, cleaned the redundant parts of the code

---
 vast_post_processing/catalogs.py            | 375 ++++++++++--------
 vast_post_processing/cli/run_corrections.py | 283 ++------------
 vast_post_processing/corrections.py         | 405 ++++++++++++++++++--
 vast_post_processing/crossmatch.py          |  43 ++-
 4 files changed, 643 insertions(+), 463 deletions(-)

diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py
index a75b000..a83b094 100644
--- a/vast_post_processing/catalogs.py
+++ b/vast_post_processing/catalogs.py
@@ -27,150 +27,37 @@
     "rms_image": u.mJy / u.beam,
 }
 
-
-class UnknownCatalogInputFormat(Exception):
-    pass
-
-
-class Catalog:
-    CATALOG_TYPE_TILE = "TILE"
-    CATALOG_TYPE_COMBINED = "COMBINED"
-    CATALOG_TYPES = (
-        CATALOG_TYPE_TILE,
-        CATALOG_TYPE_COMBINED,
-    )
-
-    def __init__(
-        self,
-        path: Path,
-        psf: Optional[Tuple[float, float]] = None,
-        input_format: str = "selavy",
-        condon: bool = False,
-        positive_fluxes_only: bool = True,
-    ):
-        self.path: Path
-        self.table: QTable
-        self.field: Optional[str]
-        self.epoch: Optional[str]
-        self.sbid: Optional[str]
-        self.psf_major: Optional[u.Quantity]
-        self.psf_minor: Optional[u.Quantity]
-        self.type: str
-
-        # read catalog
-        if input_format == "selavy":
-            if path.suffix == ".txt":
-                logger.debug("Reading %s as a Selavy txt catalog.", path)
-                read_catalog = read_selavy
-            else:
-                logger.debug("Reading %s as a Selavy VOTable catalog.", path)
-                read_catalog = read_selavy_votable
-        elif input_format == "aegean":
-            logger.debug("Reading %s as an Aegean catalog.", path)
-            read_catalog = read_aegean_csv
-        else:
-            logger.error(
-                "The format of input files is not supported. Only selavy and aegean are supported"
-            )
-            raise SystemExit
-        self.path = path
-        self.table = read_catalog(path)
-
-        # filter sources with bad sizes and optionally negative/0 fluxes
-        if positive_fluxes_only:
-            logger.info(
-                "Filtering %d sources with fluxes <= 0.",
-                (self.table["flux_peak"] <= 0).sum(),
-            )
-            self.table = self.table[self.table["flux_peak"] > 0]
-        logger.info(
-            "Filtering %d sources with fitted sizes <= 0.",
-            ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(),
-        )
-        self.table = self.table[
-            (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0)
-        ]
-
-        # read epoch, field, sbid, psf's
-        epoch_name = path.parent.name
-        _, _, field, sbid, *_ = path.name.split(".")
-        self.epoch = epoch_name
-        self.field = field.replace("VAST_", "")
-        self.sbid = sbid
-
-        if psf is not None:
-            self.psf_major, self.psf_minor = psf * u.arcsec
-            logger.debug(
-                "Using user provided PSF for %s: %s, %s.",
-                self.path,
-                self.psf_major,
-                self.psf_minor,
-            )
-        else:
-            logger.warning(
-                "PSF is unknown for %s. Condon errors will be unavailable.", self.path
-            )
-            self.psf_major = None
-            self.psf_minor = None
-
-        if condon and self.psf_major is not None and self.psf_minor is not None:
-            _ = self.calculate_condon_flux_errors(correct_peak_for_noise=True)
-            logger.debug("Condon errors computed for %s.", self.path)
-
-    def calculate_condon_flux_errors(
-        self,
-        alpha_maj1=2.5,
-        alpha_min1=0.5,
-        alpha_maj2=0.5,
-        alpha_min2=2.5,
-        alpha_maj3=1.5,
-        alpha_min3=1.5,
-        clean_bias=0.0,
-        clean_bias_error=0.0,
-        frac_flux_cal_error=0.0,
-        correct_peak_for_noise=False,
-    ):
-        noise = self.table["rms_image"]
-        snr = self.table["flux_peak"] / noise
-
-        rho_sq3 = (
-            (
-                self.table["maj_axis"]
-                * self.table["min_axis"]
-                / (4.0 * self.psf_major * self.psf_minor)
-            )
-            * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj3
-            * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min3
-            * snr**2
-        )
-
-        flux_peak_col = self.table["flux_peak"]
-        flux_peak_condon = self.table["flux_peak"] + (
-            -(noise**2) / self.table["flux_peak"] + clean_bias
-        )
-        if correct_peak_for_noise:
-            flux_peak_col = flux_peak_condon
-
-        errorpeaksq = (
-            (frac_flux_cal_error * flux_peak_col) ** 2
-            + clean_bias_error**2
-            + 2.0 * flux_peak_col**2 / rho_sq3
-        )
-        errorpeak = np.sqrt(errorpeaksq)
-
-        self.table["flux_peak_condon"] = flux_peak_condon
-        self.table["flux_peak_selavy"] = self.table["flux_peak"]
-        self.table["flux_peak_err_condon"] = errorpeak
-        self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"]
-        self.table["flux_peak_err"] = self.table["flux_peak_err_condon"]
-        if correct_peak_for_noise:
-            self.table["flux_peak"] = self.table["flux_peak_condon"]
-        return flux_peak_condon, errorpeak
+AEGEAN_COLUMN_MAP = {
+    # aegean name: (selavy name, aegean unit)
+    "ra": ("ra_deg_cont", u.deg),
+    "dec": ("dec_deg_cont", u.deg),
+    "err_ra": ("ra_err", u.deg),
+    "err_dec": ("dec_err", u.deg),
+    "peak_flux": ("flux_peak", u.Jy / u.beam),
+    "err_peak_flux": ("flux_peak_err", u.Jy / u.beam),
+    "a": ("maj_axis", u.arcsec),
+    "b": ("min_axis", u.arcsec),
+    "pa": ("pos_ang", u.arcsec),
+    "err_a": ("maj_axis_err", u.arcsec),
+    "err_b": ("min_axis_err", u.deg),
+    "err_pa": ("pos_ang_err", u.deg),
+    "local_rms": ("rms_image", u.Jy / u.beam),
+}
 
 
 def _convert_selavy_columns_to_quantites(
     qt: QTable, units: Dict[str, u.Unit] = SELAVY_COLUMN_UNITS
 ) -> QTable:
+    """Takes in a selavy component table and adds units to respective quantities
+
+    Args:
+        qt (QTable): the component catalog
+        units (Dict[str, u.Unit], optional): The dictionary with parameters and
+        their units. Defaults to SELAVY_COLUMN_UNITS.
+
+    Returns:
+        QTable: Table with units to the parameters
+    """
     for col, unit in units.items():
         qt[col].unit = unit
     return qt
@@ -210,6 +97,14 @@ def read_selavy(catalog_path: Path) -> QTable:
 
 
 def read_selavy_votable(catalog_path: Path) -> QTable:
+    """Helper function to read the selavy catalog, if the input format is votable
+
+    Args:
+        catalog_path (Path): Input Path to the catalog file
+
+    Returns:
+        QTable: The component table
+    """
     t = Table.read(catalog_path, format="votable", use_names_over_ids=True)
     # remove units from str columns and fix unrecognized flux units
     for col in t.itercols():
@@ -225,17 +120,6 @@ def read_selavy_votable(catalog_path: Path) -> QTable:
     return qt
 
 
-def read_hdf(catalog_path: Path) -> pd.DataFrame:
-    df = pd.read_hdf(catalog_path, key="data")
-    df["field"] = df.field.str.split(".", n=1, expand=True)[0]
-    qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df))
-    qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"])
-    _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky(
-        qt["coord"], nthneighbor=2
-    )
-    return qt
-
-
 def read_aegean_csv(catalog_path: Path) -> QTable:
     """Read an Aegean CSV component catalog and return a QTable.
     Assumed to contain at least the following columns with the given units:
@@ -260,22 +144,6 @@ def read_aegean_csv(catalog_path: Path) -> QTable:
         - `nn_separation`: separation to the nearest-neighbour source as a Quantity with
             angular units.
     """
-    AEGEAN_COLUMN_MAP = {
-        # aegean name: (selavy name, aegean unit)
-        "ra": ("ra_deg_cont", u.deg),
-        "dec": ("dec_deg_cont", u.deg),
-        "err_ra": ("ra_err", u.deg),
-        "err_dec": ("dec_err", u.deg),
-        "peak_flux": ("flux_peak", u.Jy / u.beam),
-        "err_peak_flux": ("flux_peak_err", u.Jy / u.beam),
-        "a": ("maj_axis", u.arcsec),
-        "b": ("min_axis", u.arcsec),
-        "pa": ("pos_ang", u.arcsec),
-        "err_a": ("maj_axis_err", u.arcsec),
-        "err_b": ("min_axis_err", u.deg),
-        "err_pa": ("pos_ang_err", u.deg),
-        "local_rms": ("rms_image", u.Jy / u.beam),
-    }
     qt = QTable.read(catalog_path)
     # rename columns to match selavy convention and assign units
     for col, (new_col, unit) in AEGEAN_COLUMN_MAP.items():
@@ -296,3 +164,178 @@ def read_aegean_csv(catalog_path: Path) -> QTable:
         qt["coord"], nthneighbor=2
     )
     return qt
+
+
+class Catalog:
+    """Class to make a catalog object from the selavy/Aegean files. This
+       is then used for catalog matching between the referecne catalog and
+       the current catalog to select for sources and get flux and astrometric
+       corrections.
+
+    Raises:
+        SystemExit: if the input catalog files are other than Selavy/Aegean
+                    products
+    """
+
+    def __init__(
+        self,
+        path: Path,
+        psf: Optional[Tuple[float, float]] = None,
+        input_format: str = "selavy",
+        condon: bool = False,
+        apply_flux_limit: bool = True,
+        flux_limit: float = 0,
+    ):
+        self.path: Path
+        self.table: QTable
+        self.input_format: Optional[str]
+        self.flux_flag: Optional[bool]
+        self.flux_lim: Optional[float]
+        self.field: Optional[str]
+        self.epoch: Optional[str]
+        self.sbid: Optional[str]
+        self.psf_major: Optional[u.Quantity]
+        self.psf_minor: Optional[u.Quantity]
+        self.type: str
+
+        self.path = path
+        self.input_format = input_format
+        self.flux_flag = apply_flux_limit
+        self.flux_lim = flux_limit
+
+        # Read the catalog
+        self._read_catalog()
+
+        # Filter sources
+        self._filter_sources()
+
+        # Get epoch, field, sbid from the file name
+        epoch_name = path.parent.name
+        _, _, field, sbid, *_ = path.name.split(".")
+        self.epoch = epoch_name
+        self.field = field.replace("VAST_", "")
+        self.sbid = sbid
+
+        # Parse the psf info
+        if psf is not None:
+            self.psf_major, self.psf_minor = psf * u.arcsec
+            logger.debug(
+                "Using user provided PSF for %s: %s, %s.",
+                self.path,
+                self.psf_major,
+                self.psf_minor,
+            )
+        else:
+            logger.warning(
+                "PSF is unknown for %s. Condon errors will be unavailable.", self.path
+            )
+            self.psf_major = None
+            self.psf_minor = None
+
+        # Calculate the covariant error using Condon 1997
+        if condon and self.psf_major is not None and self.psf_minor is not None:
+            self.calculate_condon_flux_errors(correct_peak_for_noise=True)
+            logger.debug("Condon errors computed for %s.", self.path)
+
+    def _read_catalog(self):
+        """Helper function to read and parse the input files
+
+        Raises:
+            SystemExit: if the input catalog files are other than Selavy/Aegean
+                        products
+        """
+        path = self.path
+        if self.input_format == "selavy":
+            if path.suffix == ".txt":
+                logger.debug("Reading %s as a Selavy txt catalog.", path)
+                read_catalog = read_selavy
+            else:
+                logger.debug("Reading %s as a Selavy VOTable catalog.", path)
+                read_catalog = read_selavy_votable
+        elif self.input_format == "aegean":
+            logger.debug("Reading %s as an Aegean catalog.", path)
+            read_catalog = read_aegean_csv
+        else:
+            logger.error(
+                "The format of input files is not supported. Only selavy and aegean are supported"
+            )
+            raise SystemExit
+
+        self.table = read_catalog(path)
+
+    def _filter_sources(self):
+        """Helper function to filter sources that are used for cross-match;
+        filter sources with bad sizes and optionally given flux limits"""
+        if self.flux_flag:
+            lim = self.flux_lim
+            logger.info(
+                f"Filtering %d sources with fluxes <= {lim}",
+                (self.table["flux_peak"] <= lim).sum(),
+            )
+            self.table = self.table[self.table["flux_peak"] > lim]
+        logger.info(
+            "Filtering %d sources with fitted sizes <= 0.",
+            ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(),
+        )
+        self.table = self.table[
+            (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0)
+        ]
+
+    def calculate_condon_flux_errors(
+        self,
+        alpha_maj: float = 1.5,
+        alpha_min: float = 1.5,
+        clean_bias: float = 0.0,
+        clean_bias_error: float = 0.0,
+        frac_flux_cal_error: float = 0.0,
+        correct_peak_for_noise: bool = False,
+    ):
+        """Calculates the covariant error using Condon 1997. See equation 41
+        of Condon 1997 for reference
+
+        Args:
+            alpha_maj (float, optional): power for major axis correction. Defaults to 1.5
+            alpha_min (float, optional): power for major axis correction. Defaults to 1.5.
+            clean_bias (float, optional): additive flux bias. Defaults to 0.0.
+            clean_bias_error (float, optional): error in additive flux bias. Defaults to 0.0.
+            frac_flux_cal_error (float, optional): multiplicative flux error. Defaults to 0.0.
+            correct_peak_for_noise (bool, optional): flag to re-write the peak flux from
+            selavy. Defaults to False.
+        """
+        noise = self.table["rms_image"]
+        snr = self.table["flux_peak"] / noise
+
+        # See equation 41 of Condon 1997 to calculate the signal to noise
+        rho_sq3 = (
+            (
+                self.table["maj_axis"]
+                * self.table["min_axis"]
+                / (4.0 * self.psf_major * self.psf_minor)
+            )
+            * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj
+            * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min
+            * snr**2
+        )
+
+        # Correct the peak flux now.
+        flux_peak_col = self.table["flux_peak"]
+        flux_peak_condon = self.table["flux_peak"] + (
+            -(noise**2) / self.table["flux_peak"] + clean_bias
+        )
+        if correct_peak_for_noise:
+            flux_peak_col = flux_peak_condon
+
+        errorpeaksq = (
+            (frac_flux_cal_error * flux_peak_col) ** 2
+            + clean_bias_error**2
+            + 2.0 * flux_peak_col**2 / rho_sq3
+        )
+        errorpeak = np.sqrt(errorpeaksq)
+
+        self.table["flux_peak_condon"] = flux_peak_condon
+        self.table["flux_peak_selavy"] = self.table["flux_peak"]
+        self.table["flux_peak_err_condon"] = errorpeak
+        self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"]
+        self.table["flux_peak_err"] = self.table["flux_peak_err_condon"]
+        if correct_peak_for_noise:
+            self.table["flux_peak"] = self.table["flux_peak_condon"]
diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index 9eac01b..811c8ce 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -1,108 +1,10 @@
 from loguru import logger
 from pathlib import Path
-from typing import Optional, Tuple, Generator
-from astropy.coordinates import Angle
-import astropy.units as u
-import click, sys, os
+from typing import Optional
 from uncertainties import ufloat
-from itertools import chain
-import pandas as pd
-import typer
-from astropy.table import QTable
-from astropy.io import fits
-from astropy import units as u
-from vast_post_processing.catalogs import Catalog
+import typer, sys
 
-from vast_post_processing.corrections import (
-    shift_and_scale_catalog,
-    shift_and_scale_image,
-    vast_xmatch_qc,
-)
-
-
-class _AstropyUnitType(click.ParamType):
-    def convert(self, value, param, ctx, unit_physical_type):
-        try:
-            unit = u.Unit(value)
-        except ValueError:
-            self.fail(f"astropy.units.Unit does not understand: {value}.")
-        if unit.physical_type != unit_physical_type:
-            self.fail(
-                f"{unit} is a {unit.physical_type} unit. It must be of type"
-                f" {unit_physical_type}."
-            )
-        else:
-            return unit
-
-
-class AngleUnitType(_AstropyUnitType):
-    name = "angle_unit"
-
-    def convert(self, value, param, ctx):
-        return super().convert(value, param, ctx, "angle")
-
-
-class FluxUnitType(_AstropyUnitType):
-    name = "flux_unit"
-
-    def convert(self, value, param, ctx):
-        return super().convert(value, param, ctx, "spectral flux density")
-
-
-class AngleQuantityType(click.ParamType):
-    name = "angle_quantity"
-
-    def convert(self, value, param, ctx):
-        try:
-            angle = Angle(value)
-            return angle
-        except ValueError:
-            self.fail(f"astropy.coordinates.Angle does not understand: {value}.")
-
-
-ANGLE_UNIT_TYPE = AngleUnitType()
-FLUX_UNIT_TYPE = FluxUnitType()
-ANGLE_QUANTITY_TYPE = AngleQuantityType()
-
-
-def get_correct_correction_file(correction_files_list, img_field):
-    count = 0
-    for f in correction_files_list:
-        filename = f.name
-        _, _, field, *_ = filename.split(".")
-        field = field.replace("RACS", "VAST")
-        if (field in img_field) and ("components" in filename):
-            count += 1
-            return f.as_posix()
-        else:
-            continue
-    if count == 0:
-        return None
-
-
-def get_psf_from_image(image_path: str):
-    """
-    Funtion used to get the point spread function (PSF) extent in major and minor axis.
-    These will be in the header of the image file
-
-    Parameters
-    ----------
-    image_path: str
-        Path to the image file
-
-    Returns
-    -------
-    Tuple(psf_major, psf_minor)
-        Major and minor axes of the PSF.
-    """
-    image_path = image_path.replace("SELAVY", "IMAGES")
-    image_path = image_path.replace("selavy-", "")
-    image_path = image_path.replace(".components.xml", ".fits")
-    hdu = fits.open(image_path)
-    psf_maj = hdu[0].header["BMAJ"] * u.degree
-    psf_min = hdu[0].header["BMIN"] * u.degree
-    hdu.close()
-    return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec))
+from vast_post_processing.corrections import correct_files
 
 
 def main(
@@ -169,10 +71,17 @@ def main(
             "arcsec for `catalog`. First argument is major axis followed by nimor axis."
         ),
     ),
+    outdir: Optional[str] = typer.Option(
+        None,
+        help="Stem of the output directory to store the corrected images and cataloges to. The default"
+        "way is to construct it from the tile directory, by making folders with _CORRECTED tag attached"
+        "to them as suffix",
+    ),
     overwrite: bool = False,
     verbose: bool = False,
 ):
-    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
+    """
+    Read astrometric and flux corrections produced by vast-xmatch and apply them to
     VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
     """
     # configure logger
@@ -180,164 +89,18 @@ def main(
         # replace the default sink
         logger.remove()
         logger.add(sys.stderr, level="INFO")
-
-    # read corrections
-    image_path_glob_list: list[Generator[Path, None, None]] = []
-    components_path_glob_list: list[Generator[Path, None, None]] = []
-    correction_files_path_glob_list: list[Generator[Path, None, None]] = []
-
-    correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml"))
-    correction_files_path_glob_list = list(correction_files_path_glob_list[0])
-
-    if epoch is None or len(epoch) == 0:
-        image_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
-        )
-        components_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml")
-        )
-    else:
-        for n in epoch:
-            image_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
-            )
-            components_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml")
-            )
-
-    # construct output path to store corrections
-    corr_dir = vast_tile_data_root / "corr_db"
-    if not os.path.isdir(corr_dir):
-        os.mkdir(corr_dir)
-
-    # get corrections for an image and the correct it
-    for image_path in chain.from_iterable(image_path_glob_list):
-        epoch_dir = image_path.parent.name
-        _, _, field, sbid_str, *_ = image_path.name.split(".")
-        sbid = int(sbid_str[2:])
-
-        # get rms and background images
-        rms_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"noiseMap.{image_path.name}"
-        )
-        bkg_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"meanMap.{image_path.name}"
-        )
-
-        # construct output path to store corrections for each epoch
-        epoch_corr_dir = corr_dir / epoch_dir
-
-        if not os.path.isdir(epoch_corr_dir):
-            os.mkdir(epoch_corr_dir)
-
-        ref_file = get_correct_correction_file(
-            correction_files_list=correction_files_path_glob_list,
-            img_field=field,
-        )
-
-        skip = False
-        if not rms_path.exists():
-            logger.warning(f"RMS image not found for {image_path}.")
-        if not bkg_path.exists():
-            logger.warning(f"Background image not found for {image_path}.")
-
-        # Look for any component and island files correspnding to this image
-        image_root = image_path.parent.as_posix()
-        catalog_root = image_root.replace("IMAGES", "SELAVY")
-
-        catalog_filename = image_path.name.replace("image", "selavy-image")
-        catalog_filename = catalog_filename.replace(".fits", ".components.xml")
-
-        catalog_filepath = f"{catalog_root}/{catalog_filename}"
-
-        component_file = Path(catalog_filepath)
-        island_file = Path(catalog_filepath.replace("components", "islands"))
-
-        skip = (
-            not (
-                (rms_path.exists())
-                and (bkg_path.exists())
-                and (ref_file is not None)
-                and (component_file.exists())
-            )
-            or skip
-        )
-        if skip:
-            if not ((rms_path.exists()) and (bkg_path.exists())):
-                logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist")
-            elif not (component_file.exists()):
-                logger.warning(f"Skipping {image_path}, catalog files do not exist")
-            elif ref_file is None:
-                logger.warning(f"Skipping {image_path}, no reference field found.")
-            continue
-        else:
-            fname = image_path.name.replace(".fits", "corrections.csv")
-            crossmatch_file = epoch_corr_dir / fname
-            csv_file = epoch_corr_dir / "all_fields_corrections.csv"
-
-            # Get the psf measurements to estimate errors follwoing Condon 1997
-            if len(psf_ref) > 0:
-                psf_reference = psf_ref
-            else:
-                psf_reference = get_psf_from_image(ref_file)
-
-            if len(psf) > 0:
-                psf_image = psf
-            else:
-                psf_image = get_psf_from_image(image_path.as_posix())
-            (
-                dra_median_value,
-                ddec_median_value,
-                flux_corr_mult,
-                flux_corr_add,
-            ) = vast_xmatch_qc(
-                reference_catalog_path=ref_file,
-                catalog_path=component_file.as_posix(),
-                radius=Angle(radius * u.arcsec),
-                condon=condon,
-                psf_reference=psf_reference,
-                psf=psf_image,
-                fix_m=False,
-                fix_b=False,
-                crossmatch_output=crossmatch_file,
-                csv_output=csv_file,
-            )
-
-            # get corrections
-            for path in (image_path, rms_path, bkg_path):
-                stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-                output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-                output_dir.mkdir(parents=True, exist_ok=True)
-                _ = shift_and_scale_image(
-                    path,
-                    output_dir,
-                    flux_scale=flux_corr_mult.n,
-                    flux_offset_mJy=flux_corr_add.n,
-                    ra_offset_arcsec=dra_median_value.item(),
-                    dec_offset_arcsec=ddec_median_value.item(),
-                    overwrite=overwrite,
-                )
-
-            # Do the same for catalog files
-            for path in (component_file, island_file):
-                stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-                output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-                output_dir.mkdir(parents=True, exist_ok=True)
-                _ = shift_and_scale_catalog(
-                    path,
-                    output_dir,
-                    flux_scale=flux_corr_mult.n,
-                    flux_offset_mJy=flux_corr_add.n,
-                    ra_offset_arcsec=dra_median_value.item(),
-                    dec_offset_arcsec=ddec_median_value.item(),
-                    overwrite=overwrite,
-                )
+    correct_files(
+        vast_tile_data_root=vast_tile_data_root,
+        vast_corrections_root=vast_corrections_root,
+        epoch=epoch,
+        radius=radius,
+        condon=condon,
+        psf_ref=psf_ref,
+        psf=psf,
+        outdir=outdir,
+        overwrite=overwrite,
+        verbose=verbose,
+    )
 
 
 if __name__ == "__main__":
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 95ada94..90ca726 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -1,5 +1,7 @@
 from pathlib import Path
-import warnings
+import warnings, sys, os
+from typing import Generator
+from itertools import chain
 from astropy.coordinates import SkyCoord, Angle
 from astropy.io import fits
 from astropy.io.votable import parse
@@ -31,6 +33,39 @@ def vast_xmatch_qc(
     crossmatch_output: Optional[str] = None,
     csv_output: Optional[str] = None,
 ):
+    """Function to cross-match two catalogs and filter sources that are within
+       a given radius
+
+    Args:
+        reference_catalog_path (str): Path to the reference catalog
+        catalog_path (str): Path to the catalog that needs flux/astrometric corrections
+        radius (Angle, optional): Cross-match radius. Defaults to Angle("10arcsec").
+        condon (bool, optional): Flag to calculate Condon error. Defaults to False.
+        psf_reference (Optional[Tuple[float, float]], optional): PSF of the reference catalog.
+            This includes information about the major/minor axis FWHM. Defaults to None. If None,
+            Condon errors will not be calculated.
+        psf (Optional[Tuple[float, float]], optional): PSF of the input catalog.
+            This includes information about the major/minor axis FWHM. Defaults to None. If None,
+            Condon errors will not be calculated.
+        fix_m (bool, optional): Flag to fix the slope. For tge straight line fit, should we fix
+            the slope to certain value or leave it free to be fit. Defaults to False.
+        fix_b (bool, optional): Flag to fix the intercept. For tge straight line fit, should we fix
+            the slope to certain value or leave it free to be fit. Defaults to False.
+        positional_unit (u.Unit, optional): output unit in which the astrometric offset is given.
+            Defaults to u.Unit("arcsec").
+        flux_unit (u.Unit, optional): output unit in which the flux scale is given.
+            Defaults to u.Unit("mJy").
+        crossmatch_output (Optional[str], optional): File path to write the crossmatch output.
+            Defaults to None, which means no file is written
+        csv_output (Optional[str], optional): File path to write the flux/astrometric corrections.
+            Defaults to None, which means no file is written
+
+    Returns:
+        dra_median_value: The median offset in RA (arcsec)
+        ddec_median_value: The median offset in DEC (arcsec)
+        flux_corr_mult: Multiplicative flux correction
+        flux_corr_add: Additive flux correction
+    """
     # convert catalog path strings to Path objects
     reference_catalog_path = Path(reference_catalog_path)
     catalog_path = Path(catalog_path)
@@ -125,20 +160,26 @@ def vast_xmatch_qc(
 
 def shift_and_scale_image(
     image_path: Path,
-    output_dir_path: Path,
     flux_scale: float = 1.0,
     flux_offset_mJy: float = 0.0,
     ra_offset_arcsec: float = 0.0,
     dec_offset_arcsec: float = 0.0,
     replace_nan: bool = False,
-    overwrite: bool = False,
-) -> Path:
-    """Apply astrometric and flux corrections to a FITS image."""
+):
+    """Apply astrometric and flux corrections to a FITS image.
+
+    Args:
+        image_path (Path): Path for the input image
+        flux_scale (float, optional): Multiplicative flux correction. Defaults to 1.0.
+        flux_offset_mJy (float, optional): Additive flux correction. Defaults to 0.0.
+        ra_offset_arcsec (float, optional): RA offset in arcsec. Defaults to 0.0.
+        dec_offset_arcsec (float, optional): DEC offset in arcsec. Defaults to 0.0.
+        replace_nan (bool, optional): Replace NAN's in the data with 0. Defaults to False.
+
+    Returns:
+        astropy.io.fits.hdu.image.PrimaryHDU: the HDU of the corrected image
+    """
     logger.debug(f"Correcting {image_path} ...")
-    output_path = output_dir_path / image_path.with_suffix(".corrected.fits").name
-    if output_path.exists() and not overwrite:
-        logger.warning(f"Will not overwrite existing image: {output_path}.")
-        return output_path
 
     image_hdul = fits.open(image_path)
     image_hdu = image_hdul[0]
@@ -173,26 +214,28 @@ def shift_and_scale_image(
     image_hdu.header["RAOFF"] = ra_offset_arcsec
     image_hdu.header["DECOFF"] = dec_offset_arcsec
 
-    if output_path.exists() and overwrite:
-        logger.warning(f"Overwriting existing image: {output_path}.")
-        image_hdul.writeto(str(output_path), overwrite=True)
-    else:
-        image_hdul.writeto(str(output_path))
-    logger.success(f"Wrote corrected image: {output_path}.")
-    image_hdul.close()
-    return output_path
+    return image_hdul
 
 
 def shift_and_scale_catalog(
     catalog_path: Path,
-    output_dir_path: Path,
     flux_scale: float = 1.0,
     flux_offset_mJy: float = 0.0,
     ra_offset_arcsec: float = 0.0,
     dec_offset_arcsec: float = 0.0,
-    overwrite: bool = False,
-) -> Path:
-    """Apply astrometric and flux corrections to a VAST VOTable."""
+):
+    """Apply astrometric and flux corrections to a catalog.
+
+    Args:
+        catalog_path (Path): Path for the input catalog
+        flux_scale (float, optional): Multiplicative flux correction. Defaults to 1.0.
+        flux_offset_mJy (float, optional): Additive flux correction. Defaults to 0.0.
+        ra_offset_arcsec (float, optional): RA offset in arcsec. Defaults to 0.0.
+        dec_offset_arcsec (float, optional): DEC offset in arcsec. Defaults to 0.0.
+
+    Returns:
+        astropy.io.votable: the corrected catalog
+    """
     # flux-unit columns in all catalogs
     FLUX_COLS = (
         "col_flux_peak",
@@ -214,10 +257,6 @@ def shift_and_scale_catalog(
     )
     logger.debug(f"Correcting {catalog_path} ...")
     is_island = ".islands" in catalog_path.name
-    output_path = output_dir_path / catalog_path.with_suffix(".corrected.xml").name
-    if output_path.exists() and not overwrite:
-        logger.warning(f"Will not overwrite existing catalogue: {output_path}.")
-        return output_path
 
     votablefile = parse(catalog_path)
     votable = votablefile.get_first_table()
@@ -254,12 +293,312 @@ def shift_and_scale_catalog(
     for col in cols:
         votable.array[col] = flux_scale * (votable.array[col] + flux_offset_mJy)
 
-    # write corrected VOTable
-    if output_path.exists() and overwrite:
-        logger.warning(f"Overwriting existing catalogue: {output_path}.")
-        output_path.unlink()
-        votablefile.to_xml(str(output_path))
+    return votablefile
+
+
+def get_correct_file(correction_files_dir, img_field):
+    """Helper function to get the file from the reference catalogs which
+       observed the same field.
+
+    Args:
+        correction_files_list (list): Path to the correction files directory
+        img_field (str): The field name of the input catalog
+
+    Returns:
+        str: the correspoding file with the same field as the one requested.
+    """
+    # we need to string the last A from the field
+    if img_field[-1] == "A":
+        img_field = img_field[:-1]
+    img_field = img_field.replace("VAST", "RACS")
+    matched_field = list(correction_files_dir.glob(f"*{img_field}*components*"))
+    if len(matched_field) > 0:
+        # This means that there are multpile files with the same field,
+        # possibly with different sbid's corresponding to different observations
+        return matched_field[0].as_posix()
+    else:
+        return None
+
+
+def get_psf_from_image(image_path: str):
+    """
+    Funtion used to get the point spread function (PSF) extent in major and minor axis.
+    These will be in the header of the image file
+
+    Parameters
+    ----------
+    image_path: str
+        Path to the image file
+
+    Returns
+    -------
+    Tuple(psf_major, psf_minor)
+        Major and minor axes of the PSF.
+    """
+    image_path = image_path.replace("SELAVY", "IMAGES")
+    image_path = image_path.replace("selavy-", "")
+    image_path = image_path.replace(".components.xml", ".fits")
+    hdu = fits.open(image_path)
+    psf_maj = hdu[0].header["BMAJ"] * u.degree
+    psf_min = hdu[0].header["BMIN"] * u.degree
+    hdu.close()
+    return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec))
+
+
+def correct_field(
+    image_path: Path,
+    vast_corrections_root: Path = "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY",
+    radius: float = 10,
+    condon: bool = True,
+    psf_ref: list[float] = None,
+    psf: list[float] = None,
+    write_output: bool = True,
+    outdir: str = None,
+    overwrite: bool = False,
+):
+    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
+    VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
+
+    Args:
+        image path (Path): Path to the image file that needs to be corrected.
+        vast_corrections_root (Path, optional): Path to the catalogues of referecne catalog.
+            Defaults to "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY".
+        radius (float, optional): Crossmatch radius. Defaults to 10.
+        condon (bool, optional): Flag to replace errros with Condon errors. Defaults to True.
+        psf_ref (list[float], optional): PSF information of the reference catalog. Defaults to None.
+        psf (list[float], optional): PSF information of the input catalog. Defaults to None.
+        write_output (bool, optional): Write the corrected image and catalog files or return the
+            corrected hdul and the corrected table?. Defaults to True, which means to write
+        outdir (str, optional): The stem of the output directory to write the files to
+        overwrite (bool, optional): Overwrite the existing files?. Defaults to False.
+    """
+    epoch_dir = image_path.parent.name
+    _, _, field, *_ = image_path.name.split(".")
+
+    # get rms and background images
+    rms_root = Path(
+        image_path.parent.as_posix().replace("STOKESI_IMAGES", "STOKESI_RMSMAPS")
+    )
+    rms_path = rms_root / f"noiseMap.{image_path.name}"
+    bkg_path = rms_root / f"meanMap.{image_path.name}"
+
+    correction_files_dir = Path(vast_corrections_root)
+    ref_file = get_correct_file(
+        correction_files_dir=correction_files_dir,
+        img_field=field,
+    )
+
+    if outdir is None:
+        outdir = image_path.parent.parent.parent
+
+    # construct output path to store corrections for each epoch
+    corr_dir = outdir / "corr_db"
+    if not os.path.isdir(corr_dir):
+        os.mkdir(corr_dir)
+    epoch_corr_dir = corr_dir / epoch_dir
+
+    if not os.path.isdir(epoch_corr_dir):
+        os.mkdir(epoch_corr_dir)
+
+    skip = False
+    if not rms_path.exists():
+        logger.warning(f"RMS image not found for {image_path}.")
+    if not bkg_path.exists():
+        logger.warning(f"Background image not found for {image_path}.")
+
+    # Look for any component and island files correspnding to this image
+    image_root = image_path.parent.as_posix()
+    catalog_root = image_root.replace("IMAGES", "SELAVY")
+
+    catalog_filename = image_path.name.replace("image", "selavy-image")
+    catalog_filename = catalog_filename.replace(".fits", ".components.xml")
+
+    catalog_filepath = f"{catalog_root}/{catalog_filename}"
+
+    component_file = Path(catalog_filepath)
+    island_file = Path(catalog_filepath.replace("components", "islands"))
+
+    skip = (
+        not (
+            (rms_path.exists())
+            and (bkg_path.exists())
+            and (ref_file is not None)
+            and (component_file.exists())
+        )
+        or skip
+    )
+    if skip:
+        if not ((rms_path.exists()) and (bkg_path.exists())):
+            logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist")
+        elif not (component_file.exists()):
+            logger.warning(f"Skipping {image_path}, catalog files do not exist")
+        elif ref_file is None:
+            logger.warning(f"Skipping {image_path}, no reference field found.")
+        return None
+    else:
+        fname = image_path.name.replace(".fits", "corrections.csv")
+        crossmatch_file = epoch_corr_dir / fname
+        csv_file = epoch_corr_dir / "all_fields_corrections.csv"
+
+        # Get the psf measurements to estimate errors follwoing Condon 1997
+        if len(psf_ref) > 0:
+            psf_reference = psf_ref
+        else:
+            psf_reference = get_psf_from_image(ref_file)
+
+        if len(psf) > 0:
+            psf_image = psf
+        else:
+            psf_image = get_psf_from_image(image_path.as_posix())
+
+        (
+            dra_median_value,
+            ddec_median_value,
+            flux_corr_mult,
+            flux_corr_add,
+        ) = vast_xmatch_qc(
+            reference_catalog_path=ref_file,
+            catalog_path=component_file.as_posix(),
+            radius=Angle(radius * u.arcsec),
+            condon=condon,
+            psf_reference=psf_reference,
+            psf=psf_image,
+            fix_m=False,
+            fix_b=False,
+            crossmatch_output=crossmatch_file,
+            csv_output=csv_file,
+        )
+
+        # get corrections
+        corrected_hdul = []
+        for path in (image_path, rms_path, bkg_path):
+            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+            output_dir = outdir / stokes_dir / epoch_dir
+            output_dir.mkdir(parents=True, exist_ok=True)
+            output_path = output_dir / path.with_suffix(".corrected.fits").name
+            if output_path.exists() and not overwrite:
+                logger.warning(f"Will not overwrite existing image: {output_path}.")
+            else:
+                corrected_hdu = shift_and_scale_image(
+                    path,
+                    flux_scale=flux_corr_mult.n,
+                    flux_offset_mJy=flux_corr_add.n,
+                    ra_offset_arcsec=dra_median_value.item(),
+                    dec_offset_arcsec=ddec_median_value.item(),
+                )
+                if write_output:
+                    if output_path.exists() and overwrite:
+                        logger.warning(f"Overwriting existing image: {output_path}.")
+                        corrected_hdu.writeto(str(output_path), overwrite=True)
+                    else:
+                        corrected_hdu.writeto(str(output_path))
+                    logger.success(f"Writing corrected image to: {output_path}.")
+                    corrected_hdu.close()
+                else:
+                    corrected_hdul.append(corrected_hdu)
+
+        # Do the same for catalog files
+        corrected_catalogs = []
+        for path in (component_file, island_file):
+            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+            output_dir = outdir / stokes_dir / epoch_dir
+            output_dir.mkdir(parents=True, exist_ok=True)
+            output_path = output_dir / path.with_suffix(".corrected.xml").name
+            if output_path.exists() and not overwrite:
+                logger.warning(f"Will not overwrite existing catalogue: {output_path}.")
+            else:
+                corrected_catalog = shift_and_scale_catalog(
+                    path,
+                    flux_scale=flux_corr_mult.n,
+                    flux_offset_mJy=flux_corr_add.n,
+                    ra_offset_arcsec=dra_median_value.item(),
+                    dec_offset_arcsec=ddec_median_value.item(),
+                )
+                if write_output:
+                    # write corrected VOTable
+                    if output_path.exists() and overwrite:
+                        logger.warning(
+                            f"Overwriting existing catalogue: {output_path}."
+                        )
+                        output_path.unlink()
+                        corrected_catalog.to_xml(output_path.as_posix())
+                    else:
+                        corrected_catalog.to_xml(output_path.as_posix())
+                    logger.success(f"Writing corrected catalogue: {output_path}.")
+                else:
+                    corrected_catalogs.append(corrected_catalog)
+        return (corrected_hdul, corrected_catalogs)
+
+
+def correct_files(
+    vast_tile_data_root: Path,
+    vast_corrections_root: Path = "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY",
+    epoch: list[int] = None,
+    radius: float = 10,
+    condon: bool = True,
+    psf_ref: list[float] = None,
+    psf: list[float] = None,
+    write_output: bool = True,
+    outdir: str = None,
+    overwrite: bool = False,
+    verbose: bool = False,
+):
+    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
+    VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
+
+    Args:
+        vast_tile_data_root (Path): Path to the data that needs to be corrected.
+            Should follow VAST convention, something like
+            /data/VAST/vast-data/TILES/ that has STOKESI_IMAGES/epoch_xx/
+        vast_corrections_root (Path, optional): Path to the catalogues of referecne catalog.
+            Defaults to "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY".
+        epoch (list[int], optional): Epoch to be corrected. Defaults to None.
+        radius (float, optional): Crossmatch radius. Defaults to 10.
+        condon (bool, optional): Flag to replace errros with Condon errors. Defaults to True.
+        psf_ref (list[float], optional): PSF information of the reference catalog. Defaults to None.
+        psf (list[float], optional): PSF information of the input catalog. Defaults to None.
+        write_output (bool, optional): Write the corrected image and catalog files or return the
+            corrected hdul and the corrected table?. Defaults to True, which means to write
+        outdir (str, optional): The stem of the output directory to write the files to
+        overwrite (bool, optional): Overwrite the existing files?. Defaults to False.
+        verbose (bool, optional): Show more log messages. Defaults to False.
+    """
+    # configure logger
+    if not verbose:
+        # replace the default sink
+        logger.remove()
+        logger.add(sys.stderr, level="INFO")
+
+    # read corrections
+    image_path_glob_list: list[Generator[Path, None, None]] = []
+    components_path_glob_list: list[Generator[Path, None, None]] = []
+
+    if epoch is None or len(epoch) == 0:
+        image_path_glob_list.append(
+            vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
+        )
+        components_path_glob_list.append(
+            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml")
+        )
     else:
-        votablefile.to_xml(str(output_path))
-    logger.success(f"Wrote corrected catalogue: {output_path}.")
-    return output_path
+        for n in epoch:
+            image_path_glob_list.append(
+                vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
+            )
+            components_path_glob_list.append(
+                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml")
+            )
+
+    # get corrections for an image and the correct it
+    for image_path in chain.from_iterable(image_path_glob_list):
+        correct_field(
+            image_path=image_path,
+            vast_corrections_root=vast_corrections_root,
+            radius=radius,
+            condon=condon,
+            psf_ref=psf_ref,
+            psf=psf,
+            write_output=write_output,
+            outdir=outdir,
+            overwrite=overwrite,
+        )
diff --git a/vast_post_processing/crossmatch.py b/vast_post_processing/crossmatch.py
index bdbdc3c..c5ee01f 100644
--- a/vast_post_processing/crossmatch.py
+++ b/vast_post_processing/crossmatch.py
@@ -14,11 +14,28 @@
 
 
 def median_abs_deviation(data):
+    """helper function to calculate the median offset
+
+    Args:
+        data (list): List/array of offsets
+
+    Returns:
+        float: the median offset
+    """
     median = np.median(data)
     return np.median(np.abs(data - median))
 
 
 def straight_line(B, x):
+    """Helper function for fitting. Defines a straight line
+
+    Args:
+        B (list): (slope, intercept) of the line
+        x (list): input X-axis data
+
+    Returns:
+        list: the straight line
+    """
     m, b = B
     return m * x + b
 
@@ -26,6 +43,19 @@ def straight_line(B, x):
 def join_match_coordinates_sky(
     coords1: SkyCoord, coords2: SkyCoord, seplimit: u.arcsec
 ):
+    """Helper function to do the cross match
+
+    Args:
+        coords1 (SkyCoord): Input coordinates
+        coords2 (SkyCoord): Reference coordinates
+        seplimit (u.arcsec): cross-match radius
+
+    Returns:
+        numpy.ndarray: Array to see which of the input coordinates have a cross match
+        numpy.ndarray: Indices of the input catalog where there is source in reference
+            catlog within separation limit
+        numpy.ndarray: The separation distance for the cross matches
+    """
     idx, separation, dist_3d = match_coordinates_sky(coords1, coords2)
     mask = separation < seplimit
     return np.where(mask)[0], idx[mask], separation[mask], dist_3d[mask]
@@ -35,12 +65,17 @@ def crossmatch_qtables(
     catalog: Catalog,
     catalog_reference: Catalog,
     radius: Angle = Angle("10 arcsec"),
-    catalog_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"),
-    catalog_reference_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"),
 ) -> QTable:
-    catalog_ra, catalog_dec = catalog_coord_cols
-    catalog_reference_ra, catalog_reference_dec = catalog_reference_coord_cols
+    """Main function to filter cross-matched sources.
 
+    Args:
+        catalog (Catalog): Input catalog
+        catalog_reference (Catalog): Reference catalog
+        radius (Angle, optional): cross-match radius. Defaults to Angle("10 arcsec").
+
+    Returns:
+        QTable: filtered table that return the cross matches
+    """
     logger.debug("Using crossmatch radius: %s.", radius)
 
     xmatch = join(

From c3c020d7388e002734afbf913d5289569cb9982f Mon Sep 17 00:00:00 2001
From: Akash Anumarlapudi <akashjanaki98@gmail.com>
Date: Mon, 14 Aug 2023 11:43:55 -0500
Subject: [PATCH 15/31] Fixed typos

---
 vast_post_processing/cli/run_corrections.py | 4 ++--
 vast_post_processing/corrections.py         | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index 811c8ce..0347ab7 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -22,7 +22,7 @@ def main(
         "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY",
         help=(
             "Path to RACS data that is can be used to correct VAST data. Tries to use"
-            " EPOCH00 as the defualt epoch. If not the user can override this by"
+            " EPOCH00 as the default epoch. If not the user can override this by"
             " giving a path to a folder that contain the selavy output"
         ),
         exists=True,
@@ -54,7 +54,7 @@ def main(
             ".restored.conv.fits. Note that for TILE images, the epoch is determined "
             "from the full path. If the input catalogs do not follow this convention, then "
             "the PSF sizes must be supplied using --psf-reference and/or --psf. The "
-            "deafult behaviour is to lookup the PSF sizes from the header of the image"
+            "default behaviour is to lookup the PSF sizes from the header of the image"
         ),
     ),
     psf_ref: Optional[list[float]] = typer.Option(
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 90ca726..dd2eb51 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -323,12 +323,13 @@ def get_correct_file(correction_files_dir, img_field):
 def get_psf_from_image(image_path: str):
     """
     Funtion used to get the point spread function (PSF) extent in major and minor axis.
-    These will be in the header of the image file
+    These will be in the header of the image file. If a component file is give, it will
+    construct the image path from this and then gets the psf information
 
     Parameters
     ----------
     image_path: str
-        Path to the image file
+        Path to the image file or a component file
 
     Returns
     -------

From cd14713098a798d23ade82596f3beae45638a9b5 Mon Sep 17 00:00:00 2001
From: Akash Anumarlapudi <akashjanaki98@gmail.com>
Date: Mon, 14 Aug 2023 14:04:32 -0500
Subject: [PATCH 16/31] New log message

---
 vast_post_processing/corrections.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index dd2eb51..821b638 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -603,3 +603,6 @@ def correct_files(
             outdir=outdir,
             overwrite=overwrite,
         )
+        logger.info(
+            f"Successfully corrected the images and catalogs for {image_path.as_posix()}"
+        )

From a4d316de79d62e8f05693ea0e966d98525c724da Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Wed, 12 Jul 2023 15:58:30 -0500
Subject: [PATCH 17/31] Pulled in Andrew's code for corrections and re-wrote
 the entire correction code

---
 vast_post_processing/catalogs.py            | 298 +++++++++++++++++
 vast_post_processing/cli/correct_vast.py    | 244 +++++++++++++-
 vast_post_processing/cli/run_corrections.py | 346 ++++++++++++++++++++
 vast_post_processing/corrections.py         | 117 ++++++-
 vast_post_processing/crossmatch.py          | 153 +++++++++
 5 files changed, 1149 insertions(+), 9 deletions(-)
 create mode 100644 vast_post_processing/catalogs.py
 create mode 100644 vast_post_processing/cli/run_corrections.py
 create mode 100644 vast_post_processing/crossmatch.py

diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py
new file mode 100644
index 0000000..a75b000
--- /dev/null
+++ b/vast_post_processing/catalogs.py
@@ -0,0 +1,298 @@
+import logging
+from pathlib import Path
+from typing import Tuple, Union, Dict, Optional
+from urllib.parse import quote
+
+from astropy.coordinates import SkyCoord
+from astropy.table import Table, QTable, join
+import astropy.units as u
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+SELAVY_COLUMN_UNITS = {
+    "ra_deg_cont": u.deg,
+    "dec_deg_cont": u.deg,
+    "ra_err": u.arcsec,
+    "dec_err": u.arcsec,
+    "flux_peak": u.mJy / u.beam,
+    "flux_peak_err": u.mJy / u.beam,
+    "maj_axis": u.arcsec,
+    "maj_axis_err": u.arcsec,
+    "min_axis": u.arcsec,
+    "min_axis_err": u.arcsec,
+    "pos_ang": u.deg,
+    "pos_ang_err": u.deg,
+    "rms_image": u.mJy / u.beam,
+}
+
+
+class UnknownCatalogInputFormat(Exception):
+    pass
+
+
+class Catalog:
+    CATALOG_TYPE_TILE = "TILE"
+    CATALOG_TYPE_COMBINED = "COMBINED"
+    CATALOG_TYPES = (
+        CATALOG_TYPE_TILE,
+        CATALOG_TYPE_COMBINED,
+    )
+
+    def __init__(
+        self,
+        path: Path,
+        psf: Optional[Tuple[float, float]] = None,
+        input_format: str = "selavy",
+        condon: bool = False,
+        positive_fluxes_only: bool = True,
+    ):
+        self.path: Path
+        self.table: QTable
+        self.field: Optional[str]
+        self.epoch: Optional[str]
+        self.sbid: Optional[str]
+        self.psf_major: Optional[u.Quantity]
+        self.psf_minor: Optional[u.Quantity]
+        self.type: str
+
+        # read catalog
+        if input_format == "selavy":
+            if path.suffix == ".txt":
+                logger.debug("Reading %s as a Selavy txt catalog.", path)
+                read_catalog = read_selavy
+            else:
+                logger.debug("Reading %s as a Selavy VOTable catalog.", path)
+                read_catalog = read_selavy_votable
+        elif input_format == "aegean":
+            logger.debug("Reading %s as an Aegean catalog.", path)
+            read_catalog = read_aegean_csv
+        else:
+            logger.error(
+                "The format of input files is not supported. Only selavy and aegean are supported"
+            )
+            raise SystemExit
+        self.path = path
+        self.table = read_catalog(path)
+
+        # filter sources with bad sizes and optionally negative/0 fluxes
+        if positive_fluxes_only:
+            logger.info(
+                "Filtering %d sources with fluxes <= 0.",
+                (self.table["flux_peak"] <= 0).sum(),
+            )
+            self.table = self.table[self.table["flux_peak"] > 0]
+        logger.info(
+            "Filtering %d sources with fitted sizes <= 0.",
+            ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(),
+        )
+        self.table = self.table[
+            (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0)
+        ]
+
+        # read epoch, field, sbid, psf's
+        epoch_name = path.parent.name
+        _, _, field, sbid, *_ = path.name.split(".")
+        self.epoch = epoch_name
+        self.field = field.replace("VAST_", "")
+        self.sbid = sbid
+
+        if psf is not None:
+            self.psf_major, self.psf_minor = psf * u.arcsec
+            logger.debug(
+                "Using user provided PSF for %s: %s, %s.",
+                self.path,
+                self.psf_major,
+                self.psf_minor,
+            )
+        else:
+            logger.warning(
+                "PSF is unknown for %s. Condon errors will be unavailable.", self.path
+            )
+            self.psf_major = None
+            self.psf_minor = None
+
+        if condon and self.psf_major is not None and self.psf_minor is not None:
+            _ = self.calculate_condon_flux_errors(correct_peak_for_noise=True)
+            logger.debug("Condon errors computed for %s.", self.path)
+
+    def calculate_condon_flux_errors(
+        self,
+        alpha_maj1=2.5,
+        alpha_min1=0.5,
+        alpha_maj2=0.5,
+        alpha_min2=2.5,
+        alpha_maj3=1.5,
+        alpha_min3=1.5,
+        clean_bias=0.0,
+        clean_bias_error=0.0,
+        frac_flux_cal_error=0.0,
+        correct_peak_for_noise=False,
+    ):
+        noise = self.table["rms_image"]
+        snr = self.table["flux_peak"] / noise
+
+        rho_sq3 = (
+            (
+                self.table["maj_axis"]
+                * self.table["min_axis"]
+                / (4.0 * self.psf_major * self.psf_minor)
+            )
+            * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj3
+            * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min3
+            * snr**2
+        )
+
+        flux_peak_col = self.table["flux_peak"]
+        flux_peak_condon = self.table["flux_peak"] + (
+            -(noise**2) / self.table["flux_peak"] + clean_bias
+        )
+        if correct_peak_for_noise:
+            flux_peak_col = flux_peak_condon
+
+        errorpeaksq = (
+            (frac_flux_cal_error * flux_peak_col) ** 2
+            + clean_bias_error**2
+            + 2.0 * flux_peak_col**2 / rho_sq3
+        )
+        errorpeak = np.sqrt(errorpeaksq)
+
+        self.table["flux_peak_condon"] = flux_peak_condon
+        self.table["flux_peak_selavy"] = self.table["flux_peak"]
+        self.table["flux_peak_err_condon"] = errorpeak
+        self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"]
+        self.table["flux_peak_err"] = self.table["flux_peak_err_condon"]
+        if correct_peak_for_noise:
+            self.table["flux_peak"] = self.table["flux_peak_condon"]
+        return flux_peak_condon, errorpeak
+
+
+def _convert_selavy_columns_to_quantites(
+    qt: QTable, units: Dict[str, u.Unit] = SELAVY_COLUMN_UNITS
+) -> QTable:
+    for col, unit in units.items():
+        qt[col].unit = unit
+    return qt
+
+
+def read_selavy(catalog_path: Path) -> QTable:
+    """Read a Selavy fixed-width component catalog and return a QTable.
+    Assumed to contain at least the following columns with the given units:
+        - `ra_deg_cont` and `dec_deg_cont`: degrees.
+        - `ra_err` and `dec_err`: arcseconds.
+        - `flux_peak` and `flux_peak_err`: mJy/beam.
+        - `maj_axis`, `maj_axis_err`, `min_axis`, `min_axis_err`: arcseconds.
+        - `pos_ang` and `pos_ang_err`: degrees.
+        - `rms_image`: mJy/beam.
+    These columns will be converted to Astropy quantites assuming the above units.
+
+    Parameters
+    ----------
+    catalog_path : Path
+        Path to the Selavy catalog file.
+
+    Returns
+    -------
+    QTable
+        Selavy catalog as a QTable, with extra columns:
+        - `coord`: `SkyCoord` object of the source coordinate.
+        - `nn_separation`: separation to the nearest-neighbour source as a Quantity with
+            angular units.
+    """
+    df = pd.read_fwf(catalog_path, skiprows=[1]).drop(columns="#")
+    qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df))
+    qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"])
+    _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky(
+        qt["coord"], nthneighbor=2
+    )
+    return qt
+
+
+def read_selavy_votable(catalog_path: Path) -> QTable:
+    t = Table.read(catalog_path, format="votable", use_names_over_ids=True)
+    # remove units from str columns and fix unrecognized flux units
+    for col in t.itercols():
+        if col.dtype.kind == "U":
+            col.unit = None
+        elif col.unit == u.UnrecognizedUnit("mJy/beam"):
+            col.unit = u.Unit("mJy/beam")
+    qt = QTable(t)
+    qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"])
+    _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky(
+        qt["coord"], nthneighbor=2
+    )
+    return qt
+
+
+def read_hdf(catalog_path: Path) -> pd.DataFrame:
+    df = pd.read_hdf(catalog_path, key="data")
+    df["field"] = df.field.str.split(".", n=1, expand=True)[0]
+    qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df))
+    qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"])
+    _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky(
+        qt["coord"], nthneighbor=2
+    )
+    return qt
+
+
+def read_aegean_csv(catalog_path: Path) -> QTable:
+    """Read an Aegean CSV component catalog and return a QTable.
+    Assumed to contain at least the following columns with the given units:
+        - `ra` and `dec`: degrees.
+        - `err_ra` and `err_dec`: degrees.
+        - `peak_flux` and `err_peak_flux`: Jy/beam.
+        - `a`, `err_a`, `b`, `err_b`: fitted semi-major and -minor axes in arcseconds.
+        - `pa` and `err_pa`: degrees.
+        - `local_rms`: Jy/beam.
+    These columns will be converted to Astropy quantites assuming the above units.
+
+    Parameters
+    ----------
+    catalog_path : Path
+        Path to the Selavy catalog file.
+
+    Returns
+    -------
+    QTable
+        Aegean component catalog as a QTable, with extra columns:
+        - `coord`: `SkyCoord` object of the source coordinate.
+        - `nn_separation`: separation to the nearest-neighbour source as a Quantity with
+            angular units.
+    """
+    AEGEAN_COLUMN_MAP = {
+        # aegean name: (selavy name, aegean unit)
+        "ra": ("ra_deg_cont", u.deg),
+        "dec": ("dec_deg_cont", u.deg),
+        "err_ra": ("ra_err", u.deg),
+        "err_dec": ("dec_err", u.deg),
+        "peak_flux": ("flux_peak", u.Jy / u.beam),
+        "err_peak_flux": ("flux_peak_err", u.Jy / u.beam),
+        "a": ("maj_axis", u.arcsec),
+        "b": ("min_axis", u.arcsec),
+        "pa": ("pos_ang", u.arcsec),
+        "err_a": ("maj_axis_err", u.arcsec),
+        "err_b": ("min_axis_err", u.deg),
+        "err_pa": ("pos_ang_err", u.deg),
+        "local_rms": ("rms_image", u.Jy / u.beam),
+    }
+    qt = QTable.read(catalog_path)
+    # rename columns to match selavy convention and assign units
+    for col, (new_col, unit) in AEGEAN_COLUMN_MAP.items():
+        qt.rename_column(col, new_col)
+        qt[new_col].unit = unit
+    # add has_siblings column
+    island_source_counts = (
+        qt[["island", "source"]].group_by("island").groups.aggregate(np.sum)
+    )
+    island_source_counts.rename_column("source", "has_siblings")
+    island_source_counts["has_siblings"] = island_source_counts["has_siblings"].astype(
+        bool
+    )
+    qt = join(qt, island_source_counts, keys="island", join_type="left")
+
+    qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"])
+    _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky(
+        qt["coord"], nthneighbor=2
+    )
+    return qt
diff --git a/vast_post_processing/cli/correct_vast.py b/vast_post_processing/cli/correct_vast.py
index 611208e..ebd5276 100644
--- a/vast_post_processing/cli/correct_vast.py
+++ b/vast_post_processing/cli/correct_vast.py
@@ -1,8 +1,59 @@
 from pathlib import Path
 from typing import Optional
 import typer
+from astropy.table import QTable
+from astropy.io import fits
+from astropy import units as u
 
-from vast_post_processing import corrections
+from vast_post_processing.corrections import (
+    shift_and_scale_catalog,
+    shift_and_scale_image,
+    calculate_positional_offsets,
+    calculate_flux_offsets,
+)
+
+
+def get_correct_correction_file(correction_files_list, epoch, img_field, img_sbid):
+    count = 0
+    for f in chain.from_iterable(correction_files_list):
+        epoch_name = f.parent.name
+        if epoch_name in epoch:
+            filename = f.name
+            _, _, _, sbid, field, *_ = filename.split("_")
+            sbid = sbid.replace("-VAST", "")
+            field = field.replace(".csv", "")
+            if (sbid in img_sbid) & (field in img_field):
+                df = QTable.read(f)
+                flux_shifts = calculate_flux_offsets(df)
+                pos_shifts = calculate_positional_offsets(df)
+                count += 1
+                return flux_shifts, pos_shifts
+            else:
+                continue
+    if count == 0:
+        return None, None
+
+
+def get_psf_from_image(image_path: str):
+    """
+    Funtion used to get the point spread function (PSF) extent in major and minor axis.
+    These will be in the header of the image file
+
+    Parameters
+    ----------
+    image_path: str
+        Path to the image file
+
+    Returns
+    -------
+    Tuple(psf_major, psf_minor)
+        Major and minor axes of the PSF.
+    """
+
+    hdu = fits.open(image_path)
+    psf_maj = hdu["BMAJ"] * u.degree
+    psf_min = hdu["BMIN"] * u.degree
+    return psf_maj.to(u.arcsec), psf_min.to(u.arcsec)
 
 
 def main(
@@ -16,9 +67,13 @@ def main(
         file_okay=False,
         dir_okay=True,
     ),
-    vast_corrections_csv: Path = typer.Argument(
-        ...,
-        help="Path to VAST corrections CSV file produced by vast-xmatch.",
+    vast_corrections_csv_root: Path = typer.Option(
+        "/data/vast-survey/VAST/askap-surveys-database/vast/db/",
+        help=(
+            "Path to VAST corrections CSV file produced by vast-xmatch. Tries to use"
+            " the default path of these files. If not the user can override this by"
+            "giving a path to file"
+        ),
         exists=True,
         file_okay=True,
         dir_okay=False,
@@ -34,9 +89,184 @@ def main(
     overwrite: bool = False,
     verbose: bool = False,
 ):
-    corrections.correct_vast(
-        vast_tile_data_root, vast_corrections_csv, epoch, overwrite, verbose
-    )
+    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
+    VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
+    """
+    # configure logger
+    if not verbose:
+        # replace the default sink
+        logger.remove()
+        logger.add(sys.stderr, level="INFO")
+
+    # read corrections
+    # corrections_df = (
+    #     pd.read_csv(vast_corrections_csv)
+    #     .set_index(["release_epoch", "field", "sbid"])
+    #     .sort_index()
+    # )
+    image_path_glob_list: list[Generator[Path, None, None]] = []
+    components_path_glob_list: list[Generator[Path, None, None]] = []
+    correction_files_path_glob_list: list[Generator[Path, None, None]] = []
+    if epoch is None or len(epoch) == 0:
+        image_path_glob_list.append(
+            vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
+        )
+        components_path_glob_list.append(
+            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml")
+        )
+        correction_files_path_glob_list.append(
+            vast_corrections_csv_root.glob("epoch_*/cat_match_RACS0*.csv")
+        )
+    else:
+        for n in epoch:
+            image_path_glob_list.append(
+                vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
+            )
+            components_path_glob_list.append(
+                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml")
+            )
+            correction_files_path_glob_list.append(
+                vast_corrections_csv_root.glob(f"epoch_{n}/cat_match_RACS0*.csv")
+            )
+
+    # correct images
+    for image_path in chain.from_iterable(image_path_glob_list):
+        epoch_dir = image_path.parent.name
+        _, _, field, sbid_str, *_ = image_path.name.split(".")
+        sbid = int(sbid_str[2:])
+        # get rms and background images
+        rms_path = (
+            vast_tile_data_root
+            / "STOKESI_RMSMAPS"
+            / epoch_dir
+            / f"noiseMap.{image_path.name}"
+        )
+        bkg_path = (
+            vast_tile_data_root
+            / "STOKESI_RMSMAPS"
+            / epoch_dir
+            / f"meanMap.{image_path.name}"
+        )
+        # get corrections
+        skip = False
+        # try:
+        #     corrections = corrections_df.loc[(epoch_dir, field, sbid)]
+        # except KeyError:
+        #     skip = True
+        #     logger.warning(
+        #         f"Corrections not found for {image_path} ({epoch_dir}, {field},"
+        #         f" {sbid})."
+        #     )
+        flux_corrections, pos_corrections = get_correct_correction_file(
+            correction_files_list=correction_files_path_glob_list,
+            epoch=epoch_dir,
+            img_field=field,
+            img_sbid=sbid_str,
+        )
+        if (flux_corrections is None) | (pos_corrections is None):
+            skip = True
+            logger.warning(
+                f"Corrections not found for {image_path} ({epoch_dir}, {field},"
+                f" {sbid})."
+            )
+        else:
+            scale, offset, scale_err, offset_err = flux_corrections
+            dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections
+        if not rms_path.exists():
+            logger.warning(f"RMS image not found for {image_path}.")
+        if not bkg_path.exists():
+            logger.warning(f"Background image not found for {image_path}.")
+        skip = not (rms_path.exists() and bkg_path.exists()) or skip
+        if skip:
+            logger.warning(f"Skipping {image_path}.")
+            continue
+
+        for path in (image_path, rms_path, bkg_path):
+            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
+            output_dir.mkdir(parents=True, exist_ok=True)
+            # _ = shift_and_scale_image(
+            #     path,
+            #     output_dir,
+            #     flux_scale=corrections.flux_peak_correction_multiplicative,
+            #     flux_offset_mJy=corrections.flux_peak_correction_additive,
+            #     ra_offset_arcsec=corrections.ra_correction,
+            #     dec_offset_arcsec=corrections.dec_correction,
+            #     overwrite=overwrite,
+            # )
+            _ = shift_and_scale_image(
+                path,
+                output_dir,
+                flux_scale=scale,
+                flux_offset_mJy=offset,
+                ra_offset_arcsec=dra_median,
+                dec_offset_arcsec=ddec_median,
+                overwrite=overwrite,
+            )
+
+    # correct catalogs
+    for components_path in chain.from_iterable(components_path_glob_list):
+        epoch_dir = components_path.parent.name
+        _, _, field, sbid_str, *_ = components_path.name.split(".")
+        sbid = int(sbid_str[2:])
+        # get island catalog
+        islands_path = components_path.with_name(
+            components_path.name.replace(".components", ".islands")
+        )
+        # get corrections
+        skip = False
+        # try:
+        #     corrections = corrections_df.loc[(epoch_dir, field, sbid)]
+        # except KeyError:
+        #     skip = True
+        #     logger.warning(
+        #         f"Corrections not found for {image_path} ({epoch_dir}, {field},"
+        #         f" {sbid})."
+        #     )
+        flux_corrections, pos_corrections = get_correct_correction_file(
+            correction_files_list=correction_files_path_glob_list,
+            epoch=epoch_dir,
+            img_field=field,
+            img_sbid=sbid_str,
+        )
+        if (flux_corrections is None) | (pos_corrections is None):
+            skip = True
+            logger.warning(
+                f"Corrections not found for {image_path} ({epoch_dir}, {field},"
+                f" {sbid})."
+            )
+        else:
+            scale, offset, scale_err, offset_err = flux_corrections
+            dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections
+        if not islands_path.exists():
+            logger.warning(f"Islands catalogue not found for {components_path}.")
+        skip = not islands_path.exists() or skip
+        if skip:
+            logger.warning(f"Skipping {components_path}.")
+            continue
+
+        for path in (components_path, islands_path):
+            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
+            output_dir.mkdir(parents=True, exist_ok=True)
+            # _ = shift_and_scale_catalog(
+            #     path,
+            #     output_dir,
+            #     flux_scale=corrections.flux_peak_correction_multiplicative,
+            #     flux_offset_mJy=corrections.flux_peak_correction_additive,
+            #     ra_offset_arcsec=corrections.ra_correction,
+            #     dec_offset_arcsec=corrections.dec_correction,
+            #     overwrite=overwrite,
+            # )
+            _ = shift_and_scale_catalog(
+                path,
+                output_dir,
+                flux_scale=scale,
+                flux_offset_mJy=offset,
+                ra_offset_arcsec=dra_median,
+                dec_offset_arcsec=ddec_median,
+                overwrite=overwrite,
+            )
 
 
 if __name__ == "__main__":
diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
new file mode 100644
index 0000000..da4e95f
--- /dev/null
+++ b/vast_post_processing/cli/run_corrections.py
@@ -0,0 +1,346 @@
+from loguru import logger
+from pathlib import Path
+from typing import Optional, Tuple, Generator
+from astropy.coordinates import Angle
+import astropy.units as u
+import click, sys, os
+from uncertainties import ufloat
+from itertools import chain
+import pandas as pd
+import typer
+from astropy.table import QTable
+from astropy.io import fits
+from astropy import units as u
+from vast_post_processing.catalogs import Catalog
+
+from vast_post_processing.corrections import (
+    shift_and_scale_catalog,
+    shift_and_scale_image,
+    vast_xmatch_qc,
+)
+
+
+class _AstropyUnitType(click.ParamType):
+    def convert(self, value, param, ctx, unit_physical_type):
+        try:
+            unit = u.Unit(value)
+        except ValueError:
+            self.fail(f"astropy.units.Unit does not understand: {value}.")
+        if unit.physical_type != unit_physical_type:
+            self.fail(
+                f"{unit} is a {unit.physical_type} unit. It must be of type"
+                f" {unit_physical_type}."
+            )
+        else:
+            return unit
+
+
+class AngleUnitType(_AstropyUnitType):
+    name = "angle_unit"
+
+    def convert(self, value, param, ctx):
+        return super().convert(value, param, ctx, "angle")
+
+
+class FluxUnitType(_AstropyUnitType):
+    name = "flux_unit"
+
+    def convert(self, value, param, ctx):
+        return super().convert(value, param, ctx, "spectral flux density")
+
+
+class AngleQuantityType(click.ParamType):
+    name = "angle_quantity"
+
+    def convert(self, value, param, ctx):
+        try:
+            angle = Angle(value)
+            return angle
+        except ValueError:
+            self.fail(f"astropy.coordinates.Angle does not understand: {value}.")
+
+
+ANGLE_UNIT_TYPE = AngleUnitType()
+FLUX_UNIT_TYPE = FluxUnitType()
+ANGLE_QUANTITY_TYPE = AngleQuantityType()
+
+
+def get_correct_correction_file(correction_files_list, img_field):
+    count = 0
+    for f in chain.from_iterable(correction_files_list):
+        filename = f.name
+        _, _, field, *_ = filename.split(".")
+        field = field.replace("RACS", "VAST")
+        if field in img_field:
+            count += 1
+            return f
+        else:
+            continue
+    if count == 0:
+        return None
+
+
+def get_psf_from_image(image_path: str):
+    """
+    Funtion used to get the point spread function (PSF) extent in major and minor axis.
+    These will be in the header of the image file
+
+    Parameters
+    ----------
+    image_path: str
+        Path to the image file
+
+    Returns
+    -------
+    Tuple(psf_major, psf_minor)
+        Major and minor axes of the PSF.
+    """
+
+    hdu = fits.open(image_path)
+    psf_maj = hdu["BMAJ"] * u.degree
+    psf_min = hdu["BMIN"] * u.degree
+    hdu.close()
+    return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec))
+
+
+def main(
+    vast_tile_data_root: Path = typer.Argument(
+        ...,
+        help=(
+            "Path to VAST TILES data directory, i.e. the directory that contains the"
+            " STOKES* directories."
+        ),
+        exists=True,
+        file_okay=False,
+        dir_okay=True,
+    ),
+    vast_corrections_root: Path = typer.Option(
+        "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY",
+        help=(
+            "Path to RACS data that is can be used to correct VAST data. Tries to use"
+            " EPOCH00 as the defualt epoch. If not the user can override this by"
+            " giving a path to a folder that contain the selavy output"
+        ),
+        exists=True,
+        file_okay=False,
+        dir_okay=True,
+    ),
+    epoch: Optional[list[int]] = typer.Option(
+        None,
+        help=(
+            "Only correct the given observation epochs. Can be given multiple times,"
+            " e.g. --epoch 1 --epoch 2. If no epochs are given (the default), then"
+            " correct all available epochs."
+        ),
+    ),
+    radius: Optional[ANGLE_QUANTITY_TYPE] = typer.Option(
+        "10 arcsec",
+        help=(
+            "Maximum separation limit for nearest-neighbour crossmatch. Accepts any "
+            "string understood by astropy.coordinates.Angle."
+        ),
+    ),
+    condon: Optional[bool] = typer.Option(
+        True,
+        help=(
+            "Calculate Condon (1997) flux errors and use them instead of the original "
+            "errors. Will also correct the peak flux values for noise. Requires that the "
+            "input images follow the VAST naming convention, for TILE images: EPOCH01/"
+            "TILES/STOKESI_IMAGES/selavy-image.i.SB9667.cont.VAST_0102-06A.linmos.taylor.0"
+            ".restored.conv.fits. Note that for TILE images, the epoch is determined "
+            "from the full path. If the input catalogs do not follow this convention, then "
+            "the PSF sizes must be supplied using --psf-reference and/or --psf. The "
+            "deafult behaviour is to lookup the PSF sizes from the header of the image"
+        ),
+    ),
+    psf_ref: Optional[list[float]] = typer.Option(
+        None,
+        help=(
+            "If using --condon but want to give the psfs manually, use this specified PSF size in "
+            "arcsec for `reference_catalog`. First argument is major axis followed by nimor axis."
+        ),
+    ),
+    psf: Optional[list[float]] = typer.Option(
+        None,
+        help=(
+            "If using --condon but want to give the psfs manually, use this specified PSF size in "
+            "arcsec for `catalof`. First argument is major axis followed by nimor axis."
+        ),
+    ),
+    overwrite: bool = False,
+    verbose: bool = False,
+):
+    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
+    VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
+    """
+    # configure logger
+    if not verbose:
+        # replace the default sink
+        logger.remove()
+        logger.add(sys.stderr, level="INFO")
+
+    # read corrections
+    image_path_glob_list: list[Generator[Path, None, None]] = []
+    components_path_glob_list: list[Generator[Path, None, None]] = []
+    correction_files_path_glob_list: list[Generator[Path, None, None]] = []
+
+    correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml"))
+
+    if epoch is None or len(epoch) == 0:
+        image_path_glob_list.append(
+            vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
+        )
+        components_path_glob_list.append(
+            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml")
+        )
+    else:
+        for n in epoch:
+            image_path_glob_list.append(
+                vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
+            )
+            components_path_glob_list.append(
+                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml")
+            )
+
+    # construct output path to store corrections
+    corr_dir = vast_tile_data_root / "corr_db"
+    if not os.path.isdir(corr_dir):
+        os.mkdir(corr_dir)
+
+    # get corrections for an image and the correct it
+    for image_path in chain.from_iterable(image_path_glob_list):
+        epoch_dir = image_path.parent.name
+        _, _, field, sbid_str, *_ = image_path.name.split(".")
+        sbid = int(sbid_str[2:])
+
+        # get rms and background images
+        rms_path = (
+            vast_tile_data_root
+            / "STOKESI_RMSMAPS"
+            / epoch_dir
+            / f"noiseMap.{image_path.name}"
+        )
+        bkg_path = (
+            vast_tile_data_root
+            / "STOKESI_RMSMAPS"
+            / epoch_dir
+            / f"meanMap.{image_path.name}"
+        )
+
+        # construct output path to store corrections for each epoch
+        epoch_corr_dir = vast_tile_data_root / "corr_db" / epoch_dir
+
+        if not os.path.isdir(epoch_corr_dir):
+            os.mkdir(epoch_corr_dir)
+
+        ref_file = get_correct_correction_file(
+            correction_files_list=correction_files_path_glob_list,
+            img_field=field,
+        )
+
+        skip = False
+        if not rms_path.exists():
+            logger.warning(f"RMS image not found for {image_path}.")
+        if not bkg_path.exists():
+            logger.warning(f"Background image not found for {image_path}.")
+        skip = (
+            not ((rms_path.exists()) and (bkg_path.exists()) and (ref_file is not None))
+            or skip
+        )
+        if skip:
+            if not ((rms_path.exists()) and (bkg_path.exists())):
+                logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist")
+            elif ref_file is None:
+                logger.warning(f"Skipping {image_path}, no reference field found.")
+            continue
+        else:
+            crossmatch_file = epoch_corr_dir / image_path.replace(
+                "components.xml", "corrections.csv"
+            )
+            csv_file = epoch_corr_dir / "corrections.csv"
+
+            # Get the psf measurements to estimate errors follwoing Condon 1997
+            if psf_ref is not None:
+                psf_reference = psf_ref
+            else:
+                psf_reference = get_psf_from_image(ref_file)
+
+            if psf is not None:
+                psf_image = psf
+            else:
+                psf_image = get_psf_from_image(image_path)
+            (
+                dra_median_value,
+                ddec_median_value,
+                flux_corr_mult,
+                flux_corr_add,
+            ) = vast_xmatch_qc(
+                reference_catalog_path=ref_file,
+                catalog_path=image_path,
+                radius=Angle(radius),
+                condon=condon,
+                psf_reference=psf_reference,
+                psf=psf_image,
+                fix_m=False,
+                fix_b=False,
+                crossmatch_output=crossmatch_file,
+                csv_output=csv_file,
+            )
+
+            # get corrections
+            for path in (image_path, rms_path, bkg_path):
+                stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+                output_dir = vast_tile_data_root / stokes_dir / epoch_dir
+                output_dir.mkdir(parents=True, exist_ok=True)
+                _ = shift_and_scale_image(
+                    path,
+                    output_dir,
+                    flux_scale=flux_corr_mult,
+                    flux_offset_mJy=flux_corr_add,
+                    ra_offset_arcsec=dra_median_value,
+                    dec_offset_arcsec=ddec_median_value,
+                    overwrite=overwrite,
+                )
+
+        # Do the same for catalog files
+        # Look for any component and island files correspnding to this image
+        comp_files = []
+        for p in list(components_path_glob_list[0]):
+            comp_file_name = p.name
+            comp_file_epoch = p.parent.name
+            if (
+                (epoch_dir in comp_file_epoch)
+                and (field in comp_file_name)
+                and (f"SB{sbid}" in comp_file_name)
+            ):
+                comp_files.append(p)
+
+        if len(comp_files) == 0:
+            logger.warning(f"Selavy catalogue not found for the image {image_path}")
+        elif len(comp_files) == 1:
+            if ".components" in comp_files[0].name:
+                logger.warning(
+                    f"Islannd catalogue not found for the image {image_path}"
+                )
+            else:
+                logger.warning(
+                    f"Islannd catalogue not found for the image {image_path}"
+                )
+        else:
+            for path in comp_files:
+                stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+                output_dir = vast_tile_data_root / stokes_dir / epoch_dir
+                output_dir.mkdir(parents=True, exist_ok=True)
+                _ = shift_and_scale_catalog(
+                    path,
+                    output_dir,
+                    flux_scale=flux_corr_mult,
+                    flux_offset_mJy=flux_corr_add,
+                    ra_offset_arcsec=dra_median_value,
+                    dec_offset_arcsec=ddec_median_value,
+                    overwrite=overwrite,
+                )
+
+
+if __name__ == "__main__":
+    typer.run(main)
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index c5d6ade..8603d63 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -3,14 +3,127 @@
 
 from pathlib import Path
 import warnings
-
-from astropy.coordinates import SkyCoord
+from astropy.coordinates import SkyCoord, Angle
 from astropy.io import fits
 from astropy.io.votable import parse
 import astropy.units as u
+from uncertainties import ufloat
 from astropy.wcs import WCS, FITSFixedWarning
 from loguru import logger
 import numpy as np
+from typing import Tuple, Optional
+from vast_post_processing.catalogs import Catalog
+from vast_post_processing.crossmatch import (
+    crossmatch_qtables,
+    calculate_positional_offsets,
+    calculate_flux_offsets,
+)
+
+
+def vast_xmatch_qc(
+    reference_catalog_path: str,
+    catalog_path: str,
+    radius: Angle = Angle("10arcsec"),
+    condon: bool = False,
+    psf_reference: Optional[Tuple[float, float]] = None,
+    psf: Optional[Tuple[float, float]] = None,
+    fix_m: bool = False,
+    fix_b: bool = False,
+    positional_unit: u.Unit = u.Unit("arcsec"),
+    flux_unit: u.Unit = u.Unit("mJy"),
+    crossmatch_output: Optional[str] = None,
+    csv_output: Optional[str] = None,
+):
+    # convert catalog path strings to Path objects
+    reference_catalog_path = Path(reference_catalog_path)
+    catalog_path = Path(catalog_path)
+    flux_unit /= u.beam  # add beam divisor as we currently only work with peak fluxes
+
+    reference_catalog = Catalog(
+        reference_catalog_path,
+        psf=psf_reference,
+        condon=condon,
+        input_format="selavy",
+    )
+    catalog = Catalog(
+        catalog_path,
+        psf=psf,
+        condon=condon,
+        input_format="selavy",
+    )
+
+    # perform the crossmatch
+    xmatch_qt = crossmatch_qtables(catalog, reference_catalog, radius=radius)
+    # select xmatches with non-zero flux errors and no siblings
+    logger.info("Removing crossmatched sources with siblings or flux peak errors = 0.")
+    mask = xmatch_qt["flux_peak_err"] > 0
+    mask &= xmatch_qt["flux_peak_err_reference"] > 0
+    mask &= xmatch_qt["has_siblings"] == 0
+    mask &= xmatch_qt["has_siblings_reference"] == 0
+    data = xmatch_qt[mask]
+    logger.info(
+        f"{len(data):.2f} crossmatched sources remaining ({(len(data) / len(xmatch_qt)) * 100:.2f}%).",
+    )
+
+    # Write the cross-match data into csv
+    if crossmatch_output is not None:
+        data.write("crossmatch.csv", overwrite=True)
+    # calculate positional offsets and flux ratio
+    dra_median, ddec_median, dra_madfm, ddec_madfm = calculate_positional_offsets(data)
+    dra_median_value = dra_median.to(positional_unit).value
+    dra_madfm_value = dra_madfm.to(positional_unit).value
+    ddec_median_value = ddec_median.to(positional_unit).value
+    ddec_madfm_value = ddec_madfm.to(positional_unit).value
+    logger.info(
+        f"dRA median: {dra_median_value:.2f} MADFM: {dra_madfm_value:.2f} {positional_unit}. dDec median: {ddec_median_value:.2f} MADFM: {ddec_madfm_value:.2f} {positional_unit}.",
+    )
+
+    gradient, offset, gradient_err, offset_err = calculate_flux_offsets(
+        data, fix_m=fix_m, fix_b=fix_b
+    )
+    ugradient = ufloat(gradient, gradient_err)
+    uoffset = ufloat(offset.to(flux_unit).value, offset_err.to(flux_unit).value)
+    logger.info(
+        f"ODR fit parameters: Sp = Sp,ref * {ugradient} + {uoffset} {flux_unit}.",
+    )
+
+    flux_corr_mult = 1 / ugradient
+    flux_corr_add = -1 * uoffset
+
+    if csv_output is not None:
+        # output has been requested
+
+        if True:  # csv_output is not None:
+            csv_output_path = Path(csv_output)  # ensure Path object
+            sbid = catalog.sbid if catalog.sbid is not None else ""
+            if not csv_output_path.exists():
+                f = open(csv_output_path, "w")
+                print(
+                    "field,release_epoch,sbid,ra_correction,dec_correction,ra_madfm,"
+                    "dec_madfm,flux_peak_correction_multiplicative,flux_peak_correction_additive,"
+                    "flux_peak_correction_multiplicative_err,flux_peak_correction_additive_err,"
+                    "n_sources",
+                    file=f,
+                )
+            else:
+                f = open(csv_output_path, "a")
+            logger.info(
+                "Writing corrections CSV. To correct positions, add the corrections to"
+                " the original source positions i.e. RA' = RA + ra_correction /"
+                " cos(Dec). To correct fluxes, add the additive correction and multiply"
+                " the result by the multiplicative correction i.e. S' ="
+                " flux_peak_correction_multiplicative(S +"
+                " flux_peak_correction_additive)."
+            )
+            print(
+                f"{catalog.field},{catalog.epoch},{sbid},{dra_median_value * -1},"
+                f"{ddec_median_value * -1},{dra_madfm_value},{ddec_madfm_value},"
+                f"{flux_corr_mult.nominal_value},{flux_corr_add.nominal_value},"
+                f"{flux_corr_mult.std_dev},{flux_corr_add.std_dev},{len(data)}",
+                file=f,
+            )
+            f.close()
+    return dra_median_value, ddec_median_value, flux_corr_mult, flux_corr_add
 
 
 def shift_and_scale_image(
diff --git a/vast_post_processing/crossmatch.py b/vast_post_processing/crossmatch.py
new file mode 100644
index 0000000..bdbdc3c
--- /dev/null
+++ b/vast_post_processing/crossmatch.py
@@ -0,0 +1,153 @@
+import logging
+from typing import Tuple
+
+from astropy.coordinates import SkyCoord, Angle, match_coordinates_sky
+from astropy.table import QTable, join, join_skycoord
+import astropy.units as u
+import numpy as np
+from scipy import odr
+
+from vast_post_processing.catalogs import Catalog
+
+
+logger = logging.getLogger(__name__)
+
+
+def median_abs_deviation(data):
+    median = np.median(data)
+    return np.median(np.abs(data - median))
+
+
+def straight_line(B, x):
+    m, b = B
+    return m * x + b
+
+
+def join_match_coordinates_sky(
+    coords1: SkyCoord, coords2: SkyCoord, seplimit: u.arcsec
+):
+    idx, separation, dist_3d = match_coordinates_sky(coords1, coords2)
+    mask = separation < seplimit
+    return np.where(mask)[0], idx[mask], separation[mask], dist_3d[mask]
+
+
+def crossmatch_qtables(
+    catalog: Catalog,
+    catalog_reference: Catalog,
+    radius: Angle = Angle("10 arcsec"),
+    catalog_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"),
+    catalog_reference_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"),
+) -> QTable:
+    catalog_ra, catalog_dec = catalog_coord_cols
+    catalog_reference_ra, catalog_reference_dec = catalog_reference_coord_cols
+
+    logger.debug("Using crossmatch radius: %s.", radius)
+
+    xmatch = join(
+        catalog.table,
+        catalog_reference.table,
+        keys="coord",
+        table_names=["", "reference"],
+        join_funcs={
+            "coord": join_skycoord(radius, distance_func=join_match_coordinates_sky)
+        },
+    )
+    # remove trailing _ from catalog column names
+    xmatch.rename_columns(
+        [col for col in xmatch.colnames if col.endswith("_")],
+        [col.rstrip("_") for col in xmatch.colnames if col.endswith("_")],
+    )
+    # compute the separations
+    xmatch["separation"] = xmatch["coord_reference"].separation(xmatch["coord"])
+    xmatch["dra"], xmatch["ddec"] = xmatch["coord_reference"].spherical_offsets_to(
+        xmatch["coord"]
+    )
+    xmatch["flux_peak_ratio"] = (
+        xmatch["flux_peak"] / xmatch["flux_peak_reference"]
+    ).decompose()
+
+    logger.info(
+        "Num cross-matches: %d. Num cross-matches to unique reference source: %d"
+        " (%d%%).",
+        len(xmatch),
+        len(set(xmatch["coord_id"])),
+        (len(set(xmatch["coord_id"])) / len(xmatch)) * 100,
+    )
+
+    return xmatch
+
+
+def calculate_positional_offsets(
+    xmatch_qt: QTable,
+) -> Tuple[u.Quantity, u.Quantity, u.Quantity, u.Quantity]:
+    """Calculate the median positional offsets and the median absolute deviation between
+    matched sources.
+
+    Parameters
+    ----------
+    xmatch_qt : QTable
+        QTable of crossmatched sources. Must contain columns: dra, ddec.
+
+    Returns
+    -------
+    Tuple[u.Quantity, u.Quantity, u.Quantity, u.Quantity]
+        Median RA offset, median Dec offset, median absolute deviation of RA offsets,
+        median absolute deviation of Dec offsets. Units match their inputs and are of
+        angular type.
+    """
+    dra_median = np.median(xmatch_qt["dra"])
+    dra_madfm = median_abs_deviation(xmatch_qt["dra"])
+    ddec_median = np.median(xmatch_qt["ddec"])
+    ddec_madfm = median_abs_deviation(xmatch_qt["ddec"])
+
+    return dra_median, ddec_median, dra_madfm, ddec_madfm
+
+
+def calculate_flux_offsets(
+    xmatch_qt: QTable,
+    init_m: float = 1.0,
+    init_b: float = 0.0,
+    fix_m: bool = False,
+    fix_b: bool = False,
+) -> Tuple[float, u.Quantity, float, u.Quantity]:
+    """Calculate the gradient and offset of a straight-line fit to the peak fluxes for
+    crossmatched sources. The function `y = mx + b` is fit to the reference peak fluxes
+    vs the peak fluxes using orthogonal distance regression with `scipy.odr`.
+
+    Parameters
+    ----------
+    xmatch_qt : QTable
+        QTable of crossmatched sources. Must contain columns: flux_peak,
+        flux_peak_reference, flux_peak_err, flux_peak_err_reference.
+    init_m : float
+        Initial gradient parameter passed to the fitting function, default 1.0.
+    init_b : float
+        Initial offset parameter passed to the fitting function, default 0.0.
+    fix_m : bool
+        If True, do not allow the gradient to vary during fitting, default False.
+    fix_b : bool
+        If True, do not allow the offest to vary during fitting, default False.
+
+    Returns
+    -------
+    Tuple[float, u.Quantity, float, u.Quantity]
+        Model fit parameters: the gradient, intercept (offset), gradient error, and
+        intercept error. Offset and offset error unit match the reference flux peak
+        input and are of spectral flux density type.
+    """
+    ifixb = [0 if fix_m else 1, 0 if fix_b else 1]
+    flux_unit = xmatch_qt["flux_peak_reference"].unit
+    linear_model = odr.Model(straight_line)
+    # convert all to reference flux unit as ODR does not preserve Quantity objects
+    odr_data = odr.RealData(
+        xmatch_qt["flux_peak_reference"].to(flux_unit).value,
+        xmatch_qt["flux_peak"].to(flux_unit).value,
+        sx=xmatch_qt["flux_peak_err_reference"].to(flux_unit).value,
+        sy=xmatch_qt["flux_peak_err"].to(flux_unit).value,
+    )
+    odr_obj = odr.ODR(odr_data, linear_model, beta0=[init_m, init_b], ifixb=ifixb)
+    odr_out = odr_obj.run()
+    gradient, offset = odr_out.beta
+    gradient_err, offset_err = odr_out.sd_beta
+
+    return gradient, offset * flux_unit, gradient_err, offset_err * flux_unit

From 77e49b18e747bcf7bcf4fa7f62d3e005f7f169ed Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Wed, 12 Jul 2023 18:20:47 -0500
Subject: [PATCH 18/31] Cleaned up minor naming issues with variables

---
 vast_post_processing/cli/run_corrections.py | 92 +++++++++++----------
 vast_post_processing/corrections.py         |  2 +-
 2 files changed, 50 insertions(+), 44 deletions(-)

diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index da4e95f..6f1426f 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -71,9 +71,9 @@ def get_correct_correction_file(correction_files_list, img_field):
         filename = f.name
         _, _, field, *_ = filename.split(".")
         field = field.replace("RACS", "VAST")
-        if field in img_field:
+        if (field in img_field) and ("components" in filename):
             count += 1
-            return f
+            return f.as_posix()
         else:
             continue
     if count == 0:
@@ -95,10 +95,12 @@ def get_psf_from_image(image_path: str):
     Tuple(psf_major, psf_minor)
         Major and minor axes of the PSF.
     """
-
+    image_path = image_path.replace("SELAVY", "IMAGES")
+    image_path = image_path.replace("selavy-", "")
+    image_path = image_path.replace(".components.xml", ".fits")
     hdu = fits.open(image_path)
-    psf_maj = hdu["BMAJ"] * u.degree
-    psf_min = hdu["BMIN"] * u.degree
+    psf_maj = hdu[0].header["BMAJ"] * u.degree
+    psf_min = hdu[0].header["BMIN"] * u.degree
     hdu.close()
     return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec))
 
@@ -133,8 +135,8 @@ def main(
             " correct all available epochs."
         ),
     ),
-    radius: Optional[ANGLE_QUANTITY_TYPE] = typer.Option(
-        "10 arcsec",
+    radius: Optional[float] = typer.Option(
+        10,
         help=(
             "Maximum separation limit for nearest-neighbour crossmatch. Accepts any "
             "string understood by astropy.coordinates.Angle."
@@ -164,7 +166,7 @@ def main(
         None,
         help=(
             "If using --condon but want to give the psfs manually, use this specified PSF size in "
-            "arcsec for `catalof`. First argument is major axis followed by nimor axis."
+            "arcsec for `catalog`. First argument is major axis followed by nimor axis."
         ),
     ),
     overwrite: bool = False,
@@ -228,7 +230,7 @@ def main(
         )
 
         # construct output path to store corrections for each epoch
-        epoch_corr_dir = vast_tile_data_root / "corr_db" / epoch_dir
+        epoch_corr_dir = corr_dir / epoch_dir
 
         if not os.path.isdir(epoch_corr_dir):
             os.mkdir(epoch_corr_dir)
@@ -243,8 +245,37 @@ def main(
             logger.warning(f"RMS image not found for {image_path}.")
         if not bkg_path.exists():
             logger.warning(f"Background image not found for {image_path}.")
+
+        # Look for any component and island files correspnding to this image
+        comp_files = []
+        for p in list(components_path_glob_list[0]):
+            comp_file_name = p.name
+            comp_file_epoch = p.parent.name
+            if (
+                (epoch_dir in comp_file_epoch)
+                and (field in comp_file_name)
+                and (f"SB{sbid}" in comp_file_name)
+            ):
+                comp_files.append(p)
+
+        component_file = None
+        island_file = None
+        if len(comp_files) == 0:
+            logger.warning(f"Selavy catalogue not found for the image {image_path}")
+        else:
+            for i in comp_files:
+                if "components" in i.as_posix():
+                    component_file = i
+                elif "islands" in i.as_posix():
+                    island_file = i
+
         skip = (
-            not ((rms_path.exists()) and (bkg_path.exists()) and (ref_file is not None))
+            not (
+                (rms_path.exists())
+                and (bkg_path.exists())
+                and (ref_file is not None)
+                and (component_file is not None)
+            )
             or skip
         )
         if skip:
@@ -254,10 +285,9 @@ def main(
                 logger.warning(f"Skipping {image_path}, no reference field found.")
             continue
         else:
-            crossmatch_file = epoch_corr_dir / image_path.replace(
-                "components.xml", "corrections.csv"
-            )
-            csv_file = epoch_corr_dir / "corrections.csv"
+            fname = image_path.name.replace(".fits", "corrections.csv")
+            crossmatch_file = epoch_corr_dir / fname
+            csv_file = epoch_corr_dir / "all_fields_corrections.csv"
 
             # Get the psf measurements to estimate errors follwoing Condon 1997
             if psf_ref is not None:
@@ -268,7 +298,7 @@ def main(
             if psf is not None:
                 psf_image = psf
             else:
-                psf_image = get_psf_from_image(image_path)
+                psf_image = get_psf_from_image(image_path.as_posix())
             (
                 dra_median_value,
                 ddec_median_value,
@@ -276,8 +306,8 @@ def main(
                 flux_corr_add,
             ) = vast_xmatch_qc(
                 reference_catalog_path=ref_file,
-                catalog_path=image_path,
-                radius=Angle(radius),
+                catalog_path=component_file.as_posix(),
+                radius=Angle(radius * u.arcsec),
                 condon=condon,
                 psf_reference=psf_reference,
                 psf=psf_image,
@@ -302,32 +332,8 @@ def main(
                     overwrite=overwrite,
                 )
 
-        # Do the same for catalog files
-        # Look for any component and island files correspnding to this image
-        comp_files = []
-        for p in list(components_path_glob_list[0]):
-            comp_file_name = p.name
-            comp_file_epoch = p.parent.name
-            if (
-                (epoch_dir in comp_file_epoch)
-                and (field in comp_file_name)
-                and (f"SB{sbid}" in comp_file_name)
-            ):
-                comp_files.append(p)
-
-        if len(comp_files) == 0:
-            logger.warning(f"Selavy catalogue not found for the image {image_path}")
-        elif len(comp_files) == 1:
-            if ".components" in comp_files[0].name:
-                logger.warning(
-                    f"Islannd catalogue not found for the image {image_path}"
-                )
-            else:
-                logger.warning(
-                    f"Islannd catalogue not found for the image {image_path}"
-                )
-        else:
-            for path in comp_files:
+            # Do the same for catalog files
+            for path in (component_file, island_file):
                 stokes_dir = f"{path.parent.parent.name}_CORRECTED"
                 output_dir = vast_tile_data_root / stokes_dir / epoch_dir
                 output_dir.mkdir(parents=True, exist_ok=True)
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 8603d63..7467195 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -67,7 +67,7 @@ def vast_xmatch_qc(
 
     # Write the cross-match data into csv
     if crossmatch_output is not None:
-        data.write("crossmatch.csv", overwrite=True)
+        data.write(crossmatch_output, overwrite=True)
     # calculate positional offsets and flux ratio
     dra_median, ddec_median, dra_madfm, ddec_madfm = calculate_positional_offsets(data)
     dra_median_value = dra_median.to(positional_unit).value

From efaf3697ca8ad4e5baabd7796124e7e4de9e956f Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Fri, 14 Jul 2023 01:09:46 -0500
Subject: [PATCH 19/31] Fixed quantities with units; component files matching
 made easy

---
 vast_post_processing/cli/run_corrections.py | 51 +++++++--------------
 1 file changed, 17 insertions(+), 34 deletions(-)

diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index 6f1426f..99bb051 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -67,7 +67,7 @@ def convert(self, value, param, ctx):
 
 def get_correct_correction_file(correction_files_list, img_field):
     count = 0
-    for f in chain.from_iterable(correction_files_list):
+    for f in correction_files_list:
         filename = f.name
         _, _, field, *_ = filename.split(".")
         field = field.replace("RACS", "VAST")
@@ -187,13 +187,14 @@ def main(
     correction_files_path_glob_list: list[Generator[Path, None, None]] = []
 
     correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml"))
+    correction_files_path_glob_list = list(correction_files_path_glob_list[0])
 
     if epoch is None or len(epoch) == 0:
         image_path_glob_list.append(
             vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
         )
         components_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml")
+            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml")
         )
     else:
         for n in epoch:
@@ -201,7 +202,7 @@ def main(
                 vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
             )
             components_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml")
+                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml")
             )
 
     # construct output path to store corrections
@@ -247,27 +248,9 @@ def main(
             logger.warning(f"Background image not found for {image_path}.")
 
         # Look for any component and island files correspnding to this image
-        comp_files = []
-        for p in list(components_path_glob_list[0]):
-            comp_file_name = p.name
-            comp_file_epoch = p.parent.name
-            if (
-                (epoch_dir in comp_file_epoch)
-                and (field in comp_file_name)
-                and (f"SB{sbid}" in comp_file_name)
-            ):
-                comp_files.append(p)
-
-        component_file = None
-        island_file = None
-        if len(comp_files) == 0:
-            logger.warning(f"Selavy catalogue not found for the image {image_path}")
-        else:
-            for i in comp_files:
-                if "components" in i.as_posix():
-                    component_file = i
-                elif "islands" in i.as_posix():
-                    island_file = i
+
+        component_file = Path(ref_file)
+        island_file = Path(ref_file.replace("components", "islands"))
 
         skip = (
             not (
@@ -290,12 +273,12 @@ def main(
             csv_file = epoch_corr_dir / "all_fields_corrections.csv"
 
             # Get the psf measurements to estimate errors follwoing Condon 1997
-            if psf_ref is not None:
+            if len(psf_ref) > 0:
                 psf_reference = psf_ref
             else:
                 psf_reference = get_psf_from_image(ref_file)
 
-            if psf is not None:
+            if len(psf) > 0:
                 psf_image = psf
             else:
                 psf_image = get_psf_from_image(image_path.as_posix())
@@ -325,10 +308,10 @@ def main(
                 _ = shift_and_scale_image(
                     path,
                     output_dir,
-                    flux_scale=flux_corr_mult,
-                    flux_offset_mJy=flux_corr_add,
-                    ra_offset_arcsec=dra_median_value,
-                    dec_offset_arcsec=ddec_median_value,
+                    flux_scale=flux_corr_mult.n,
+                    flux_offset_mJy=flux_corr_add.n,
+                    ra_offset_arcsec=dra_median_value.item(),
+                    dec_offset_arcsec=ddec_median_value.item(),
                     overwrite=overwrite,
                 )
 
@@ -340,10 +323,10 @@ def main(
                 _ = shift_and_scale_catalog(
                     path,
                     output_dir,
-                    flux_scale=flux_corr_mult,
-                    flux_offset_mJy=flux_corr_add,
-                    ra_offset_arcsec=dra_median_value,
-                    dec_offset_arcsec=ddec_median_value,
+                    flux_scale=flux_corr_mult.n,
+                    flux_offset_mJy=flux_corr_add.n,
+                    ra_offset_arcsec=dra_median_value.item(),
+                    dec_offset_arcsec=ddec_median_value.item(),
                     overwrite=overwrite,
                 )
 

From 7e1be371db89f4055081f3998e8c01216040a2db Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Thu, 10 Aug 2023 13:44:49 -0500
Subject: [PATCH 20/31] Corrected the wrong path for the catalog files

---
 vast_post_processing/cli/run_corrections.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index 99bb051..9eac01b 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -248,22 +248,31 @@ def main(
             logger.warning(f"Background image not found for {image_path}.")
 
         # Look for any component and island files correspnding to this image
+        image_root = image_path.parent.as_posix()
+        catalog_root = image_root.replace("IMAGES", "SELAVY")
 
-        component_file = Path(ref_file)
-        island_file = Path(ref_file.replace("components", "islands"))
+        catalog_filename = image_path.name.replace("image", "selavy-image")
+        catalog_filename = catalog_filename.replace(".fits", ".components.xml")
+
+        catalog_filepath = f"{catalog_root}/{catalog_filename}"
+
+        component_file = Path(catalog_filepath)
+        island_file = Path(catalog_filepath.replace("components", "islands"))
 
         skip = (
             not (
                 (rms_path.exists())
                 and (bkg_path.exists())
                 and (ref_file is not None)
-                and (component_file is not None)
+                and (component_file.exists())
             )
             or skip
         )
         if skip:
             if not ((rms_path.exists()) and (bkg_path.exists())):
                 logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist")
+            elif not (component_file.exists()):
+                logger.warning(f"Skipping {image_path}, catalog files do not exist")
             elif ref_file is None:
                 logger.warning(f"Skipping {image_path}, no reference field found.")
             continue

From 1d3bd393514148a6df7ff591f0a5c503718c7e74 Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Mon, 14 Aug 2023 00:36:32 -0500
Subject: [PATCH 21/31] Re-organized code so that this can be passed to
 cropping, added docstrings, cleaned the redundant parts of the code

---
 vast_post_processing/catalogs.py            | 375 +++++++-------
 vast_post_processing/cli/run_corrections.py | 283 +----------
 vast_post_processing/corrections.py         | 512 +++++++++++++-------
 vast_post_processing/crossmatch.py          |  43 +-
 4 files changed, 599 insertions(+), 614 deletions(-)

diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py
index a75b000..a83b094 100644
--- a/vast_post_processing/catalogs.py
+++ b/vast_post_processing/catalogs.py
@@ -27,150 +27,37 @@
     "rms_image": u.mJy / u.beam,
 }
 
-
-class UnknownCatalogInputFormat(Exception):
-    pass
-
-
-class Catalog:
-    CATALOG_TYPE_TILE = "TILE"
-    CATALOG_TYPE_COMBINED = "COMBINED"
-    CATALOG_TYPES = (
-        CATALOG_TYPE_TILE,
-        CATALOG_TYPE_COMBINED,
-    )
-
-    def __init__(
-        self,
-        path: Path,
-        psf: Optional[Tuple[float, float]] = None,
-        input_format: str = "selavy",
-        condon: bool = False,
-        positive_fluxes_only: bool = True,
-    ):
-        self.path: Path
-        self.table: QTable
-        self.field: Optional[str]
-        self.epoch: Optional[str]
-        self.sbid: Optional[str]
-        self.psf_major: Optional[u.Quantity]
-        self.psf_minor: Optional[u.Quantity]
-        self.type: str
-
-        # read catalog
-        if input_format == "selavy":
-            if path.suffix == ".txt":
-                logger.debug("Reading %s as a Selavy txt catalog.", path)
-                read_catalog = read_selavy
-            else:
-                logger.debug("Reading %s as a Selavy VOTable catalog.", path)
-                read_catalog = read_selavy_votable
-        elif input_format == "aegean":
-            logger.debug("Reading %s as an Aegean catalog.", path)
-            read_catalog = read_aegean_csv
-        else:
-            logger.error(
-                "The format of input files is not supported. Only selavy and aegean are supported"
-            )
-            raise SystemExit
-        self.path = path
-        self.table = read_catalog(path)
-
-        # filter sources with bad sizes and optionally negative/0 fluxes
-        if positive_fluxes_only:
-            logger.info(
-                "Filtering %d sources with fluxes <= 0.",
-                (self.table["flux_peak"] <= 0).sum(),
-            )
-            self.table = self.table[self.table["flux_peak"] > 0]
-        logger.info(
-            "Filtering %d sources with fitted sizes <= 0.",
-            ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(),
-        )
-        self.table = self.table[
-            (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0)
-        ]
-
-        # read epoch, field, sbid, psf's
-        epoch_name = path.parent.name
-        _, _, field, sbid, *_ = path.name.split(".")
-        self.epoch = epoch_name
-        self.field = field.replace("VAST_", "")
-        self.sbid = sbid
-
-        if psf is not None:
-            self.psf_major, self.psf_minor = psf * u.arcsec
-            logger.debug(
-                "Using user provided PSF for %s: %s, %s.",
-                self.path,
-                self.psf_major,
-                self.psf_minor,
-            )
-        else:
-            logger.warning(
-                "PSF is unknown for %s. Condon errors will be unavailable.", self.path
-            )
-            self.psf_major = None
-            self.psf_minor = None
-
-        if condon and self.psf_major is not None and self.psf_minor is not None:
-            _ = self.calculate_condon_flux_errors(correct_peak_for_noise=True)
-            logger.debug("Condon errors computed for %s.", self.path)
-
-    def calculate_condon_flux_errors(
-        self,
-        alpha_maj1=2.5,
-        alpha_min1=0.5,
-        alpha_maj2=0.5,
-        alpha_min2=2.5,
-        alpha_maj3=1.5,
-        alpha_min3=1.5,
-        clean_bias=0.0,
-        clean_bias_error=0.0,
-        frac_flux_cal_error=0.0,
-        correct_peak_for_noise=False,
-    ):
-        noise = self.table["rms_image"]
-        snr = self.table["flux_peak"] / noise
-
-        rho_sq3 = (
-            (
-                self.table["maj_axis"]
-                * self.table["min_axis"]
-                / (4.0 * self.psf_major * self.psf_minor)
-            )
-            * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj3
-            * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min3
-            * snr**2
-        )
-
-        flux_peak_col = self.table["flux_peak"]
-        flux_peak_condon = self.table["flux_peak"] + (
-            -(noise**2) / self.table["flux_peak"] + clean_bias
-        )
-        if correct_peak_for_noise:
-            flux_peak_col = flux_peak_condon
-
-        errorpeaksq = (
-            (frac_flux_cal_error * flux_peak_col) ** 2
-            + clean_bias_error**2
-            + 2.0 * flux_peak_col**2 / rho_sq3
-        )
-        errorpeak = np.sqrt(errorpeaksq)
-
-        self.table["flux_peak_condon"] = flux_peak_condon
-        self.table["flux_peak_selavy"] = self.table["flux_peak"]
-        self.table["flux_peak_err_condon"] = errorpeak
-        self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"]
-        self.table["flux_peak_err"] = self.table["flux_peak_err_condon"]
-        if correct_peak_for_noise:
-            self.table["flux_peak"] = self.table["flux_peak_condon"]
-        return flux_peak_condon, errorpeak
+AEGEAN_COLUMN_MAP = {
+    # aegean name: (selavy name, aegean unit)
+    "ra": ("ra_deg_cont", u.deg),
+    "dec": ("dec_deg_cont", u.deg),
+    "err_ra": ("ra_err", u.deg),
+    "err_dec": ("dec_err", u.deg),
+    "peak_flux": ("flux_peak", u.Jy / u.beam),
+    "err_peak_flux": ("flux_peak_err", u.Jy / u.beam),
+    "a": ("maj_axis", u.arcsec),
+    "b": ("min_axis", u.arcsec),
+    "pa": ("pos_ang", u.arcsec),
+    "err_a": ("maj_axis_err", u.arcsec),
+    "err_b": ("min_axis_err", u.deg),
+    "err_pa": ("pos_ang_err", u.deg),
+    "local_rms": ("rms_image", u.Jy / u.beam),
+}
 
 
 def _convert_selavy_columns_to_quantites(
     qt: QTable, units: Dict[str, u.Unit] = SELAVY_COLUMN_UNITS
 ) -> QTable:
+    """Takes in a selavy component table and adds units to respective quantities
+
+    Args:
+        qt (QTable): the component catalog
+        units (Dict[str, u.Unit], optional): The dictionary with parameters and
+        their units. Defaults to SELAVY_COLUMN_UNITS.
+
+    Returns:
+        QTable: Table with units to the parameters
+    """
     for col, unit in units.items():
         qt[col].unit = unit
     return qt
@@ -210,6 +97,14 @@ def read_selavy(catalog_path: Path) -> QTable:
 
 
 def read_selavy_votable(catalog_path: Path) -> QTable:
+    """Helper function to read the selavy catalog, if the input format is votable
+
+    Args:
+        catalog_path (Path): Input Path to the catalog file
+
+    Returns:
+        QTable: The component table
+    """
     t = Table.read(catalog_path, format="votable", use_names_over_ids=True)
     # remove units from str columns and fix unrecognized flux units
     for col in t.itercols():
@@ -225,17 +120,6 @@ def read_selavy_votable(catalog_path: Path) -> QTable:
     return qt
 
 
-def read_hdf(catalog_path: Path) -> pd.DataFrame:
-    df = pd.read_hdf(catalog_path, key="data")
-    df["field"] = df.field.str.split(".", n=1, expand=True)[0]
-    qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df))
-    qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"])
-    _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky(
-        qt["coord"], nthneighbor=2
-    )
-    return qt
-
-
 def read_aegean_csv(catalog_path: Path) -> QTable:
     """Read an Aegean CSV component catalog and return a QTable.
     Assumed to contain at least the following columns with the given units:
@@ -260,22 +144,6 @@ def read_aegean_csv(catalog_path: Path) -> QTable:
         - `nn_separation`: separation to the nearest-neighbour source as a Quantity with
             angular units.
     """
-    AEGEAN_COLUMN_MAP = {
-        # aegean name: (selavy name, aegean unit)
-        "ra": ("ra_deg_cont", u.deg),
-        "dec": ("dec_deg_cont", u.deg),
-        "err_ra": ("ra_err", u.deg),
-        "err_dec": ("dec_err", u.deg),
-        "peak_flux": ("flux_peak", u.Jy / u.beam),
-        "err_peak_flux": ("flux_peak_err", u.Jy / u.beam),
-        "a": ("maj_axis", u.arcsec),
-        "b": ("min_axis", u.arcsec),
-        "pa": ("pos_ang", u.arcsec),
-        "err_a": ("maj_axis_err", u.arcsec),
-        "err_b": ("min_axis_err", u.deg),
-        "err_pa": ("pos_ang_err", u.deg),
-        "local_rms": ("rms_image", u.Jy / u.beam),
-    }
     qt = QTable.read(catalog_path)
     # rename columns to match selavy convention and assign units
     for col, (new_col, unit) in AEGEAN_COLUMN_MAP.items():
@@ -296,3 +164,178 @@ def read_aegean_csv(catalog_path: Path) -> QTable:
         qt["coord"], nthneighbor=2
     )
     return qt
+
+
+class Catalog:
+    """Class to make a catalog object from the selavy/Aegean files. This
+       is then used for catalog matching between the referecne catalog and
+       the current catalog to select for sources and get flux and astrometric
+       corrections.
+
+    Raises:
+        SystemExit: if the input catalog files are other than Selavy/Aegean
+                    products
+    """
+
+    def __init__(
+        self,
+        path: Path,
+        psf: Optional[Tuple[float, float]] = None,
+        input_format: str = "selavy",
+        condon: bool = False,
+        apply_flux_limit: bool = True,
+        flux_limit: float = 0,
+    ):
+        self.path: Path
+        self.table: QTable
+        self.input_format: Optional[str]
+        self.flux_flag: Optional[bool]
+        self.flux_lim: Optional[float]
+        self.field: Optional[str]
+        self.epoch: Optional[str]
+        self.sbid: Optional[str]
+        self.psf_major: Optional[u.Quantity]
+        self.psf_minor: Optional[u.Quantity]
+        self.type: str
+
+        self.path = path
+        self.input_format = input_format
+        self.flux_flag = apply_flux_limit
+        self.flux_lim = flux_limit
+
+        # Read the catalog
+        self._read_catalog()
+
+        # Filter sources
+        self._filter_sources()
+
+        # Get epoch, field, sbid from the file name
+        epoch_name = path.parent.name
+        _, _, field, sbid, *_ = path.name.split(".")
+        self.epoch = epoch_name
+        self.field = field.replace("VAST_", "")
+        self.sbid = sbid
+
+        # Parse the psf info
+        if psf is not None:
+            self.psf_major, self.psf_minor = psf * u.arcsec
+            logger.debug(
+                "Using user provided PSF for %s: %s, %s.",
+                self.path,
+                self.psf_major,
+                self.psf_minor,
+            )
+        else:
+            logger.warning(
+                "PSF is unknown for %s. Condon errors will be unavailable.", self.path
+            )
+            self.psf_major = None
+            self.psf_minor = None
+
+        # Calculate the covariant error using Condon 1997
+        if condon and self.psf_major is not None and self.psf_minor is not None:
+            self.calculate_condon_flux_errors(correct_peak_for_noise=True)
+            logger.debug("Condon errors computed for %s.", self.path)
+
+    def _read_catalog(self):
+        """Helper function to read and parse the input files
+
+        Raises:
+            SystemExit: if the input catalog files are other than Selavy/Aegean
+                        products
+        """
+        path = self.path
+        if self.input_format == "selavy":
+            if path.suffix == ".txt":
+                logger.debug("Reading %s as a Selavy txt catalog.", path)
+                read_catalog = read_selavy
+            else:
+                logger.debug("Reading %s as a Selavy VOTable catalog.", path)
+                read_catalog = read_selavy_votable
+        elif self.input_format == "aegean":
+            logger.debug("Reading %s as an Aegean catalog.", path)
+            read_catalog = read_aegean_csv
+        else:
+            logger.error(
+                "The format of input files is not supported. Only selavy and aegean are supported"
+            )
+            raise SystemExit
+
+        self.table = read_catalog(path)
+
+    def _filter_sources(self):
+        """Helper function to filter sources that are used for cross-match;
+        filter sources with bad sizes and optionally given flux limits"""
+        if self.flux_flag:
+            lim = self.flux_lim
+            logger.info(
+                f"Filtering %d sources with fluxes <= {lim}",
+                (self.table["flux_peak"] <= lim).sum(),
+            )
+            self.table = self.table[self.table["flux_peak"] > lim]
+        logger.info(
+            "Filtering %d sources with fitted sizes <= 0.",
+            ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(),
+        )
+        self.table = self.table[
+            (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0)
+        ]
+
+    def calculate_condon_flux_errors(
+        self,
+        alpha_maj: float = 1.5,
+        alpha_min: float = 1.5,
+        clean_bias: float = 0.0,
+        clean_bias_error: float = 0.0,
+        frac_flux_cal_error: float = 0.0,
+        correct_peak_for_noise: bool = False,
+    ):
+        """Calculates the covariant error using Condon 1997. See equation 41
+        of Condon 1997 for reference
+
+        Args:
+            alpha_maj (float, optional): power for major axis correction. Defaults to 1.5
+            alpha_min (float, optional): power for major axis correction. Defaults to 1.5.
+            clean_bias (float, optional): additive flux bias. Defaults to 0.0.
+            clean_bias_error (float, optional): error in additive flux bias. Defaults to 0.0.
+            frac_flux_cal_error (float, optional): multiplicative flux error. Defaults to 0.0.
+            correct_peak_for_noise (bool, optional): flag to re-write the peak flux from
+            selavy. Defaults to False.
+        """
+        noise = self.table["rms_image"]
+        snr = self.table["flux_peak"] / noise
+
+        # See equation 41 of Condon 1997 to calculate the signal to noise
+        rho_sq3 = (
+            (
+                self.table["maj_axis"]
+                * self.table["min_axis"]
+                / (4.0 * self.psf_major * self.psf_minor)
+            )
+            * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj
+            * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min
+            * snr**2
+        )
+
+        # Correct the peak flux now.
+        flux_peak_col = self.table["flux_peak"]
+        flux_peak_condon = self.table["flux_peak"] + (
+            -(noise**2) / self.table["flux_peak"] + clean_bias
+        )
+        if correct_peak_for_noise:
+            flux_peak_col = flux_peak_condon
+
+        errorpeaksq = (
+            (frac_flux_cal_error * flux_peak_col) ** 2
+            + clean_bias_error**2
+            + 2.0 * flux_peak_col**2 / rho_sq3
+        )
+        errorpeak = np.sqrt(errorpeaksq)
+
+        self.table["flux_peak_condon"] = flux_peak_condon
+        self.table["flux_peak_selavy"] = self.table["flux_peak"]
+        self.table["flux_peak_err_condon"] = errorpeak
+        self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"]
+        self.table["flux_peak_err"] = self.table["flux_peak_err_condon"]
+        if correct_peak_for_noise:
+            self.table["flux_peak"] = self.table["flux_peak_condon"]
diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index 9eac01b..811c8ce 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -1,108 +1,10 @@
 from loguru import logger
 from pathlib import Path
-from typing import Optional, Tuple, Generator
-from astropy.coordinates import Angle
-import astropy.units as u
-import click, sys, os
+from typing import Optional
 from uncertainties import ufloat
-from itertools import chain
-import pandas as pd
-import typer
-from astropy.table import QTable
-from astropy.io import fits
-from astropy import units as u
-from vast_post_processing.catalogs import Catalog
+import typer, sys
 
-from vast_post_processing.corrections import (
-    shift_and_scale_catalog,
-    shift_and_scale_image,
-    vast_xmatch_qc,
-)
-
-
-class _AstropyUnitType(click.ParamType):
-    def convert(self, value, param, ctx, unit_physical_type):
-        try:
-            unit = u.Unit(value)
-        except ValueError:
-            self.fail(f"astropy.units.Unit does not understand: {value}.")
-        if unit.physical_type != unit_physical_type:
-            self.fail(
-                f"{unit} is a {unit.physical_type} unit. It must be of type"
-                f" {unit_physical_type}."
-            )
-        else:
-            return unit
-
-
-class AngleUnitType(_AstropyUnitType):
-    name = "angle_unit"
-
-    def convert(self, value, param, ctx):
-        return super().convert(value, param, ctx, "angle")
-
-
-class FluxUnitType(_AstropyUnitType):
-    name = "flux_unit"
-
-    def convert(self, value, param, ctx):
-        return super().convert(value, param, ctx, "spectral flux density")
-
-
-class AngleQuantityType(click.ParamType):
-    name = "angle_quantity"
-
-    def convert(self, value, param, ctx):
-        try:
-            angle = Angle(value)
-            return angle
-        except ValueError:
-            self.fail(f"astropy.coordinates.Angle does not understand: {value}.")
-
-
-ANGLE_UNIT_TYPE = AngleUnitType()
-FLUX_UNIT_TYPE = FluxUnitType()
-ANGLE_QUANTITY_TYPE = AngleQuantityType()
-
-
-def get_correct_correction_file(correction_files_list, img_field):
-    count = 0
-    for f in correction_files_list:
-        filename = f.name
-        _, _, field, *_ = filename.split(".")
-        field = field.replace("RACS", "VAST")
-        if (field in img_field) and ("components" in filename):
-            count += 1
-            return f.as_posix()
-        else:
-            continue
-    if count == 0:
-        return None
-
-
-def get_psf_from_image(image_path: str):
-    """
-    Funtion used to get the point spread function (PSF) extent in major and minor axis.
-    These will be in the header of the image file
-
-    Parameters
-    ----------
-    image_path: str
-        Path to the image file
-
-    Returns
-    -------
-    Tuple(psf_major, psf_minor)
-        Major and minor axes of the PSF.
-    """
-    image_path = image_path.replace("SELAVY", "IMAGES")
-    image_path = image_path.replace("selavy-", "")
-    image_path = image_path.replace(".components.xml", ".fits")
-    hdu = fits.open(image_path)
-    psf_maj = hdu[0].header["BMAJ"] * u.degree
-    psf_min = hdu[0].header["BMIN"] * u.degree
-    hdu.close()
-    return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec))
+from vast_post_processing.corrections import correct_files
 
 
 def main(
@@ -169,10 +71,17 @@ def main(
             "arcsec for `catalog`. First argument is major axis followed by nimor axis."
         ),
     ),
+    outdir: Optional[str] = typer.Option(
+        None,
+        help="Stem of the output directory to store the corrected images and cataloges to. The default"
+        "way is to construct it from the tile directory, by making folders with _CORRECTED tag attached"
+        "to them as suffix",
+    ),
     overwrite: bool = False,
     verbose: bool = False,
 ):
-    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
+    """
+    Read astrometric and flux corrections produced by vast-xmatch and apply them to
     VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
     """
     # configure logger
@@ -180,164 +89,18 @@ def main(
         # replace the default sink
         logger.remove()
         logger.add(sys.stderr, level="INFO")
-
-    # read corrections
-    image_path_glob_list: list[Generator[Path, None, None]] = []
-    components_path_glob_list: list[Generator[Path, None, None]] = []
-    correction_files_path_glob_list: list[Generator[Path, None, None]] = []
-
-    correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml"))
-    correction_files_path_glob_list = list(correction_files_path_glob_list[0])
-
-    if epoch is None or len(epoch) == 0:
-        image_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
-        )
-        components_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml")
-        )
-    else:
-        for n in epoch:
-            image_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
-            )
-            components_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml")
-            )
-
-    # construct output path to store corrections
-    corr_dir = vast_tile_data_root / "corr_db"
-    if not os.path.isdir(corr_dir):
-        os.mkdir(corr_dir)
-
-    # get corrections for an image and the correct it
-    for image_path in chain.from_iterable(image_path_glob_list):
-        epoch_dir = image_path.parent.name
-        _, _, field, sbid_str, *_ = image_path.name.split(".")
-        sbid = int(sbid_str[2:])
-
-        # get rms and background images
-        rms_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"noiseMap.{image_path.name}"
-        )
-        bkg_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"meanMap.{image_path.name}"
-        )
-
-        # construct output path to store corrections for each epoch
-        epoch_corr_dir = corr_dir / epoch_dir
-
-        if not os.path.isdir(epoch_corr_dir):
-            os.mkdir(epoch_corr_dir)
-
-        ref_file = get_correct_correction_file(
-            correction_files_list=correction_files_path_glob_list,
-            img_field=field,
-        )
-
-        skip = False
-        if not rms_path.exists():
-            logger.warning(f"RMS image not found for {image_path}.")
-        if not bkg_path.exists():
-            logger.warning(f"Background image not found for {image_path}.")
-
-        # Look for any component and island files correspnding to this image
-        image_root = image_path.parent.as_posix()
-        catalog_root = image_root.replace("IMAGES", "SELAVY")
-
-        catalog_filename = image_path.name.replace("image", "selavy-image")
-        catalog_filename = catalog_filename.replace(".fits", ".components.xml")
-
-        catalog_filepath = f"{catalog_root}/{catalog_filename}"
-
-        component_file = Path(catalog_filepath)
-        island_file = Path(catalog_filepath.replace("components", "islands"))
-
-        skip = (
-            not (
-                (rms_path.exists())
-                and (bkg_path.exists())
-                and (ref_file is not None)
-                and (component_file.exists())
-            )
-            or skip
-        )
-        if skip:
-            if not ((rms_path.exists()) and (bkg_path.exists())):
-                logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist")
-            elif not (component_file.exists()):
-                logger.warning(f"Skipping {image_path}, catalog files do not exist")
-            elif ref_file is None:
-                logger.warning(f"Skipping {image_path}, no reference field found.")
-            continue
-        else:
-            fname = image_path.name.replace(".fits", "corrections.csv")
-            crossmatch_file = epoch_corr_dir / fname
-            csv_file = epoch_corr_dir / "all_fields_corrections.csv"
-
-            # Get the psf measurements to estimate errors follwoing Condon 1997
-            if len(psf_ref) > 0:
-                psf_reference = psf_ref
-            else:
-                psf_reference = get_psf_from_image(ref_file)
-
-            if len(psf) > 0:
-                psf_image = psf
-            else:
-                psf_image = get_psf_from_image(image_path.as_posix())
-            (
-                dra_median_value,
-                ddec_median_value,
-                flux_corr_mult,
-                flux_corr_add,
-            ) = vast_xmatch_qc(
-                reference_catalog_path=ref_file,
-                catalog_path=component_file.as_posix(),
-                radius=Angle(radius * u.arcsec),
-                condon=condon,
-                psf_reference=psf_reference,
-                psf=psf_image,
-                fix_m=False,
-                fix_b=False,
-                crossmatch_output=crossmatch_file,
-                csv_output=csv_file,
-            )
-
-            # get corrections
-            for path in (image_path, rms_path, bkg_path):
-                stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-                output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-                output_dir.mkdir(parents=True, exist_ok=True)
-                _ = shift_and_scale_image(
-                    path,
-                    output_dir,
-                    flux_scale=flux_corr_mult.n,
-                    flux_offset_mJy=flux_corr_add.n,
-                    ra_offset_arcsec=dra_median_value.item(),
-                    dec_offset_arcsec=ddec_median_value.item(),
-                    overwrite=overwrite,
-                )
-
-            # Do the same for catalog files
-            for path in (component_file, island_file):
-                stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-                output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-                output_dir.mkdir(parents=True, exist_ok=True)
-                _ = shift_and_scale_catalog(
-                    path,
-                    output_dir,
-                    flux_scale=flux_corr_mult.n,
-                    flux_offset_mJy=flux_corr_add.n,
-                    ra_offset_arcsec=dra_median_value.item(),
-                    dec_offset_arcsec=ddec_median_value.item(),
-                    overwrite=overwrite,
-                )
+    correct_files(
+        vast_tile_data_root=vast_tile_data_root,
+        vast_corrections_root=vast_corrections_root,
+        epoch=epoch,
+        radius=radius,
+        condon=condon,
+        psf_ref=psf_ref,
+        psf=psf,
+        outdir=outdir,
+        overwrite=overwrite,
+        verbose=verbose,
+    )
 
 
 if __name__ == "__main__":
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 7467195..171550e 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -2,7 +2,9 @@
 """
 
 from pathlib import Path
-import warnings
+import warnings, sys, os
+from typing import Generator
+from itertools import chain
 from astropy.coordinates import SkyCoord, Angle
 from astropy.io import fits
 from astropy.io.votable import parse
@@ -34,6 +36,39 @@ def vast_xmatch_qc(
     crossmatch_output: Optional[str] = None,
     csv_output: Optional[str] = None,
 ):
+    """Function to cross-match two catalogs and filter sources that are within
+       a given radius
+
+    Args:
+        reference_catalog_path (str): Path to the reference catalog
+        catalog_path (str): Path to the catalog that needs flux/astrometric corrections
+        radius (Angle, optional): Cross-match radius. Defaults to Angle("10arcsec").
+        condon (bool, optional): Flag to calculate Condon error. Defaults to False.
+        psf_reference (Optional[Tuple[float, float]], optional): PSF of the reference catalog.
+            This includes information about the major/minor axis FWHM. Defaults to None. If None,
+            Condon errors will not be calculated.
+        psf (Optional[Tuple[float, float]], optional): PSF of the input catalog.
+            This includes information about the major/minor axis FWHM. Defaults to None. If None,
+            Condon errors will not be calculated.
+        fix_m (bool, optional): Flag to fix the slope. For tge straight line fit, should we fix
+            the slope to certain value or leave it free to be fit. Defaults to False.
+        fix_b (bool, optional): Flag to fix the intercept. For tge straight line fit, should we fix
+            the slope to certain value or leave it free to be fit. Defaults to False.
+        positional_unit (u.Unit, optional): output unit in which the astrometric offset is given.
+            Defaults to u.Unit("arcsec").
+        flux_unit (u.Unit, optional): output unit in which the flux scale is given.
+            Defaults to u.Unit("mJy").
+        crossmatch_output (Optional[str], optional): File path to write the crossmatch output.
+            Defaults to None, which means no file is written
+        csv_output (Optional[str], optional): File path to write the flux/astrometric corrections.
+            Defaults to None, which means no file is written
+
+    Returns:
+        dra_median_value: The median offset in RA (arcsec)
+        ddec_median_value: The median offset in DEC (arcsec)
+        flux_corr_mult: Multiplicative flux correction
+        flux_corr_add: Additive flux correction
+    """
     # convert catalog path strings to Path objects
     reference_catalog_path = Path(reference_catalog_path)
     catalog_path = Path(catalog_path)
@@ -128,46 +163,26 @@ def vast_xmatch_qc(
 
 def shift_and_scale_image(
     image_path: Path,
-    output_dir_path: Path,
     flux_scale: float = 1.0,
     flux_offset_mJy: float = 0.0,
     ra_offset_arcsec: float = 0.0,
     dec_offset_arcsec: float = 0.0,
     replace_nan: bool = False,
-    overwrite: bool = False,
-) -> Path:
+):
     """Apply astrometric and flux corrections to a FITS image.
 
-    Parameters
-    ----------
-    image_path : Path
-        Path to image.
-    output_dir_path : Path
-        Path to write corrected image.
-    flux_scale : float, optional
-        Flux scale, by default 1.0
-    flux_offset_mJy : float, optional
-        Flux offset in mJy, by default 0.0
-    ra_offset_arcsec : float, optional
-        Right ascension offset in arcsec, by default 0.0
-    dec_offset_arcsec : float, optional
-        Declination offset in arcsec, by default 0.0
-    replace_nan : bool, optional
-        Whether to replace `NaN` pixels with 0, by default False
-    overwrite : bool, optional
-        Whether to write over existing image, by default False
+    Args:
+        image_path (Path): Path for the input image
+        flux_scale (float, optional): Multiplicative flux correction. Defaults to 1.0.
+        flux_offset_mJy (float, optional): Additive flux correction. Defaults to 0.0.
+        ra_offset_arcsec (float, optional): RA offset in arcsec. Defaults to 0.0.
+        dec_offset_arcsec (float, optional): DEC offset in arcsec. Defaults to 0.0.
+        replace_nan (bool, optional): Replace NAN's in the data with 0. Defaults to False.
 
-    Returns
-    -------
-    output_path : Path
-        Path to corrected image.
+    Returns:
+        astropy.io.fits.hdu.image.PrimaryHDU: the HDU of the corrected image
     """
-    # Create new output path and check for existing image at path
     logger.debug(f"Correcting {image_path} ...")
-    output_path = output_dir_path / image_path.with_suffix(".corrected.fits").name
-    if output_path.exists() and not overwrite:
-        logger.warning(f"Will not overwrite existing image: {output_path}.")
-        return output_path
 
     # Open image
     image_hdul = fits.open(image_path)
@@ -205,51 +220,29 @@ def shift_and_scale_image(
     image_hdu.header["RAOFF"] = ra_offset_arcsec
     image_hdu.header["DECOFF"] = dec_offset_arcsec
 
-    # Safely write image to file and return path to corrected image
-    if output_path.exists() and overwrite:
-        logger.warning(f"Overwriting existing image: {output_path}.")
-        image_hdul.writeto(str(output_path), overwrite=True)
-    else:
-        image_hdul.writeto(str(output_path))
-    logger.success(f"Wrote corrected image: {output_path}.")
-    image_hdul.close()
-    return output_path
+    return image_hdul
 
 
 def shift_and_scale_catalog(
     catalog_path: Path,
-    output_dir_path: Path,
     flux_scale: float = 1.0,
     flux_offset_mJy: float = 0.0,
     ra_offset_arcsec: float = 0.0,
     dec_offset_arcsec: float = 0.0,
-    overwrite: bool = False,
-) -> Path:
-    """Apply astrometric and flux corrections to a VAST VOTable.
+):
+    """Apply astrometric and flux corrections to a catalog.
 
-    Parameters
-    ----------
-    catalog_path : Path
-        Path to catalog.
-    output_dir_path : Path
-        Path to write corrected catalog to.
-    flux_scale : float, optional
-        Flux scale, by default 1.0
-    flux_offset_mJy : float, optional
-        Flux offset in mJy, by default 0.0
-    ra_offset_arcsec : float, optional
-        Right ascension offset in arcsec, by default 0.0
-    dec_offset_arcsec : float, optional
-        Declination offset in arcsec, by default 0.0
-    overwrite : bool, optional
-        Whether to write over existing catalog, by default False
+    Args:
+        catalog_path (Path): Path for the input catalog
+        flux_scale (float, optional): Multiplicative flux correction. Defaults to 1.0.
+        flux_offset_mJy (float, optional): Additive flux correction. Defaults to 0.0.
+        ra_offset_arcsec (float, optional): RA offset in arcsec. Defaults to 0.0.
+        dec_offset_arcsec (float, optional): DEC offset in arcsec. Defaults to 0.0.
 
-    Returns
-    -------
-    output_path : Path
-        Path to corrected catalog.
+    Returns:
+        astropy.io.votable: the corrected catalog
     """
-    # Flux-unit columns in all catalogs
+    # flux-unit columns in all catalogs
     FLUX_COLS = (
         "col_flux_peak",
         "col_flux_int",
@@ -273,10 +266,6 @@ def shift_and_scale_catalog(
     # Create new output path and check for existing catalog at path
     logger.debug(f"Correcting {catalog_path} ...")
     is_island = ".islands" in catalog_path.name
-    output_path = output_dir_path / catalog_path.with_suffix(".corrected.xml").name
-    if output_path.exists() and not overwrite:
-        logger.warning(f"Will not overwrite existing catalogue: {output_path}.")
-        return output_path
 
     # Open catalog
     votablefile = parse(catalog_path)
@@ -314,38 +303,275 @@ def shift_and_scale_catalog(
     for col in cols:
         votable.array[col] = flux_scale * (votable.array[col] + flux_offset_mJy)
 
-    # Safely write corrected VOTable to file and return path to corrected
-    # catalog
-    if output_path.exists() and overwrite:
-        logger.warning(f"Overwriting existing catalogue: {output_path}.")
-        output_path.unlink()
-        votablefile.to_xml(str(output_path))
+    return votablefile
+
+
+def get_correct_file(correction_files_dir, img_field):
+    """Helper function to get the file from the reference catalogs which
+       observed the same field.
+
+    Args:
+        correction_files_list (list): Path to the correction files directory
+        img_field (str): The field name of the input catalog
+
+    Returns:
+        str: the correspoding file with the same field as the one requested.
+    """
+    # we need to string the last A from the field
+    if img_field[-1] == "A":
+        img_field = img_field[:-1]
+    img_field = img_field.replace("VAST", "RACS")
+    matched_field = list(correction_files_dir.glob(f"*{img_field}*components*"))
+    if len(matched_field) > 0:
+        # This means that there are multpile files with the same field,
+        # possibly with different sbid's corresponding to different observations
+        return matched_field[0].as_posix()
     else:
-        votablefile.to_xml(str(output_path))
-    logger.success(f"Wrote corrected catalogue: {output_path}.")
-    return output_path
+        return None
 
 
-# Separated logic
+def get_psf_from_image(image_path: str):
+    """
+    Funtion used to get the point spread function (PSF) extent in major and minor axis.
+    These will be in the header of the image file
 
-from itertools import chain
-from pathlib import Path
-import sys
-from typing import Optional, Generator
+    Parameters
+    ----------
+    image_path: str
+        Path to the image file
 
-from loguru import logger
-import pandas as pd
+    Returns
+    -------
+    Tuple(psf_major, psf_minor)
+        Major and minor axes of the PSF.
+    """
+    image_path = image_path.replace("SELAVY", "IMAGES")
+    image_path = image_path.replace("selavy-", "")
+    image_path = image_path.replace(".components.xml", ".fits")
+    hdu = fits.open(image_path)
+    psf_maj = hdu[0].header["BMAJ"] * u.degree
+    psf_min = hdu[0].header["BMIN"] * u.degree
+    hdu.close()
+    return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec))
+
+
+def correct_field(
+    image_path: Path,
+    vast_corrections_root: Path = "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY",
+    radius: float = 10,
+    condon: bool = True,
+    psf_ref: list[float] = None,
+    psf: list[float] = None,
+    write_output: bool = True,
+    outdir: str = None,
+    overwrite: bool = False,
+):
+    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
+    VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
 
+    Args:
+        image path (Path): Path to the image file that needs to be corrected.
+        vast_corrections_root (Path, optional): Path to the catalogues of referecne catalog.
+            Defaults to "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY".
+        radius (float, optional): Crossmatch radius. Defaults to 10.
+        condon (bool, optional): Flag to replace errros with Condon errors. Defaults to True.
+        psf_ref (list[float], optional): PSF information of the reference catalog. Defaults to None.
+        psf (list[float], optional): PSF information of the input catalog. Defaults to None.
+        write_output (bool, optional): Write the corrected image and catalog files or return the
+            corrected hdul and the corrected table?. Defaults to True, which means to write
+        outdir (str, optional): The stem of the output directory to write the files to
+        overwrite (bool, optional): Overwrite the existing files?. Defaults to False.
+    """
+    epoch_dir = image_path.parent.name
+    _, _, field, *_ = image_path.name.split(".")
+
+    # get rms and background images
+    rms_root = Path(
+        image_path.parent.as_posix().replace("STOKESI_IMAGES", "STOKESI_RMSMAPS")
+    )
+    rms_path = rms_root / f"noiseMap.{image_path.name}"
+    bkg_path = rms_root / f"meanMap.{image_path.name}"
+
+    correction_files_dir = Path(vast_corrections_root)
+    ref_file = get_correct_file(
+        correction_files_dir=correction_files_dir,
+        img_field=field,
+    )
 
-def correct_vast(
+    if outdir is None:
+        outdir = image_path.parent.parent.parent
+
+    # construct output path to store corrections for each epoch
+    corr_dir = outdir / "corr_db"
+    if not os.path.isdir(corr_dir):
+        os.mkdir(corr_dir)
+    epoch_corr_dir = corr_dir / epoch_dir
+
+    if not os.path.isdir(epoch_corr_dir):
+        os.mkdir(epoch_corr_dir)
+
+    skip = False
+    if not rms_path.exists():
+        logger.warning(f"RMS image not found for {image_path}.")
+    if not bkg_path.exists():
+        logger.warning(f"Background image not found for {image_path}.")
+
+    # Look for any component and island files correspnding to this image
+    image_root = image_path.parent.as_posix()
+    catalog_root = image_root.replace("IMAGES", "SELAVY")
+
+    catalog_filename = image_path.name.replace("image", "selavy-image")
+    catalog_filename = catalog_filename.replace(".fits", ".components.xml")
+
+    catalog_filepath = f"{catalog_root}/{catalog_filename}"
+
+    component_file = Path(catalog_filepath)
+    island_file = Path(catalog_filepath.replace("components", "islands"))
+
+    skip = (
+        not (
+            (rms_path.exists())
+            and (bkg_path.exists())
+            and (ref_file is not None)
+            and (component_file.exists())
+        )
+        or skip
+    )
+    if skip:
+        if not ((rms_path.exists()) and (bkg_path.exists())):
+            logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist")
+        elif not (component_file.exists()):
+            logger.warning(f"Skipping {image_path}, catalog files do not exist")
+        elif ref_file is None:
+            logger.warning(f"Skipping {image_path}, no reference field found.")
+        return None
+    else:
+        fname = image_path.name.replace(".fits", "corrections.csv")
+        crossmatch_file = epoch_corr_dir / fname
+        csv_file = epoch_corr_dir / "all_fields_corrections.csv"
+
+        # Get the psf measurements to estimate errors follwoing Condon 1997
+        if len(psf_ref) > 0:
+            psf_reference = psf_ref
+        else:
+            psf_reference = get_psf_from_image(ref_file)
+
+        if len(psf) > 0:
+            psf_image = psf
+        else:
+            psf_image = get_psf_from_image(image_path.as_posix())
+
+        (
+            dra_median_value,
+            ddec_median_value,
+            flux_corr_mult,
+            flux_corr_add,
+        ) = vast_xmatch_qc(
+            reference_catalog_path=ref_file,
+            catalog_path=component_file.as_posix(),
+            radius=Angle(radius * u.arcsec),
+            condon=condon,
+            psf_reference=psf_reference,
+            psf=psf_image,
+            fix_m=False,
+            fix_b=False,
+            crossmatch_output=crossmatch_file,
+            csv_output=csv_file,
+        )
+
+        # get corrections
+        corrected_hdul = []
+        for path in (image_path, rms_path, bkg_path):
+            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+            output_dir = outdir / stokes_dir / epoch_dir
+            output_dir.mkdir(parents=True, exist_ok=True)
+            output_path = output_dir / path.with_suffix(".corrected.fits").name
+            if output_path.exists() and not overwrite:
+                logger.warning(f"Will not overwrite existing image: {output_path}.")
+            else:
+                corrected_hdu = shift_and_scale_image(
+                    path,
+                    flux_scale=flux_corr_mult.n,
+                    flux_offset_mJy=flux_corr_add.n,
+                    ra_offset_arcsec=dra_median_value.item(),
+                    dec_offset_arcsec=ddec_median_value.item(),
+                )
+                if write_output:
+                    if output_path.exists() and overwrite:
+                        logger.warning(f"Overwriting existing image: {output_path}.")
+                        corrected_hdu.writeto(str(output_path), overwrite=True)
+                    else:
+                        corrected_hdu.writeto(str(output_path))
+                    logger.success(f"Writing corrected image to: {output_path}.")
+                    corrected_hdu.close()
+                else:
+                    corrected_hdul.append(corrected_hdu)
+
+        # Do the same for catalog files
+        corrected_catalogs = []
+        for path in (component_file, island_file):
+            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
+            output_dir = outdir / stokes_dir / epoch_dir
+            output_dir.mkdir(parents=True, exist_ok=True)
+            output_path = output_dir / path.with_suffix(".corrected.xml").name
+            if output_path.exists() and not overwrite:
+                logger.warning(f"Will not overwrite existing catalogue: {output_path}.")
+            else:
+                corrected_catalog = shift_and_scale_catalog(
+                    path,
+                    flux_scale=flux_corr_mult.n,
+                    flux_offset_mJy=flux_corr_add.n,
+                    ra_offset_arcsec=dra_median_value.item(),
+                    dec_offset_arcsec=ddec_median_value.item(),
+                )
+                if write_output:
+                    # write corrected VOTable
+                    if output_path.exists() and overwrite:
+                        logger.warning(
+                            f"Overwriting existing catalogue: {output_path}."
+                        )
+                        output_path.unlink()
+                        corrected_catalog.to_xml(output_path.as_posix())
+                    else:
+                        corrected_catalog.to_xml(output_path.as_posix())
+                    logger.success(f"Writing corrected catalogue: {output_path}.")
+                else:
+                    corrected_catalogs.append(corrected_catalog)
+        return (corrected_hdul, corrected_catalogs)
+
+
+def correct_files(
     vast_tile_data_root: Path,
-    vast_corrections_csv: Path,
-    epoch: Optional[list[int]],
+    vast_corrections_root: Path = "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY",
+    epoch: list[int] = None,
+    radius: float = 10,
+    condon: bool = True,
+    psf_ref: list[float] = None,
+    psf: list[float] = None,
+    write_output: bool = True,
+    outdir: str = None,
     overwrite: bool = False,
     verbose: bool = False,
 ):
     """Read astrometric and flux corrections produced by vast-xmatch and apply them to
     VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
+
+    Args:
+        vast_tile_data_root (Path): Path to the data that needs to be corrected.
+            Should follow VAST convention, something like
+            /data/VAST/vast-data/TILES/ that has STOKESI_IMAGES/epoch_xx/
+        vast_corrections_root (Path, optional): Path to the catalogues of referecne catalog.
+            Defaults to "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY".
+        epoch (list[int], optional): Epoch to be corrected. Defaults to None.
+        radius (float, optional): Crossmatch radius. Defaults to 10.
+        condon (bool, optional): Flag to replace errros with Condon errors. Defaults to True.
+        psf_ref (list[float], optional): PSF information of the reference catalog. Defaults to None.
+        psf (list[float], optional): PSF information of the input catalog. Defaults to None.
+        write_output (bool, optional): Write the corrected image and catalog files or return the
+            corrected hdul and the corrected table?. Defaults to True, which means to write
+        outdir (str, optional): The stem of the output directory to write the files to
+        overwrite (bool, optional): Overwrite the existing files?. Defaults to False.
+        verbose (bool, optional): Show more log messages. Defaults to False.
     """
     # configure logger
     if not verbose:
@@ -354,19 +580,15 @@ def correct_vast(
         logger.add(sys.stderr, level="INFO")
 
     # read corrections
-    corrections_df = (
-        pd.read_csv(vast_corrections_csv)
-        .set_index(["release_epoch", "field", "sbid"])
-        .sort_index()
-    )
     image_path_glob_list: list[Generator[Path, None, None]] = []
     components_path_glob_list: list[Generator[Path, None, None]] = []
+
     if epoch is None or len(epoch) == 0:
         image_path_glob_list.append(
             vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
         )
         components_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml")
+            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml")
         )
     else:
         for n in epoch:
@@ -374,97 +596,19 @@ def correct_vast(
                 vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
             )
             components_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml")
+                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml")
             )
 
-    # correct images
+    # get corrections for an image and the correct it
     for image_path in chain.from_iterable(image_path_glob_list):
-        epoch_dir = image_path.parent.name
-        _, _, field, sbid_str, *_ = image_path.name.split(".")
-        sbid = int(sbid_str[2:])
-        # get rms and background images
-        rms_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"noiseMap.{image_path.name}"
-        )
-        bkg_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"meanMap.{image_path.name}"
-        )
-        # get corrections
-        skip = False
-        try:
-            corrections = corrections_df.loc[(epoch_dir, field, sbid)]
-        except KeyError:
-            skip = True
-            logger.warning(
-                f"Corrections not found for {image_path} ({epoch_dir}, {field},"
-                f" {sbid})."
-            )
-        if not rms_path.exists():
-            logger.warning(f"RMS image not found for {image_path}.")
-        if not bkg_path.exists():
-            logger.warning(f"Background image not found for {image_path}.")
-        skip = not (rms_path.exists() and bkg_path.exists()) or skip
-        if skip:
-            logger.warning(f"Skipping {image_path}.")
-            continue
-
-        # TODO determine what these variables are and where they are from
-        for path in (image_path, rms_path, bkg_path):
-            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-            output_dir.mkdir(parents=True, exist_ok=True)
-            _ = shift_and_scale_image(
-                path,
-                output_dir,
-                flux_scale=corrections.flux_peak_correction_multiplicative,
-                flux_offset_mJy=corrections.flux_peak_correction_additive,
-                ra_offset_arcsec=corrections.ra_correction,
-                dec_offset_arcsec=corrections.dec_correction,
-                overwrite=overwrite,
-            )
-
-    # correct catalogs
-    for components_path in chain.from_iterable(components_path_glob_list):
-        epoch_dir = components_path.parent.name
-        _, _, field, sbid_str, *_ = components_path.name.split(".")
-        sbid = int(sbid_str[2:])
-        # get island catalog
-        islands_path = components_path.with_name(
-            components_path.name.replace(".components", ".islands")
+        correct_field(
+            image_path=image_path,
+            vast_corrections_root=vast_corrections_root,
+            radius=radius,
+            condon=condon,
+            psf_ref=psf_ref,
+            psf=psf,
+            write_output=write_output,
+            outdir=outdir,
+            overwrite=overwrite,
         )
-        # get corrections
-        skip = False
-        try:
-            corrections = corrections_df.loc[(epoch_dir, field, sbid)]
-        except KeyError:
-            skip = True
-            logger.warning(
-                f"Corrections not found for {components_path} ({epoch_dir}, {field},"
-                f" {sbid})."
-            )
-        if not islands_path.exists():
-            logger.warning(f"Islands catalogue not found for {components_path}.")
-        skip = not islands_path.exists() or skip
-        if skip:
-            logger.warning(f"Skipping {components_path}.")
-            continue
-
-        for path in (components_path, islands_path):
-            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-            output_dir.mkdir(parents=True, exist_ok=True)
-            _ = shift_and_scale_catalog(
-                path,
-                output_dir,
-                flux_scale=corrections.flux_peak_correction_multiplicative,
-                flux_offset_mJy=corrections.flux_peak_correction_additive,
-                ra_offset_arcsec=corrections.ra_correction,
-                dec_offset_arcsec=corrections.dec_correction,
-                overwrite=overwrite,
-            )
diff --git a/vast_post_processing/crossmatch.py b/vast_post_processing/crossmatch.py
index bdbdc3c..c5ee01f 100644
--- a/vast_post_processing/crossmatch.py
+++ b/vast_post_processing/crossmatch.py
@@ -14,11 +14,28 @@
 
 
 def median_abs_deviation(data):
+    """helper function to calculate the median offset
+
+    Args:
+        data (list): List/array of offsets
+
+    Returns:
+        float: the median offset
+    """
     median = np.median(data)
     return np.median(np.abs(data - median))
 
 
 def straight_line(B, x):
+    """Helper function for fitting. Defines a straight line
+
+    Args:
+        B (list): (slope, intercept) of the line
+        x (list): input X-axis data
+
+    Returns:
+        list: the straight line
+    """
     m, b = B
     return m * x + b
 
@@ -26,6 +43,19 @@ def straight_line(B, x):
 def join_match_coordinates_sky(
     coords1: SkyCoord, coords2: SkyCoord, seplimit: u.arcsec
 ):
+    """Helper function to do the cross match
+
+    Args:
+        coords1 (SkyCoord): Input coordinates
+        coords2 (SkyCoord): Reference coordinates
+        seplimit (u.arcsec): cross-match radius
+
+    Returns:
+        numpy.ndarray: Array to see which of the input coordinates have a cross match
+        numpy.ndarray: Indices of the input catalog where there is source in reference
+            catlog within separation limit
+        numpy.ndarray: The separation distance for the cross matches
+    """
     idx, separation, dist_3d = match_coordinates_sky(coords1, coords2)
     mask = separation < seplimit
     return np.where(mask)[0], idx[mask], separation[mask], dist_3d[mask]
@@ -35,12 +65,17 @@ def crossmatch_qtables(
     catalog: Catalog,
     catalog_reference: Catalog,
     radius: Angle = Angle("10 arcsec"),
-    catalog_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"),
-    catalog_reference_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"),
 ) -> QTable:
-    catalog_ra, catalog_dec = catalog_coord_cols
-    catalog_reference_ra, catalog_reference_dec = catalog_reference_coord_cols
+    """Main function to filter cross-matched sources.
 
+    Args:
+        catalog (Catalog): Input catalog
+        catalog_reference (Catalog): Reference catalog
+        radius (Angle, optional): cross-match radius. Defaults to Angle("10 arcsec").
+
+    Returns:
+        QTable: filtered table that return the cross matches
+    """
     logger.debug("Using crossmatch radius: %s.", radius)
 
     xmatch = join(

From d4477a8b3d5410fbab61511c1c2aab7de06f3b34 Mon Sep 17 00:00:00 2001
From: Akash Anumarlapudi <akashjanaki98@gmail.com>
Date: Mon, 14 Aug 2023 11:43:55 -0500
Subject: [PATCH 22/31] Fixed typos

---
 vast_post_processing/cli/run_corrections.py | 4 ++--
 vast_post_processing/corrections.py         | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index 811c8ce..0347ab7 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -22,7 +22,7 @@ def main(
         "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY",
         help=(
             "Path to RACS data that is can be used to correct VAST data. Tries to use"
-            " EPOCH00 as the defualt epoch. If not the user can override this by"
+            " EPOCH00 as the default epoch. If not the user can override this by"
             " giving a path to a folder that contain the selavy output"
         ),
         exists=True,
@@ -54,7 +54,7 @@ def main(
             ".restored.conv.fits. Note that for TILE images, the epoch is determined "
             "from the full path. If the input catalogs do not follow this convention, then "
             "the PSF sizes must be supplied using --psf-reference and/or --psf. The "
-            "deafult behaviour is to lookup the PSF sizes from the header of the image"
+            "default behaviour is to lookup the PSF sizes from the header of the image"
         ),
     ),
     psf_ref: Optional[list[float]] = typer.Option(
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 171550e..84d86cf 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -333,12 +333,13 @@ def get_correct_file(correction_files_dir, img_field):
 def get_psf_from_image(image_path: str):
     """
     Funtion used to get the point spread function (PSF) extent in major and minor axis.
-    These will be in the header of the image file
+    These will be in the header of the image file. If a component file is give, it will
+    construct the image path from this and then gets the psf information
 
     Parameters
     ----------
     image_path: str
-        Path to the image file
+        Path to the image file or a component file
 
     Returns
     -------

From 513d2c35484e40b64be559ac5f05d38ac38eaef7 Mon Sep 17 00:00:00 2001
From: Akash Anumarlapudi <akashjanaki98@gmail.com>
Date: Mon, 14 Aug 2023 14:04:32 -0500
Subject: [PATCH 23/31] New log message

---
 vast_post_processing/corrections.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 84d86cf..bc3be5b 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -613,3 +613,6 @@ def correct_files(
             outdir=outdir,
             overwrite=overwrite,
         )
+        logger.info(
+            f"Successfully corrected the images and catalogs for {image_path.as_posix()}"
+        )

From 005f45f2a0943e5e2b84c73da7edf009d4a3f84b Mon Sep 17 00:00:00 2001
From: Akash Anumarlapudi <akashjanaki98@gmail.com>
Date: Mon, 14 Aug 2023 14:40:01 -0500
Subject: [PATCH 24/31] Deleted older corrections cli file

---
 vast_post_processing/cli/correct_vast.py | 273 -----------------------
 1 file changed, 273 deletions(-)
 delete mode 100644 vast_post_processing/cli/correct_vast.py

diff --git a/vast_post_processing/cli/correct_vast.py b/vast_post_processing/cli/correct_vast.py
deleted file mode 100644
index ebd5276..0000000
--- a/vast_post_processing/cli/correct_vast.py
+++ /dev/null
@@ -1,273 +0,0 @@
-from pathlib import Path
-from typing import Optional
-import typer
-from astropy.table import QTable
-from astropy.io import fits
-from astropy import units as u
-
-from vast_post_processing.corrections import (
-    shift_and_scale_catalog,
-    shift_and_scale_image,
-    calculate_positional_offsets,
-    calculate_flux_offsets,
-)
-
-
-def get_correct_correction_file(correction_files_list, epoch, img_field, img_sbid):
-    count = 0
-    for f in chain.from_iterable(correction_files_list):
-        epoch_name = f.parent.name
-        if epoch_name in epoch:
-            filename = f.name
-            _, _, _, sbid, field, *_ = filename.split("_")
-            sbid = sbid.replace("-VAST", "")
-            field = field.replace(".csv", "")
-            if (sbid in img_sbid) & (field in img_field):
-                df = QTable.read(f)
-                flux_shifts = calculate_flux_offsets(df)
-                pos_shifts = calculate_positional_offsets(df)
-                count += 1
-                return flux_shifts, pos_shifts
-            else:
-                continue
-    if count == 0:
-        return None, None
-
-
-def get_psf_from_image(image_path: str):
-    """
-    Funtion used to get the point spread function (PSF) extent in major and minor axis.
-    These will be in the header of the image file
-
-    Parameters
-    ----------
-    image_path: str
-        Path to the image file
-
-    Returns
-    -------
-    Tuple(psf_major, psf_minor)
-        Major and minor axes of the PSF.
-    """
-
-    hdu = fits.open(image_path)
-    psf_maj = hdu["BMAJ"] * u.degree
-    psf_min = hdu["BMIN"] * u.degree
-    return psf_maj.to(u.arcsec), psf_min.to(u.arcsec)
-
-
-def main(
-    vast_tile_data_root: Path = typer.Argument(
-        ...,
-        help=(
-            "Path to VAST TILES data directory, i.e. the directory that contains the"
-            " STOKES* directories."
-        ),
-        exists=True,
-        file_okay=False,
-        dir_okay=True,
-    ),
-    vast_corrections_csv_root: Path = typer.Option(
-        "/data/vast-survey/VAST/askap-surveys-database/vast/db/",
-        help=(
-            "Path to VAST corrections CSV file produced by vast-xmatch. Tries to use"
-            " the default path of these files. If not the user can override this by"
-            "giving a path to file"
-        ),
-        exists=True,
-        file_okay=True,
-        dir_okay=False,
-    ),
-    epoch: Optional[list[int]] = typer.Option(
-        None,
-        help=(
-            "Only correct the given observation epochs. Can be given multiple times,"
-            " e.g. --epoch 1 --epoch 2. If no epochs are given (the default), then"
-            " correct all available epochs."
-        ),
-    ),
-    overwrite: bool = False,
-    verbose: bool = False,
-):
-    """Read astrometric and flux corrections produced by vast-xmatch and apply them to
-    VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch.
-    """
-    # configure logger
-    if not verbose:
-        # replace the default sink
-        logger.remove()
-        logger.add(sys.stderr, level="INFO")
-
-    # read corrections
-    # corrections_df = (
-    #     pd.read_csv(vast_corrections_csv)
-    #     .set_index(["release_epoch", "field", "sbid"])
-    #     .sort_index()
-    # )
-    image_path_glob_list: list[Generator[Path, None, None]] = []
-    components_path_glob_list: list[Generator[Path, None, None]] = []
-    correction_files_path_glob_list: list[Generator[Path, None, None]] = []
-    if epoch is None or len(epoch) == 0:
-        image_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
-        )
-        components_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml")
-        )
-        correction_files_path_glob_list.append(
-            vast_corrections_csv_root.glob("epoch_*/cat_match_RACS0*.csv")
-        )
-    else:
-        for n in epoch:
-            image_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
-            )
-            components_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml")
-            )
-            correction_files_path_glob_list.append(
-                vast_corrections_csv_root.glob(f"epoch_{n}/cat_match_RACS0*.csv")
-            )
-
-    # correct images
-    for image_path in chain.from_iterable(image_path_glob_list):
-        epoch_dir = image_path.parent.name
-        _, _, field, sbid_str, *_ = image_path.name.split(".")
-        sbid = int(sbid_str[2:])
-        # get rms and background images
-        rms_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"noiseMap.{image_path.name}"
-        )
-        bkg_path = (
-            vast_tile_data_root
-            / "STOKESI_RMSMAPS"
-            / epoch_dir
-            / f"meanMap.{image_path.name}"
-        )
-        # get corrections
-        skip = False
-        # try:
-        #     corrections = corrections_df.loc[(epoch_dir, field, sbid)]
-        # except KeyError:
-        #     skip = True
-        #     logger.warning(
-        #         f"Corrections not found for {image_path} ({epoch_dir}, {field},"
-        #         f" {sbid})."
-        #     )
-        flux_corrections, pos_corrections = get_correct_correction_file(
-            correction_files_list=correction_files_path_glob_list,
-            epoch=epoch_dir,
-            img_field=field,
-            img_sbid=sbid_str,
-        )
-        if (flux_corrections is None) | (pos_corrections is None):
-            skip = True
-            logger.warning(
-                f"Corrections not found for {image_path} ({epoch_dir}, {field},"
-                f" {sbid})."
-            )
-        else:
-            scale, offset, scale_err, offset_err = flux_corrections
-            dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections
-        if not rms_path.exists():
-            logger.warning(f"RMS image not found for {image_path}.")
-        if not bkg_path.exists():
-            logger.warning(f"Background image not found for {image_path}.")
-        skip = not (rms_path.exists() and bkg_path.exists()) or skip
-        if skip:
-            logger.warning(f"Skipping {image_path}.")
-            continue
-
-        for path in (image_path, rms_path, bkg_path):
-            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-            output_dir.mkdir(parents=True, exist_ok=True)
-            # _ = shift_and_scale_image(
-            #     path,
-            #     output_dir,
-            #     flux_scale=corrections.flux_peak_correction_multiplicative,
-            #     flux_offset_mJy=corrections.flux_peak_correction_additive,
-            #     ra_offset_arcsec=corrections.ra_correction,
-            #     dec_offset_arcsec=corrections.dec_correction,
-            #     overwrite=overwrite,
-            # )
-            _ = shift_and_scale_image(
-                path,
-                output_dir,
-                flux_scale=scale,
-                flux_offset_mJy=offset,
-                ra_offset_arcsec=dra_median,
-                dec_offset_arcsec=ddec_median,
-                overwrite=overwrite,
-            )
-
-    # correct catalogs
-    for components_path in chain.from_iterable(components_path_glob_list):
-        epoch_dir = components_path.parent.name
-        _, _, field, sbid_str, *_ = components_path.name.split(".")
-        sbid = int(sbid_str[2:])
-        # get island catalog
-        islands_path = components_path.with_name(
-            components_path.name.replace(".components", ".islands")
-        )
-        # get corrections
-        skip = False
-        # try:
-        #     corrections = corrections_df.loc[(epoch_dir, field, sbid)]
-        # except KeyError:
-        #     skip = True
-        #     logger.warning(
-        #         f"Corrections not found for {image_path} ({epoch_dir}, {field},"
-        #         f" {sbid})."
-        #     )
-        flux_corrections, pos_corrections = get_correct_correction_file(
-            correction_files_list=correction_files_path_glob_list,
-            epoch=epoch_dir,
-            img_field=field,
-            img_sbid=sbid_str,
-        )
-        if (flux_corrections is None) | (pos_corrections is None):
-            skip = True
-            logger.warning(
-                f"Corrections not found for {image_path} ({epoch_dir}, {field},"
-                f" {sbid})."
-            )
-        else:
-            scale, offset, scale_err, offset_err = flux_corrections
-            dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections
-        if not islands_path.exists():
-            logger.warning(f"Islands catalogue not found for {components_path}.")
-        skip = not islands_path.exists() or skip
-        if skip:
-            logger.warning(f"Skipping {components_path}.")
-            continue
-
-        for path in (components_path, islands_path):
-            stokes_dir = f"{path.parent.parent.name}_CORRECTED"
-            output_dir = vast_tile_data_root / stokes_dir / epoch_dir
-            output_dir.mkdir(parents=True, exist_ok=True)
-            # _ = shift_and_scale_catalog(
-            #     path,
-            #     output_dir,
-            #     flux_scale=corrections.flux_peak_correction_multiplicative,
-            #     flux_offset_mJy=corrections.flux_peak_correction_additive,
-            #     ra_offset_arcsec=corrections.ra_correction,
-            #     dec_offset_arcsec=corrections.dec_correction,
-            #     overwrite=overwrite,
-            # )
-            _ = shift_and_scale_catalog(
-                path,
-                output_dir,
-                flux_scale=scale,
-                flux_offset_mJy=offset,
-                ra_offset_arcsec=dra_median,
-                dec_offset_arcsec=ddec_median,
-                overwrite=overwrite,
-            )
-
-
-if __name__ == "__main__":
-    typer.run(main)

From 27395f2c0b4c3cd4e95c59b2520de292d20139a8 Mon Sep 17 00:00:00 2001
From: Akash Anumarlapudi <akashjanaki98@gmail.com>
Date: Tue, 15 Aug 2023 11:16:08 -0500
Subject: [PATCH 25/31] Make new directories only when write_output=True

---
 vast_post_processing/corrections.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 821b638..eba68f2 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -475,7 +475,6 @@ def correct_field(
         for path in (image_path, rms_path, bkg_path):
             stokes_dir = f"{path.parent.parent.name}_CORRECTED"
             output_dir = outdir / stokes_dir / epoch_dir
-            output_dir.mkdir(parents=True, exist_ok=True)
             output_path = output_dir / path.with_suffix(".corrected.fits").name
             if output_path.exists() and not overwrite:
                 logger.warning(f"Will not overwrite existing image: {output_path}.")
@@ -488,6 +487,7 @@ def correct_field(
                     dec_offset_arcsec=ddec_median_value.item(),
                 )
                 if write_output:
+                    output_dir.mkdir(parents=True, exist_ok=True)
                     if output_path.exists() and overwrite:
                         logger.warning(f"Overwriting existing image: {output_path}.")
                         corrected_hdu.writeto(str(output_path), overwrite=True)
@@ -503,7 +503,6 @@ def correct_field(
         for path in (component_file, island_file):
             stokes_dir = f"{path.parent.parent.name}_CORRECTED"
             output_dir = outdir / stokes_dir / epoch_dir
-            output_dir.mkdir(parents=True, exist_ok=True)
             output_path = output_dir / path.with_suffix(".corrected.xml").name
             if output_path.exists() and not overwrite:
                 logger.warning(f"Will not overwrite existing catalogue: {output_path}.")
@@ -516,6 +515,7 @@ def correct_field(
                     dec_offset_arcsec=ddec_median_value.item(),
                 )
                 if write_output:
+                    output_dir.mkdir(parents=True, exist_ok=True)
                     # write corrected VOTable
                     if output_path.exists() and overwrite:
                         logger.warning(

From ef8698efb30d7b488175220ad04e94be1b0163f0 Mon Sep 17 00:00:00 2001
From: Akash Anumarlapudi <akashjanaki98@gmail.com>
Date: Wed, 16 Aug 2023 17:21:33 -0500
Subject: [PATCH 26/31] Updated the filtering function for catalogs

---
 vast_post_processing/catalogs.py | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py
index a83b094..28a10a8 100644
--- a/vast_post_processing/catalogs.py
+++ b/vast_post_processing/catalogs.py
@@ -266,20 +266,35 @@ def _read_catalog(self):
     def _filter_sources(self):
         """Helper function to filter sources that are used for cross-match;
         filter sources with bad sizes and optionally given flux limits"""
+
+        # Add a flux threshold flag
         if self.flux_flag:
             lim = self.flux_lim
             logger.info(
                 f"Filtering %d sources with fluxes <= {lim}",
                 (self.table["flux_peak"] <= lim).sum(),
             )
-            self.table = self.table[self.table["flux_peak"] > lim]
+            flux_mask = self.table["flux_peak"] > lim
+            # self.table = self.table[self.table["flux_peak"] > lim]
+        # Add good psf flag
         logger.info(
             "Filtering %d sources with fitted sizes <= 0.",
             ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(),
         )
-        self.table = self.table[
-            (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0)
-        ]
+        psf_mask = (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0)
+
+        # point source flag
+        ps_metric = self.table["flux_peak"] / self.table["flux_int"]
+        ps_mask = ps_metric < 1.5
+
+        # Add snr flag
+        snr_mask = self.table["flux_peak"] / self.table["rms_image"] > 20
+
+        # Select distant sources
+        dist_mask = self.table["nn_separation"].to(u.arcsec).value > 60
+
+        mask = (flux_mask) & (psf_mask) & (ps_mask) & (snr_mask) & (dist_mask)
+        self.table = self.table[mask]
 
     def calculate_condon_flux_errors(
         self,

From b495dba62a523c245eb4297cf6dacfded8ab6f3a Mon Sep 17 00:00:00 2001
From: Akash Anumarlapudi <akashjanaki98@gmail.com>
Date: Wed, 16 Aug 2023 18:26:06 -0500
Subject: [PATCH 27/31] Deal with all epochs or single epoch the same way

---
 vast_post_processing/corrections.py | 66 +++++++++++++++--------------
 1 file changed, 34 insertions(+), 32 deletions(-)

diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index eba68f2..a16de44 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -570,39 +570,41 @@ def correct_files(
         logger.remove()
         logger.add(sys.stderr, level="INFO")
 
-    # read corrections
-    image_path_glob_list: list[Generator[Path, None, None]] = []
-    components_path_glob_list: list[Generator[Path, None, None]] = []
-
+    # Read all the epochs
     if epoch is None or len(epoch) == 0:
-        image_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits")
-        )
-        components_path_glob_list.append(
-            vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml")
-        )
+        epoch_dirs = list(vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*"))
     else:
-        for n in epoch:
-            image_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits")
+        epoch_dirs = []
+        epoch_dirs = [
+            vast_tile_data_root / "STOKESI_IMAGES" / f"epoch_{e}" for e in epoch
+        ]
+
+    logger.info(
+        f"Corrections requested of these epochs: {[i.name for i in epoch_dirs]}"
+    )
+
+    # Work on individual epochs
+    for e in epoch_dirs:
+        # read fits/xml files
+        image_path_glob_list: list[Generator[Path, None, None]] = []
+        components_path_glob_list: list[Generator[Path, None, None]] = []
+
+        image_path_glob_list.append(e.glob("*.fits"))
+        components_path_glob_list.append(e.glob("*.xml"))
+
+        # get corrections for every image and the correct it
+        for image_path in chain.from_iterable(image_path_glob_list):
+            correct_field(
+                image_path=image_path,
+                vast_corrections_root=vast_corrections_root,
+                radius=radius,
+                condon=condon,
+                psf_ref=psf_ref,
+                psf=psf,
+                write_output=write_output,
+                outdir=outdir,
+                overwrite=overwrite,
             )
-            components_path_glob_list.append(
-                vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml")
+            logger.info(
+                f"Successfully corrected the images and catalogs for {image_path.as_posix()}"
             )
-
-    # get corrections for an image and the correct it
-    for image_path in chain.from_iterable(image_path_glob_list):
-        correct_field(
-            image_path=image_path,
-            vast_corrections_root=vast_corrections_root,
-            radius=radius,
-            condon=condon,
-            psf_ref=psf_ref,
-            psf=psf,
-            write_output=write_output,
-            outdir=outdir,
-            overwrite=overwrite,
-        )
-        logger.info(
-            f"Successfully corrected the images and catalogs for {image_path.as_posix()}"
-        )

From 3450e474344c7e53aa05b73c7265311ef195dd7b Mon Sep 17 00:00:00 2001
From: Akash Anumarlapudi <akashjanaki98@gmail.com>
Date: Thu, 17 Aug 2023 18:09:01 -0500
Subject: [PATCH 28/31] User decides whether to skip entire epoch or a single
 file

---
 vast_post_processing/corrections.py | 227 ++++++++++++++++++++--------
 1 file changed, 160 insertions(+), 67 deletions(-)

diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index a16de44..d3f42fb 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -5,6 +5,7 @@
 from astropy.coordinates import SkyCoord, Angle
 from astropy.io import fits
 from astropy.io.votable import parse
+from astropy.io.votable.tree import Param
 import astropy.units as u
 from uncertainties import ufloat
 from astropy.wcs import WCS, FITSFixedWarning
@@ -184,10 +185,23 @@ def shift_and_scale_image(
     image_hdul = fits.open(image_path)
     image_hdu = image_hdul[0]
 
-    # do the flux scaling
-    image_hdu.data = flux_scale * (image_hdu.data + (flux_offset_mJy * 1e-3))
-    image_hdu.header["FLUXOFF"] = flux_offset_mJy * 1e-3
+    # do the flux scaling, but check that the data is in Jy
+    if image_hdu.header["BUNIT"] == "Jy/beam":
+        data_unit = u.Jy
+    else:
+        data_unit = u.mJy
+    image_hdu.data = flux_scale * (
+        image_hdu.data + (flux_offset_mJy * (u.mJy.to(data_unit)))
+    )
+    image_hdu.header["FLUXOFF"] = flux_offset_mJy * (u.mJy.to(data_unit))
     image_hdu.header["FLUXSCL"] = flux_scale
+
+    image_hdu[
+        "HISTORY"
+    ] = """
+    Image has been corrected for flux by a scaling factor and an offset given by 
+    FLUXSCL and FLUXOFF.
+    """
     # check for NaN
     if replace_nan:
         if np.any(np.isnan(image_hdu.data)):
@@ -214,6 +228,13 @@ def shift_and_scale_image(
     image_hdu.header["RAOFF"] = ra_offset_arcsec
     image_hdu.header["DECOFF"] = dec_offset_arcsec
 
+    image_hdu[
+        "HISTORY"
+    ] = """
+    Image has been corrected for astrometric position by a an offset in both directions 
+    given by RAOFF and DECOFF using a model RA=RA+RAOFF/COS(DEC), DEC=DEC+DECOFF.
+    """
+
     return image_hdul
 
 
@@ -293,10 +314,50 @@ def shift_and_scale_catalog(
     for col in cols:
         votable.array[col] = flux_scale * (votable.array[col] + flux_offset_mJy)
 
+    # Add in the corrections to the votable
+    flux_scl_param = Param(
+        votable=votablefile,
+        ID="flux_scl",
+        name="flux_scl",
+        value=flux_scale,
+        datatype="float",
+        unit=None,
+    )
+    flux_off_param = Param(
+        votable=votablefile,
+        ID="flux_offset",
+        name="flux_offset",
+        value=flux_offset_mJy,
+        datatype="float",
+        unit=u.mJy,
+    )
+
+    ra_offset_param = Param(
+        votable=votablefile,
+        ID="ra_offset",
+        name="ra_offset",
+        value=ra_offset_arcsec,
+        datatype="float",
+        unit=u.arcsec,
+    )
+
+    dec_offset_param = Param(
+        votable=votablefile,
+        ID="dec_offset",
+        name="dec_offset",
+        value=dec_offset_arcsec,
+        datatype="float",
+        unit=u.arcsec,
+    )
+
+    votablefile.params.extend(
+        [ra_offset_param, dec_offset_param, flux_scl_param, flux_off_param]
+    )
+
     return votablefile
 
 
-def get_correct_file(correction_files_dir, img_field):
+def get_correct_file(correction_files_dir: list, img_field: str):
     """Helper function to get the file from the reference catalogs which
        observed the same field.
 
@@ -339,13 +400,57 @@ def get_psf_from_image(image_path: str):
     image_path = image_path.replace("SELAVY", "IMAGES")
     image_path = image_path.replace("selavy-", "")
     image_path = image_path.replace(".components.xml", ".fits")
-    hdu = fits.open(image_path)
+    hdu = fits.getheader(image_path)
     psf_maj = hdu[0].header["BMAJ"] * u.degree
     psf_min = hdu[0].header["BMIN"] * u.degree
-    hdu.close()
+    # hdu.close()
     return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec))
 
 
+def check_for_files(image_path: str):
+    """Helper function to cehck for bkg/noise maps and the component/island
+       catalogs given the image file
+
+    Args:
+        image_path (str): Path to the image file
+    """
+    # get rms and background images
+    rms_root = Path(
+        image_path.parent.as_posix().replace("STOKESI_IMAGES", "STOKESI_RMSMAPS")
+    )
+    rms_path = rms_root / f"noiseMap.{image_path.name}"
+    bkg_path = rms_root / f"meanMap.{image_path.name}"
+
+    skip = False
+    if not rms_path.exists():
+        logger.warning(f"RMS image not found for {image_path}.")
+    if not bkg_path.exists():
+        logger.warning(f"Background image not found for {image_path}.")
+
+    # Look for any component and island files correspnding to this image
+    image_root = image_path.parent.as_posix()
+    catalog_root = image_root.replace("IMAGES", "SELAVY")
+
+    catalog_filename = image_path.name.replace("image", "selavy-image")
+    catalog_filename = catalog_filename.replace(".fits", ".components.xml")
+
+    catalog_filepath = f"{catalog_root}/{catalog_filename}"
+
+    component_file = Path(catalog_filepath)
+    island_file = Path(catalog_filepath.replace("components", "islands"))
+
+    skip = (
+        not (
+            (rms_path.exists())
+            and (bkg_path.exists())
+            and (island_file.exists())
+            and (component_file.exists())
+        )
+        or skip
+    )
+    return skip, (bkg_path, rms_path, component_file, island_file)
+
+
 def correct_field(
     image_path: Path,
     vast_corrections_root: Path = "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY",
@@ -376,13 +481,7 @@ def correct_field(
     epoch_dir = image_path.parent.name
     _, _, field, *_ = image_path.name.split(".")
 
-    # get rms and background images
-    rms_root = Path(
-        image_path.parent.as_posix().replace("STOKESI_IMAGES", "STOKESI_RMSMAPS")
-    )
-    rms_path = rms_root / f"noiseMap.{image_path.name}"
-    bkg_path = rms_root / f"meanMap.{image_path.name}"
-
+    # get the correction file
     correction_files_dir = Path(vast_corrections_root)
     ref_file = get_correct_file(
         correction_files_dir=correction_files_dir,
@@ -394,40 +493,17 @@ def correct_field(
 
     # construct output path to store corrections for each epoch
     corr_dir = outdir / "corr_db"
-    if not os.path.isdir(corr_dir):
-        os.mkdir(corr_dir)
+    if not corr_dir.isdir():
+        corr_dir.mkdir()
     epoch_corr_dir = corr_dir / epoch_dir
 
-    if not os.path.isdir(epoch_corr_dir):
-        os.mkdir(epoch_corr_dir)
+    if not epoch_corr_dir.isdir():
+        epoch_corr_dir.mkdir()
 
-    skip = False
-    if not rms_path.exists():
-        logger.warning(f"RMS image not found for {image_path}.")
-    if not bkg_path.exists():
-        logger.warning(f"Background image not found for {image_path}.")
-
-    # Look for any component and island files correspnding to this image
-    image_root = image_path.parent.as_posix()
-    catalog_root = image_root.replace("IMAGES", "SELAVY")
-
-    catalog_filename = image_path.name.replace("image", "selavy-image")
-    catalog_filename = catalog_filename.replace(".fits", ".components.xml")
-
-    catalog_filepath = f"{catalog_root}/{catalog_filename}"
-
-    component_file = Path(catalog_filepath)
-    island_file = Path(catalog_filepath.replace("components", "islands"))
-
-    skip = (
-        not (
-            (rms_path.exists())
-            and (bkg_path.exists())
-            and (ref_file is not None)
-            and (component_file.exists())
-        )
-        or skip
-    )
+    # check for auxiliary files
+    skip, aux_files = check_for_files(image_path=image_path)
+    skip |= ref_file is None
+    bkg_path, rms_path, component_file, island_file = aux_files
     if skip:
         if not ((rms_path.exists()) and (bkg_path.exists())):
             logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist")
@@ -471,7 +547,7 @@ def correct_field(
         )
 
         # get corrections
-        corrected_hdul = []
+        corrected_hdus = []
         for path in (image_path, rms_path, bkg_path):
             stokes_dir = f"{path.parent.parent.name}_CORRECTED"
             output_dir = outdir / stokes_dir / epoch_dir
@@ -496,7 +572,7 @@ def correct_field(
                     logger.success(f"Writing corrected image to: {output_path}.")
                     corrected_hdu.close()
                 else:
-                    corrected_hdul.append(corrected_hdu)
+                    corrected_hdus.append(corrected_hdu)
 
         # Do the same for catalog files
         corrected_catalogs = []
@@ -506,6 +582,7 @@ def correct_field(
             output_path = output_dir / path.with_suffix(".corrected.xml").name
             if output_path.exists() and not overwrite:
                 logger.warning(f"Will not overwrite existing catalogue: {output_path}.")
+                continue
             else:
                 corrected_catalog = shift_and_scale_catalog(
                     path,
@@ -528,7 +605,7 @@ def correct_field(
                     logger.success(f"Writing corrected catalogue: {output_path}.")
                 else:
                     corrected_catalogs.append(corrected_catalog)
-        return (corrected_hdul, corrected_catalogs)
+        return (corrected_hdus, corrected_catalogs)
 
 
 def correct_files(
@@ -542,6 +619,7 @@ def correct_files(
     write_output: bool = True,
     outdir: str = None,
     overwrite: bool = False,
+    skip_on_missing=False,
     verbose: bool = False,
 ):
     """Read astrometric and flux corrections produced by vast-xmatch and apply them to
@@ -587,24 +665,39 @@ def correct_files(
     for e in epoch_dirs:
         # read fits/xml files
         image_path_glob_list: list[Generator[Path, None, None]] = []
-        components_path_glob_list: list[Generator[Path, None, None]] = []
-
         image_path_glob_list.append(e.glob("*.fits"))
-        components_path_glob_list.append(e.glob("*.xml"))
-
-        # get corrections for every image and the correct it
-        for image_path in chain.from_iterable(image_path_glob_list):
-            correct_field(
-                image_path=image_path,
-                vast_corrections_root=vast_corrections_root,
-                radius=radius,
-                condon=condon,
-                psf_ref=psf_ref,
-                psf=psf,
-                write_output=write_output,
-                outdir=outdir,
-                overwrite=overwrite,
-            )
-            logger.info(
-                f"Successfully corrected the images and catalogs for {image_path.as_posix()}"
-            )
+        image_files = list(image_path_glob_list)
+
+        skip_epoch = False
+        for img in image_files:
+            skip_file, _ = check_for_files(image_path=img)
+            skip_epoch |= skip_file
+            if skip_epoch:
+                logger.warning(
+                    f"One/Some of the bkg/rms/catlogues is are missing for {img}"
+                )
+                break
+        if skip_on_missing:
+            if skip_epoch:
+                logger.warning(
+                    "User input is to skip the entire epoch if one of the images \
+                        have missing bkg/rms/catalog files, so skipping epoch {e}"
+                )
+                break
+        else:
+            # get corrections for every image and the correct it
+            for image_path in image_files:
+                correct_field(
+                    image_path=image_path,
+                    vast_corrections_root=vast_corrections_root,
+                    radius=radius,
+                    condon=condon,
+                    psf_ref=psf_ref,
+                    psf=psf,
+                    write_output=write_output,
+                    outdir=outdir,
+                    overwrite=overwrite,
+                )
+                logger.info(
+                    f"Successfully corrected the images and catalogs for {image_path.as_posix()}"
+                )

From 903b04b4c6bc2f14497e56597324e142a624132b Mon Sep 17 00:00:00 2001
From: Akash Anumarlapudi <akashjanaki98@gmail.com>
Date: Sun, 20 Aug 2023 20:06:43 -0500
Subject: [PATCH 29/31] changed variables to function arguments for filtering
 sources

---
 vast_post_processing/catalogs.py    | 40 ++++++++++++++++++++++++-----
 vast_post_processing/corrections.py |  4 +--
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py
index 28a10a8..3f1acb8 100644
--- a/vast_post_processing/catalogs.py
+++ b/vast_post_processing/catalogs.py
@@ -182,10 +182,32 @@ def __init__(
         path: Path,
         psf: Optional[Tuple[float, float]] = None,
         input_format: str = "selavy",
-        condon: bool = False,
-        apply_flux_limit: bool = True,
+        condon: bool = True,
         flux_limit: float = 0,
+        snr_limit: float = 20,
+        nneighbor: float = 1,
+        apply_flux_limit: bool = True,
+        select_point_sources: bool = True,
     ):
+        """Defines a catalog class to read the component files
+
+        Args:
+            path (Path): path to the component file (selavy/aegean supported right now)
+            psf (Optional[Tuple[float, float]], optional): The major and minor axis dimensions
+                in arcsec. Defaults to None. Used to calculate condon errors
+            input_format (str, optional): are the component files selavy or aegean generated?.
+                Defaults to "selavy".
+            condon (bool, optional): Apply condon corrections. Defaults to True.
+            flux_limit (float, optional): Flux limit to select sources (sources with peak flux
+                > this will be selected). Defaults to 0.
+            snr_limit (float, optional): SNR limit to select sources (sources with SNR > this
+                will be selected). Defaults to 20.
+            nneighbor (float, optional): Distance to nearest neighbor (in arcmin). Sources with
+                neighbors < this will be removed. Defaults to 1.
+            apply_flux_limit (bool, optional): Flag to decide to apply flux limit. Defaults to True.
+            select_point_sources (bool, optional): Flag to decide to select point sources.
+                Defaults to True
+        """
         self.path: Path
         self.table: QTable
         self.input_format: Optional[str]
@@ -202,6 +224,9 @@ def __init__(
         self.input_format = input_format
         self.flux_flag = apply_flux_limit
         self.flux_lim = flux_limit
+        self.snr_lim = snr_limit
+        self.sep_lim = nneighbor  # In arcmin
+        self.point_sources = select_point_sources
 
         # Read the catalog
         self._read_catalog()
@@ -284,14 +309,17 @@ def _filter_sources(self):
         psf_mask = (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0)
 
         # point source flag
-        ps_metric = self.table["flux_peak"] / self.table["flux_int"]
-        ps_mask = ps_metric < 1.5
+        if self.point_sources:
+            ps_metric = self.table["flux_peak"] / self.table["flux_int"]
+            ps_mask = ps_metric < 1.5
+        else:
+            ps_mask = np.ones(len(self.table)).astype(bool)
 
         # Add snr flag
-        snr_mask = self.table["flux_peak"] / self.table["rms_image"] > 20
+        snr_mask = self.table["flux_peak"] / self.table["rms_image"] > self.snr_lim
 
         # Select distant sources
-        dist_mask = self.table["nn_separation"].to(u.arcsec).value > 60
+        dist_mask = self.table["nn_separation"].to(u.arcsec).value > 60 * self.sep_lim
 
         mask = (flux_mask) & (psf_mask) & (ps_mask) & (snr_mask) & (dist_mask)
         self.table = self.table[mask]
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index d3f42fb..635792e 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -493,11 +493,11 @@ def correct_field(
 
     # construct output path to store corrections for each epoch
     corr_dir = outdir / "corr_db"
-    if not corr_dir.isdir():
+    if not corr_dir.is_dir():
         corr_dir.mkdir()
     epoch_corr_dir = corr_dir / epoch_dir
 
-    if not epoch_corr_dir.isdir():
+    if not epoch_corr_dir.is_dir():
         epoch_corr_dir.mkdir()
 
     # check for auxiliary files

From ffeae45107deb1f1a731f287a5ce49a4e7c874ff Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Mon, 21 Aug 2023 11:02:28 -0500
Subject: [PATCH 30/31] Tested it on a couple of epoch, changed code to use
 f-strings

---
 vast_post_processing/catalogs.py            | 54 ++++++++++++---------
 vast_post_processing/cli/run_corrections.py |  6 +++
 vast_post_processing/corrections.py         | 46 ++++++++----------
 vast_post_processing/crossmatch.py          | 16 ++----
 4 files changed, 63 insertions(+), 59 deletions(-)

diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py
index 3f1acb8..a2d72f2 100644
--- a/vast_post_processing/catalogs.py
+++ b/vast_post_processing/catalogs.py
@@ -1,4 +1,4 @@
-import logging
+from loguru import logger
 from pathlib import Path
 from typing import Tuple, Union, Dict, Optional
 from urllib.parse import quote
@@ -9,8 +9,6 @@
 import numpy as np
 import pandas as pd
 
-logger = logging.getLogger(__name__)
-
 SELAVY_COLUMN_UNITS = {
     "ra_deg_cont": u.deg,
     "dec_deg_cont": u.deg,
@@ -245,14 +243,11 @@ def __init__(
         if psf is not None:
             self.psf_major, self.psf_minor = psf * u.arcsec
             logger.debug(
-                "Using user provided PSF for %s: %s, %s.",
-                self.path,
-                self.psf_major,
-                self.psf_minor,
+                f"Using user provided PSF for {self.path}: {self.psf_major}, {self.psf_minor}."
             )
         else:
             logger.warning(
-                "PSF is unknown for %s. Condon errors will be unavailable.", self.path
+                f"PSF is unknown for {self.path}. Condon errors will be unavailable."
             )
             self.psf_major = None
             self.psf_minor = None
@@ -260,7 +255,7 @@ def __init__(
         # Calculate the covariant error using Condon 1997
         if condon and self.psf_major is not None and self.psf_minor is not None:
             self.calculate_condon_flux_errors(correct_peak_for_noise=True)
-            logger.debug("Condon errors computed for %s.", self.path)
+            logger.debug(f"Condon errors computed for {self.path}.")
 
     def _read_catalog(self):
         """Helper function to read and parse the input files
@@ -272,13 +267,13 @@ def _read_catalog(self):
         path = self.path
         if self.input_format == "selavy":
             if path.suffix == ".txt":
-                logger.debug("Reading %s as a Selavy txt catalog.", path)
+                logger.debug(f"Reading {path} as a Selavy txt catalog.")
                 read_catalog = read_selavy
             else:
-                logger.debug("Reading %s as a Selavy VOTable catalog.", path)
+                logger.debug(f"Reading {path} as a Selavy VOTable catalog.")
                 read_catalog = read_selavy_votable
         elif self.input_format == "aegean":
-            logger.debug("Reading %s as an Aegean catalog.", path)
+            logger.debug(f"Reading {path} as an Aegean catalog.")
             read_catalog = read_aegean_csv
         else:
             logger.error(
@@ -292,37 +287,52 @@ def _filter_sources(self):
         """Helper function to filter sources that are used for cross-match;
         filter sources with bad sizes and optionally given flux limits"""
 
+        sources = self.table
+        flux_peak = (self.table["flux_peak"].to(u.mJy / u.beam)).value
+        flux_int = (self.table["flux_int"].to(u.mJy)).value
+        rms = (self.table["rms_image"].to(u.mJy / u.beam)).value
+
         # Add a flux threshold flag
         if self.flux_flag:
             lim = self.flux_lim
+            flux_mask = flux_peak > lim
             logger.info(
-                f"Filtering %d sources with fluxes <= {lim}",
-                (self.table["flux_peak"] <= lim).sum(),
+                f"Filtering {len(sources[~flux_mask])} sources with fluxes <= {lim}"
             )
-            flux_mask = self.table["flux_peak"] > lim
-            # self.table = self.table[self.table["flux_peak"] > lim]
+
         # Add good psf flag
+        psf_mask = (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0)
         logger.info(
-            "Filtering %d sources with fitted sizes <= 0.",
-            ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(),
+            f"Filtering {len(sources[~psf_mask])} sources with fitted sizes <= 0."
         )
-        psf_mask = (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0)
-
         # point source flag
         if self.point_sources:
-            ps_metric = self.table["flux_peak"] / self.table["flux_int"]
+            ps_metric = np.divide(
+                flux_peak, flux_int, where=flux_int != 0, out=np.zeros_like(flux_int)
+            )
             ps_mask = ps_metric < 1.5
+            logger.info(
+                f"Filtering {len(sources[~ps_mask])} sources that are not point sources."
+            )
         else:
             ps_mask = np.ones(len(self.table)).astype(bool)
 
         # Add snr flag
-        snr_mask = self.table["flux_peak"] / self.table["rms_image"] > self.snr_lim
+        snr = np.divide(flux_peak, rms, where=rms != 0, out=np.zeros_like(rms))
+        snr_mask = snr > self.snr_lim
+        logger.info(
+            f"Filtering {len(sources[~snr_mask])} sources with SNR <= {self.snr_lim}"
+        )
 
         # Select distant sources
         dist_mask = self.table["nn_separation"].to(u.arcsec).value > 60 * self.sep_lim
+        logger.info(
+            f"Filtering {len(sources[~dist_mask])} sources that have neighbors within {self.sep_lim} arcmin."
+        )
 
         mask = (flux_mask) & (psf_mask) & (ps_mask) & (snr_mask) & (dist_mask)
         self.table = self.table[mask]
+        logger.info(f"Filtering {len(sources[~mask])} sources in total.")
 
     def calculate_condon_flux_errors(
         self,
diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index 0347ab7..f4790ed 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -78,6 +78,11 @@ def main(
         "to them as suffix",
     ),
     overwrite: bool = False,
+    skip_on_missing: Optional[bool] = typer.Option(
+        False,
+        help="If there are missing files (noise/bkg/catalogs) corresponding to an image file, should"
+        "we skip the entire epoch or just that one files? Defaults to skipping just that file.",
+    ),
     verbose: bool = False,
 ):
     """
@@ -99,6 +104,7 @@ def main(
         psf=psf,
         outdir=outdir,
         overwrite=overwrite,
+        skip_on_missing=skip_on_missing,
         verbose=verbose,
     )
 
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 635792e..31f0c9f 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -196,12 +196,9 @@ def shift_and_scale_image(
     image_hdu.header["FLUXOFF"] = flux_offset_mJy * (u.mJy.to(data_unit))
     image_hdu.header["FLUXSCL"] = flux_scale
 
-    image_hdu[
+    image_hdu.header[
         "HISTORY"
-    ] = """
-    Image has been corrected for flux by a scaling factor and an offset given by 
-    FLUXSCL and FLUXOFF.
-    """
+    ] = "Image has been corrected for flux by a scaling factor and an offset given by FLUXSCL and FLUXOFF."
     # check for NaN
     if replace_nan:
         if np.any(np.isnan(image_hdu.data)):
@@ -228,12 +225,9 @@ def shift_and_scale_image(
     image_hdu.header["RAOFF"] = ra_offset_arcsec
     image_hdu.header["DECOFF"] = dec_offset_arcsec
 
-    image_hdu[
+    image_hdu.header[
         "HISTORY"
-    ] = """
-    Image has been corrected for astrometric position by a an offset in both directions 
-    given by RAOFF and DECOFF using a model RA=RA+RAOFF/COS(DEC), DEC=DEC+DECOFF.
-    """
+    ] = "Image has been corrected for astrometric position by a an offset in both directions given by RAOFF and DECOFF using a model RA=RA+RAOFF/COS(DEC), DEC=DEC+DECOFF"
 
     return image_hdul
 
@@ -400,9 +394,9 @@ def get_psf_from_image(image_path: str):
     image_path = image_path.replace("SELAVY", "IMAGES")
     image_path = image_path.replace("selavy-", "")
     image_path = image_path.replace(".components.xml", ".fits")
-    hdu = fits.getheader(image_path)
-    psf_maj = hdu[0].header["BMAJ"] * u.degree
-    psf_min = hdu[0].header["BMIN"] * u.degree
+    hdr = fits.getheader(image_path)
+    psf_maj = hdr["BMAJ"] * u.degree
+    psf_min = hdr["BMIN"] * u.degree
     # hdu.close()
     return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec))
 
@@ -605,6 +599,9 @@ def correct_field(
                     logger.success(f"Writing corrected catalogue: {output_path}.")
                 else:
                     corrected_catalogs.append(corrected_catalog)
+        logger.info(
+            f"Successfully corrected the images and catalogs for {image_path.as_posix()}"
+        )
         return (corrected_hdus, corrected_catalogs)
 
 
@@ -666,8 +663,7 @@ def correct_files(
         # read fits/xml files
         image_path_glob_list: list[Generator[Path, None, None]] = []
         image_path_glob_list.append(e.glob("*.fits"))
-        image_files = list(image_path_glob_list)
-
+        image_files = list(image_path_glob_list[0])
         skip_epoch = False
         for img in image_files:
             skip_file, _ = check_for_files(image_path=img)
@@ -677,17 +673,16 @@ def correct_files(
                     f"One/Some of the bkg/rms/catlogues is are missing for {img}"
                 )
                 break
-        if skip_on_missing:
-            if skip_epoch:
-                logger.warning(
-                    "User input is to skip the entire epoch if one of the images \
-                        have missing bkg/rms/catalog files, so skipping epoch {e}"
-                )
-                break
+        if skip_on_missing & skip_epoch:
+            logger.warning(
+                "User input is to skip the entire epoch if one of the images"
+                f"have missing bkg/rms/catalog files, so skipping epoch {e}"
+            )
+
         else:
             # get corrections for every image and the correct it
             for image_path in image_files:
-                correct_field(
+                products = correct_field(
                     image_path=image_path,
                     vast_corrections_root=vast_corrections_root,
                     radius=radius,
@@ -698,6 +693,5 @@ def correct_files(
                     outdir=outdir,
                     overwrite=overwrite,
                 )
-                logger.info(
-                    f"Successfully corrected the images and catalogs for {image_path.as_posix()}"
-                )
+                if products is not None:
+                    hdus, catalogs = products
diff --git a/vast_post_processing/crossmatch.py b/vast_post_processing/crossmatch.py
index c5ee01f..11061bd 100644
--- a/vast_post_processing/crossmatch.py
+++ b/vast_post_processing/crossmatch.py
@@ -1,6 +1,5 @@
-import logging
 from typing import Tuple
-
+from loguru import logger
 from astropy.coordinates import SkyCoord, Angle, match_coordinates_sky
 from astropy.table import QTable, join, join_skycoord
 import astropy.units as u
@@ -10,9 +9,6 @@
 from vast_post_processing.catalogs import Catalog
 
 
-logger = logging.getLogger(__name__)
-
-
 def median_abs_deviation(data):
     """helper function to calculate the median offset
 
@@ -76,7 +72,7 @@ def crossmatch_qtables(
     Returns:
         QTable: filtered table that return the cross matches
     """
-    logger.debug("Using crossmatch radius: %s.", radius)
+    logger.debug(f"Using crossmatch radius: {radius}.")
 
     xmatch = join(
         catalog.table,
@@ -102,11 +98,9 @@ def crossmatch_qtables(
     ).decompose()
 
     logger.info(
-        "Num cross-matches: %d. Num cross-matches to unique reference source: %d"
-        " (%d%%).",
-        len(xmatch),
-        len(set(xmatch["coord_id"])),
-        (len(set(xmatch["coord_id"])) / len(xmatch)) * 100,
+        f"Num cross-matches: {len(xmatch)}. Num cross-matches to unique reference "
+        f"source: {len(set(xmatch['coord_id']))} -- "
+        f" ({(len(set(xmatch['coord_id'])) / len(xmatch)) * 100})."
     )
 
     return xmatch

From f3a5ae858bfb103ca26f3cfbffe937aebed3c864 Mon Sep 17 00:00:00 2001
From: Akash <aakash@uwm.edu>
Date: Mon, 28 Aug 2023 12:49:45 +1000
Subject: [PATCH 31/31] Added catalog filtering parameters as CLI arguments.

---
 vast_post_processing/cli/run_corrections.py |  28 ++++
 vast_post_processing/corrections.py         | 136 +++++++++++++-------
 2 files changed, 114 insertions(+), 50 deletions(-)

diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py
index f4790ed..5b2c202 100644
--- a/vast_post_processing/cli/run_corrections.py
+++ b/vast_post_processing/cli/run_corrections.py
@@ -71,6 +71,29 @@ def main(
             "arcsec for `catalog`. First argument is major axis followed by nimor axis."
         ),
     ),
+    flux_limit: Optional[float] = typer.Option(
+        0,
+        help="Flux limit to select sources (sources with peak flux"
+        "> this will be selected). Defaults to 0.",
+    ),
+    snr_limit: Optional[float] = typer.Option(
+        20,
+        help="SNR limit to select sources (sources with SNR > this"
+        "will be selected). Defaults to 20.",
+    ),
+    nneighbor: Optional[float] = typer.Option(
+        1,
+        help="Distance to nearest neighbor (in arcmin). Sources with"
+        "neighbors < this will be removed. Defaults to 1.",
+    ),
+    apply_flux_limit: Optional[bool] = typer.Option(
+        True,
+        help="Flag to decide to apply flux limit. Defaults to True",
+    ),
+    select_point_sources: Optional[bool] = typer.Option(
+        True,
+        help="Flag to decide to select point sources. Defaults to True",
+    ),
     outdir: Optional[str] = typer.Option(
         None,
         help="Stem of the output directory to store the corrected images and cataloges to. The default"
@@ -102,6 +125,11 @@ def main(
         condon=condon,
         psf_ref=psf_ref,
         psf=psf,
+        flux_limit=flux_limit,
+        snr_limit=snr_limit,
+        nneighbor=nneighbor,
+        apply_flux_limit=apply_flux_limit,
+        select_point_sources=select_point_sources,
         outdir=outdir,
         overwrite=overwrite,
         skip_on_missing=skip_on_missing,
diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py
index 31f0c9f..0b10720 100644
--- a/vast_post_processing/corrections.py
+++ b/vast_post_processing/corrections.py
@@ -31,6 +31,11 @@ def vast_xmatch_qc(
     fix_b: bool = False,
     positional_unit: u.Unit = u.Unit("arcsec"),
     flux_unit: u.Unit = u.Unit("mJy"),
+    flux_limit: float = 0,
+    snr_limit: float = 20,
+    nneighbor: float = 1,
+    apply_flux_limit: bool = True,
+    select_point_sources: bool = True,
     crossmatch_output: Optional[str] = None,
     csv_output: Optional[str] = None,
 ):
@@ -56,6 +61,15 @@ def vast_xmatch_qc(
             Defaults to u.Unit("arcsec").
         flux_unit (u.Unit, optional): output unit in which the flux scale is given.
             Defaults to u.Unit("mJy").
+        flux_limit (float, optional): Flux limit to select sources (sources with peak flux
+            > this will be selected). Defaults to 0.
+        snr_limit (float, optional): SNR limit to select sources (sources with SNR > this
+            will be selected). Defaults to 20.
+        nneighbor (float, optional): Distance to nearest neighbor (in arcmin). Sources with
+            neighbors < this will be removed. Defaults to 1.
+        apply_flux_limit (bool, optional): Flag to decide to apply flux limit. Defaults to True.
+        select_point_sources (bool, optional): Flag to decide to select point sources.
+            Defaults to True
         crossmatch_output (Optional[str], optional): File path to write the crossmatch output.
             Defaults to None, which means no file is written
         csv_output (Optional[str], optional): File path to write the flux/astrometric corrections.
@@ -77,12 +91,22 @@ def vast_xmatch_qc(
         psf=psf_reference,
         condon=condon,
         input_format="selavy",
+        flux_limit=flux_limit,
+        snr_limit=snr_limit,
+        nneighbor=nneighbor,
+        apply_flux_limit=apply_flux_limit,
+        select_point_sources=select_point_sources,
     )
     catalog = Catalog(
         catalog_path,
         psf=psf,
         condon=condon,
         input_format="selavy",
+        flux_limit=flux_limit,
+        snr_limit=snr_limit,
+        nneighbor=nneighbor,
+        apply_flux_limit=apply_flux_limit,
+        select_point_sources=select_point_sources,
     )
 
     # perform the crossmatch
@@ -125,37 +149,28 @@ def vast_xmatch_qc(
 
     if csv_output is not None:
         # output has been requested
-
-        if True:  # csv_output is not None:
-            csv_output_path = Path(csv_output)  # ensure Path object
-            sbid = catalog.sbid if catalog.sbid is not None else ""
-            if not csv_output_path.exists():
-                f = open(csv_output_path, "w")
-                print(
-                    "field,release_epoch,sbid,ra_correction,dec_correction,ra_madfm,"
-                    "dec_madfm,flux_peak_correction_multiplicative,flux_peak_correction_additive,"
-                    "flux_peak_correction_multiplicative_err,flux_peak_correction_additive_err,"
-                    "n_sources",
-                    file=f,
-                )
-            else:
-                f = open(csv_output_path, "a")
-            logger.info(
-                "Writing corrections CSV. To correct positions, add the corrections to"
-                " the original source positions i.e. RA' = RA + ra_correction /"
-                " cos(Dec). To correct fluxes, add the additive correction and multiply"
-                " the result by the multiplicative correction i.e. S' ="
-                " flux_peak_correction_multiplicative(S +"
-                " flux_peak_correction_additive)."
-            )
-            print(
-                f"{catalog.field},{catalog.epoch},{sbid},{dra_median_value * -1},"
-                f"{ddec_median_value * -1},{dra_madfm_value},{ddec_madfm_value},"
-                f"{flux_corr_mult.nominal_value},{flux_corr_add.nominal_value},"
-                f"{flux_corr_mult.std_dev},{flux_corr_add.std_dev},{len(data)}",
-                file=f,
-            )
-            f.close()
+        csv_output_path = Path(csv_output)  # ensure Path object
+        sbid = catalog.sbid if catalog.sbid is not None else ""
+        if not csv_output_path.exists():
+            f = open(csv_output_path, "w")
+        else:
+            f = open(csv_output_path, "a")
+        logger.info(
+            "Writing corrections CSV. To correct positions, add the corrections to"
+            " the original source positions i.e. RA' = RA + ra_correction /"
+            " cos(Dec). To correct fluxes, add the additive correction and multiply"
+            " the result by the multiplicative correction i.e. S' ="
+            " flux_peak_correction_multiplicative(S +"
+            " flux_peak_correction_additive)."
+        )
+        print(
+            f"{catalog.field},{catalog.epoch},{sbid},{dra_median_value * -1},"
+            f"{ddec_median_value * -1},{dra_madfm_value},{ddec_madfm_value},"
+            f"{flux_corr_mult.nominal_value},{flux_corr_add.nominal_value},"
+            f"{flux_corr_mult.std_dev},{flux_corr_add.std_dev},{len(data)}",
+            file=f,
+        )
+        f.close()
     return dra_median_value, ddec_median_value, flux_corr_mult, flux_corr_add
 
 
@@ -193,12 +208,13 @@ def shift_and_scale_image(
     image_hdu.data = flux_scale * (
         image_hdu.data + (flux_offset_mJy * (u.mJy.to(data_unit)))
     )
-    image_hdu.header["FLUXOFF"] = flux_offset_mJy * (u.mJy.to(data_unit))
-    image_hdu.header["FLUXSCL"] = flux_scale
+    image_hdu.header["FLUXOFFSET"] = flux_offset_mJy * (u.mJy.to(data_unit))
+    image_hdu.header["FLUXSCALE"] = flux_scale
 
-    image_hdu.header[
-        "HISTORY"
-    ] = "Image has been corrected for flux by a scaling factor and an offset given by FLUXSCL and FLUXOFF."
+    image_hdu.header.add_history(
+        "Image has been corrected for flux by a scaling factor and\
+        an offset given by FLUXSCALE and FLUXOFFSET."
+    )
     # check for NaN
     if replace_nan:
         if np.any(np.isnan(image_hdu.data)):
@@ -225,9 +241,11 @@ def shift_and_scale_image(
     image_hdu.header["RAOFF"] = ra_offset_arcsec
     image_hdu.header["DECOFF"] = dec_offset_arcsec
 
-    image_hdu.header[
-        "HISTORY"
-    ] = "Image has been corrected for astrometric position by a an offset in both directions given by RAOFF and DECOFF using a model RA=RA+RAOFF/COS(DEC), DEC=DEC+DECOFF"
+    image_hdu.header.add_history(
+        "Image has been corrected for astrometric position by a an offset\
+        in both directions given by RAOFF and DECOFF using a model\
+        RA=RA+RAOFF/COS(DEC), DEC=DEC+DECOFF"
+    )
 
     return image_hdul
 
@@ -311,16 +329,16 @@ def shift_and_scale_catalog(
     # Add in the corrections to the votable
     flux_scl_param = Param(
         votable=votablefile,
-        ID="flux_scl",
-        name="flux_scl",
+        ID="FluxScale",
+        name="FluxScale",
         value=flux_scale,
         datatype="float",
         unit=None,
     )
     flux_off_param = Param(
         votable=votablefile,
-        ID="flux_offset",
-        name="flux_offset",
+        ID="FluxOffset",
+        name="FluxOffset",
         value=flux_offset_mJy,
         datatype="float",
         unit=u.mJy,
@@ -328,8 +346,8 @@ def shift_and_scale_catalog(
 
     ra_offset_param = Param(
         votable=votablefile,
-        ID="ra_offset",
-        name="ra_offset",
+        ID="RAOffset",
+        name="RAOffset",
         value=ra_offset_arcsec,
         datatype="float",
         unit=u.arcsec,
@@ -337,8 +355,8 @@ def shift_and_scale_catalog(
 
     dec_offset_param = Param(
         votable=votablefile,
-        ID="dec_offset",
-        name="dec_offset",
+        ID="DECOffset",
+        name="DECOffset",
         value=dec_offset_arcsec,
         datatype="float",
         unit=u.arcsec,
@@ -452,6 +470,11 @@ def correct_field(
     condon: bool = True,
     psf_ref: list[float] = None,
     psf: list[float] = None,
+    flux_limit: float = 0,
+    snr_limit: float = 20,
+    nneighbor: float = 1,
+    apply_flux_limit: bool = True,
+    select_point_sources: bool = True,
     write_output: bool = True,
     outdir: str = None,
     overwrite: bool = False,
@@ -536,6 +559,11 @@ def correct_field(
             psf=psf_image,
             fix_m=False,
             fix_b=False,
+            flux_limit=flux_limit,
+            snr_limit=snr_limit,
+            nneighbor=nneighbor,
+            apply_flux_limit=apply_flux_limit,
+            select_point_sources=select_point_sources,
             crossmatch_output=crossmatch_file,
             csv_output=csv_file,
         )
@@ -613,6 +641,11 @@ def correct_files(
     condon: bool = True,
     psf_ref: list[float] = None,
     psf: list[float] = None,
+    flux_limit: float = 0,
+    snr_limit: float = 20,
+    nneighbor: float = 1,
+    apply_flux_limit: bool = True,
+    select_point_sources: bool = True,
     write_output: bool = True,
     outdir: str = None,
     overwrite: bool = False,
@@ -682,16 +715,19 @@ def correct_files(
         else:
             # get corrections for every image and the correct it
             for image_path in image_files:
-                products = correct_field(
+                _ = correct_field(
                     image_path=image_path,
                     vast_corrections_root=vast_corrections_root,
                     radius=radius,
                     condon=condon,
                     psf_ref=psf_ref,
                     psf=psf,
+                    flux_limit=flux_limit,
+                    snr_limit=snr_limit,
+                    nneighbor=nneighbor,
+                    apply_flux_limit=apply_flux_limit,
+                    select_point_sources=select_point_sources,
                     write_output=write_output,
                     outdir=outdir,
                     overwrite=overwrite,
                 )
-                if products is not None:
-                    hdus, catalogs = products