From 3e6dc1d758b97651e6a3840cd9a37d06712d5972 Mon Sep 17 00:00:00 2001 From: Akash Date: Wed, 12 Jul 2023 15:58:30 -0500 Subject: [PATCH 01/31] Pulled in Andrew's code for corrections and re-wrote the entire correction code --- vast_post_processing/catalogs.py | 298 +++++++++++++++++ vast_post_processing/cli/correct_vast.py | 158 +++++++-- vast_post_processing/cli/run_corrections.py | 346 ++++++++++++++++++++ vast_post_processing/corrections.py | 117 ++++++- vast_post_processing/crossmatch.py | 153 +++++++++ 5 files changed, 1046 insertions(+), 26 deletions(-) create mode 100644 vast_post_processing/catalogs.py create mode 100644 vast_post_processing/cli/run_corrections.py create mode 100644 vast_post_processing/crossmatch.py diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py new file mode 100644 index 0000000..a75b000 --- /dev/null +++ b/vast_post_processing/catalogs.py @@ -0,0 +1,298 @@ +import logging +from pathlib import Path +from typing import Tuple, Union, Dict, Optional +from urllib.parse import quote + +from astropy.coordinates import SkyCoord +from astropy.table import Table, QTable, join +import astropy.units as u +import numpy as np +import pandas as pd + +logger = logging.getLogger(__name__) + +SELAVY_COLUMN_UNITS = { + "ra_deg_cont": u.deg, + "dec_deg_cont": u.deg, + "ra_err": u.arcsec, + "dec_err": u.arcsec, + "flux_peak": u.mJy / u.beam, + "flux_peak_err": u.mJy / u.beam, + "maj_axis": u.arcsec, + "maj_axis_err": u.arcsec, + "min_axis": u.arcsec, + "min_axis_err": u.arcsec, + "pos_ang": u.deg, + "pos_ang_err": u.deg, + "rms_image": u.mJy / u.beam, +} + + +class UnknownCatalogInputFormat(Exception): + pass + + +class Catalog: + CATALOG_TYPE_TILE = "TILE" + CATALOG_TYPE_COMBINED = "COMBINED" + CATALOG_TYPES = ( + CATALOG_TYPE_TILE, + CATALOG_TYPE_COMBINED, + ) + + def __init__( + self, + path: Path, + psf: Optional[Tuple[float, float]] = None, + input_format: str = "selavy", + condon: bool = False, + positive_fluxes_only: bool = True, + ): + self.path: Path + self.table: QTable + self.field: Optional[str] + self.epoch: Optional[str] + self.sbid: Optional[str] + self.psf_major: Optional[u.Quantity] + self.psf_minor: Optional[u.Quantity] + self.type: str + + # read catalog + if input_format == "selavy": + if path.suffix == ".txt": + logger.debug("Reading %s as a Selavy txt catalog.", path) + read_catalog = read_selavy + else: + logger.debug("Reading %s as a Selavy VOTable catalog.", path) + read_catalog = read_selavy_votable + elif input_format == "aegean": + logger.debug("Reading %s as an Aegean catalog.", path) + read_catalog = read_aegean_csv + else: + logger.error( + "The format of input files is not supported. Only selavy and aegean are supported" + ) + raise SystemExit + self.path = path + self.table = read_catalog(path) + + # filter sources with bad sizes and optionally negative/0 fluxes + if positive_fluxes_only: + logger.info( + "Filtering %d sources with fluxes <= 0.", + (self.table["flux_peak"] <= 0).sum(), + ) + self.table = self.table[self.table["flux_peak"] > 0] + logger.info( + "Filtering %d sources with fitted sizes <= 0.", + ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(), + ) + self.table = self.table[ + (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0) + ] + + # read epoch, field, sbid, psf's + epoch_name = path.parent.name + _, _, field, sbid, *_ = path.name.split(".") + self.epoch = epoch_name + self.field = field.replace("VAST_", "") + self.sbid = sbid + + if psf is not None: + self.psf_major, self.psf_minor = psf * u.arcsec + logger.debug( + "Using user provided PSF for %s: %s, %s.", + self.path, + self.psf_major, + self.psf_minor, + ) + else: + logger.warning( + "PSF is unknown for %s. Condon errors will be unavailable.", self.path + ) + self.psf_major = None + self.psf_minor = None + + if condon and self.psf_major is not None and self.psf_minor is not None: + _ = self.calculate_condon_flux_errors(correct_peak_for_noise=True) + logger.debug("Condon errors computed for %s.", self.path) + + def calculate_condon_flux_errors( + self, + alpha_maj1=2.5, + alpha_min1=0.5, + alpha_maj2=0.5, + alpha_min2=2.5, + alpha_maj3=1.5, + alpha_min3=1.5, + clean_bias=0.0, + clean_bias_error=0.0, + frac_flux_cal_error=0.0, + correct_peak_for_noise=False, + ): + noise = self.table["rms_image"] + snr = self.table["flux_peak"] / noise + + rho_sq3 = ( + ( + self.table["maj_axis"] + * self.table["min_axis"] + / (4.0 * self.psf_major * self.psf_minor) + ) + * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj3 + * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min3 + * snr**2 + ) + + flux_peak_col = self.table["flux_peak"] + flux_peak_condon = self.table["flux_peak"] + ( + -(noise**2) / self.table["flux_peak"] + clean_bias + ) + if correct_peak_for_noise: + flux_peak_col = flux_peak_condon + + errorpeaksq = ( + (frac_flux_cal_error * flux_peak_col) ** 2 + + clean_bias_error**2 + + 2.0 * flux_peak_col**2 / rho_sq3 + ) + errorpeak = np.sqrt(errorpeaksq) + + self.table["flux_peak_condon"] = flux_peak_condon + self.table["flux_peak_selavy"] = self.table["flux_peak"] + self.table["flux_peak_err_condon"] = errorpeak + self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"] + self.table["flux_peak_err"] = self.table["flux_peak_err_condon"] + if correct_peak_for_noise: + self.table["flux_peak"] = self.table["flux_peak_condon"] + return flux_peak_condon, errorpeak + + +def _convert_selavy_columns_to_quantites( + qt: QTable, units: Dict[str, u.Unit] = SELAVY_COLUMN_UNITS +) -> QTable: + for col, unit in units.items(): + qt[col].unit = unit + return qt + + +def read_selavy(catalog_path: Path) -> QTable: + """Read a Selavy fixed-width component catalog and return a QTable. + Assumed to contain at least the following columns with the given units: + - `ra_deg_cont` and `dec_deg_cont`: degrees. + - `ra_err` and `dec_err`: arcseconds. + - `flux_peak` and `flux_peak_err`: mJy/beam. + - `maj_axis`, `maj_axis_err`, `min_axis`, `min_axis_err`: arcseconds. + - `pos_ang` and `pos_ang_err`: degrees. + - `rms_image`: mJy/beam. + These columns will be converted to Astropy quantites assuming the above units. + + Parameters + ---------- + catalog_path : Path + Path to the Selavy catalog file. + + Returns + ------- + QTable + Selavy catalog as a QTable, with extra columns: + - `coord`: `SkyCoord` object of the source coordinate. + - `nn_separation`: separation to the nearest-neighbour source as a Quantity with + angular units. + """ + df = pd.read_fwf(catalog_path, skiprows=[1]).drop(columns="#") + qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df)) + qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"]) + _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky( + qt["coord"], nthneighbor=2 + ) + return qt + + +def read_selavy_votable(catalog_path: Path) -> QTable: + t = Table.read(catalog_path, format="votable", use_names_over_ids=True) + # remove units from str columns and fix unrecognized flux units + for col in t.itercols(): + if col.dtype.kind == "U": + col.unit = None + elif col.unit == u.UnrecognizedUnit("mJy/beam"): + col.unit = u.Unit("mJy/beam") + qt = QTable(t) + qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"]) + _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky( + qt["coord"], nthneighbor=2 + ) + return qt + + +def read_hdf(catalog_path: Path) -> pd.DataFrame: + df = pd.read_hdf(catalog_path, key="data") + df["field"] = df.field.str.split(".", n=1, expand=True)[0] + qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df)) + qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"]) + _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky( + qt["coord"], nthneighbor=2 + ) + return qt + + +def read_aegean_csv(catalog_path: Path) -> QTable: + """Read an Aegean CSV component catalog and return a QTable. + Assumed to contain at least the following columns with the given units: + - `ra` and `dec`: degrees. + - `err_ra` and `err_dec`: degrees. + - `peak_flux` and `err_peak_flux`: Jy/beam. + - `a`, `err_a`, `b`, `err_b`: fitted semi-major and -minor axes in arcseconds. + - `pa` and `err_pa`: degrees. + - `local_rms`: Jy/beam. + These columns will be converted to Astropy quantites assuming the above units. + + Parameters + ---------- + catalog_path : Path + Path to the Selavy catalog file. + + Returns + ------- + QTable + Aegean component catalog as a QTable, with extra columns: + - `coord`: `SkyCoord` object of the source coordinate. + - `nn_separation`: separation to the nearest-neighbour source as a Quantity with + angular units. + """ + AEGEAN_COLUMN_MAP = { + # aegean name: (selavy name, aegean unit) + "ra": ("ra_deg_cont", u.deg), + "dec": ("dec_deg_cont", u.deg), + "err_ra": ("ra_err", u.deg), + "err_dec": ("dec_err", u.deg), + "peak_flux": ("flux_peak", u.Jy / u.beam), + "err_peak_flux": ("flux_peak_err", u.Jy / u.beam), + "a": ("maj_axis", u.arcsec), + "b": ("min_axis", u.arcsec), + "pa": ("pos_ang", u.arcsec), + "err_a": ("maj_axis_err", u.arcsec), + "err_b": ("min_axis_err", u.deg), + "err_pa": ("pos_ang_err", u.deg), + "local_rms": ("rms_image", u.Jy / u.beam), + } + qt = QTable.read(catalog_path) + # rename columns to match selavy convention and assign units + for col, (new_col, unit) in AEGEAN_COLUMN_MAP.items(): + qt.rename_column(col, new_col) + qt[new_col].unit = unit + # add has_siblings column + island_source_counts = ( + qt[["island", "source"]].group_by("island").groups.aggregate(np.sum) + ) + island_source_counts.rename_column("source", "has_siblings") + island_source_counts["has_siblings"] = island_source_counts["has_siblings"].astype( + bool + ) + qt = join(qt, island_source_counts, keys="island", join_type="left") + + qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"]) + _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky( + qt["coord"], nthneighbor=2 + ) + return qt diff --git a/vast_post_processing/cli/correct_vast.py b/vast_post_processing/cli/correct_vast.py index 90cb6fe..a54686c 100644 --- a/vast_post_processing/cli/correct_vast.py +++ b/vast_post_processing/cli/correct_vast.py @@ -6,8 +6,59 @@ from loguru import logger import pandas as pd import typer +from astropy.table import QTable +from astropy.io import fits +from astropy import units as u -from vast_post_processing.corrections import shift_and_scale_catalog, shift_and_scale_image +from vast_post_processing.corrections import ( + shift_and_scale_catalog, + shift_and_scale_image, + calculate_positional_offsets, + calculate_flux_offsets, +) + + +def get_correct_correction_file(correction_files_list, epoch, img_field, img_sbid): + count = 0 + for f in chain.from_iterable(correction_files_list): + epoch_name = f.parent.name + if epoch_name in epoch: + filename = f.name + _, _, _, sbid, field, *_ = filename.split("_") + sbid = sbid.replace("-VAST", "") + field = field.replace(".csv", "") + if (sbid in img_sbid) & (field in img_field): + df = QTable.read(f) + flux_shifts = calculate_flux_offsets(df) + pos_shifts = calculate_positional_offsets(df) + count += 1 + return flux_shifts, pos_shifts + else: + continue + if count == 0: + return None, None + + +def get_psf_from_image(image_path: str): + """ + Funtion used to get the point spread function (PSF) extent in major and minor axis. + These will be in the header of the image file + + Parameters + ---------- + image_path: str + Path to the image file + + Returns + ------- + Tuple(psf_major, psf_minor) + Major and minor axes of the PSF. + """ + + hdu = fits.open(image_path) + psf_maj = hdu["BMAJ"] * u.degree + psf_min = hdu["BMIN"] * u.degree + return psf_maj.to(u.arcsec), psf_min.to(u.arcsec) def main( @@ -21,9 +72,13 @@ def main( file_okay=False, dir_okay=True, ), - vast_corrections_csv: Path = typer.Argument( - ..., - help="Path to VAST corrections CSV file produced by vast-xmatch.", + vast_corrections_csv_root: Path = typer.Option( + "/data/vast-survey/VAST/askap-surveys-database/vast/db/", + help=( + "Path to VAST corrections CSV file produced by vast-xmatch. Tries to use" + " the default path of these files. If not the user can override this by" + "giving a path to file" + ), exists=True, file_okay=True, dir_okay=False, @@ -49,13 +104,14 @@ def main( logger.add(sys.stderr, level="INFO") # read corrections - corrections_df = ( - pd.read_csv(vast_corrections_csv) - .set_index(["release_epoch", "field", "sbid"]) - .sort_index() - ) + # corrections_df = ( + # pd.read_csv(vast_corrections_csv) + # .set_index(["release_epoch", "field", "sbid"]) + # .sort_index() + # ) image_path_glob_list: list[Generator[Path, None, None]] = [] components_path_glob_list: list[Generator[Path, None, None]] = [] + correction_files_path_glob_list: list[Generator[Path, None, None]] = [] if epoch is None or len(epoch) == 0: image_path_glob_list.append( vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") @@ -63,6 +119,9 @@ def main( components_path_glob_list.append( vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml") ) + correction_files_path_glob_list.append( + vast_corrections_csv_root.glob("epoch_*/cat_match_RACS0*.csv") + ) else: for n in epoch: image_path_glob_list.append( @@ -71,6 +130,9 @@ def main( components_path_glob_list.append( vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml") ) + correction_files_path_glob_list.append( + vast_corrections_csv_root.glob(f"epoch_{n}/cat_match_RACS0*.csv") + ) # correct images for image_path in chain.from_iterable(image_path_glob_list): @@ -92,14 +154,29 @@ def main( ) # get corrections skip = False - try: - corrections = corrections_df.loc[(epoch_dir, field, sbid)] - except KeyError: + # try: + # corrections = corrections_df.loc[(epoch_dir, field, sbid)] + # except KeyError: + # skip = True + # logger.warning( + # f"Corrections not found for {image_path} ({epoch_dir}, {field}," + # f" {sbid})." + # ) + flux_corrections, pos_corrections = get_correct_correction_file( + correction_files_list=correction_files_path_glob_list, + epoch=epoch_dir, + img_field=field, + img_sbid=sbid_str, + ) + if (flux_corrections is None) | (pos_corrections is None): skip = True logger.warning( f"Corrections not found for {image_path} ({epoch_dir}, {field}," f" {sbid})." ) + else: + scale, offset, scale_err, offset_err = flux_corrections + dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections if not rms_path.exists(): logger.warning(f"RMS image not found for {image_path}.") if not bkg_path.exists(): @@ -113,13 +190,22 @@ def main( stokes_dir = f"{path.parent.parent.name}_CORRECTED" output_dir = vast_tile_data_root / stokes_dir / epoch_dir output_dir.mkdir(parents=True, exist_ok=True) + # _ = shift_and_scale_image( + # path, + # output_dir, + # flux_scale=corrections.flux_peak_correction_multiplicative, + # flux_offset_mJy=corrections.flux_peak_correction_additive, + # ra_offset_arcsec=corrections.ra_correction, + # dec_offset_arcsec=corrections.dec_correction, + # overwrite=overwrite, + # ) _ = shift_and_scale_image( path, output_dir, - flux_scale=corrections.flux_peak_correction_multiplicative, - flux_offset_mJy=corrections.flux_peak_correction_additive, - ra_offset_arcsec=corrections.ra_correction, - dec_offset_arcsec=corrections.dec_correction, + flux_scale=scale, + flux_offset_mJy=offset, + ra_offset_arcsec=dra_median, + dec_offset_arcsec=ddec_median, overwrite=overwrite, ) @@ -134,14 +220,29 @@ def main( ) # get corrections skip = False - try: - corrections = corrections_df.loc[(epoch_dir, field, sbid)] - except KeyError: + # try: + # corrections = corrections_df.loc[(epoch_dir, field, sbid)] + # except KeyError: + # skip = True + # logger.warning( + # f"Corrections not found for {image_path} ({epoch_dir}, {field}," + # f" {sbid})." + # ) + flux_corrections, pos_corrections = get_correct_correction_file( + correction_files_list=correction_files_path_glob_list, + epoch=epoch_dir, + img_field=field, + img_sbid=sbid_str, + ) + if (flux_corrections is None) | (pos_corrections is None): skip = True logger.warning( - f"Corrections not found for {components_path} ({epoch_dir}, {field}," + f"Corrections not found for {image_path} ({epoch_dir}, {field}," f" {sbid})." ) + else: + scale, offset, scale_err, offset_err = flux_corrections + dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections if not islands_path.exists(): logger.warning(f"Islands catalogue not found for {components_path}.") skip = not islands_path.exists() or skip @@ -153,13 +254,22 @@ def main( stokes_dir = f"{path.parent.parent.name}_CORRECTED" output_dir = vast_tile_data_root / stokes_dir / epoch_dir output_dir.mkdir(parents=True, exist_ok=True) + # _ = shift_and_scale_catalog( + # path, + # output_dir, + # flux_scale=corrections.flux_peak_correction_multiplicative, + # flux_offset_mJy=corrections.flux_peak_correction_additive, + # ra_offset_arcsec=corrections.ra_correction, + # dec_offset_arcsec=corrections.dec_correction, + # overwrite=overwrite, + # ) _ = shift_and_scale_catalog( path, output_dir, - flux_scale=corrections.flux_peak_correction_multiplicative, - flux_offset_mJy=corrections.flux_peak_correction_additive, - ra_offset_arcsec=corrections.ra_correction, - dec_offset_arcsec=corrections.dec_correction, + flux_scale=scale, + flux_offset_mJy=offset, + ra_offset_arcsec=dra_median, + dec_offset_arcsec=ddec_median, overwrite=overwrite, ) diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py new file mode 100644 index 0000000..da4e95f --- /dev/null +++ b/vast_post_processing/cli/run_corrections.py @@ -0,0 +1,346 @@ +from loguru import logger +from pathlib import Path +from typing import Optional, Tuple, Generator +from astropy.coordinates import Angle +import astropy.units as u +import click, sys, os +from uncertainties import ufloat +from itertools import chain +import pandas as pd +import typer +from astropy.table import QTable +from astropy.io import fits +from astropy import units as u +from vast_post_processing.catalogs import Catalog + +from vast_post_processing.corrections import ( + shift_and_scale_catalog, + shift_and_scale_image, + vast_xmatch_qc, +) + + +class _AstropyUnitType(click.ParamType): + def convert(self, value, param, ctx, unit_physical_type): + try: + unit = u.Unit(value) + except ValueError: + self.fail(f"astropy.units.Unit does not understand: {value}.") + if unit.physical_type != unit_physical_type: + self.fail( + f"{unit} is a {unit.physical_type} unit. It must be of type" + f" {unit_physical_type}." + ) + else: + return unit + + +class AngleUnitType(_AstropyUnitType): + name = "angle_unit" + + def convert(self, value, param, ctx): + return super().convert(value, param, ctx, "angle") + + +class FluxUnitType(_AstropyUnitType): + name = "flux_unit" + + def convert(self, value, param, ctx): + return super().convert(value, param, ctx, "spectral flux density") + + +class AngleQuantityType(click.ParamType): + name = "angle_quantity" + + def convert(self, value, param, ctx): + try: + angle = Angle(value) + return angle + except ValueError: + self.fail(f"astropy.coordinates.Angle does not understand: {value}.") + + +ANGLE_UNIT_TYPE = AngleUnitType() +FLUX_UNIT_TYPE = FluxUnitType() +ANGLE_QUANTITY_TYPE = AngleQuantityType() + + +def get_correct_correction_file(correction_files_list, img_field): + count = 0 + for f in chain.from_iterable(correction_files_list): + filename = f.name + _, _, field, *_ = filename.split(".") + field = field.replace("RACS", "VAST") + if field in img_field: + count += 1 + return f + else: + continue + if count == 0: + return None + + +def get_psf_from_image(image_path: str): + """ + Funtion used to get the point spread function (PSF) extent in major and minor axis. + These will be in the header of the image file + + Parameters + ---------- + image_path: str + Path to the image file + + Returns + ------- + Tuple(psf_major, psf_minor) + Major and minor axes of the PSF. + """ + + hdu = fits.open(image_path) + psf_maj = hdu["BMAJ"] * u.degree + psf_min = hdu["BMIN"] * u.degree + hdu.close() + return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec)) + + +def main( + vast_tile_data_root: Path = typer.Argument( + ..., + help=( + "Path to VAST TILES data directory, i.e. the directory that contains the" + " STOKES* directories." + ), + exists=True, + file_okay=False, + dir_okay=True, + ), + vast_corrections_root: Path = typer.Option( + "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY", + help=( + "Path to RACS data that is can be used to correct VAST data. Tries to use" + " EPOCH00 as the defualt epoch. If not the user can override this by" + " giving a path to a folder that contain the selavy output" + ), + exists=True, + file_okay=False, + dir_okay=True, + ), + epoch: Optional[list[int]] = typer.Option( + None, + help=( + "Only correct the given observation epochs. Can be given multiple times," + " e.g. --epoch 1 --epoch 2. If no epochs are given (the default), then" + " correct all available epochs." + ), + ), + radius: Optional[ANGLE_QUANTITY_TYPE] = typer.Option( + "10 arcsec", + help=( + "Maximum separation limit for nearest-neighbour crossmatch. Accepts any " + "string understood by astropy.coordinates.Angle." + ), + ), + condon: Optional[bool] = typer.Option( + True, + help=( + "Calculate Condon (1997) flux errors and use them instead of the original " + "errors. Will also correct the peak flux values for noise. Requires that the " + "input images follow the VAST naming convention, for TILE images: EPOCH01/" + "TILES/STOKESI_IMAGES/selavy-image.i.SB9667.cont.VAST_0102-06A.linmos.taylor.0" + ".restored.conv.fits. Note that for TILE images, the epoch is determined " + "from the full path. If the input catalogs do not follow this convention, then " + "the PSF sizes must be supplied using --psf-reference and/or --psf. The " + "deafult behaviour is to lookup the PSF sizes from the header of the image" + ), + ), + psf_ref: Optional[list[float]] = typer.Option( + None, + help=( + "If using --condon but want to give the psfs manually, use this specified PSF size in " + "arcsec for `reference_catalog`. First argument is major axis followed by nimor axis." + ), + ), + psf: Optional[list[float]] = typer.Option( + None, + help=( + "If using --condon but want to give the psfs manually, use this specified PSF size in " + "arcsec for `catalof`. First argument is major axis followed by nimor axis." + ), + ), + overwrite: bool = False, + verbose: bool = False, +): + """Read astrometric and flux corrections produced by vast-xmatch and apply them to + VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. + """ + # configure logger + if not verbose: + # replace the default sink + logger.remove() + logger.add(sys.stderr, level="INFO") + + # read corrections + image_path_glob_list: list[Generator[Path, None, None]] = [] + components_path_glob_list: list[Generator[Path, None, None]] = [] + correction_files_path_glob_list: list[Generator[Path, None, None]] = [] + + correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml")) + + if epoch is None or len(epoch) == 0: + image_path_glob_list.append( + vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") + ) + components_path_glob_list.append( + vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml") + ) + else: + for n in epoch: + image_path_glob_list.append( + vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") + ) + components_path_glob_list.append( + vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml") + ) + + # construct output path to store corrections + corr_dir = vast_tile_data_root / "corr_db" + if not os.path.isdir(corr_dir): + os.mkdir(corr_dir) + + # get corrections for an image and the correct it + for image_path in chain.from_iterable(image_path_glob_list): + epoch_dir = image_path.parent.name + _, _, field, sbid_str, *_ = image_path.name.split(".") + sbid = int(sbid_str[2:]) + + # get rms and background images + rms_path = ( + vast_tile_data_root + / "STOKESI_RMSMAPS" + / epoch_dir + / f"noiseMap.{image_path.name}" + ) + bkg_path = ( + vast_tile_data_root + / "STOKESI_RMSMAPS" + / epoch_dir + / f"meanMap.{image_path.name}" + ) + + # construct output path to store corrections for each epoch + epoch_corr_dir = vast_tile_data_root / "corr_db" / epoch_dir + + if not os.path.isdir(epoch_corr_dir): + os.mkdir(epoch_corr_dir) + + ref_file = get_correct_correction_file( + correction_files_list=correction_files_path_glob_list, + img_field=field, + ) + + skip = False + if not rms_path.exists(): + logger.warning(f"RMS image not found for {image_path}.") + if not bkg_path.exists(): + logger.warning(f"Background image not found for {image_path}.") + skip = ( + not ((rms_path.exists()) and (bkg_path.exists()) and (ref_file is not None)) + or skip + ) + if skip: + if not ((rms_path.exists()) and (bkg_path.exists())): + logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist") + elif ref_file is None: + logger.warning(f"Skipping {image_path}, no reference field found.") + continue + else: + crossmatch_file = epoch_corr_dir / image_path.replace( + "components.xml", "corrections.csv" + ) + csv_file = epoch_corr_dir / "corrections.csv" + + # Get the psf measurements to estimate errors follwoing Condon 1997 + if psf_ref is not None: + psf_reference = psf_ref + else: + psf_reference = get_psf_from_image(ref_file) + + if psf is not None: + psf_image = psf + else: + psf_image = get_psf_from_image(image_path) + ( + dra_median_value, + ddec_median_value, + flux_corr_mult, + flux_corr_add, + ) = vast_xmatch_qc( + reference_catalog_path=ref_file, + catalog_path=image_path, + radius=Angle(radius), + condon=condon, + psf_reference=psf_reference, + psf=psf_image, + fix_m=False, + fix_b=False, + crossmatch_output=crossmatch_file, + csv_output=csv_file, + ) + + # get corrections + for path in (image_path, rms_path, bkg_path): + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = vast_tile_data_root / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + _ = shift_and_scale_image( + path, + output_dir, + flux_scale=flux_corr_mult, + flux_offset_mJy=flux_corr_add, + ra_offset_arcsec=dra_median_value, + dec_offset_arcsec=ddec_median_value, + overwrite=overwrite, + ) + + # Do the same for catalog files + # Look for any component and island files correspnding to this image + comp_files = [] + for p in list(components_path_glob_list[0]): + comp_file_name = p.name + comp_file_epoch = p.parent.name + if ( + (epoch_dir in comp_file_epoch) + and (field in comp_file_name) + and (f"SB{sbid}" in comp_file_name) + ): + comp_files.append(p) + + if len(comp_files) == 0: + logger.warning(f"Selavy catalogue not found for the image {image_path}") + elif len(comp_files) == 1: + if ".components" in comp_files[0].name: + logger.warning( + f"Islannd catalogue not found for the image {image_path}" + ) + else: + logger.warning( + f"Islannd catalogue not found for the image {image_path}" + ) + else: + for path in comp_files: + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = vast_tile_data_root / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + _ = shift_and_scale_catalog( + path, + output_dir, + flux_scale=flux_corr_mult, + flux_offset_mJy=flux_corr_add, + ra_offset_arcsec=dra_median_value, + dec_offset_arcsec=ddec_median_value, + overwrite=overwrite, + ) + + +if __name__ == "__main__": + typer.run(main) diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 927a8f0..7c1d0f4 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -1,13 +1,126 @@ from pathlib import Path import warnings - -from astropy.coordinates import SkyCoord +from astropy.coordinates import SkyCoord, Angle from astropy.io import fits from astropy.io.votable import parse import astropy.units as u +from uncertainties import ufloat from astropy.wcs import WCS, FITSFixedWarning from loguru import logger import numpy as np +from typing import Tuple, Optional +from vast_post_processing.catalogs import Catalog +from vast_post_processing.crossmatch import ( + crossmatch_qtables, + calculate_positional_offsets, + calculate_flux_offsets, +) + + +def vast_xmatch_qc( + reference_catalog_path: str, + catalog_path: str, + radius: Angle = Angle("10arcsec"), + condon: bool = False, + psf_reference: Optional[Tuple[float, float]] = None, + psf: Optional[Tuple[float, float]] = None, + fix_m: bool = False, + fix_b: bool = False, + positional_unit: u.Unit = u.Unit("arcsec"), + flux_unit: u.Unit = u.Unit("mJy"), + crossmatch_output: Optional[str] = None, + csv_output: Optional[str] = None, +): + # convert catalog path strings to Path objects + reference_catalog_path = Path(reference_catalog_path) + catalog_path = Path(catalog_path) + flux_unit /= u.beam # add beam divisor as we currently only work with peak fluxes + + reference_catalog = Catalog( + reference_catalog_path, + psf=psf_reference, + condon=condon, + input_format="selavy", + ) + catalog = Catalog( + catalog_path, + psf=psf, + condon=condon, + input_format="selavy", + ) + + # perform the crossmatch + xmatch_qt = crossmatch_qtables(catalog, reference_catalog, radius=radius) + # select xmatches with non-zero flux errors and no siblings + logger.info("Removing crossmatched sources with siblings or flux peak errors = 0.") + mask = xmatch_qt["flux_peak_err"] > 0 + mask &= xmatch_qt["flux_peak_err_reference"] > 0 + mask &= xmatch_qt["has_siblings"] == 0 + mask &= xmatch_qt["has_siblings_reference"] == 0 + data = xmatch_qt[mask] + logger.info( + f"{len(data):.2f} crossmatched sources remaining ({(len(data) / len(xmatch_qt)) * 100:.2f}%).", + ) + + # Write the cross-match data into csv + if crossmatch_output is not None: + data.write("crossmatch.csv", overwrite=True) + # calculate positional offsets and flux ratio + dra_median, ddec_median, dra_madfm, ddec_madfm = calculate_positional_offsets(data) + dra_median_value = dra_median.to(positional_unit).value + dra_madfm_value = dra_madfm.to(positional_unit).value + ddec_median_value = ddec_median.to(positional_unit).value + ddec_madfm_value = ddec_madfm.to(positional_unit).value + logger.info( + f"dRA median: {dra_median_value:.2f} MADFM: {dra_madfm_value:.2f} {positional_unit}. dDec median: {ddec_median_value:.2f} MADFM: {ddec_madfm_value:.2f} {positional_unit}.", + ) + + gradient, offset, gradient_err, offset_err = calculate_flux_offsets( + data, fix_m=fix_m, fix_b=fix_b + ) + ugradient = ufloat(gradient, gradient_err) + uoffset = ufloat(offset.to(flux_unit).value, offset_err.to(flux_unit).value) + logger.info( + f"ODR fit parameters: Sp = Sp,ref * {ugradient} + {uoffset} {flux_unit}.", + ) + + flux_corr_mult = 1 / ugradient + flux_corr_add = -1 * uoffset + + if csv_output is not None: + # output has been requested + + if True: # csv_output is not None: + csv_output_path = Path(csv_output) # ensure Path object + sbid = catalog.sbid if catalog.sbid is not None else "" + if not csv_output_path.exists(): + f = open(csv_output_path, "w") + print( + "field,release_epoch,sbid,ra_correction,dec_correction,ra_madfm," + "dec_madfm,flux_peak_correction_multiplicative,flux_peak_correction_additive," + "flux_peak_correction_multiplicative_err,flux_peak_correction_additive_err," + "n_sources", + file=f, + ) + else: + f = open(csv_output_path, "a") + logger.info( + "Writing corrections CSV. To correct positions, add the corrections to" + " the original source positions i.e. RA' = RA + ra_correction /" + " cos(Dec). To correct fluxes, add the additive correction and multiply" + " the result by the multiplicative correction i.e. S' =" + " flux_peak_correction_multiplicative(S +" + " flux_peak_correction_additive)." + ) + print( + f"{catalog.field},{catalog.epoch},{sbid},{dra_median_value * -1}," + f"{ddec_median_value * -1},{dra_madfm_value},{ddec_madfm_value}," + f"{flux_corr_mult.nominal_value},{flux_corr_add.nominal_value}," + f"{flux_corr_mult.std_dev},{flux_corr_add.std_dev},{len(data)}", + file=f, + ) + f.close() + return dra_median_value, ddec_median_value, flux_corr_mult, flux_corr_add def shift_and_scale_image( diff --git a/vast_post_processing/crossmatch.py b/vast_post_processing/crossmatch.py new file mode 100644 index 0000000..bdbdc3c --- /dev/null +++ b/vast_post_processing/crossmatch.py @@ -0,0 +1,153 @@ +import logging +from typing import Tuple + +from astropy.coordinates import SkyCoord, Angle, match_coordinates_sky +from astropy.table import QTable, join, join_skycoord +import astropy.units as u +import numpy as np +from scipy import odr + +from vast_post_processing.catalogs import Catalog + + +logger = logging.getLogger(__name__) + + +def median_abs_deviation(data): + median = np.median(data) + return np.median(np.abs(data - median)) + + +def straight_line(B, x): + m, b = B + return m * x + b + + +def join_match_coordinates_sky( + coords1: SkyCoord, coords2: SkyCoord, seplimit: u.arcsec +): + idx, separation, dist_3d = match_coordinates_sky(coords1, coords2) + mask = separation < seplimit + return np.where(mask)[0], idx[mask], separation[mask], dist_3d[mask] + + +def crossmatch_qtables( + catalog: Catalog, + catalog_reference: Catalog, + radius: Angle = Angle("10 arcsec"), + catalog_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"), + catalog_reference_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"), +) -> QTable: + catalog_ra, catalog_dec = catalog_coord_cols + catalog_reference_ra, catalog_reference_dec = catalog_reference_coord_cols + + logger.debug("Using crossmatch radius: %s.", radius) + + xmatch = join( + catalog.table, + catalog_reference.table, + keys="coord", + table_names=["", "reference"], + join_funcs={ + "coord": join_skycoord(radius, distance_func=join_match_coordinates_sky) + }, + ) + # remove trailing _ from catalog column names + xmatch.rename_columns( + [col for col in xmatch.colnames if col.endswith("_")], + [col.rstrip("_") for col in xmatch.colnames if col.endswith("_")], + ) + # compute the separations + xmatch["separation"] = xmatch["coord_reference"].separation(xmatch["coord"]) + xmatch["dra"], xmatch["ddec"] = xmatch["coord_reference"].spherical_offsets_to( + xmatch["coord"] + ) + xmatch["flux_peak_ratio"] = ( + xmatch["flux_peak"] / xmatch["flux_peak_reference"] + ).decompose() + + logger.info( + "Num cross-matches: %d. Num cross-matches to unique reference source: %d" + " (%d%%).", + len(xmatch), + len(set(xmatch["coord_id"])), + (len(set(xmatch["coord_id"])) / len(xmatch)) * 100, + ) + + return xmatch + + +def calculate_positional_offsets( + xmatch_qt: QTable, +) -> Tuple[u.Quantity, u.Quantity, u.Quantity, u.Quantity]: + """Calculate the median positional offsets and the median absolute deviation between + matched sources. + + Parameters + ---------- + xmatch_qt : QTable + QTable of crossmatched sources. Must contain columns: dra, ddec. + + Returns + ------- + Tuple[u.Quantity, u.Quantity, u.Quantity, u.Quantity] + Median RA offset, median Dec offset, median absolute deviation of RA offsets, + median absolute deviation of Dec offsets. Units match their inputs and are of + angular type. + """ + dra_median = np.median(xmatch_qt["dra"]) + dra_madfm = median_abs_deviation(xmatch_qt["dra"]) + ddec_median = np.median(xmatch_qt["ddec"]) + ddec_madfm = median_abs_deviation(xmatch_qt["ddec"]) + + return dra_median, ddec_median, dra_madfm, ddec_madfm + + +def calculate_flux_offsets( + xmatch_qt: QTable, + init_m: float = 1.0, + init_b: float = 0.0, + fix_m: bool = False, + fix_b: bool = False, +) -> Tuple[float, u.Quantity, float, u.Quantity]: + """Calculate the gradient and offset of a straight-line fit to the peak fluxes for + crossmatched sources. The function `y = mx + b` is fit to the reference peak fluxes + vs the peak fluxes using orthogonal distance regression with `scipy.odr`. + + Parameters + ---------- + xmatch_qt : QTable + QTable of crossmatched sources. Must contain columns: flux_peak, + flux_peak_reference, flux_peak_err, flux_peak_err_reference. + init_m : float + Initial gradient parameter passed to the fitting function, default 1.0. + init_b : float + Initial offset parameter passed to the fitting function, default 0.0. + fix_m : bool + If True, do not allow the gradient to vary during fitting, default False. + fix_b : bool + If True, do not allow the offest to vary during fitting, default False. + + Returns + ------- + Tuple[float, u.Quantity, float, u.Quantity] + Model fit parameters: the gradient, intercept (offset), gradient error, and + intercept error. Offset and offset error unit match the reference flux peak + input and are of spectral flux density type. + """ + ifixb = [0 if fix_m else 1, 0 if fix_b else 1] + flux_unit = xmatch_qt["flux_peak_reference"].unit + linear_model = odr.Model(straight_line) + # convert all to reference flux unit as ODR does not preserve Quantity objects + odr_data = odr.RealData( + xmatch_qt["flux_peak_reference"].to(flux_unit).value, + xmatch_qt["flux_peak"].to(flux_unit).value, + sx=xmatch_qt["flux_peak_err_reference"].to(flux_unit).value, + sy=xmatch_qt["flux_peak_err"].to(flux_unit).value, + ) + odr_obj = odr.ODR(odr_data, linear_model, beta0=[init_m, init_b], ifixb=ifixb) + odr_out = odr_obj.run() + gradient, offset = odr_out.beta + gradient_err, offset_err = odr_out.sd_beta + + return gradient, offset * flux_unit, gradient_err, offset_err * flux_unit From ce61320dea60d2bf1baa8305e342db12875bb583 Mon Sep 17 00:00:00 2001 From: Akash Date: Wed, 12 Jul 2023 18:20:47 -0500 Subject: [PATCH 02/31] Cleaned up minor naming issues with variables --- vast_post_processing/cli/run_corrections.py | 92 +++++++++++---------- vast_post_processing/corrections.py | 2 +- 2 files changed, 50 insertions(+), 44 deletions(-) diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index da4e95f..6f1426f 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -71,9 +71,9 @@ def get_correct_correction_file(correction_files_list, img_field): filename = f.name _, _, field, *_ = filename.split(".") field = field.replace("RACS", "VAST") - if field in img_field: + if (field in img_field) and ("components" in filename): count += 1 - return f + return f.as_posix() else: continue if count == 0: @@ -95,10 +95,12 @@ def get_psf_from_image(image_path: str): Tuple(psf_major, psf_minor) Major and minor axes of the PSF. """ - + image_path = image_path.replace("SELAVY", "IMAGES") + image_path = image_path.replace("selavy-", "") + image_path = image_path.replace(".components.xml", ".fits") hdu = fits.open(image_path) - psf_maj = hdu["BMAJ"] * u.degree - psf_min = hdu["BMIN"] * u.degree + psf_maj = hdu[0].header["BMAJ"] * u.degree + psf_min = hdu[0].header["BMIN"] * u.degree hdu.close() return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec)) @@ -133,8 +135,8 @@ def main( " correct all available epochs." ), ), - radius: Optional[ANGLE_QUANTITY_TYPE] = typer.Option( - "10 arcsec", + radius: Optional[float] = typer.Option( + 10, help=( "Maximum separation limit for nearest-neighbour crossmatch. Accepts any " "string understood by astropy.coordinates.Angle." @@ -164,7 +166,7 @@ def main( None, help=( "If using --condon but want to give the psfs manually, use this specified PSF size in " - "arcsec for `catalof`. First argument is major axis followed by nimor axis." + "arcsec for `catalog`. First argument is major axis followed by nimor axis." ), ), overwrite: bool = False, @@ -228,7 +230,7 @@ def main( ) # construct output path to store corrections for each epoch - epoch_corr_dir = vast_tile_data_root / "corr_db" / epoch_dir + epoch_corr_dir = corr_dir / epoch_dir if not os.path.isdir(epoch_corr_dir): os.mkdir(epoch_corr_dir) @@ -243,8 +245,37 @@ def main( logger.warning(f"RMS image not found for {image_path}.") if not bkg_path.exists(): logger.warning(f"Background image not found for {image_path}.") + + # Look for any component and island files correspnding to this image + comp_files = [] + for p in list(components_path_glob_list[0]): + comp_file_name = p.name + comp_file_epoch = p.parent.name + if ( + (epoch_dir in comp_file_epoch) + and (field in comp_file_name) + and (f"SB{sbid}" in comp_file_name) + ): + comp_files.append(p) + + component_file = None + island_file = None + if len(comp_files) == 0: + logger.warning(f"Selavy catalogue not found for the image {image_path}") + else: + for i in comp_files: + if "components" in i.as_posix(): + component_file = i + elif "islands" in i.as_posix(): + island_file = i + skip = ( - not ((rms_path.exists()) and (bkg_path.exists()) and (ref_file is not None)) + not ( + (rms_path.exists()) + and (bkg_path.exists()) + and (ref_file is not None) + and (component_file is not None) + ) or skip ) if skip: @@ -254,10 +285,9 @@ def main( logger.warning(f"Skipping {image_path}, no reference field found.") continue else: - crossmatch_file = epoch_corr_dir / image_path.replace( - "components.xml", "corrections.csv" - ) - csv_file = epoch_corr_dir / "corrections.csv" + fname = image_path.name.replace(".fits", "corrections.csv") + crossmatch_file = epoch_corr_dir / fname + csv_file = epoch_corr_dir / "all_fields_corrections.csv" # Get the psf measurements to estimate errors follwoing Condon 1997 if psf_ref is not None: @@ -268,7 +298,7 @@ def main( if psf is not None: psf_image = psf else: - psf_image = get_psf_from_image(image_path) + psf_image = get_psf_from_image(image_path.as_posix()) ( dra_median_value, ddec_median_value, @@ -276,8 +306,8 @@ def main( flux_corr_add, ) = vast_xmatch_qc( reference_catalog_path=ref_file, - catalog_path=image_path, - radius=Angle(radius), + catalog_path=component_file.as_posix(), + radius=Angle(radius * u.arcsec), condon=condon, psf_reference=psf_reference, psf=psf_image, @@ -302,32 +332,8 @@ def main( overwrite=overwrite, ) - # Do the same for catalog files - # Look for any component and island files correspnding to this image - comp_files = [] - for p in list(components_path_glob_list[0]): - comp_file_name = p.name - comp_file_epoch = p.parent.name - if ( - (epoch_dir in comp_file_epoch) - and (field in comp_file_name) - and (f"SB{sbid}" in comp_file_name) - ): - comp_files.append(p) - - if len(comp_files) == 0: - logger.warning(f"Selavy catalogue not found for the image {image_path}") - elif len(comp_files) == 1: - if ".components" in comp_files[0].name: - logger.warning( - f"Islannd catalogue not found for the image {image_path}" - ) - else: - logger.warning( - f"Islannd catalogue not found for the image {image_path}" - ) - else: - for path in comp_files: + # Do the same for catalog files + for path in (component_file, island_file): stokes_dir = f"{path.parent.parent.name}_CORRECTED" output_dir = vast_tile_data_root / stokes_dir / epoch_dir output_dir.mkdir(parents=True, exist_ok=True) diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 7c1d0f4..95ada94 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -64,7 +64,7 @@ def vast_xmatch_qc( # Write the cross-match data into csv if crossmatch_output is not None: - data.write("crossmatch.csv", overwrite=True) + data.write(crossmatch_output, overwrite=True) # calculate positional offsets and flux ratio dra_median, ddec_median, dra_madfm, ddec_madfm = calculate_positional_offsets(data) dra_median_value = dra_median.to(positional_unit).value From 270ec3eec7165afde8d539bfd5f1bb18b4279ae1 Mon Sep 17 00:00:00 2001 From: Akash Date: Wed, 12 Jul 2023 18:24:10 -0500 Subject: [PATCH 03/31] Removed redundant code --- vast_post_processing/cli/correct_vast.py | 278 ----------------------- 1 file changed, 278 deletions(-) delete mode 100644 vast_post_processing/cli/correct_vast.py diff --git a/vast_post_processing/cli/correct_vast.py b/vast_post_processing/cli/correct_vast.py deleted file mode 100644 index a54686c..0000000 --- a/vast_post_processing/cli/correct_vast.py +++ /dev/null @@ -1,278 +0,0 @@ -from itertools import chain -from pathlib import Path -import sys -from typing import Optional, Generator - -from loguru import logger -import pandas as pd -import typer -from astropy.table import QTable -from astropy.io import fits -from astropy import units as u - -from vast_post_processing.corrections import ( - shift_and_scale_catalog, - shift_and_scale_image, - calculate_positional_offsets, - calculate_flux_offsets, -) - - -def get_correct_correction_file(correction_files_list, epoch, img_field, img_sbid): - count = 0 - for f in chain.from_iterable(correction_files_list): - epoch_name = f.parent.name - if epoch_name in epoch: - filename = f.name - _, _, _, sbid, field, *_ = filename.split("_") - sbid = sbid.replace("-VAST", "") - field = field.replace(".csv", "") - if (sbid in img_sbid) & (field in img_field): - df = QTable.read(f) - flux_shifts = calculate_flux_offsets(df) - pos_shifts = calculate_positional_offsets(df) - count += 1 - return flux_shifts, pos_shifts - else: - continue - if count == 0: - return None, None - - -def get_psf_from_image(image_path: str): - """ - Funtion used to get the point spread function (PSF) extent in major and minor axis. - These will be in the header of the image file - - Parameters - ---------- - image_path: str - Path to the image file - - Returns - ------- - Tuple(psf_major, psf_minor) - Major and minor axes of the PSF. - """ - - hdu = fits.open(image_path) - psf_maj = hdu["BMAJ"] * u.degree - psf_min = hdu["BMIN"] * u.degree - return psf_maj.to(u.arcsec), psf_min.to(u.arcsec) - - -def main( - vast_tile_data_root: Path = typer.Argument( - ..., - help=( - "Path to VAST TILES data directory, i.e. the directory that contains the" - " STOKES* directories." - ), - exists=True, - file_okay=False, - dir_okay=True, - ), - vast_corrections_csv_root: Path = typer.Option( - "/data/vast-survey/VAST/askap-surveys-database/vast/db/", - help=( - "Path to VAST corrections CSV file produced by vast-xmatch. Tries to use" - " the default path of these files. If not the user can override this by" - "giving a path to file" - ), - exists=True, - file_okay=True, - dir_okay=False, - ), - epoch: Optional[list[int]] = typer.Option( - None, - help=( - "Only correct the given observation epochs. Can be given multiple times," - " e.g. --epoch 1 --epoch 2. If no epochs are given (the default), then" - " correct all available epochs." - ), - ), - overwrite: bool = False, - verbose: bool = False, -): - """Read astrometric and flux corrections produced by vast-xmatch and apply them to - VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. - """ - # configure logger - if not verbose: - # replace the default sink - logger.remove() - logger.add(sys.stderr, level="INFO") - - # read corrections - # corrections_df = ( - # pd.read_csv(vast_corrections_csv) - # .set_index(["release_epoch", "field", "sbid"]) - # .sort_index() - # ) - image_path_glob_list: list[Generator[Path, None, None]] = [] - components_path_glob_list: list[Generator[Path, None, None]] = [] - correction_files_path_glob_list: list[Generator[Path, None, None]] = [] - if epoch is None or len(epoch) == 0: - image_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") - ) - components_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml") - ) - correction_files_path_glob_list.append( - vast_corrections_csv_root.glob("epoch_*/cat_match_RACS0*.csv") - ) - else: - for n in epoch: - image_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") - ) - components_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml") - ) - correction_files_path_glob_list.append( - vast_corrections_csv_root.glob(f"epoch_{n}/cat_match_RACS0*.csv") - ) - - # correct images - for image_path in chain.from_iterable(image_path_glob_list): - epoch_dir = image_path.parent.name - _, _, field, sbid_str, *_ = image_path.name.split(".") - sbid = int(sbid_str[2:]) - # get rms and background images - rms_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"noiseMap.{image_path.name}" - ) - bkg_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"meanMap.{image_path.name}" - ) - # get corrections - skip = False - # try: - # corrections = corrections_df.loc[(epoch_dir, field, sbid)] - # except KeyError: - # skip = True - # logger.warning( - # f"Corrections not found for {image_path} ({epoch_dir}, {field}," - # f" {sbid})." - # ) - flux_corrections, pos_corrections = get_correct_correction_file( - correction_files_list=correction_files_path_glob_list, - epoch=epoch_dir, - img_field=field, - img_sbid=sbid_str, - ) - if (flux_corrections is None) | (pos_corrections is None): - skip = True - logger.warning( - f"Corrections not found for {image_path} ({epoch_dir}, {field}," - f" {sbid})." - ) - else: - scale, offset, scale_err, offset_err = flux_corrections - dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections - if not rms_path.exists(): - logger.warning(f"RMS image not found for {image_path}.") - if not bkg_path.exists(): - logger.warning(f"Background image not found for {image_path}.") - skip = not (rms_path.exists() and bkg_path.exists()) or skip - if skip: - logger.warning(f"Skipping {image_path}.") - continue - - for path in (image_path, rms_path, bkg_path): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - # _ = shift_and_scale_image( - # path, - # output_dir, - # flux_scale=corrections.flux_peak_correction_multiplicative, - # flux_offset_mJy=corrections.flux_peak_correction_additive, - # ra_offset_arcsec=corrections.ra_correction, - # dec_offset_arcsec=corrections.dec_correction, - # overwrite=overwrite, - # ) - _ = shift_and_scale_image( - path, - output_dir, - flux_scale=scale, - flux_offset_mJy=offset, - ra_offset_arcsec=dra_median, - dec_offset_arcsec=ddec_median, - overwrite=overwrite, - ) - - # correct catalogs - for components_path in chain.from_iterable(components_path_glob_list): - epoch_dir = components_path.parent.name - _, _, field, sbid_str, *_ = components_path.name.split(".") - sbid = int(sbid_str[2:]) - # get island catalog - islands_path = components_path.with_name( - components_path.name.replace(".components", ".islands") - ) - # get corrections - skip = False - # try: - # corrections = corrections_df.loc[(epoch_dir, field, sbid)] - # except KeyError: - # skip = True - # logger.warning( - # f"Corrections not found for {image_path} ({epoch_dir}, {field}," - # f" {sbid})." - # ) - flux_corrections, pos_corrections = get_correct_correction_file( - correction_files_list=correction_files_path_glob_list, - epoch=epoch_dir, - img_field=field, - img_sbid=sbid_str, - ) - if (flux_corrections is None) | (pos_corrections is None): - skip = True - logger.warning( - f"Corrections not found for {image_path} ({epoch_dir}, {field}," - f" {sbid})." - ) - else: - scale, offset, scale_err, offset_err = flux_corrections - dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections - if not islands_path.exists(): - logger.warning(f"Islands catalogue not found for {components_path}.") - skip = not islands_path.exists() or skip - if skip: - logger.warning(f"Skipping {components_path}.") - continue - - for path in (components_path, islands_path): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - # _ = shift_and_scale_catalog( - # path, - # output_dir, - # flux_scale=corrections.flux_peak_correction_multiplicative, - # flux_offset_mJy=corrections.flux_peak_correction_additive, - # ra_offset_arcsec=corrections.ra_correction, - # dec_offset_arcsec=corrections.dec_correction, - # overwrite=overwrite, - # ) - _ = shift_and_scale_catalog( - path, - output_dir, - flux_scale=scale, - flux_offset_mJy=offset, - ra_offset_arcsec=dra_median, - dec_offset_arcsec=ddec_median, - overwrite=overwrite, - ) - - -if __name__ == "__main__": - typer.run(main) From 66919e1e078ef68a40a17c7b5663cb21722d54a7 Mon Sep 17 00:00:00 2001 From: Akash Date: Fri, 14 Jul 2023 01:09:46 -0500 Subject: [PATCH 04/31] Fixed quantities with units; component files matching made easy --- vast_post_processing/cli/run_corrections.py | 51 +++++++-------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index 6f1426f..99bb051 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -67,7 +67,7 @@ def convert(self, value, param, ctx): def get_correct_correction_file(correction_files_list, img_field): count = 0 - for f in chain.from_iterable(correction_files_list): + for f in correction_files_list: filename = f.name _, _, field, *_ = filename.split(".") field = field.replace("RACS", "VAST") @@ -187,13 +187,14 @@ def main( correction_files_path_glob_list: list[Generator[Path, None, None]] = [] correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml")) + correction_files_path_glob_list = list(correction_files_path_glob_list[0]) if epoch is None or len(epoch) == 0: image_path_glob_list.append( vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") ) components_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml") + vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml") ) else: for n in epoch: @@ -201,7 +202,7 @@ def main( vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") ) components_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml") + vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml") ) # construct output path to store corrections @@ -247,27 +248,9 @@ def main( logger.warning(f"Background image not found for {image_path}.") # Look for any component and island files correspnding to this image - comp_files = [] - for p in list(components_path_glob_list[0]): - comp_file_name = p.name - comp_file_epoch = p.parent.name - if ( - (epoch_dir in comp_file_epoch) - and (field in comp_file_name) - and (f"SB{sbid}" in comp_file_name) - ): - comp_files.append(p) - - component_file = None - island_file = None - if len(comp_files) == 0: - logger.warning(f"Selavy catalogue not found for the image {image_path}") - else: - for i in comp_files: - if "components" in i.as_posix(): - component_file = i - elif "islands" in i.as_posix(): - island_file = i + + component_file = Path(ref_file) + island_file = Path(ref_file.replace("components", "islands")) skip = ( not ( @@ -290,12 +273,12 @@ def main( csv_file = epoch_corr_dir / "all_fields_corrections.csv" # Get the psf measurements to estimate errors follwoing Condon 1997 - if psf_ref is not None: + if len(psf_ref) > 0: psf_reference = psf_ref else: psf_reference = get_psf_from_image(ref_file) - if psf is not None: + if len(psf) > 0: psf_image = psf else: psf_image = get_psf_from_image(image_path.as_posix()) @@ -325,10 +308,10 @@ def main( _ = shift_and_scale_image( path, output_dir, - flux_scale=flux_corr_mult, - flux_offset_mJy=flux_corr_add, - ra_offset_arcsec=dra_median_value, - dec_offset_arcsec=ddec_median_value, + flux_scale=flux_corr_mult.n, + flux_offset_mJy=flux_corr_add.n, + ra_offset_arcsec=dra_median_value.item(), + dec_offset_arcsec=ddec_median_value.item(), overwrite=overwrite, ) @@ -340,10 +323,10 @@ def main( _ = shift_and_scale_catalog( path, output_dir, - flux_scale=flux_corr_mult, - flux_offset_mJy=flux_corr_add, - ra_offset_arcsec=dra_median_value, - dec_offset_arcsec=ddec_median_value, + flux_scale=flux_corr_mult.n, + flux_offset_mJy=flux_corr_add.n, + ra_offset_arcsec=dra_median_value.item(), + dec_offset_arcsec=ddec_median_value.item(), overwrite=overwrite, ) From 842267dd41c3500d4638da923c99f52dbec1aa9b Mon Sep 17 00:00:00 2001 From: Hansen Date: Sun, 6 Aug 2023 17:15:29 -0400 Subject: [PATCH 05/31] Start of reorg --- vast_post_processing/__init__.py | 8 ++++++++ vast_post_processing/cli/__init__.py | 12 ++++++++++++ vast_post_processing/utils/__init__.py | 6 ++++++ vast_post_processing/utils/fileutils.py | 6 ++++++ vast_post_processing/utils/misc.py | 1 + vast_post_processing/validation.py | 5 +++++ 6 files changed, 38 insertions(+) create mode 100644 vast_post_processing/cli/__init__.py create mode 100644 vast_post_processing/utils/__init__.py create mode 100644 vast_post_processing/utils/fileutils.py create mode 100644 vast_post_processing/utils/misc.py create mode 100644 vast_post_processing/validation.py diff --git a/vast_post_processing/__init__.py b/vast_post_processing/__init__.py index e69de29..a26f4d7 100644 --- a/vast_post_processing/__init__.py +++ b/vast_post_processing/__init__.py @@ -0,0 +1,8 @@ +import combine +import corrections +import crop +import neighbours +import validation + +import utils +import cli diff --git a/vast_post_processing/cli/__init__.py b/vast_post_processing/cli/__init__.py new file mode 100644 index 0000000..9ed0ea9 --- /dev/null +++ b/vast_post_processing/cli/__init__.py @@ -0,0 +1,12 @@ +# +# The CLI bindings for VAST Post-processing +# + +import _util +import cleanup +import convolve_neighbours +import correct_vast +import link_neighbours +import run_crop +import selavy_combined +import swarp diff --git a/vast_post_processing/utils/__init__.py b/vast_post_processing/utils/__init__.py new file mode 100644 index 0000000..8420b50 --- /dev/null +++ b/vast_post_processing/utils/__init__.py @@ -0,0 +1,6 @@ +# +# Utility functions for VAST Post-processing +# + +import misc +import fileutils diff --git a/vast_post_processing/utils/fileutils.py b/vast_post_processing/utils/fileutils.py new file mode 100644 index 0000000..8502dc9 --- /dev/null +++ b/vast_post_processing/utils/fileutils.py @@ -0,0 +1,6 @@ +""" +Utility Functions for Files +""" + + +# Move cleanup functions/logic here diff --git a/vast_post_processing/utils/misc.py b/vast_post_processing/utils/misc.py new file mode 100644 index 0000000..9058a05 --- /dev/null +++ b/vast_post_processing/utils/misc.py @@ -0,0 +1 @@ +# Miscellaneous Utilities diff --git a/vast_post_processing/validation.py b/vast_post_processing/validation.py new file mode 100644 index 0000000..f7e6bbd --- /dev/null +++ b/vast_post_processing/validation.py @@ -0,0 +1,5 @@ +""" + +Validation generation code + +""" From fcd07cdc5ffde07b2ba1771b95107f69c78c5254 Mon Sep 17 00:00:00 2001 From: Hansen Date: Sun, 6 Aug 2023 19:08:58 -0400 Subject: [PATCH 06/31] created scripts directory --- scripts/combined-fix-timestamps.py | 136 +++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 scripts/combined-fix-timestamps.py diff --git a/scripts/combined-fix-timestamps.py b/scripts/combined-fix-timestamps.py new file mode 100644 index 0000000..e5d16de --- /dev/null +++ b/scripts/combined-fix-timestamps.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# coding: utf-8 + +from dataclasses import dataclass +from pathlib import Path +from astropy.io import fits +from astropy.time import Time +import astropy.units as u +import pandas as pd +from tqdm import tqdm + + +@dataclass +class VastNeighbour: + field: str + sbid: int + filename: str + date_start_isot: str + date_end_isot: str + combined_field: str + combined_release_epoch: str + combined_main_field: bool + + +def combined_date_obs(df) -> str: + main_field_obs = df.query("combined_main_field").reset_index().set_index("sbid") + idx = main_field_obs.date_start_isot.idxmin() + return main_field_obs.loc[idx][["date_start_isot", "date_end_isot"]] + + +VAST_REPO = Path("/data/VAST/askap-surveys-database/vast/db") +RACS_REPO = Path("/data/VAST/askap-surveys-database/racs/db") + +# read the VAST metadata +vast_df = pd.DataFrame() +for field_data_path in VAST_REPO.glob("epoch_*/field_data.csv"): + vast_df = vast_df.append(pd.read_csv(field_data_path)) +vast_df["DATE-BEG"] = pd.to_datetime( + Time(vast_df["SCAN_START"].values * u.s, format="mjd").isot +) +vast_df["DATE-END"] = vast_df["DATE-BEG"] + pd.to_timedelta( + vast_df["SCAN_LEN"], unit="sec" +) + +# read the RACS metadata +racs_df = pd.DataFrame() +for field_data_path in RACS_REPO.glob("epoch_[01]/field_data.csv"): + racs_df = racs_df.append(pd.read_csv(field_data_path)) +racs_df["DATE-BEG"] = pd.to_datetime( + Time(racs_df["SCAN_START"].values * u.s, format="mjd").isot +) +racs_df["DATE-END"] = racs_df["DATE-BEG"] + pd.to_timedelta( + racs_df["SCAN_LEN"], unit="sec" +) +racs_df["FIELD_NAME"] = racs_df["FIELD_NAME"].str.replace("RACS", "VAST") + +# add racs to vast +vast_df = pd.concat((vast_df, racs_df)) + +# remove duplicates +vast_df = vast_df.drop_duplicates(subset=["FIELD_NAME", "SBID"], keep=False) +vast_df = ( + vast_df[["FIELD_NAME", "SBID", "DATE-BEG", "DATE-END"]] + .set_index(["FIELD_NAME", "SBID"]) + .sort_index() +) + +# get the neighbours from the hard-links generated by ~/vast-post-processing/link_neighbours.py +neighbours_list = [] +for field_dir_path in Path("/data/.staging/convolved/").glob("EPOCH*/VAST_*"): + combined_field = field_dir_path.name + combined_release_epoch = field_dir_path.parent.name + + for input_field_path in (field_dir_path / "inputs").glob("image.i.VAST_*.fits"): + _, _, field, sbid_str, *_ = input_field_path.name.split(".") + sbid = int(sbid_str[2:]) + # use the metadata instead of the imager header, some of the image headers + # appeared incorrect e.g. duplicate field observation had the same DATE-OBS + # which is impossible! + date_start_meta = Time(vast_df.loc[(field, sbid), "DATE-BEG"].isoformat()) + date_end_meta = Time(vast_df.loc[(field, sbid), "DATE-END"].isoformat()) + neighbours_list.append( + VastNeighbour( + field=field, + sbid=sbid, + filename=input_field_path.name, + date_start_isot=pd.Timestamp(date_start_meta.utc.isot), + date_end_isot=pd.Timestamp(date_end_meta.utc.isot), + combined_field=combined_field, + combined_release_epoch=combined_release_epoch, + combined_main_field=(field == combined_field), + ) + ) +neighbours_df = ( + pd.DataFrame(neighbours_list) + .set_index(["combined_field", "combined_release_epoch"]) + .sort_index() +) + +combined_timestamps = neighbours_df.groupby( + ["combined_field", "combined_release_epoch"] +).apply(combined_date_obs) + +# update headers +epochs = "EPOCH14" # "EPOCH*" +for image_path in tqdm( + list( + Path("/data/VAST/vast-data/COMBINED/STOKESI_IMAGES/").glob( + f"{epochs}/VAST_*.fits" + ) + ) +): + combined_field = image_path.name.split(".")[0] + combined_release_epoch = image_path.parent.name + with fits.open(image_path, mode="update") as hdul: + date_start = Time( + combined_timestamps.loc[ + (combined_field, combined_release_epoch), "date_start_isot" + ] + ) + date_end = Time( + combined_timestamps.loc[ + (combined_field, combined_release_epoch), "date_end_isot" + ] + ) + hdul[0].header["DATE-OBS"] = date_start.utc.isot + hdul[0].header["DATE-BEG"] = date_start.utc.isot + hdul[0].header["DATE-END"] = date_end.utc.isot + hdul[0].header["MJD-OBS"] = date_start.utc.mjd + hdul[0].header["MJD-BEG"] = date_start.utc.mjd + hdul[0].header["MJD-END"] = date_end.utc.mjd + hdul[0].header.add_history( + "Set dates to earliest observation of the main central field. Edges may" + " contain data from other dates." + ) + # closing the file will save the changes, i.e. when the "with" block exits From b417e3462ce5476cc51b5dc8a8034258c03d7bcb Mon Sep 17 00:00:00 2001 From: Hansen Date: Sun, 6 Aug 2023 19:09:31 -0400 Subject: [PATCH 07/31] created docker directory --- .dockerignore => docker/.dockerignore | 0 Dockerfile => docker/Dockerfile | 0 build_singularity.sh => docker/build_singularity.sh | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename .dockerignore => docker/.dockerignore (100%) rename Dockerfile => docker/Dockerfile (100%) rename build_singularity.sh => docker/build_singularity.sh (100%) diff --git a/.dockerignore b/docker/.dockerignore similarity index 100% rename from .dockerignore rename to docker/.dockerignore diff --git a/Dockerfile b/docker/Dockerfile similarity index 100% rename from Dockerfile rename to docker/Dockerfile diff --git a/build_singularity.sh b/docker/build_singularity.sh similarity index 100% rename from build_singularity.sh rename to docker/build_singularity.sh From f69668167bc4dbe54a13479443b18c7da35cec0e Mon Sep 17 00:00:00 2001 From: Hansen Date: Sun, 6 Aug 2023 19:13:47 -0400 Subject: [PATCH 08/31] scripts directory --- combined-fix-timestamps.py | 136 ------------------------------------- 1 file changed, 136 deletions(-) delete mode 100644 combined-fix-timestamps.py diff --git a/combined-fix-timestamps.py b/combined-fix-timestamps.py deleted file mode 100644 index e5d16de..0000000 --- a/combined-fix-timestamps.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from dataclasses import dataclass -from pathlib import Path -from astropy.io import fits -from astropy.time import Time -import astropy.units as u -import pandas as pd -from tqdm import tqdm - - -@dataclass -class VastNeighbour: - field: str - sbid: int - filename: str - date_start_isot: str - date_end_isot: str - combined_field: str - combined_release_epoch: str - combined_main_field: bool - - -def combined_date_obs(df) -> str: - main_field_obs = df.query("combined_main_field").reset_index().set_index("sbid") - idx = main_field_obs.date_start_isot.idxmin() - return main_field_obs.loc[idx][["date_start_isot", "date_end_isot"]] - - -VAST_REPO = Path("/data/VAST/askap-surveys-database/vast/db") -RACS_REPO = Path("/data/VAST/askap-surveys-database/racs/db") - -# read the VAST metadata -vast_df = pd.DataFrame() -for field_data_path in VAST_REPO.glob("epoch_*/field_data.csv"): - vast_df = vast_df.append(pd.read_csv(field_data_path)) -vast_df["DATE-BEG"] = pd.to_datetime( - Time(vast_df["SCAN_START"].values * u.s, format="mjd").isot -) -vast_df["DATE-END"] = vast_df["DATE-BEG"] + pd.to_timedelta( - vast_df["SCAN_LEN"], unit="sec" -) - -# read the RACS metadata -racs_df = pd.DataFrame() -for field_data_path in RACS_REPO.glob("epoch_[01]/field_data.csv"): - racs_df = racs_df.append(pd.read_csv(field_data_path)) -racs_df["DATE-BEG"] = pd.to_datetime( - Time(racs_df["SCAN_START"].values * u.s, format="mjd").isot -) -racs_df["DATE-END"] = racs_df["DATE-BEG"] + pd.to_timedelta( - racs_df["SCAN_LEN"], unit="sec" -) -racs_df["FIELD_NAME"] = racs_df["FIELD_NAME"].str.replace("RACS", "VAST") - -# add racs to vast -vast_df = pd.concat((vast_df, racs_df)) - -# remove duplicates -vast_df = vast_df.drop_duplicates(subset=["FIELD_NAME", "SBID"], keep=False) -vast_df = ( - vast_df[["FIELD_NAME", "SBID", "DATE-BEG", "DATE-END"]] - .set_index(["FIELD_NAME", "SBID"]) - .sort_index() -) - -# get the neighbours from the hard-links generated by ~/vast-post-processing/link_neighbours.py -neighbours_list = [] -for field_dir_path in Path("/data/.staging/convolved/").glob("EPOCH*/VAST_*"): - combined_field = field_dir_path.name - combined_release_epoch = field_dir_path.parent.name - - for input_field_path in (field_dir_path / "inputs").glob("image.i.VAST_*.fits"): - _, _, field, sbid_str, *_ = input_field_path.name.split(".") - sbid = int(sbid_str[2:]) - # use the metadata instead of the imager header, some of the image headers - # appeared incorrect e.g. duplicate field observation had the same DATE-OBS - # which is impossible! - date_start_meta = Time(vast_df.loc[(field, sbid), "DATE-BEG"].isoformat()) - date_end_meta = Time(vast_df.loc[(field, sbid), "DATE-END"].isoformat()) - neighbours_list.append( - VastNeighbour( - field=field, - sbid=sbid, - filename=input_field_path.name, - date_start_isot=pd.Timestamp(date_start_meta.utc.isot), - date_end_isot=pd.Timestamp(date_end_meta.utc.isot), - combined_field=combined_field, - combined_release_epoch=combined_release_epoch, - combined_main_field=(field == combined_field), - ) - ) -neighbours_df = ( - pd.DataFrame(neighbours_list) - .set_index(["combined_field", "combined_release_epoch"]) - .sort_index() -) - -combined_timestamps = neighbours_df.groupby( - ["combined_field", "combined_release_epoch"] -).apply(combined_date_obs) - -# update headers -epochs = "EPOCH14" # "EPOCH*" -for image_path in tqdm( - list( - Path("/data/VAST/vast-data/COMBINED/STOKESI_IMAGES/").glob( - f"{epochs}/VAST_*.fits" - ) - ) -): - combined_field = image_path.name.split(".")[0] - combined_release_epoch = image_path.parent.name - with fits.open(image_path, mode="update") as hdul: - date_start = Time( - combined_timestamps.loc[ - (combined_field, combined_release_epoch), "date_start_isot" - ] - ) - date_end = Time( - combined_timestamps.loc[ - (combined_field, combined_release_epoch), "date_end_isot" - ] - ) - hdul[0].header["DATE-OBS"] = date_start.utc.isot - hdul[0].header["DATE-BEG"] = date_start.utc.isot - hdul[0].header["DATE-END"] = date_end.utc.isot - hdul[0].header["MJD-OBS"] = date_start.utc.mjd - hdul[0].header["MJD-BEG"] = date_start.utc.mjd - hdul[0].header["MJD-END"] = date_end.utc.mjd - hdul[0].header.add_history( - "Set dates to earliest observation of the main central field. Edges may" - " contain data from other dates." - ) - # closing the file will save the changes, i.e. when the "with" block exits From a9065403d8401a1cb0d2cf3d19e3a068beae8e91 Mon Sep 17 00:00:00 2001 From: Hansen Date: Sun, 6 Aug 2023 19:14:36 -0400 Subject: [PATCH 09/31] added READMEs to explain directories --- docker/README.md | 10 ++++++++++ docs/README.md | 35 +++++++++++++++++++++++++++++++++++ examples/README.md | 15 +++++++++++++++ scripts/README.md | 7 +++++++ 4 files changed, 67 insertions(+) create mode 100644 docker/README.md create mode 100644 docs/README.md create mode 100644 examples/README.md create mode 100644 scripts/README.md diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..5855ee2 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,10 @@ +# Docker + +This directory contains the relevant Docker files for this project's Docker +services. + +## Included + +1. `.dockerignore` +2. `build_singularity.sh` +3. `Dockerfile` \ No newline at end of file diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..3b5702c --- /dev/null +++ b/docs/README.md @@ -0,0 +1,35 @@ +# Documentation + +This directory contains the documentation for this project. + +The modules of this project are documented using docstrings +in [numpydoc](https://numpydoc.readthedocs.io/en/latest/format.html) style and +comments, and formatted using code blocking, parentheses, and other +[PEP8](https://peps.python.org/pep-0008/) style guidelines, using [the Black +formatter](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html). + +They are auto-generated in `HTML` format with [`sphinx`](https://www.sphinx-doc.org/en/master/index.html). + +## Included + +1. `source/` + 1. `conf.py` + 2. `index.rst` + 3. `modules.rst` + 4. `vast_post_processing.rst` +2. `make.bat` +3. `Makefile` + +## Instructions + +To view `sphinx` documentation for this project, navigate to the root of the +package (i.e. `vast-post-processing`), and +run +``` +poetry install +poetry shell +cd docs +make html +``` +The pages are built in `vast-post-processing/docs/build/html`, and you can load +the index page by opening `index.html` in a browser. diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..b42e8c6 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,15 @@ +# Examples + +This directory contains program runtime examples. + +## Included + +1. `mortimer/` + 1. `01-convolve_neighbours.sbatch` + 2. `02-swarp.sbatch` + 3. `03-selavy_setup.sbatch` + 4. `04-selavy_submit.sbatch` + 5. `05-rsync_outputs.sh` + 6. `field_list.txt` + 7. `selavy_template.in` + 8. `selavy_template.sbatch` \ No newline at end of file diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..bca0fcc --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,7 @@ +# Scripts + +This directory contains standalone scripts. + +## Included + +1. `combined-fix-timestamps` From 852ca0e6ae33e14c5cfa1b6bb34541f1adaef9b8 Mon Sep 17 00:00:00 2001 From: Hansen Date: Sun, 6 Aug 2023 19:14:54 -0400 Subject: [PATCH 10/31] moved documentation instructions to docs --- README.md | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/README.md b/README.md index b668a3c..e75445d 100644 --- a/README.md +++ b/README.md @@ -10,20 +10,6 @@ This repository holds the code of VAST Post Processing. ## Screenshots and Previews -## Documentation - -To view `sphinx` documentation for this project, navigate to the root of the package, and -enter the following commands - -``` -poetry install -poetry shell -cd docs -make html -``` -The pages will be built in `vast-post-processing/docs/build/html`, and you can -load the index page by opening `index.html` in a browser. - - ## Contributors * Andrew O'Brien – [Department of Physics, University of Wisconsin-Milwaukee](https://uwm.edu/physics/research/astronomy-gravitation-cosmology/) From d0fb95854f59546a21e59b1af4a68c7c0fc0c5c0 Mon Sep 17 00:00:00 2001 From: Hansen Date: Wed, 9 Aug 2023 13:47:27 -0400 Subject: [PATCH 11/31] separating logic - cleanup module --- vast_post_processing/cli/cleanup.py | 19 ++----------------- vast_post_processing/utils/fileutils.py | 22 +++++++++++++++++++++- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/vast_post_processing/cli/cleanup.py b/vast_post_processing/cli/cleanup.py index 3ec0c5c..4b0b4bd 100644 --- a/vast_post_processing/cli/cleanup.py +++ b/vast_post_processing/cli/cleanup.py @@ -8,24 +8,9 @@ from pathlib import Path from shutil import rmtree -from loguru import logger import typer - -def cleanup_directory(directory: Path): - DELETE_EXT = (".fits", ".ann", ".txt", ".xml") - DELETE_DIR = ("inputs", "tmp") - - for path in directory.iterdir(): - if path.is_file(): - if path.suffix in DELETE_EXT: - path.unlink() - logger.info(f"Deleted file {path}.") - elif path.is_dir() and path.name in DELETE_DIR: - rmtree(path) - logger.info(f"Deleted directory {path}.") - else: - logger.debug(f"Leaving {path}.") +from vast_post_processing.utils import fileutils def main(neighbour_data_dir: Path, delete_all: bool = False): @@ -33,7 +18,7 @@ def main(neighbour_data_dir: Path, delete_all: bool = False): rmtree(neighbour_data_dir) else: for field_path in neighbour_data_dir.glob("VAST_*"): - cleanup_directory(field_path) + fileutils.cleanup_directory(field_path) if __name__ == "__main__": diff --git a/vast_post_processing/utils/fileutils.py b/vast_post_processing/utils/fileutils.py index 8502dc9..a6c4ec9 100644 --- a/vast_post_processing/utils/fileutils.py +++ b/vast_post_processing/utils/fileutils.py @@ -2,5 +2,25 @@ Utility Functions for Files """ - # Move cleanup functions/logic here + +from pathlib import Path +from shutil import rmtree + +from loguru import logger + + +def cleanup_directory(directory: Path): + DELETE_EXT = (".fits", ".ann", ".txt", ".xml") + DELETE_DIR = ("inputs", "tmp") + + for path in directory.iterdir(): + if path.is_file(): + if path.suffix in DELETE_EXT: + path.unlink() + logger.info(f"Deleted file {path}.") + elif path.is_dir() and path.name in DELETE_DIR: + rmtree(path) + logger.info(f"Deleted directory {path}.") + else: + logger.debug(f"Leaving {path}.") From 66d50f99b381832e70de39a0d8f799972619d7bc Mon Sep 17 00:00:00 2001 From: Hansen Date: Wed, 9 Aug 2023 17:12:24 -0400 Subject: [PATCH 12/31] moved logic from cli calls into relevant modules --- tests/__init__.py | 0 vast_post_processing/cli/cleanup.py | 7 +- .../cli/convolve_neighbours.py | 85 +----- vast_post_processing/cli/correct_vast.py | 133 +-------- vast_post_processing/cli/link_neighbours.py | 67 +---- vast_post_processing/cli/selavy_combined.py | 69 +---- vast_post_processing/cli/swarp.py | 177 +----------- vast_post_processing/combine.py | 254 ++++++++++++++++++ vast_post_processing/corrections.py | 144 ++++++++++ vast_post_processing/crop.py | 174 ++++++------ vast_post_processing/neighbours.py | 171 ++++++++++++ vast_post_processing/utils/fileutils.py | 8 + 12 files changed, 688 insertions(+), 601 deletions(-) delete mode 100644 tests/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/vast_post_processing/cli/cleanup.py b/vast_post_processing/cli/cleanup.py index 4b0b4bd..bce46b0 100644 --- a/vast_post_processing/cli/cleanup.py +++ b/vast_post_processing/cli/cleanup.py @@ -6,7 +6,6 @@ """ from pathlib import Path -from shutil import rmtree import typer @@ -14,11 +13,7 @@ def main(neighbour_data_dir: Path, delete_all: bool = False): - if delete_all: - rmtree(neighbour_data_dir) - else: - for field_path in neighbour_data_dir.glob("VAST_*"): - fileutils.cleanup_directory(field_path) + fileutils.cleanup(neighbour_data_dir, delete_all) if __name__ == "__main__": diff --git a/vast_post_processing/cli/convolve_neighbours.py b/vast_post_processing/cli/convolve_neighbours.py index 2251049..9d2fee3 100644 --- a/vast_post_processing/cli/convolve_neighbours.py +++ b/vast_post_processing/cli/convolve_neighbours.py @@ -1,44 +1,16 @@ """Requires setup_neighbours.py to be run first. """ -from dataclasses import dataclass, fields -from functools import partial from pathlib import Path from typing import Optional, List -from loguru import logger -from racs_tools import beamcon_2D -from radio_beam import Beam import typer -from vast_post_processing.cli._util import get_pool, _get_worker_name -from vast_post_processing.neighbours import convolve_image +from vast_post_processing import neighbours app = typer.Typer() -@dataclass -class WorkerArgs: - image_path: Path - output_dir_path: Path - target_beam: Beam - mode: str - suffix: str = "sm" - prefix: Optional[str] = None - cutoff: Optional[float] = None - dry_run: bool = False - - def __iter__(self): - # Makes the class fields iterable so they can be unpacked - # e.g. func(*args) where args is a WorkerArgs object. - return (getattr(self, field.name) for field in fields(self)) - - -def worker(args: WorkerArgs, mpi: bool = False, n_proc: int = 1): - with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)): - return convolve_image(*args) - - @app.command() def main( neighbour_data_dir: Path, @@ -48,55 +20,6 @@ def main( racs: bool = False, field_list: Optional[List[str]] = typer.Option(None, "--field"), ): - # neighbour_data_dir has the structure: - # //inputs contains the input FITS images - # to be convolved to a common resolution and their weights FITS images. - - pool = get_pool(mpi=mpi, n_proc=n_proc) - logger.debug(f"pool created, type: {type(pool)}") - - glob_expr = "RACS_*" if racs else "VAST_*" - worker_args_list: list[WorkerArgs] = [] - n_images: int = 0 - for field_dir in neighbour_data_dir.glob(glob_expr): - if field_list and field_dir.name not in field_list: - logger.info( - f"Glob found field {field_dir} but it was not given as a --field option. Skipping." - ) - continue - if max_images is not None and n_images >= max_images: - logger.warning( - f"Reached maximum image limit of {max_images}. Skipping remaining images." - ) - break - if len(list(field_dir.glob("*.sm.fits"))) > 0: - logger.warning(f"Smoothed images already exist in {field_dir}. Skipping.") - continue - image_path_list = list(field_dir.glob("inputs/image.*.fits")) - logger.debug( - f"Found {len(image_path_list)} images for {field_dir.name}" - ) - # find the smallest common beam - common_beam, _ = beamcon_2D.getmaxbeam(image_path_list) - logger.debug( - f"{field_dir} common beam major {common_beam.major} type" - f" {type(common_beam)}" - ) - for image_path in image_path_list: - worker_args = WorkerArgs( - image_path=image_path, - output_dir_path=field_dir, - target_beam=common_beam, - mode="robust", - ) - worker_args_list.append(worker_args) - n_images += 1 - if max_images is not None and n_images >= max_images: - logger.warning( - f"Reached maximum image limit of {max_images}. Skipping remaining images." - ) - break - - # start convolutions - _ = list(pool.map(partial(worker, mpi=mpi, n_proc=n_proc), worker_args_list)) - pool.close() + neighbours.convolve_neighbours( + neighbour_data_dir, n_proc, mpi, max_images, racs, field_list + ) diff --git a/vast_post_processing/cli/correct_vast.py b/vast_post_processing/cli/correct_vast.py index 90cb6fe..611208e 100644 --- a/vast_post_processing/cli/correct_vast.py +++ b/vast_post_processing/cli/correct_vast.py @@ -1,13 +1,8 @@ -from itertools import chain from pathlib import Path -import sys -from typing import Optional, Generator - -from loguru import logger -import pandas as pd +from typing import Optional import typer -from vast_post_processing.corrections import shift_and_scale_catalog, shift_and_scale_image +from vast_post_processing import corrections def main( @@ -39,129 +34,9 @@ def main( overwrite: bool = False, verbose: bool = False, ): - """Read astrometric and flux corrections produced by vast-xmatch and apply them to - VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. - """ - # configure logger - if not verbose: - # replace the default sink - logger.remove() - logger.add(sys.stderr, level="INFO") - - # read corrections - corrections_df = ( - pd.read_csv(vast_corrections_csv) - .set_index(["release_epoch", "field", "sbid"]) - .sort_index() + corrections.correct_vast( + vast_tile_data_root, vast_corrections_csv, epoch, overwrite, verbose ) - image_path_glob_list: list[Generator[Path, None, None]] = [] - components_path_glob_list: list[Generator[Path, None, None]] = [] - if epoch is None or len(epoch) == 0: - image_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") - ) - components_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml") - ) - else: - for n in epoch: - image_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") - ) - components_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml") - ) - - # correct images - for image_path in chain.from_iterable(image_path_glob_list): - epoch_dir = image_path.parent.name - _, _, field, sbid_str, *_ = image_path.name.split(".") - sbid = int(sbid_str[2:]) - # get rms and background images - rms_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"noiseMap.{image_path.name}" - ) - bkg_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"meanMap.{image_path.name}" - ) - # get corrections - skip = False - try: - corrections = corrections_df.loc[(epoch_dir, field, sbid)] - except KeyError: - skip = True - logger.warning( - f"Corrections not found for {image_path} ({epoch_dir}, {field}," - f" {sbid})." - ) - if not rms_path.exists(): - logger.warning(f"RMS image not found for {image_path}.") - if not bkg_path.exists(): - logger.warning(f"Background image not found for {image_path}.") - skip = not (rms_path.exists() and bkg_path.exists()) or skip - if skip: - logger.warning(f"Skipping {image_path}.") - continue - - for path in (image_path, rms_path, bkg_path): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - _ = shift_and_scale_image( - path, - output_dir, - flux_scale=corrections.flux_peak_correction_multiplicative, - flux_offset_mJy=corrections.flux_peak_correction_additive, - ra_offset_arcsec=corrections.ra_correction, - dec_offset_arcsec=corrections.dec_correction, - overwrite=overwrite, - ) - - # correct catalogs - for components_path in chain.from_iterable(components_path_glob_list): - epoch_dir = components_path.parent.name - _, _, field, sbid_str, *_ = components_path.name.split(".") - sbid = int(sbid_str[2:]) - # get island catalog - islands_path = components_path.with_name( - components_path.name.replace(".components", ".islands") - ) - # get corrections - skip = False - try: - corrections = corrections_df.loc[(epoch_dir, field, sbid)] - except KeyError: - skip = True - logger.warning( - f"Corrections not found for {components_path} ({epoch_dir}, {field}," - f" {sbid})." - ) - if not islands_path.exists(): - logger.warning(f"Islands catalogue not found for {components_path}.") - skip = not islands_path.exists() or skip - if skip: - logger.warning(f"Skipping {components_path}.") - continue - - for path in (components_path, islands_path): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - _ = shift_and_scale_catalog( - path, - output_dir, - flux_scale=corrections.flux_peak_correction_multiplicative, - flux_offset_mJy=corrections.flux_peak_correction_additive, - ra_offset_arcsec=corrections.ra_correction, - dec_offset_arcsec=corrections.dec_correction, - overwrite=overwrite, - ) if __name__ == "__main__": diff --git a/vast_post_processing/cli/link_neighbours.py b/vast_post_processing/cli/link_neighbours.py index 7d81fcf..82c6d49 100644 --- a/vast_post_processing/cli/link_neighbours.py +++ b/vast_post_processing/cli/link_neighbours.py @@ -2,10 +2,7 @@ from typing import Optional import typer -from vast_post_processing.neighbours import ( - read_release_epochs, - find_vast_neighbours_by_release_epoch, -) +from vast_post_processing import neighbours app = typer.Typer() @@ -79,61 +76,15 @@ def main( ), ), ): - # get the release epochs - release_epochs = read_release_epochs(release_epochs_csv) - # get the neighbours DataFrame and filter for the requested release epoch and - # overlap area threshold - vast_neighbours_df = find_vast_neighbours_by_release_epoch( + neighbours.link_neighbours( release_epoch, vast_data_root, + release_epochs_csv, + output_root, vast_db_repo, - release_epochs, - racs_db_repo=racs_db_repo, - use_corrected=use_corrected, - ).query( - "release_epoch_a == @release_epoch and overlap_frac >= @overlap_frac_thresh" + racs_db_repo, + overlap_frac_thresh, + use_corrected, + neighbours_output, + make_links, ) - - if neighbours_output is not None: - vast_neighbours_df[ - [ - "field_a", - "sbid_a", - "obs_epoch_a", - "release_epoch_a", - "field_b", - "sbid_b", - "obs_epoch_b", - "release_epoch_b", - "overlap_frac", - "delta_t_days", - ] - ].to_csv(neighbours_output, index=False) - - # create a directory for each field and create links to the neighbouring images - if make_links: - release_output_path = output_root / release_epoch - release_output_path.mkdir(parents=True, exist_ok=True) - for _, obs_pair in vast_neighbours_df.iterrows(): - # create directories - field_inputs_path_a = release_output_path / obs_pair.field_a / "inputs" - field_inputs_path_a.mkdir(parents=True, exist_ok=True) - field_inputs_path_b = release_output_path / obs_pair.field_b / "inputs" - field_inputs_path_b.mkdir(parents=True, exist_ok=True) - - # create a hard link for each field in the pair in both directions, e.g. - # A/inputs/A.fits, A/inputs/B.fits, B/inputs/A.fits, B/inputs/B.fits (plus weights) - for output_path in (field_inputs_path_a, field_inputs_path_b): - target_image_a = output_path / obs_pair.image_path_a.name - target_weights_a = output_path / obs_pair.weights_path_a.name - if not target_image_a.exists(): - obs_pair.image_path_a.link_to(target_image_a) - if not target_weights_a.exists(): - obs_pair.weights_path_a.link_to(target_weights_a) - - target_image_b = output_path / obs_pair.image_path_b.name - target_weights_b = output_path / obs_pair.weights_path_b.name - if not target_image_b.exists(): - obs_pair.image_path_b.link_to(target_image_b) - if not target_weights_b.exists(): - obs_pair.weights_path_b.link_to(target_weights_b) diff --git a/vast_post_processing/cli/selavy_combined.py b/vast_post_processing/cli/selavy_combined.py index bf2b4c9..0b9bf73 100644 --- a/vast_post_processing/cli/selavy_combined.py +++ b/vast_post_processing/cli/selavy_combined.py @@ -7,47 +7,12 @@ from pathlib import Path from typing import Optional, List -from loguru import logger import typer -app = typer.Typer() - - -def write_selavy_files( - field_name: str, - epoch_name: str, - image_path: Path, - parset_template_path: Path, - sbatch_template_path: Path, - weights_path: Optional[Path] = None, -): - if image_path is None: - raise FileNotFoundError(f"Image {image_path} doesn't exist.") - if weights_path is None: - # try to find the weights file using the combined naming convention - weights_path = image_path.with_name(f"{image_path.stem}.weight.fits") - if not weights_path.exists(): - raise FileNotFoundError(f"Weights image {weights_path} doesn't exist.") +from vast_post_processing import combine - image_name = image_path.stem - weights_name = weights_path.stem - parset_template = parset_template_path.read_text().format( - image_name=image_name, weights_name=weights_name - ) - parset_path = image_path.with_name(f"selavy.{image_name}.in") - parset_path.write_text(parset_template) - - sbatch_template = sbatch_template_path.read_text().format( - job_name=f"selavy-{field_name}-{epoch_name}", - parset_path=parset_path.relative_to(image_path.parent), - log_path=parset_path.with_suffix(".log").relative_to(image_path.parent), - working_dir_path=parset_path.parent, - ) - sbatch_path = image_path.with_name(f"selavy.{image_name}.sbatch") - sbatch_path.write_text(sbatch_template) - - return sbatch_path +app = typer.Typer() @app.command() @@ -59,25 +24,11 @@ def main( racs: bool = False, field_list: Optional[List[str]] = typer.Option(None, "--field"), ): - glob_expr = "RACS_*" if racs else "VAST_*" - for field_path in neighbour_data_dir.glob(glob_expr): - if field_list and field_path.name not in field_list: - logger.info( - f"Glob found field {field_path} but it was not given as a --field option." - " Skipping." - ) - continue - field_name = field_path.name - epoch_name = field_path.parent.name - image_path = field_path / f"{field_name}.{epoch_name}.{stokes}.conv.fits" - try: - _ = write_selavy_files( - field_name, - epoch_name, - image_path, - parset_template_path, - sbatch_template_path, - ) - except FileNotFoundError as e: - logger.error(e) - continue + combine.selavy_combined( + neighbour_data_dir, + parset_template_path, + sbatch_template_path, + stokes, + racs, + field_list, + ) diff --git a/vast_post_processing/cli/swarp.py b/vast_post_processing/cli/swarp.py index 110b329..2ba4682 100644 --- a/vast_post_processing/cli/swarp.py +++ b/vast_post_processing/cli/swarp.py @@ -6,93 +6,14 @@ Assumes convolved files are named *.sm.fits and are organized: //*.sm.fits. """ -from functools import partial -import os from pathlib import Path -import subprocess - -from loguru import logger import typer -from vast_post_processing.cli._util import get_pool, _get_worker_name -from vast_post_processing.combine import ( - add_degenerate_axes, - mask_weightless_pixels, - get_image_geometry, - write_swarp_config, - CentralImageNotFound, - COPY_FITS_KEYWORDS, -) - -# configure logging -# logger.remove() # remove default log sink -# logger.add(sys.stderr, level="DEBUG", enqueue=True) - -slurm_job_id = os.environ.get("SLURM_JOB_ID", "no-slurm") +from vast_post_processing import combine app = typer.Typer() -def worker( - args: tuple[list[str], str, Path, Path, Path], mpi: bool = False, n_proc: int = 1 -): - with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)): - swarp_cmd: list[str] - field_name: str - output_mosaic_path: Path - output_weight_path: Path - central_image_path: Path - - ( - swarp_cmd, - field_name, - output_mosaic_path, - output_weight_path, - central_image_path, - ) = args - logger.debug(f"worker args: {args}") - - config_path = Path(swarp_cmd[2]) - field_name = config_path.parent.name - try: - logger.debug(f"SWarping {field_name} ...") - _ = subprocess.run(swarp_cmd, check=True) - except subprocess.CalledProcessError as e: - logger.error( - f"Error while calling SWarp for {field_name}. Return code: {e.returncode}" - ) - logger.debug(e.cmd) - raise e - add_degenerate_axes(output_mosaic_path, central_image_path) - add_degenerate_axes(output_weight_path, central_image_path) - mask_weightless_pixels(output_mosaic_path, output_weight_path) - logger.info(f"SWarp completed for {field_name}.") - - -def test_worker( - args: tuple[list[str], str, Path, Path, Path], mpi: bool = False, n_proc: int = 1 -): - with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)): - swarp_cmd: list[str] - field_name: str - output_mosaic_path: Path - output_weight_path: Path - central_image_path: Path - - ( - swarp_cmd, - field_name, - output_mosaic_path, - output_weight_path, - central_image_path, - ) = args - logger.debug(f"worker args: {args}") - - config_path = Path(swarp_cmd[2]) - field_name = config_path.parent.name - logger.debug(f"Would SWarp {field_name}") - - @app.command() def main( neighbour_data_dir: Path, @@ -101,98 +22,4 @@ def main( test: bool = False, racs: bool = False, ): - # neighbour_data_dir has the structure: - # / contain the smoothed images to combine. - # //inputs contain the original images and weights. - # setup_logger(mpi=mpi) - # logger.info("checking rank and size") - # pool = schwimmbad.choose_pool(mpi=mpi, processes=n_proc) - pool = get_pool(mpi=mpi, n_proc=n_proc) - # if using MPI, the following is executed only on the main process - epoch_name = neighbour_data_dir.name - arg_list: list[tuple[list[str], str, Path, Path, Path]] = [] - glob_expr = "RACS_*" if racs else "VAST_*" - for field_path in neighbour_data_dir.glob(glob_expr): - field_name = field_path.name - output_mosaic_path = field_path / f"{field_name}.{epoch_name}.I.conv.fits" - output_weight_path = ( - field_path / f"{field_name}.{epoch_name}.I.conv.weight.fits" - ) - if output_mosaic_path.exists(): - logger.debug( - f"COMBINED image {output_mosaic_path} already exists, skipping" - ) - continue - images = list(field_path.glob("*.sm.fits")) - # get the central image - for image in images: - if field_name in image.name: - central_image = image - break - else: - raise CentralImageNotFound( - f"Could not find central image for {field_path}." - ) - weight_path = field_path / "inputs" - weights = [ - weight_path - / image.name.replace("image", "weights") - .replace(".sm", "") - .replace(".restored", "") - .replace(".conv", "") - .replace(".corrected", "") - for image in images - ] - image_geo = get_image_geometry(central_image) - tmp_dir = field_path / "tmp" - tmp_dir.mkdir(exist_ok=True) - swarp_config_dict = { - "VMEM_MAX": 4000, - "MEM_MAX": 4000, - "COMBINE_BUFSIZE": 2000, - "VMEM_DIR": tmp_dir, - "IMAGEOUT_NAME": output_mosaic_path, - "WEIGHTOUT_NAME": output_weight_path, - "COMBINE": "Y", - "COMBINE_TYPE": "WEIGHTED", - "SUBTRACT_BACK": "N", - "WRITE_XML": "N", - "FSCALASTRO_TYPE": "NONE", - "WEIGHT_TYPE": "MAP_WEIGHT", - "RESCALE_WEIGHTS": "Y", - "WEIGHT_IMAGE": " ".join([str(p) for p in weights]), - "PROJECTION_TYPE": "SIN", - "RESAMPLE_DIR": field_path, - "CENTER_TYPE": "MANUAL", - "CENTER": image_geo.center_hmsdms, - "IMAGE_SIZE": f"{image_geo.npix_x},{image_geo.npix_y}", - "PIXELSCALE_TYPE": "MANUAL", - "PIXEL_SCALE": image_geo.pixel_arcsec, - "COPY_KEYWORDS": ",".join(COPY_FITS_KEYWORDS), - } - config_path = write_swarp_config( - swarp_config_dict, output_mosaic_path.with_suffix(".cfg") - ) - swarp_cmd = [ - "SWarp", - "-c", - str(config_path), - ] - swarp_cmd.extend([str(p) for p in images]) - arg_list.append( - ( - swarp_cmd, - field_name, - output_mosaic_path, - output_weight_path, - central_image, - ) - ) - logger.info(f"Added SWarp command for {field_path.name}.") - logger.debug(swarp_cmd) - - # distribute tasks - - worker_func = partial(worker if not test else test_worker, mpi=mpi, n_proc=n_proc) - _ = list(pool.map(worker_func, arg_list)) - pool.close() + combine.swarp(neighbour_data_dir, n_proc, mpi, test, racs) diff --git a/vast_post_processing/combine.py b/vast_post_processing/combine.py index b6dafa9..e3c8da9 100644 --- a/vast_post_processing/combine.py +++ b/vast_post_processing/combine.py @@ -206,3 +206,257 @@ def mask_weightless_pixels(image_path: Path, weights_path: Path): # Output operation to log logger.info(f"Masked weightless pixels in {image_path}.") + + +# Logic separation + +from pathlib import Path +from typing import Optional, List + +from loguru import logger + + +def write_selavy_files( + field_name: str, + epoch_name: str, + image_path: Path, + parset_template_path: Path, + sbatch_template_path: Path, + weights_path: Optional[Path] = None, +): + if image_path is None: + raise FileNotFoundError(f"Image {image_path} doesn't exist.") + if weights_path is None: + # try to find the weights file using the combined naming convention + weights_path = image_path.with_name(f"{image_path.stem}.weight.fits") + if not weights_path.exists(): + raise FileNotFoundError(f"Weights image {weights_path} doesn't exist.") + + image_name = image_path.stem + weights_name = weights_path.stem + + parset_template = parset_template_path.read_text().format( + image_name=image_name, weights_name=weights_name + ) + parset_path = image_path.with_name(f"selavy.{image_name}.in") + parset_path.write_text(parset_template) + + sbatch_template = sbatch_template_path.read_text().format( + job_name=f"selavy-{field_name}-{epoch_name}", + parset_path=parset_path.relative_to(image_path.parent), + log_path=parset_path.with_suffix(".log").relative_to(image_path.parent), + working_dir_path=parset_path.parent, + ) + sbatch_path = image_path.with_name(f"selavy.{image_name}.sbatch") + sbatch_path.write_text(sbatch_template) + + return sbatch_path + + +def selavy_combined( + neighbour_data_dir: Path, + parset_template_path: Path, + sbatch_template_path: Path, + stokes: str, + racs: bool, + field_list: Optional[List[str]], +): + glob_expr = "RACS_*" if racs else "VAST_*" + for field_path in neighbour_data_dir.glob(glob_expr): + if field_list and field_path.name not in field_list: + logger.info( + f"Glob found field {field_path} but it was not given as a --field option." + " Skipping." + ) + continue + field_name = field_path.name + epoch_name = field_path.parent.name + image_path = field_path / f"{field_name}.{epoch_name}.{stokes}.conv.fits" + try: + _ = write_selavy_files( + field_name, + epoch_name, + image_path, + parset_template_path, + sbatch_template_path, + ) + except FileNotFoundError as e: + logger.error(e) + continue + + +from functools import partial +import os +import subprocess + +from loguru import logger + +from vast_post_processing.cli._util import get_pool, _get_worker_name + +# configure logging +# logger.remove() # remove default log sink +# logger.add(sys.stderr, level="DEBUG", enqueue=True) + + +def worker( + args: tuple[list[str], str, Path, Path, Path], mpi: bool = False, n_proc: int = 1 +): + with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)): + swarp_cmd: list[str] + field_name: str + output_mosaic_path: Path + output_weight_path: Path + central_image_path: Path + + ( + swarp_cmd, + field_name, + output_mosaic_path, + output_weight_path, + central_image_path, + ) = args + logger.debug(f"worker args: {args}") + + config_path = Path(swarp_cmd[2]) + field_name = config_path.parent.name + try: + logger.debug(f"SWarping {field_name} ...") + _ = subprocess.run(swarp_cmd, check=True) + except subprocess.CalledProcessError as e: + logger.error( + f"Error while calling SWarp for {field_name}. Return code: {e.returncode}" + ) + logger.debug(e.cmd) + raise e + add_degenerate_axes(output_mosaic_path, central_image_path) + add_degenerate_axes(output_weight_path, central_image_path) + mask_weightless_pixels(output_mosaic_path, output_weight_path) + logger.info(f"SWarp completed for {field_name}.") + + +def test_worker( + args: tuple[list[str], str, Path, Path, Path], mpi: bool = False, n_proc: int = 1 +): + with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)): + swarp_cmd: list[str] + field_name: str + output_mosaic_path: Path + output_weight_path: Path + central_image_path: Path + + ( + swarp_cmd, + field_name, + output_mosaic_path, + output_weight_path, + central_image_path, + ) = args + logger.debug(f"worker args: {args}") + + config_path = Path(swarp_cmd[2]) + field_name = config_path.parent.name + logger.debug(f"Would SWarp {field_name}") + + +def swarp( + neighbour_data_dir: Path, + n_proc: int, + mpi: bool, + test: bool, + racs: bool, +): + # neighbour_data_dir has the structure: + # / contain the smoothed images to combine. + # //inputs contain the original images and weights. + # setup_logger(mpi=mpi) + # logger.info("checking rank and size") + # pool = schwimmbad.choose_pool(mpi=mpi, processes=n_proc) + pool = get_pool(mpi=mpi, n_proc=n_proc) + # if using MPI, the following is executed only on the main process + epoch_name = neighbour_data_dir.name + arg_list: list[tuple[list[str], str, Path, Path, Path]] = [] + glob_expr = "RACS_*" if racs else "VAST_*" + for field_path in neighbour_data_dir.glob(glob_expr): + field_name = field_path.name + output_mosaic_path = field_path / f"{field_name}.{epoch_name}.I.conv.fits" + output_weight_path = ( + field_path / f"{field_name}.{epoch_name}.I.conv.weight.fits" + ) + if output_mosaic_path.exists(): + logger.debug( + f"COMBINED image {output_mosaic_path} already exists, skipping" + ) + continue + images = list(field_path.glob("*.sm.fits")) + # get the central image + for image in images: + if field_name in image.name: + central_image = image + break + else: + raise CentralImageNotFound( + f"Could not find central image for {field_path}." + ) + weight_path = field_path / "inputs" + weights = [ + weight_path + / image.name.replace("image", "weights") + .replace(".sm", "") + .replace(".restored", "") + .replace(".conv", "") + .replace(".corrected", "") + for image in images + ] + image_geo = get_image_geometry(central_image) + tmp_dir = field_path / "tmp" + tmp_dir.mkdir(exist_ok=True) + swarp_config_dict = { + "VMEM_MAX": 4000, + "MEM_MAX": 4000, + "COMBINE_BUFSIZE": 2000, + "VMEM_DIR": tmp_dir, + "IMAGEOUT_NAME": output_mosaic_path, + "WEIGHTOUT_NAME": output_weight_path, + "COMBINE": "Y", + "COMBINE_TYPE": "WEIGHTED", + "SUBTRACT_BACK": "N", + "WRITE_XML": "N", + "FSCALASTRO_TYPE": "NONE", + "WEIGHT_TYPE": "MAP_WEIGHT", + "RESCALE_WEIGHTS": "Y", + "WEIGHT_IMAGE": " ".join([str(p) for p in weights]), + "PROJECTION_TYPE": "SIN", + "RESAMPLE_DIR": field_path, + "CENTER_TYPE": "MANUAL", + "CENTER": image_geo.center_hmsdms, + "IMAGE_SIZE": f"{image_geo.npix_x},{image_geo.npix_y}", + "PIXELSCALE_TYPE": "MANUAL", + "PIXEL_SCALE": image_geo.pixel_arcsec, + "COPY_KEYWORDS": ",".join(COPY_FITS_KEYWORDS), + } + config_path = write_swarp_config( + swarp_config_dict, output_mosaic_path.with_suffix(".cfg") + ) + swarp_cmd = [ + "SWarp", + "-c", + str(config_path), + ] + swarp_cmd.extend([str(p) for p in images]) + arg_list.append( + ( + swarp_cmd, + field_name, + output_mosaic_path, + output_weight_path, + central_image, + ) + ) + logger.info(f"Added SWarp command for {field_path.name}.") + logger.debug(swarp_cmd) + + # distribute tasks + + worker_func = partial(worker if not test else test_worker, mpi=mpi, n_proc=n_proc) + _ = list(pool.map(worker_func, arg_list)) + pool.close() diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 6521736..c5d6ade 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -211,3 +211,147 @@ def shift_and_scale_catalog( votablefile.to_xml(str(output_path)) logger.success(f"Wrote corrected catalogue: {output_path}.") return output_path + + +# Separated logic + +from itertools import chain +from pathlib import Path +import sys +from typing import Optional, Generator + +from loguru import logger +import pandas as pd + + +def correct_vast( + vast_tile_data_root: Path, + vast_corrections_csv: Path, + epoch: Optional[list[int]], + overwrite: bool = False, + verbose: bool = False, +): + """Read astrometric and flux corrections produced by vast-xmatch and apply them to + VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. + """ + # configure logger + if not verbose: + # replace the default sink + logger.remove() + logger.add(sys.stderr, level="INFO") + + # read corrections + corrections_df = ( + pd.read_csv(vast_corrections_csv) + .set_index(["release_epoch", "field", "sbid"]) + .sort_index() + ) + image_path_glob_list: list[Generator[Path, None, None]] = [] + components_path_glob_list: list[Generator[Path, None, None]] = [] + if epoch is None or len(epoch) == 0: + image_path_glob_list.append( + vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") + ) + components_path_glob_list.append( + vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml") + ) + else: + for n in epoch: + image_path_glob_list.append( + vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") + ) + components_path_glob_list.append( + vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml") + ) + + # correct images + for image_path in chain.from_iterable(image_path_glob_list): + epoch_dir = image_path.parent.name + _, _, field, sbid_str, *_ = image_path.name.split(".") + sbid = int(sbid_str[2:]) + # get rms and background images + rms_path = ( + vast_tile_data_root + / "STOKESI_RMSMAPS" + / epoch_dir + / f"noiseMap.{image_path.name}" + ) + bkg_path = ( + vast_tile_data_root + / "STOKESI_RMSMAPS" + / epoch_dir + / f"meanMap.{image_path.name}" + ) + # get corrections + skip = False + try: + corrections = corrections_df.loc[(epoch_dir, field, sbid)] + except KeyError: + skip = True + logger.warning( + f"Corrections not found for {image_path} ({epoch_dir}, {field}," + f" {sbid})." + ) + if not rms_path.exists(): + logger.warning(f"RMS image not found for {image_path}.") + if not bkg_path.exists(): + logger.warning(f"Background image not found for {image_path}.") + skip = not (rms_path.exists() and bkg_path.exists()) or skip + if skip: + logger.warning(f"Skipping {image_path}.") + continue + + # TODO determine what these variables are and where they are from + for path in (image_path, rms_path, bkg_path): + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = vast_tile_data_root / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + _ = shift_and_scale_image( + path, + output_dir, + flux_scale=corrections.flux_peak_correction_multiplicative, + flux_offset_mJy=corrections.flux_peak_correction_additive, + ra_offset_arcsec=corrections.ra_correction, + dec_offset_arcsec=corrections.dec_correction, + overwrite=overwrite, + ) + + # correct catalogs + for components_path in chain.from_iterable(components_path_glob_list): + epoch_dir = components_path.parent.name + _, _, field, sbid_str, *_ = components_path.name.split(".") + sbid = int(sbid_str[2:]) + # get island catalog + islands_path = components_path.with_name( + components_path.name.replace(".components", ".islands") + ) + # get corrections + skip = False + try: + corrections = corrections_df.loc[(epoch_dir, field, sbid)] + except KeyError: + skip = True + logger.warning( + f"Corrections not found for {components_path} ({epoch_dir}, {field}," + f" {sbid})." + ) + if not islands_path.exists(): + logger.warning(f"Islands catalogue not found for {components_path}.") + skip = not islands_path.exists() or skip + if skip: + logger.warning(f"Skipping {components_path}.") + continue + + for path in (components_path, islands_path): + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = vast_tile_data_root / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + _ = shift_and_scale_catalog( + path, + output_dir, + flux_scale=corrections.flux_peak_correction_multiplicative, + flux_offset_mJy=corrections.flux_peak_correction_additive, + ra_offset_arcsec=corrections.ra_correction, + dec_offset_arcsec=corrections.dec_correction, + overwrite=overwrite, + ) diff --git a/vast_post_processing/crop.py b/vast_post_processing/crop.py index 946f86b..44200d9 100644 --- a/vast_post_processing/crop.py +++ b/vast_post_processing/crop.py @@ -20,7 +20,7 @@ from pathlib import Path from itertools import chain -warnings.filterwarnings('ignore', category=FITSFixedWarning) +warnings.filterwarnings("ignore", category=FITSFixedWarning) def get_field_centre(header): @@ -28,14 +28,15 @@ def get_field_centre(header): w = WCS(header, naxis=2) size_x = header["NAXIS1"] size_y = header["NAXIS2"] - field_centre = w.pixel_to_world(size_x/2, size_y/2) - + field_centre = w.pixel_to_world(size_x / 2, size_y / 2) + logger.debug(field_centre) return field_centre -def crop_hdu(hdu, field_centre, size=6.3*u.deg, rotation=0.0*u.deg): - if rotation != 0.0*u.deg: + +def crop_hdu(hdu, field_centre, size=6.3 * u.deg, rotation=0.0 * u.deg): + if rotation != 0.0 * u.deg: raise NotImplementedError("Rotation handling is not yet available") logger.debug("Cropping HDU") wcs = WCS(hdu.header, naxis=2) @@ -43,91 +44,87 @@ def crop_hdu(hdu, field_centre, size=6.3*u.deg, rotation=0.0*u.deg): data = hdu.data if data.ndim == 4: - data = data[0,0,:,:] - - cutout = Cutout2D(data, - position=field_centre, - size=size, - wcs=wcs - ) + data = data[0, 0, :, :] + + cutout = Cutout2D(data, position=field_centre, size=size, wcs=wcs) hdu.data = cutout.data hdu.header.update(cutout.wcs.to_header()) - - coord_str = field_centre.to_string('hmsdms', sep=':') - hdu.header.add_history(f"Cropped to a {size.to(u.deg):.1f} deg square " - f"centered on {coord_str} on {datetime.now()}") + + coord_str = field_centre.to_string("hmsdms", sep=":") + hdu.header.add_history( + f"Cropped to a {size.to(u.deg):.1f} deg square " + f"centered on {coord_str} on {datetime.now()}" + ) return hdu - + + def crop_catalogue(vot, cropped_hdu, field_centre, size): logger.debug("Cropping catalogue") votable = vot.get_first_table() - + cropped_wcs = WCS(cropped_hdu.header, naxis=2) - + ra_deg = votable.array["col_ra_deg_cont"] * u.deg dec_deg = votable.array["col_dec_deg_cont"] * u.deg sc = SkyCoord(ra_deg, dec_deg) - + in_footprint = cropped_wcs.footprint_contains(sc) votable.array = votable.array[in_footprint] - + return votable - + + def wcs_to_moc(cropped_hdu): logger.debug("Creating MOC") - + cropped_wcs = WCS(cropped_hdu.header, naxis=2) - + nx, ny = cropped_wcs._naxis sc1 = wcs.utils.pixel_to_skycoord(0, 0, cropped_wcs) - sc2 = wcs.utils.pixel_to_skycoord(0, ny-1, cropped_wcs) - sc4 = wcs.utils.pixel_to_skycoord(nx-1, 0, cropped_wcs) - sc3 = wcs.utils.pixel_to_skycoord(nx-1, ny-1, cropped_wcs) - - sc = SkyCoord([sc1,sc2,sc3,sc4]) - + sc2 = wcs.utils.pixel_to_skycoord(0, ny - 1, cropped_wcs) + sc4 = wcs.utils.pixel_to_skycoord(nx - 1, 0, cropped_wcs) + sc3 = wcs.utils.pixel_to_skycoord(nx - 1, ny - 1, cropped_wcs) + + sc = SkyCoord([sc1, sc2, sc3, sc4]) + return MOC.from_polygon_skycoord(sc) + def moc_to_stmoc(moc, hdu): - start = Time([hdu.header['DATE-BEG']]) - end = Time([hdu.header['DATE-END']]) - + start = Time([hdu.header["DATE-BEG"]]) + end = Time([hdu.header["DATE-END"]]) + stmoc = STMOC.from_spatial_coverages(start, end, [moc]) - + return stmoc -def run_full_crop(data_root: Union[str, Path], - crop_size: u.quantity.Quantity, - epoch: Union[str, int, list], - stokes: str, - out_root: Optional[Union[str, Path]]=None, - create_moc: Optional[bool]=False, - overwrite: Optional[bool]=False, - ): +def run_full_crop( + data_root: Union[str, Path], + crop_size: u.quantity.Quantity, + epoch: Union[str, int, list], + stokes: str, + out_root: Optional[Union[str, Path]] = None, + create_moc: Optional[bool] = False, + overwrite: Optional[bool] = False, +): if out_root is None: out_root = data_root image_path_glob_list: list[Generator[Path, None, None]] = [] - + image_root = data_root / f"STOKES{stokes}_IMAGES" logger.debug(image_root) - - - + if type(epoch) is int: epoch = list(epoch) if epoch is None or len(epoch) == 0: - image_path_glob_list.append( - image_root.glob(f"epoch_*/*.fits") - ) + image_path_glob_list.append(image_root.glob(f"epoch_*/*.fits")) else: for n in epoch: - image_path_glob_list.append( - image_root.glob(f"epoch_{n}/*.fits") - ) - + image_path_glob_list.append(image_root.glob(f"epoch_{n}/*.fits")) + for image_path in chain.from_iterable(image_path_glob_list): logger.info(f"Working on {image_path}...") epoch_dir = image_path.parent.name @@ -140,33 +137,29 @@ def run_full_crop(data_root: Union[str, Path], / epoch_dir / f"noiseMap.{image_path.name}" ) - + bkg_path = ( data_root / f"STOKES{stokes}_RMSMAPS" / epoch_dir / f"meanMap.{image_path.name}" ) - + # get selavy files - components_name = f"selavy-{image_path.name}".replace(".fits", - ".components.xml" - ) - islands_name = components_name.replace("components", "islands") - - selavy_dir = ( - data_root - / f"STOKES{stokes}_SELAVY" - / epoch_dir + components_name = f"selavy-{image_path.name}".replace( + ".fits", ".components.xml" ) + islands_name = components_name.replace("components", "islands") + + selavy_dir = data_root / f"STOKES{stokes}_SELAVY" / epoch_dir components_path = selavy_dir / components_name islands_path = selavy_dir / islands_name - + exists = True if not rms_path.exists(): exists = False logger.warning(f"noisemap file ({rms_path}) is missing.") - + if not bkg_path.exists(): exists = False logger.warning(f"meanmap file ({bkg_path}) is missing.") @@ -178,79 +171,74 @@ def run_full_crop(data_root: Union[str, Path], logger.warning(f"selavy islands file ({islands_path}) is missing.") if not exists: logger.warning(f"Skipping {image_path} due to missing files.") - + for path in (rms_path, bkg_path, image_path): stokes_dir = f"{path.parent.parent.name}_CROPPED" fits_output_dir = out_root / stokes_dir / epoch_dir - + if not fits_output_dir.exists(): fits_output_dir.mkdir(parents=True) - + outfile = fits_output_dir / path.name hdu = fits.open(path)[0] field_centre = get_field_centre(hdu.header) cropped_hdu = crop_hdu(hdu, field_centre, size=crop_size) cropped_hdu.writeto(outfile, overwrite=overwrite) logger.debug(f"Wrote {outfile}") - - + # Crop the catalogues stokes_dir = f"{components_path.parent.parent.name}_CROPPED" cat_output_dir = out_root / stokes_dir / epoch_dir - + if not cat_output_dir.exists(): cat_output_dir.mkdir(parents=True) - + components_outfile = cat_output_dir / components_path.name islands_outfile = cat_output_dir / islands_path.name - + components_vot = parse(str(components_path)) islands_vot = parse(str(islands_path)) - + # This uses the last cropped hdu from the above for loop # which should be the image file, but doesn't actually matter - cropped_components_vot = crop_catalogue(components_vot, - cropped_hdu, - field_centre, - size - ) - cropped_islands_vot = crop_catalogue(islands_vot, - cropped_hdu, - field_centre, - size - ) + cropped_components_vot = crop_catalogue( + components_vot, cropped_hdu, field_centre, size + ) + cropped_islands_vot = crop_catalogue( + islands_vot, cropped_hdu, field_centre, size + ) if components_outfile.exists() and not overwrite: logger.critical(f"{components_outfile} exists, not overwriting") else: components_vot.to_xml(str(components_outfile)) logger.debug(f"Wrote {components_outfile}") - + if islands_outfile.exists() and not overwrite: logger.critical(f"{components_outfile} exists, not overwriting") else: components_vot.to_xml(str(islands_outfile)) logger.debug(f"Wrote {islands_outfile}") - + # Create the MOC if not create_moc: continue moc_dir = f"STOKES{stokes}_MOC_CROPPED" moc_output_dir = out_root / moc_dir / epoch_dir - - moc_filename = image_path.name.replace('.fits','.moc.fits') + + moc_filename = image_path.name.replace(".fits", ".moc.fits") moc_outfile = moc_output_dir / moc_filename - + if not moc_output_dir.exists(): moc_output_dir.mkdir(parents=True) moc = vpc.wcs_to_moc(cropped_hdu) moc.write(moc_outfile, overwrite=overwrite) logger.debug(f"Wrote {moc_outfile}") - - stmoc_filename = image_path.name.replace('.fits','.stmoc.fits') + + stmoc_filename = image_path.name.replace(".fits", ".stmoc.fits") stmoc_outfile = moc_output_dir / stmoc_filename - + stmoc = vpc.moc_to_stmoc(moc, cropped_hdu) stmoc.write(stmoc_outfile, overwrite=overwrite) logger.debug("Wrote {stmoc_outfile}") diff --git a/vast_post_processing/neighbours.py b/vast_post_processing/neighbours.py index ebbc0fa..2545213 100644 --- a/vast_post_processing/neighbours.py +++ b/vast_post_processing/neighbours.py @@ -553,3 +553,174 @@ def convolve_image( return output_dir_path / output_filename else: return None + + +# Separated logic + +"""Requires setup_neighbours.py to be run first. +""" +from dataclasses import dataclass, fields +from functools import partial +from pathlib import Path +from typing import Optional, List + +from loguru import logger +from racs_tools import beamcon_2D +from radio_beam import Beam +import typer + +from vast_post_processing.cli._util import get_pool, _get_worker_name + + +@dataclass +class WorkerArgs: + image_path: Path + output_dir_path: Path + target_beam: Beam + mode: str + suffix: str = "sm" + prefix: Optional[str] = None + cutoff: Optional[float] = None + dry_run: bool = False + + def __iter__(self): + # Makes the class fields iterable so they can be unpacked + # e.g. func(*args) where args is a WorkerArgs object. + return (getattr(self, field.name) for field in fields(self)) + + +def worker(args: WorkerArgs, mpi: bool = False, n_proc: int = 1): + with logger.contextualize(worker_name=_get_worker_name(mpi=mpi, n_proc=n_proc)): + return convolve_image(*args) + + +def convolve_neighbours( + neighbour_data_dir: Path, + n_proc: int = 1, + mpi: bool = False, + max_images: Optional[int] = None, + racs: bool = False, + field_list: Optional[List[str]] = typer.Option(None, "--field"), +): + # neighbour_data_dir has the structure: + # //inputs contains the input FITS images + # to be convolved to a common resolution and their weights FITS images. + + pool = get_pool(mpi=mpi, n_proc=n_proc) + logger.debug(f"pool created, type: {type(pool)}") + + glob_expr = "RACS_*" if racs else "VAST_*" + worker_args_list: list[WorkerArgs] = [] + n_images: int = 0 + for field_dir in neighbour_data_dir.glob(glob_expr): + if field_list and field_dir.name not in field_list: + logger.info( + f"Glob found field {field_dir} but it was not given as a --field option. Skipping." + ) + continue + if max_images is not None and n_images >= max_images: + logger.warning( + f"Reached maximum image limit of {max_images}. Skipping remaining images." + ) + break + if len(list(field_dir.glob("*.sm.fits"))) > 0: + logger.warning(f"Smoothed images already exist in {field_dir}. Skipping.") + continue + image_path_list = list(field_dir.glob("inputs/image.*.fits")) + logger.debug(f"Found {len(image_path_list)} images for {field_dir.name}") + # find the smallest common beam + common_beam, _ = beamcon_2D.getmaxbeam(image_path_list) + logger.debug( + f"{field_dir} common beam major {common_beam.major} type" + f" {type(common_beam)}" + ) + for image_path in image_path_list: + worker_args = WorkerArgs( + image_path=image_path, + output_dir_path=field_dir, + target_beam=common_beam, + mode="robust", + ) + worker_args_list.append(worker_args) + n_images += 1 + if max_images is not None and n_images >= max_images: + logger.warning( + f"Reached maximum image limit of {max_images}. Skipping remaining images." + ) + break + + # start convolutions + _ = list(pool.map(partial(worker, mpi=mpi, n_proc=n_proc), worker_args_list)) + pool.close() + + +def link_neighbours( + release_epoch: str, + vast_data_root: Path, + release_epochs_csv: Path, + output_root: Path, + vast_db_repo: Path, + racs_db_repo: Optional[Path], + overlap_frac_thresh: float, + use_corrected: bool, + neighbours_output: Optional[Path], + make_links: bool, +): + # get the release epochs + release_epochs = read_release_epochs(release_epochs_csv) + # get the neighbours DataFrame and filter for the requested release epoch and + # overlap area threshold + vast_neighbours_df = find_vast_neighbours_by_release_epoch( + release_epoch, + vast_data_root, + vast_db_repo, + release_epochs, + racs_db_repo=racs_db_repo, + use_corrected=use_corrected, + ).query( + "release_epoch_a == @release_epoch and overlap_frac >= @overlap_frac_thresh" + ) + + if neighbours_output is not None: + vast_neighbours_df[ + [ + "field_a", + "sbid_a", + "obs_epoch_a", + "release_epoch_a", + "field_b", + "sbid_b", + "obs_epoch_b", + "release_epoch_b", + "overlap_frac", + "delta_t_days", + ] + ].to_csv(neighbours_output, index=False) + + # create a directory for each field and create links to the neighbouring images + if make_links: + release_output_path = output_root / release_epoch + release_output_path.mkdir(parents=True, exist_ok=True) + for _, obs_pair in vast_neighbours_df.iterrows(): + # create directories + field_inputs_path_a = release_output_path / obs_pair.field_a / "inputs" + field_inputs_path_a.mkdir(parents=True, exist_ok=True) + field_inputs_path_b = release_output_path / obs_pair.field_b / "inputs" + field_inputs_path_b.mkdir(parents=True, exist_ok=True) + + # create a hard link for each field in the pair in both directions, e.g. + # A/inputs/A.fits, A/inputs/B.fits, B/inputs/A.fits, B/inputs/B.fits (plus weights) + for output_path in (field_inputs_path_a, field_inputs_path_b): + target_image_a = output_path / obs_pair.image_path_a.name + target_weights_a = output_path / obs_pair.weights_path_a.name + if not target_image_a.exists(): + obs_pair.image_path_a.link_to(target_image_a) + if not target_weights_a.exists(): + obs_pair.weights_path_a.link_to(target_weights_a) + + target_image_b = output_path / obs_pair.image_path_b.name + target_weights_b = output_path / obs_pair.weights_path_b.name + if not target_image_b.exists(): + obs_pair.image_path_b.link_to(target_image_b) + if not target_weights_b.exists(): + obs_pair.weights_path_b.link_to(target_weights_b) diff --git a/vast_post_processing/utils/fileutils.py b/vast_post_processing/utils/fileutils.py index a6c4ec9..7e037ec 100644 --- a/vast_post_processing/utils/fileutils.py +++ b/vast_post_processing/utils/fileutils.py @@ -24,3 +24,11 @@ def cleanup_directory(directory: Path): logger.info(f"Deleted directory {path}.") else: logger.debug(f"Leaving {path}.") + + +def cleanup(neighbour_data_dir: Path, delete_all: bool = False): + if delete_all: + rmtree(neighbour_data_dir) + else: + for field_path in neighbour_data_dir.glob("VAST_*"): + cleanup_directory(field_path) From 07ae31c3c243c5aff1f1f62d1a744d94ddbe53c5 Mon Sep 17 00:00:00 2001 From: Akash Date: Thu, 10 Aug 2023 13:44:49 -0500 Subject: [PATCH 13/31] Corrected the wrong path for the catalog files --- vast_post_processing/cli/run_corrections.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index 99bb051..9eac01b 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -248,22 +248,31 @@ def main( logger.warning(f"Background image not found for {image_path}.") # Look for any component and island files correspnding to this image + image_root = image_path.parent.as_posix() + catalog_root = image_root.replace("IMAGES", "SELAVY") - component_file = Path(ref_file) - island_file = Path(ref_file.replace("components", "islands")) + catalog_filename = image_path.name.replace("image", "selavy-image") + catalog_filename = catalog_filename.replace(".fits", ".components.xml") + + catalog_filepath = f"{catalog_root}/{catalog_filename}" + + component_file = Path(catalog_filepath) + island_file = Path(catalog_filepath.replace("components", "islands")) skip = ( not ( (rms_path.exists()) and (bkg_path.exists()) and (ref_file is not None) - and (component_file is not None) + and (component_file.exists()) ) or skip ) if skip: if not ((rms_path.exists()) and (bkg_path.exists())): logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist") + elif not (component_file.exists()): + logger.warning(f"Skipping {image_path}, catalog files do not exist") elif ref_file is None: logger.warning(f"Skipping {image_path}, no reference field found.") continue From b2fccaf14e4784b950a01d04ec957323907356e6 Mon Sep 17 00:00:00 2001 From: Akash Date: Mon, 14 Aug 2023 00:36:32 -0500 Subject: [PATCH 14/31] Re-organized code so that this can be passed to cropping, added docstrings, cleaned the redundant parts of the code --- vast_post_processing/catalogs.py | 375 ++++++++++-------- vast_post_processing/cli/run_corrections.py | 283 ++------------ vast_post_processing/corrections.py | 405 ++++++++++++++++++-- vast_post_processing/crossmatch.py | 43 ++- 4 files changed, 643 insertions(+), 463 deletions(-) diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py index a75b000..a83b094 100644 --- a/vast_post_processing/catalogs.py +++ b/vast_post_processing/catalogs.py @@ -27,150 +27,37 @@ "rms_image": u.mJy / u.beam, } - -class UnknownCatalogInputFormat(Exception): - pass - - -class Catalog: - CATALOG_TYPE_TILE = "TILE" - CATALOG_TYPE_COMBINED = "COMBINED" - CATALOG_TYPES = ( - CATALOG_TYPE_TILE, - CATALOG_TYPE_COMBINED, - ) - - def __init__( - self, - path: Path, - psf: Optional[Tuple[float, float]] = None, - input_format: str = "selavy", - condon: bool = False, - positive_fluxes_only: bool = True, - ): - self.path: Path - self.table: QTable - self.field: Optional[str] - self.epoch: Optional[str] - self.sbid: Optional[str] - self.psf_major: Optional[u.Quantity] - self.psf_minor: Optional[u.Quantity] - self.type: str - - # read catalog - if input_format == "selavy": - if path.suffix == ".txt": - logger.debug("Reading %s as a Selavy txt catalog.", path) - read_catalog = read_selavy - else: - logger.debug("Reading %s as a Selavy VOTable catalog.", path) - read_catalog = read_selavy_votable - elif input_format == "aegean": - logger.debug("Reading %s as an Aegean catalog.", path) - read_catalog = read_aegean_csv - else: - logger.error( - "The format of input files is not supported. Only selavy and aegean are supported" - ) - raise SystemExit - self.path = path - self.table = read_catalog(path) - - # filter sources with bad sizes and optionally negative/0 fluxes - if positive_fluxes_only: - logger.info( - "Filtering %d sources with fluxes <= 0.", - (self.table["flux_peak"] <= 0).sum(), - ) - self.table = self.table[self.table["flux_peak"] > 0] - logger.info( - "Filtering %d sources with fitted sizes <= 0.", - ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(), - ) - self.table = self.table[ - (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0) - ] - - # read epoch, field, sbid, psf's - epoch_name = path.parent.name - _, _, field, sbid, *_ = path.name.split(".") - self.epoch = epoch_name - self.field = field.replace("VAST_", "") - self.sbid = sbid - - if psf is not None: - self.psf_major, self.psf_minor = psf * u.arcsec - logger.debug( - "Using user provided PSF for %s: %s, %s.", - self.path, - self.psf_major, - self.psf_minor, - ) - else: - logger.warning( - "PSF is unknown for %s. Condon errors will be unavailable.", self.path - ) - self.psf_major = None - self.psf_minor = None - - if condon and self.psf_major is not None and self.psf_minor is not None: - _ = self.calculate_condon_flux_errors(correct_peak_for_noise=True) - logger.debug("Condon errors computed for %s.", self.path) - - def calculate_condon_flux_errors( - self, - alpha_maj1=2.5, - alpha_min1=0.5, - alpha_maj2=0.5, - alpha_min2=2.5, - alpha_maj3=1.5, - alpha_min3=1.5, - clean_bias=0.0, - clean_bias_error=0.0, - frac_flux_cal_error=0.0, - correct_peak_for_noise=False, - ): - noise = self.table["rms_image"] - snr = self.table["flux_peak"] / noise - - rho_sq3 = ( - ( - self.table["maj_axis"] - * self.table["min_axis"] - / (4.0 * self.psf_major * self.psf_minor) - ) - * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj3 - * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min3 - * snr**2 - ) - - flux_peak_col = self.table["flux_peak"] - flux_peak_condon = self.table["flux_peak"] + ( - -(noise**2) / self.table["flux_peak"] + clean_bias - ) - if correct_peak_for_noise: - flux_peak_col = flux_peak_condon - - errorpeaksq = ( - (frac_flux_cal_error * flux_peak_col) ** 2 - + clean_bias_error**2 - + 2.0 * flux_peak_col**2 / rho_sq3 - ) - errorpeak = np.sqrt(errorpeaksq) - - self.table["flux_peak_condon"] = flux_peak_condon - self.table["flux_peak_selavy"] = self.table["flux_peak"] - self.table["flux_peak_err_condon"] = errorpeak - self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"] - self.table["flux_peak_err"] = self.table["flux_peak_err_condon"] - if correct_peak_for_noise: - self.table["flux_peak"] = self.table["flux_peak_condon"] - return flux_peak_condon, errorpeak +AEGEAN_COLUMN_MAP = { + # aegean name: (selavy name, aegean unit) + "ra": ("ra_deg_cont", u.deg), + "dec": ("dec_deg_cont", u.deg), + "err_ra": ("ra_err", u.deg), + "err_dec": ("dec_err", u.deg), + "peak_flux": ("flux_peak", u.Jy / u.beam), + "err_peak_flux": ("flux_peak_err", u.Jy / u.beam), + "a": ("maj_axis", u.arcsec), + "b": ("min_axis", u.arcsec), + "pa": ("pos_ang", u.arcsec), + "err_a": ("maj_axis_err", u.arcsec), + "err_b": ("min_axis_err", u.deg), + "err_pa": ("pos_ang_err", u.deg), + "local_rms": ("rms_image", u.Jy / u.beam), +} def _convert_selavy_columns_to_quantites( qt: QTable, units: Dict[str, u.Unit] = SELAVY_COLUMN_UNITS ) -> QTable: + """Takes in a selavy component table and adds units to respective quantities + + Args: + qt (QTable): the component catalog + units (Dict[str, u.Unit], optional): The dictionary with parameters and + their units. Defaults to SELAVY_COLUMN_UNITS. + + Returns: + QTable: Table with units to the parameters + """ for col, unit in units.items(): qt[col].unit = unit return qt @@ -210,6 +97,14 @@ def read_selavy(catalog_path: Path) -> QTable: def read_selavy_votable(catalog_path: Path) -> QTable: + """Helper function to read the selavy catalog, if the input format is votable + + Args: + catalog_path (Path): Input Path to the catalog file + + Returns: + QTable: The component table + """ t = Table.read(catalog_path, format="votable", use_names_over_ids=True) # remove units from str columns and fix unrecognized flux units for col in t.itercols(): @@ -225,17 +120,6 @@ def read_selavy_votable(catalog_path: Path) -> QTable: return qt -def read_hdf(catalog_path: Path) -> pd.DataFrame: - df = pd.read_hdf(catalog_path, key="data") - df["field"] = df.field.str.split(".", n=1, expand=True)[0] - qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df)) - qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"]) - _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky( - qt["coord"], nthneighbor=2 - ) - return qt - - def read_aegean_csv(catalog_path: Path) -> QTable: """Read an Aegean CSV component catalog and return a QTable. Assumed to contain at least the following columns with the given units: @@ -260,22 +144,6 @@ def read_aegean_csv(catalog_path: Path) -> QTable: - `nn_separation`: separation to the nearest-neighbour source as a Quantity with angular units. """ - AEGEAN_COLUMN_MAP = { - # aegean name: (selavy name, aegean unit) - "ra": ("ra_deg_cont", u.deg), - "dec": ("dec_deg_cont", u.deg), - "err_ra": ("ra_err", u.deg), - "err_dec": ("dec_err", u.deg), - "peak_flux": ("flux_peak", u.Jy / u.beam), - "err_peak_flux": ("flux_peak_err", u.Jy / u.beam), - "a": ("maj_axis", u.arcsec), - "b": ("min_axis", u.arcsec), - "pa": ("pos_ang", u.arcsec), - "err_a": ("maj_axis_err", u.arcsec), - "err_b": ("min_axis_err", u.deg), - "err_pa": ("pos_ang_err", u.deg), - "local_rms": ("rms_image", u.Jy / u.beam), - } qt = QTable.read(catalog_path) # rename columns to match selavy convention and assign units for col, (new_col, unit) in AEGEAN_COLUMN_MAP.items(): @@ -296,3 +164,178 @@ def read_aegean_csv(catalog_path: Path) -> QTable: qt["coord"], nthneighbor=2 ) return qt + + +class Catalog: + """Class to make a catalog object from the selavy/Aegean files. This + is then used for catalog matching between the referecne catalog and + the current catalog to select for sources and get flux and astrometric + corrections. + + Raises: + SystemExit: if the input catalog files are other than Selavy/Aegean + products + """ + + def __init__( + self, + path: Path, + psf: Optional[Tuple[float, float]] = None, + input_format: str = "selavy", + condon: bool = False, + apply_flux_limit: bool = True, + flux_limit: float = 0, + ): + self.path: Path + self.table: QTable + self.input_format: Optional[str] + self.flux_flag: Optional[bool] + self.flux_lim: Optional[float] + self.field: Optional[str] + self.epoch: Optional[str] + self.sbid: Optional[str] + self.psf_major: Optional[u.Quantity] + self.psf_minor: Optional[u.Quantity] + self.type: str + + self.path = path + self.input_format = input_format + self.flux_flag = apply_flux_limit + self.flux_lim = flux_limit + + # Read the catalog + self._read_catalog() + + # Filter sources + self._filter_sources() + + # Get epoch, field, sbid from the file name + epoch_name = path.parent.name + _, _, field, sbid, *_ = path.name.split(".") + self.epoch = epoch_name + self.field = field.replace("VAST_", "") + self.sbid = sbid + + # Parse the psf info + if psf is not None: + self.psf_major, self.psf_minor = psf * u.arcsec + logger.debug( + "Using user provided PSF for %s: %s, %s.", + self.path, + self.psf_major, + self.psf_minor, + ) + else: + logger.warning( + "PSF is unknown for %s. Condon errors will be unavailable.", self.path + ) + self.psf_major = None + self.psf_minor = None + + # Calculate the covariant error using Condon 1997 + if condon and self.psf_major is not None and self.psf_minor is not None: + self.calculate_condon_flux_errors(correct_peak_for_noise=True) + logger.debug("Condon errors computed for %s.", self.path) + + def _read_catalog(self): + """Helper function to read and parse the input files + + Raises: + SystemExit: if the input catalog files are other than Selavy/Aegean + products + """ + path = self.path + if self.input_format == "selavy": + if path.suffix == ".txt": + logger.debug("Reading %s as a Selavy txt catalog.", path) + read_catalog = read_selavy + else: + logger.debug("Reading %s as a Selavy VOTable catalog.", path) + read_catalog = read_selavy_votable + elif self.input_format == "aegean": + logger.debug("Reading %s as an Aegean catalog.", path) + read_catalog = read_aegean_csv + else: + logger.error( + "The format of input files is not supported. Only selavy and aegean are supported" + ) + raise SystemExit + + self.table = read_catalog(path) + + def _filter_sources(self): + """Helper function to filter sources that are used for cross-match; + filter sources with bad sizes and optionally given flux limits""" + if self.flux_flag: + lim = self.flux_lim + logger.info( + f"Filtering %d sources with fluxes <= {lim}", + (self.table["flux_peak"] <= lim).sum(), + ) + self.table = self.table[self.table["flux_peak"] > lim] + logger.info( + "Filtering %d sources with fitted sizes <= 0.", + ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(), + ) + self.table = self.table[ + (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0) + ] + + def calculate_condon_flux_errors( + self, + alpha_maj: float = 1.5, + alpha_min: float = 1.5, + clean_bias: float = 0.0, + clean_bias_error: float = 0.0, + frac_flux_cal_error: float = 0.0, + correct_peak_for_noise: bool = False, + ): + """Calculates the covariant error using Condon 1997. See equation 41 + of Condon 1997 for reference + + Args: + alpha_maj (float, optional): power for major axis correction. Defaults to 1.5 + alpha_min (float, optional): power for major axis correction. Defaults to 1.5. + clean_bias (float, optional): additive flux bias. Defaults to 0.0. + clean_bias_error (float, optional): error in additive flux bias. Defaults to 0.0. + frac_flux_cal_error (float, optional): multiplicative flux error. Defaults to 0.0. + correct_peak_for_noise (bool, optional): flag to re-write the peak flux from + selavy. Defaults to False. + """ + noise = self.table["rms_image"] + snr = self.table["flux_peak"] / noise + + # See equation 41 of Condon 1997 to calculate the signal to noise + rho_sq3 = ( + ( + self.table["maj_axis"] + * self.table["min_axis"] + / (4.0 * self.psf_major * self.psf_minor) + ) + * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj + * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min + * snr**2 + ) + + # Correct the peak flux now. + flux_peak_col = self.table["flux_peak"] + flux_peak_condon = self.table["flux_peak"] + ( + -(noise**2) / self.table["flux_peak"] + clean_bias + ) + if correct_peak_for_noise: + flux_peak_col = flux_peak_condon + + errorpeaksq = ( + (frac_flux_cal_error * flux_peak_col) ** 2 + + clean_bias_error**2 + + 2.0 * flux_peak_col**2 / rho_sq3 + ) + errorpeak = np.sqrt(errorpeaksq) + + self.table["flux_peak_condon"] = flux_peak_condon + self.table["flux_peak_selavy"] = self.table["flux_peak"] + self.table["flux_peak_err_condon"] = errorpeak + self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"] + self.table["flux_peak_err"] = self.table["flux_peak_err_condon"] + if correct_peak_for_noise: + self.table["flux_peak"] = self.table["flux_peak_condon"] diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index 9eac01b..811c8ce 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -1,108 +1,10 @@ from loguru import logger from pathlib import Path -from typing import Optional, Tuple, Generator -from astropy.coordinates import Angle -import astropy.units as u -import click, sys, os +from typing import Optional from uncertainties import ufloat -from itertools import chain -import pandas as pd -import typer -from astropy.table import QTable -from astropy.io import fits -from astropy import units as u -from vast_post_processing.catalogs import Catalog +import typer, sys -from vast_post_processing.corrections import ( - shift_and_scale_catalog, - shift_and_scale_image, - vast_xmatch_qc, -) - - -class _AstropyUnitType(click.ParamType): - def convert(self, value, param, ctx, unit_physical_type): - try: - unit = u.Unit(value) - except ValueError: - self.fail(f"astropy.units.Unit does not understand: {value}.") - if unit.physical_type != unit_physical_type: - self.fail( - f"{unit} is a {unit.physical_type} unit. It must be of type" - f" {unit_physical_type}." - ) - else: - return unit - - -class AngleUnitType(_AstropyUnitType): - name = "angle_unit" - - def convert(self, value, param, ctx): - return super().convert(value, param, ctx, "angle") - - -class FluxUnitType(_AstropyUnitType): - name = "flux_unit" - - def convert(self, value, param, ctx): - return super().convert(value, param, ctx, "spectral flux density") - - -class AngleQuantityType(click.ParamType): - name = "angle_quantity" - - def convert(self, value, param, ctx): - try: - angle = Angle(value) - return angle - except ValueError: - self.fail(f"astropy.coordinates.Angle does not understand: {value}.") - - -ANGLE_UNIT_TYPE = AngleUnitType() -FLUX_UNIT_TYPE = FluxUnitType() -ANGLE_QUANTITY_TYPE = AngleQuantityType() - - -def get_correct_correction_file(correction_files_list, img_field): - count = 0 - for f in correction_files_list: - filename = f.name - _, _, field, *_ = filename.split(".") - field = field.replace("RACS", "VAST") - if (field in img_field) and ("components" in filename): - count += 1 - return f.as_posix() - else: - continue - if count == 0: - return None - - -def get_psf_from_image(image_path: str): - """ - Funtion used to get the point spread function (PSF) extent in major and minor axis. - These will be in the header of the image file - - Parameters - ---------- - image_path: str - Path to the image file - - Returns - ------- - Tuple(psf_major, psf_minor) - Major and minor axes of the PSF. - """ - image_path = image_path.replace("SELAVY", "IMAGES") - image_path = image_path.replace("selavy-", "") - image_path = image_path.replace(".components.xml", ".fits") - hdu = fits.open(image_path) - psf_maj = hdu[0].header["BMAJ"] * u.degree - psf_min = hdu[0].header["BMIN"] * u.degree - hdu.close() - return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec)) +from vast_post_processing.corrections import correct_files def main( @@ -169,10 +71,17 @@ def main( "arcsec for `catalog`. First argument is major axis followed by nimor axis." ), ), + outdir: Optional[str] = typer.Option( + None, + help="Stem of the output directory to store the corrected images and cataloges to. The default" + "way is to construct it from the tile directory, by making folders with _CORRECTED tag attached" + "to them as suffix", + ), overwrite: bool = False, verbose: bool = False, ): - """Read astrometric and flux corrections produced by vast-xmatch and apply them to + """ + Read astrometric and flux corrections produced by vast-xmatch and apply them to VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. """ # configure logger @@ -180,164 +89,18 @@ def main( # replace the default sink logger.remove() logger.add(sys.stderr, level="INFO") - - # read corrections - image_path_glob_list: list[Generator[Path, None, None]] = [] - components_path_glob_list: list[Generator[Path, None, None]] = [] - correction_files_path_glob_list: list[Generator[Path, None, None]] = [] - - correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml")) - correction_files_path_glob_list = list(correction_files_path_glob_list[0]) - - if epoch is None or len(epoch) == 0: - image_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") - ) - components_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml") - ) - else: - for n in epoch: - image_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") - ) - components_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml") - ) - - # construct output path to store corrections - corr_dir = vast_tile_data_root / "corr_db" - if not os.path.isdir(corr_dir): - os.mkdir(corr_dir) - - # get corrections for an image and the correct it - for image_path in chain.from_iterable(image_path_glob_list): - epoch_dir = image_path.parent.name - _, _, field, sbid_str, *_ = image_path.name.split(".") - sbid = int(sbid_str[2:]) - - # get rms and background images - rms_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"noiseMap.{image_path.name}" - ) - bkg_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"meanMap.{image_path.name}" - ) - - # construct output path to store corrections for each epoch - epoch_corr_dir = corr_dir / epoch_dir - - if not os.path.isdir(epoch_corr_dir): - os.mkdir(epoch_corr_dir) - - ref_file = get_correct_correction_file( - correction_files_list=correction_files_path_glob_list, - img_field=field, - ) - - skip = False - if not rms_path.exists(): - logger.warning(f"RMS image not found for {image_path}.") - if not bkg_path.exists(): - logger.warning(f"Background image not found for {image_path}.") - - # Look for any component and island files correspnding to this image - image_root = image_path.parent.as_posix() - catalog_root = image_root.replace("IMAGES", "SELAVY") - - catalog_filename = image_path.name.replace("image", "selavy-image") - catalog_filename = catalog_filename.replace(".fits", ".components.xml") - - catalog_filepath = f"{catalog_root}/{catalog_filename}" - - component_file = Path(catalog_filepath) - island_file = Path(catalog_filepath.replace("components", "islands")) - - skip = ( - not ( - (rms_path.exists()) - and (bkg_path.exists()) - and (ref_file is not None) - and (component_file.exists()) - ) - or skip - ) - if skip: - if not ((rms_path.exists()) and (bkg_path.exists())): - logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist") - elif not (component_file.exists()): - logger.warning(f"Skipping {image_path}, catalog files do not exist") - elif ref_file is None: - logger.warning(f"Skipping {image_path}, no reference field found.") - continue - else: - fname = image_path.name.replace(".fits", "corrections.csv") - crossmatch_file = epoch_corr_dir / fname - csv_file = epoch_corr_dir / "all_fields_corrections.csv" - - # Get the psf measurements to estimate errors follwoing Condon 1997 - if len(psf_ref) > 0: - psf_reference = psf_ref - else: - psf_reference = get_psf_from_image(ref_file) - - if len(psf) > 0: - psf_image = psf - else: - psf_image = get_psf_from_image(image_path.as_posix()) - ( - dra_median_value, - ddec_median_value, - flux_corr_mult, - flux_corr_add, - ) = vast_xmatch_qc( - reference_catalog_path=ref_file, - catalog_path=component_file.as_posix(), - radius=Angle(radius * u.arcsec), - condon=condon, - psf_reference=psf_reference, - psf=psf_image, - fix_m=False, - fix_b=False, - crossmatch_output=crossmatch_file, - csv_output=csv_file, - ) - - # get corrections - for path in (image_path, rms_path, bkg_path): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - _ = shift_and_scale_image( - path, - output_dir, - flux_scale=flux_corr_mult.n, - flux_offset_mJy=flux_corr_add.n, - ra_offset_arcsec=dra_median_value.item(), - dec_offset_arcsec=ddec_median_value.item(), - overwrite=overwrite, - ) - - # Do the same for catalog files - for path in (component_file, island_file): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - _ = shift_and_scale_catalog( - path, - output_dir, - flux_scale=flux_corr_mult.n, - flux_offset_mJy=flux_corr_add.n, - ra_offset_arcsec=dra_median_value.item(), - dec_offset_arcsec=ddec_median_value.item(), - overwrite=overwrite, - ) + correct_files( + vast_tile_data_root=vast_tile_data_root, + vast_corrections_root=vast_corrections_root, + epoch=epoch, + radius=radius, + condon=condon, + psf_ref=psf_ref, + psf=psf, + outdir=outdir, + overwrite=overwrite, + verbose=verbose, + ) if __name__ == "__main__": diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 95ada94..90ca726 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -1,5 +1,7 @@ from pathlib import Path -import warnings +import warnings, sys, os +from typing import Generator +from itertools import chain from astropy.coordinates import SkyCoord, Angle from astropy.io import fits from astropy.io.votable import parse @@ -31,6 +33,39 @@ def vast_xmatch_qc( crossmatch_output: Optional[str] = None, csv_output: Optional[str] = None, ): + """Function to cross-match two catalogs and filter sources that are within + a given radius + + Args: + reference_catalog_path (str): Path to the reference catalog + catalog_path (str): Path to the catalog that needs flux/astrometric corrections + radius (Angle, optional): Cross-match radius. Defaults to Angle("10arcsec"). + condon (bool, optional): Flag to calculate Condon error. Defaults to False. + psf_reference (Optional[Tuple[float, float]], optional): PSF of the reference catalog. + This includes information about the major/minor axis FWHM. Defaults to None. If None, + Condon errors will not be calculated. + psf (Optional[Tuple[float, float]], optional): PSF of the input catalog. + This includes information about the major/minor axis FWHM. Defaults to None. If None, + Condon errors will not be calculated. + fix_m (bool, optional): Flag to fix the slope. For tge straight line fit, should we fix + the slope to certain value or leave it free to be fit. Defaults to False. + fix_b (bool, optional): Flag to fix the intercept. For tge straight line fit, should we fix + the slope to certain value or leave it free to be fit. Defaults to False. + positional_unit (u.Unit, optional): output unit in which the astrometric offset is given. + Defaults to u.Unit("arcsec"). + flux_unit (u.Unit, optional): output unit in which the flux scale is given. + Defaults to u.Unit("mJy"). + crossmatch_output (Optional[str], optional): File path to write the crossmatch output. + Defaults to None, which means no file is written + csv_output (Optional[str], optional): File path to write the flux/astrometric corrections. + Defaults to None, which means no file is written + + Returns: + dra_median_value: The median offset in RA (arcsec) + ddec_median_value: The median offset in DEC (arcsec) + flux_corr_mult: Multiplicative flux correction + flux_corr_add: Additive flux correction + """ # convert catalog path strings to Path objects reference_catalog_path = Path(reference_catalog_path) catalog_path = Path(catalog_path) @@ -125,20 +160,26 @@ def vast_xmatch_qc( def shift_and_scale_image( image_path: Path, - output_dir_path: Path, flux_scale: float = 1.0, flux_offset_mJy: float = 0.0, ra_offset_arcsec: float = 0.0, dec_offset_arcsec: float = 0.0, replace_nan: bool = False, - overwrite: bool = False, -) -> Path: - """Apply astrometric and flux corrections to a FITS image.""" +): + """Apply astrometric and flux corrections to a FITS image. + + Args: + image_path (Path): Path for the input image + flux_scale (float, optional): Multiplicative flux correction. Defaults to 1.0. + flux_offset_mJy (float, optional): Additive flux correction. Defaults to 0.0. + ra_offset_arcsec (float, optional): RA offset in arcsec. Defaults to 0.0. + dec_offset_arcsec (float, optional): DEC offset in arcsec. Defaults to 0.0. + replace_nan (bool, optional): Replace NAN's in the data with 0. Defaults to False. + + Returns: + astropy.io.fits.hdu.image.PrimaryHDU: the HDU of the corrected image + """ logger.debug(f"Correcting {image_path} ...") - output_path = output_dir_path / image_path.with_suffix(".corrected.fits").name - if output_path.exists() and not overwrite: - logger.warning(f"Will not overwrite existing image: {output_path}.") - return output_path image_hdul = fits.open(image_path) image_hdu = image_hdul[0] @@ -173,26 +214,28 @@ def shift_and_scale_image( image_hdu.header["RAOFF"] = ra_offset_arcsec image_hdu.header["DECOFF"] = dec_offset_arcsec - if output_path.exists() and overwrite: - logger.warning(f"Overwriting existing image: {output_path}.") - image_hdul.writeto(str(output_path), overwrite=True) - else: - image_hdul.writeto(str(output_path)) - logger.success(f"Wrote corrected image: {output_path}.") - image_hdul.close() - return output_path + return image_hdul def shift_and_scale_catalog( catalog_path: Path, - output_dir_path: Path, flux_scale: float = 1.0, flux_offset_mJy: float = 0.0, ra_offset_arcsec: float = 0.0, dec_offset_arcsec: float = 0.0, - overwrite: bool = False, -) -> Path: - """Apply astrometric and flux corrections to a VAST VOTable.""" +): + """Apply astrometric and flux corrections to a catalog. + + Args: + catalog_path (Path): Path for the input catalog + flux_scale (float, optional): Multiplicative flux correction. Defaults to 1.0. + flux_offset_mJy (float, optional): Additive flux correction. Defaults to 0.0. + ra_offset_arcsec (float, optional): RA offset in arcsec. Defaults to 0.0. + dec_offset_arcsec (float, optional): DEC offset in arcsec. Defaults to 0.0. + + Returns: + astropy.io.votable: the corrected catalog + """ # flux-unit columns in all catalogs FLUX_COLS = ( "col_flux_peak", @@ -214,10 +257,6 @@ def shift_and_scale_catalog( ) logger.debug(f"Correcting {catalog_path} ...") is_island = ".islands" in catalog_path.name - output_path = output_dir_path / catalog_path.with_suffix(".corrected.xml").name - if output_path.exists() and not overwrite: - logger.warning(f"Will not overwrite existing catalogue: {output_path}.") - return output_path votablefile = parse(catalog_path) votable = votablefile.get_first_table() @@ -254,12 +293,312 @@ def shift_and_scale_catalog( for col in cols: votable.array[col] = flux_scale * (votable.array[col] + flux_offset_mJy) - # write corrected VOTable - if output_path.exists() and overwrite: - logger.warning(f"Overwriting existing catalogue: {output_path}.") - output_path.unlink() - votablefile.to_xml(str(output_path)) + return votablefile + + +def get_correct_file(correction_files_dir, img_field): + """Helper function to get the file from the reference catalogs which + observed the same field. + + Args: + correction_files_list (list): Path to the correction files directory + img_field (str): The field name of the input catalog + + Returns: + str: the correspoding file with the same field as the one requested. + """ + # we need to string the last A from the field + if img_field[-1] == "A": + img_field = img_field[:-1] + img_field = img_field.replace("VAST", "RACS") + matched_field = list(correction_files_dir.glob(f"*{img_field}*components*")) + if len(matched_field) > 0: + # This means that there are multpile files with the same field, + # possibly with different sbid's corresponding to different observations + return matched_field[0].as_posix() + else: + return None + + +def get_psf_from_image(image_path: str): + """ + Funtion used to get the point spread function (PSF) extent in major and minor axis. + These will be in the header of the image file + + Parameters + ---------- + image_path: str + Path to the image file + + Returns + ------- + Tuple(psf_major, psf_minor) + Major and minor axes of the PSF. + """ + image_path = image_path.replace("SELAVY", "IMAGES") + image_path = image_path.replace("selavy-", "") + image_path = image_path.replace(".components.xml", ".fits") + hdu = fits.open(image_path) + psf_maj = hdu[0].header["BMAJ"] * u.degree + psf_min = hdu[0].header["BMIN"] * u.degree + hdu.close() + return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec)) + + +def correct_field( + image_path: Path, + vast_corrections_root: Path = "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY", + radius: float = 10, + condon: bool = True, + psf_ref: list[float] = None, + psf: list[float] = None, + write_output: bool = True, + outdir: str = None, + overwrite: bool = False, +): + """Read astrometric and flux corrections produced by vast-xmatch and apply them to + VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. + + Args: + image path (Path): Path to the image file that needs to be corrected. + vast_corrections_root (Path, optional): Path to the catalogues of referecne catalog. + Defaults to "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY". + radius (float, optional): Crossmatch radius. Defaults to 10. + condon (bool, optional): Flag to replace errros with Condon errors. Defaults to True. + psf_ref (list[float], optional): PSF information of the reference catalog. Defaults to None. + psf (list[float], optional): PSF information of the input catalog. Defaults to None. + write_output (bool, optional): Write the corrected image and catalog files or return the + corrected hdul and the corrected table?. Defaults to True, which means to write + outdir (str, optional): The stem of the output directory to write the files to + overwrite (bool, optional): Overwrite the existing files?. Defaults to False. + """ + epoch_dir = image_path.parent.name + _, _, field, *_ = image_path.name.split(".") + + # get rms and background images + rms_root = Path( + image_path.parent.as_posix().replace("STOKESI_IMAGES", "STOKESI_RMSMAPS") + ) + rms_path = rms_root / f"noiseMap.{image_path.name}" + bkg_path = rms_root / f"meanMap.{image_path.name}" + + correction_files_dir = Path(vast_corrections_root) + ref_file = get_correct_file( + correction_files_dir=correction_files_dir, + img_field=field, + ) + + if outdir is None: + outdir = image_path.parent.parent.parent + + # construct output path to store corrections for each epoch + corr_dir = outdir / "corr_db" + if not os.path.isdir(corr_dir): + os.mkdir(corr_dir) + epoch_corr_dir = corr_dir / epoch_dir + + if not os.path.isdir(epoch_corr_dir): + os.mkdir(epoch_corr_dir) + + skip = False + if not rms_path.exists(): + logger.warning(f"RMS image not found for {image_path}.") + if not bkg_path.exists(): + logger.warning(f"Background image not found for {image_path}.") + + # Look for any component and island files correspnding to this image + image_root = image_path.parent.as_posix() + catalog_root = image_root.replace("IMAGES", "SELAVY") + + catalog_filename = image_path.name.replace("image", "selavy-image") + catalog_filename = catalog_filename.replace(".fits", ".components.xml") + + catalog_filepath = f"{catalog_root}/{catalog_filename}" + + component_file = Path(catalog_filepath) + island_file = Path(catalog_filepath.replace("components", "islands")) + + skip = ( + not ( + (rms_path.exists()) + and (bkg_path.exists()) + and (ref_file is not None) + and (component_file.exists()) + ) + or skip + ) + if skip: + if not ((rms_path.exists()) and (bkg_path.exists())): + logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist") + elif not (component_file.exists()): + logger.warning(f"Skipping {image_path}, catalog files do not exist") + elif ref_file is None: + logger.warning(f"Skipping {image_path}, no reference field found.") + return None + else: + fname = image_path.name.replace(".fits", "corrections.csv") + crossmatch_file = epoch_corr_dir / fname + csv_file = epoch_corr_dir / "all_fields_corrections.csv" + + # Get the psf measurements to estimate errors follwoing Condon 1997 + if len(psf_ref) > 0: + psf_reference = psf_ref + else: + psf_reference = get_psf_from_image(ref_file) + + if len(psf) > 0: + psf_image = psf + else: + psf_image = get_psf_from_image(image_path.as_posix()) + + ( + dra_median_value, + ddec_median_value, + flux_corr_mult, + flux_corr_add, + ) = vast_xmatch_qc( + reference_catalog_path=ref_file, + catalog_path=component_file.as_posix(), + radius=Angle(radius * u.arcsec), + condon=condon, + psf_reference=psf_reference, + psf=psf_image, + fix_m=False, + fix_b=False, + crossmatch_output=crossmatch_file, + csv_output=csv_file, + ) + + # get corrections + corrected_hdul = [] + for path in (image_path, rms_path, bkg_path): + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = outdir / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + output_path = output_dir / path.with_suffix(".corrected.fits").name + if output_path.exists() and not overwrite: + logger.warning(f"Will not overwrite existing image: {output_path}.") + else: + corrected_hdu = shift_and_scale_image( + path, + flux_scale=flux_corr_mult.n, + flux_offset_mJy=flux_corr_add.n, + ra_offset_arcsec=dra_median_value.item(), + dec_offset_arcsec=ddec_median_value.item(), + ) + if write_output: + if output_path.exists() and overwrite: + logger.warning(f"Overwriting existing image: {output_path}.") + corrected_hdu.writeto(str(output_path), overwrite=True) + else: + corrected_hdu.writeto(str(output_path)) + logger.success(f"Writing corrected image to: {output_path}.") + corrected_hdu.close() + else: + corrected_hdul.append(corrected_hdu) + + # Do the same for catalog files + corrected_catalogs = [] + for path in (component_file, island_file): + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = outdir / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + output_path = output_dir / path.with_suffix(".corrected.xml").name + if output_path.exists() and not overwrite: + logger.warning(f"Will not overwrite existing catalogue: {output_path}.") + else: + corrected_catalog = shift_and_scale_catalog( + path, + flux_scale=flux_corr_mult.n, + flux_offset_mJy=flux_corr_add.n, + ra_offset_arcsec=dra_median_value.item(), + dec_offset_arcsec=ddec_median_value.item(), + ) + if write_output: + # write corrected VOTable + if output_path.exists() and overwrite: + logger.warning( + f"Overwriting existing catalogue: {output_path}." + ) + output_path.unlink() + corrected_catalog.to_xml(output_path.as_posix()) + else: + corrected_catalog.to_xml(output_path.as_posix()) + logger.success(f"Writing corrected catalogue: {output_path}.") + else: + corrected_catalogs.append(corrected_catalog) + return (corrected_hdul, corrected_catalogs) + + +def correct_files( + vast_tile_data_root: Path, + vast_corrections_root: Path = "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY", + epoch: list[int] = None, + radius: float = 10, + condon: bool = True, + psf_ref: list[float] = None, + psf: list[float] = None, + write_output: bool = True, + outdir: str = None, + overwrite: bool = False, + verbose: bool = False, +): + """Read astrometric and flux corrections produced by vast-xmatch and apply them to + VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. + + Args: + vast_tile_data_root (Path): Path to the data that needs to be corrected. + Should follow VAST convention, something like + /data/VAST/vast-data/TILES/ that has STOKESI_IMAGES/epoch_xx/ + vast_corrections_root (Path, optional): Path to the catalogues of referecne catalog. + Defaults to "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY". + epoch (list[int], optional): Epoch to be corrected. Defaults to None. + radius (float, optional): Crossmatch radius. Defaults to 10. + condon (bool, optional): Flag to replace errros with Condon errors. Defaults to True. + psf_ref (list[float], optional): PSF information of the reference catalog. Defaults to None. + psf (list[float], optional): PSF information of the input catalog. Defaults to None. + write_output (bool, optional): Write the corrected image and catalog files or return the + corrected hdul and the corrected table?. Defaults to True, which means to write + outdir (str, optional): The stem of the output directory to write the files to + overwrite (bool, optional): Overwrite the existing files?. Defaults to False. + verbose (bool, optional): Show more log messages. Defaults to False. + """ + # configure logger + if not verbose: + # replace the default sink + logger.remove() + logger.add(sys.stderr, level="INFO") + + # read corrections + image_path_glob_list: list[Generator[Path, None, None]] = [] + components_path_glob_list: list[Generator[Path, None, None]] = [] + + if epoch is None or len(epoch) == 0: + image_path_glob_list.append( + vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") + ) + components_path_glob_list.append( + vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml") + ) else: - votablefile.to_xml(str(output_path)) - logger.success(f"Wrote corrected catalogue: {output_path}.") - return output_path + for n in epoch: + image_path_glob_list.append( + vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") + ) + components_path_glob_list.append( + vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml") + ) + + # get corrections for an image and the correct it + for image_path in chain.from_iterable(image_path_glob_list): + correct_field( + image_path=image_path, + vast_corrections_root=vast_corrections_root, + radius=radius, + condon=condon, + psf_ref=psf_ref, + psf=psf, + write_output=write_output, + outdir=outdir, + overwrite=overwrite, + ) diff --git a/vast_post_processing/crossmatch.py b/vast_post_processing/crossmatch.py index bdbdc3c..c5ee01f 100644 --- a/vast_post_processing/crossmatch.py +++ b/vast_post_processing/crossmatch.py @@ -14,11 +14,28 @@ def median_abs_deviation(data): + """helper function to calculate the median offset + + Args: + data (list): List/array of offsets + + Returns: + float: the median offset + """ median = np.median(data) return np.median(np.abs(data - median)) def straight_line(B, x): + """Helper function for fitting. Defines a straight line + + Args: + B (list): (slope, intercept) of the line + x (list): input X-axis data + + Returns: + list: the straight line + """ m, b = B return m * x + b @@ -26,6 +43,19 @@ def straight_line(B, x): def join_match_coordinates_sky( coords1: SkyCoord, coords2: SkyCoord, seplimit: u.arcsec ): + """Helper function to do the cross match + + Args: + coords1 (SkyCoord): Input coordinates + coords2 (SkyCoord): Reference coordinates + seplimit (u.arcsec): cross-match radius + + Returns: + numpy.ndarray: Array to see which of the input coordinates have a cross match + numpy.ndarray: Indices of the input catalog where there is source in reference + catlog within separation limit + numpy.ndarray: The separation distance for the cross matches + """ idx, separation, dist_3d = match_coordinates_sky(coords1, coords2) mask = separation < seplimit return np.where(mask)[0], idx[mask], separation[mask], dist_3d[mask] @@ -35,12 +65,17 @@ def crossmatch_qtables( catalog: Catalog, catalog_reference: Catalog, radius: Angle = Angle("10 arcsec"), - catalog_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"), - catalog_reference_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"), ) -> QTable: - catalog_ra, catalog_dec = catalog_coord_cols - catalog_reference_ra, catalog_reference_dec = catalog_reference_coord_cols + """Main function to filter cross-matched sources. + Args: + catalog (Catalog): Input catalog + catalog_reference (Catalog): Reference catalog + radius (Angle, optional): cross-match radius. Defaults to Angle("10 arcsec"). + + Returns: + QTable: filtered table that return the cross matches + """ logger.debug("Using crossmatch radius: %s.", radius) xmatch = join( From c3c020d7388e002734afbf913d5289569cb9982f Mon Sep 17 00:00:00 2001 From: Akash Anumarlapudi Date: Mon, 14 Aug 2023 11:43:55 -0500 Subject: [PATCH 15/31] Fixed typos --- vast_post_processing/cli/run_corrections.py | 4 ++-- vast_post_processing/corrections.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index 811c8ce..0347ab7 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -22,7 +22,7 @@ def main( "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY", help=( "Path to RACS data that is can be used to correct VAST data. Tries to use" - " EPOCH00 as the defualt epoch. If not the user can override this by" + " EPOCH00 as the default epoch. If not the user can override this by" " giving a path to a folder that contain the selavy output" ), exists=True, @@ -54,7 +54,7 @@ def main( ".restored.conv.fits. Note that for TILE images, the epoch is determined " "from the full path. If the input catalogs do not follow this convention, then " "the PSF sizes must be supplied using --psf-reference and/or --psf. The " - "deafult behaviour is to lookup the PSF sizes from the header of the image" + "default behaviour is to lookup the PSF sizes from the header of the image" ), ), psf_ref: Optional[list[float]] = typer.Option( diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 90ca726..dd2eb51 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -323,12 +323,13 @@ def get_correct_file(correction_files_dir, img_field): def get_psf_from_image(image_path: str): """ Funtion used to get the point spread function (PSF) extent in major and minor axis. - These will be in the header of the image file + These will be in the header of the image file. If a component file is give, it will + construct the image path from this and then gets the psf information Parameters ---------- image_path: str - Path to the image file + Path to the image file or a component file Returns ------- From cd14713098a798d23ade82596f3beae45638a9b5 Mon Sep 17 00:00:00 2001 From: Akash Anumarlapudi Date: Mon, 14 Aug 2023 14:04:32 -0500 Subject: [PATCH 16/31] New log message --- vast_post_processing/corrections.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index dd2eb51..821b638 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -603,3 +603,6 @@ def correct_files( outdir=outdir, overwrite=overwrite, ) + logger.info( + f"Successfully corrected the images and catalogs for {image_path.as_posix()}" + ) From a4d316de79d62e8f05693ea0e966d98525c724da Mon Sep 17 00:00:00 2001 From: Akash Date: Wed, 12 Jul 2023 15:58:30 -0500 Subject: [PATCH 17/31] Pulled in Andrew's code for corrections and re-wrote the entire correction code --- vast_post_processing/catalogs.py | 298 +++++++++++++++++ vast_post_processing/cli/correct_vast.py | 244 +++++++++++++- vast_post_processing/cli/run_corrections.py | 346 ++++++++++++++++++++ vast_post_processing/corrections.py | 117 ++++++- vast_post_processing/crossmatch.py | 153 +++++++++ 5 files changed, 1149 insertions(+), 9 deletions(-) create mode 100644 vast_post_processing/catalogs.py create mode 100644 vast_post_processing/cli/run_corrections.py create mode 100644 vast_post_processing/crossmatch.py diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py new file mode 100644 index 0000000..a75b000 --- /dev/null +++ b/vast_post_processing/catalogs.py @@ -0,0 +1,298 @@ +import logging +from pathlib import Path +from typing import Tuple, Union, Dict, Optional +from urllib.parse import quote + +from astropy.coordinates import SkyCoord +from astropy.table import Table, QTable, join +import astropy.units as u +import numpy as np +import pandas as pd + +logger = logging.getLogger(__name__) + +SELAVY_COLUMN_UNITS = { + "ra_deg_cont": u.deg, + "dec_deg_cont": u.deg, + "ra_err": u.arcsec, + "dec_err": u.arcsec, + "flux_peak": u.mJy / u.beam, + "flux_peak_err": u.mJy / u.beam, + "maj_axis": u.arcsec, + "maj_axis_err": u.arcsec, + "min_axis": u.arcsec, + "min_axis_err": u.arcsec, + "pos_ang": u.deg, + "pos_ang_err": u.deg, + "rms_image": u.mJy / u.beam, +} + + +class UnknownCatalogInputFormat(Exception): + pass + + +class Catalog: + CATALOG_TYPE_TILE = "TILE" + CATALOG_TYPE_COMBINED = "COMBINED" + CATALOG_TYPES = ( + CATALOG_TYPE_TILE, + CATALOG_TYPE_COMBINED, + ) + + def __init__( + self, + path: Path, + psf: Optional[Tuple[float, float]] = None, + input_format: str = "selavy", + condon: bool = False, + positive_fluxes_only: bool = True, + ): + self.path: Path + self.table: QTable + self.field: Optional[str] + self.epoch: Optional[str] + self.sbid: Optional[str] + self.psf_major: Optional[u.Quantity] + self.psf_minor: Optional[u.Quantity] + self.type: str + + # read catalog + if input_format == "selavy": + if path.suffix == ".txt": + logger.debug("Reading %s as a Selavy txt catalog.", path) + read_catalog = read_selavy + else: + logger.debug("Reading %s as a Selavy VOTable catalog.", path) + read_catalog = read_selavy_votable + elif input_format == "aegean": + logger.debug("Reading %s as an Aegean catalog.", path) + read_catalog = read_aegean_csv + else: + logger.error( + "The format of input files is not supported. Only selavy and aegean are supported" + ) + raise SystemExit + self.path = path + self.table = read_catalog(path) + + # filter sources with bad sizes and optionally negative/0 fluxes + if positive_fluxes_only: + logger.info( + "Filtering %d sources with fluxes <= 0.", + (self.table["flux_peak"] <= 0).sum(), + ) + self.table = self.table[self.table["flux_peak"] > 0] + logger.info( + "Filtering %d sources with fitted sizes <= 0.", + ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(), + ) + self.table = self.table[ + (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0) + ] + + # read epoch, field, sbid, psf's + epoch_name = path.parent.name + _, _, field, sbid, *_ = path.name.split(".") + self.epoch = epoch_name + self.field = field.replace("VAST_", "") + self.sbid = sbid + + if psf is not None: + self.psf_major, self.psf_minor = psf * u.arcsec + logger.debug( + "Using user provided PSF for %s: %s, %s.", + self.path, + self.psf_major, + self.psf_minor, + ) + else: + logger.warning( + "PSF is unknown for %s. Condon errors will be unavailable.", self.path + ) + self.psf_major = None + self.psf_minor = None + + if condon and self.psf_major is not None and self.psf_minor is not None: + _ = self.calculate_condon_flux_errors(correct_peak_for_noise=True) + logger.debug("Condon errors computed for %s.", self.path) + + def calculate_condon_flux_errors( + self, + alpha_maj1=2.5, + alpha_min1=0.5, + alpha_maj2=0.5, + alpha_min2=2.5, + alpha_maj3=1.5, + alpha_min3=1.5, + clean_bias=0.0, + clean_bias_error=0.0, + frac_flux_cal_error=0.0, + correct_peak_for_noise=False, + ): + noise = self.table["rms_image"] + snr = self.table["flux_peak"] / noise + + rho_sq3 = ( + ( + self.table["maj_axis"] + * self.table["min_axis"] + / (4.0 * self.psf_major * self.psf_minor) + ) + * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj3 + * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min3 + * snr**2 + ) + + flux_peak_col = self.table["flux_peak"] + flux_peak_condon = self.table["flux_peak"] + ( + -(noise**2) / self.table["flux_peak"] + clean_bias + ) + if correct_peak_for_noise: + flux_peak_col = flux_peak_condon + + errorpeaksq = ( + (frac_flux_cal_error * flux_peak_col) ** 2 + + clean_bias_error**2 + + 2.0 * flux_peak_col**2 / rho_sq3 + ) + errorpeak = np.sqrt(errorpeaksq) + + self.table["flux_peak_condon"] = flux_peak_condon + self.table["flux_peak_selavy"] = self.table["flux_peak"] + self.table["flux_peak_err_condon"] = errorpeak + self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"] + self.table["flux_peak_err"] = self.table["flux_peak_err_condon"] + if correct_peak_for_noise: + self.table["flux_peak"] = self.table["flux_peak_condon"] + return flux_peak_condon, errorpeak + + +def _convert_selavy_columns_to_quantites( + qt: QTable, units: Dict[str, u.Unit] = SELAVY_COLUMN_UNITS +) -> QTable: + for col, unit in units.items(): + qt[col].unit = unit + return qt + + +def read_selavy(catalog_path: Path) -> QTable: + """Read a Selavy fixed-width component catalog and return a QTable. + Assumed to contain at least the following columns with the given units: + - `ra_deg_cont` and `dec_deg_cont`: degrees. + - `ra_err` and `dec_err`: arcseconds. + - `flux_peak` and `flux_peak_err`: mJy/beam. + - `maj_axis`, `maj_axis_err`, `min_axis`, `min_axis_err`: arcseconds. + - `pos_ang` and `pos_ang_err`: degrees. + - `rms_image`: mJy/beam. + These columns will be converted to Astropy quantites assuming the above units. + + Parameters + ---------- + catalog_path : Path + Path to the Selavy catalog file. + + Returns + ------- + QTable + Selavy catalog as a QTable, with extra columns: + - `coord`: `SkyCoord` object of the source coordinate. + - `nn_separation`: separation to the nearest-neighbour source as a Quantity with + angular units. + """ + df = pd.read_fwf(catalog_path, skiprows=[1]).drop(columns="#") + qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df)) + qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"]) + _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky( + qt["coord"], nthneighbor=2 + ) + return qt + + +def read_selavy_votable(catalog_path: Path) -> QTable: + t = Table.read(catalog_path, format="votable", use_names_over_ids=True) + # remove units from str columns and fix unrecognized flux units + for col in t.itercols(): + if col.dtype.kind == "U": + col.unit = None + elif col.unit == u.UnrecognizedUnit("mJy/beam"): + col.unit = u.Unit("mJy/beam") + qt = QTable(t) + qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"]) + _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky( + qt["coord"], nthneighbor=2 + ) + return qt + + +def read_hdf(catalog_path: Path) -> pd.DataFrame: + df = pd.read_hdf(catalog_path, key="data") + df["field"] = df.field.str.split(".", n=1, expand=True)[0] + qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df)) + qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"]) + _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky( + qt["coord"], nthneighbor=2 + ) + return qt + + +def read_aegean_csv(catalog_path: Path) -> QTable: + """Read an Aegean CSV component catalog and return a QTable. + Assumed to contain at least the following columns with the given units: + - `ra` and `dec`: degrees. + - `err_ra` and `err_dec`: degrees. + - `peak_flux` and `err_peak_flux`: Jy/beam. + - `a`, `err_a`, `b`, `err_b`: fitted semi-major and -minor axes in arcseconds. + - `pa` and `err_pa`: degrees. + - `local_rms`: Jy/beam. + These columns will be converted to Astropy quantites assuming the above units. + + Parameters + ---------- + catalog_path : Path + Path to the Selavy catalog file. + + Returns + ------- + QTable + Aegean component catalog as a QTable, with extra columns: + - `coord`: `SkyCoord` object of the source coordinate. + - `nn_separation`: separation to the nearest-neighbour source as a Quantity with + angular units. + """ + AEGEAN_COLUMN_MAP = { + # aegean name: (selavy name, aegean unit) + "ra": ("ra_deg_cont", u.deg), + "dec": ("dec_deg_cont", u.deg), + "err_ra": ("ra_err", u.deg), + "err_dec": ("dec_err", u.deg), + "peak_flux": ("flux_peak", u.Jy / u.beam), + "err_peak_flux": ("flux_peak_err", u.Jy / u.beam), + "a": ("maj_axis", u.arcsec), + "b": ("min_axis", u.arcsec), + "pa": ("pos_ang", u.arcsec), + "err_a": ("maj_axis_err", u.arcsec), + "err_b": ("min_axis_err", u.deg), + "err_pa": ("pos_ang_err", u.deg), + "local_rms": ("rms_image", u.Jy / u.beam), + } + qt = QTable.read(catalog_path) + # rename columns to match selavy convention and assign units + for col, (new_col, unit) in AEGEAN_COLUMN_MAP.items(): + qt.rename_column(col, new_col) + qt[new_col].unit = unit + # add has_siblings column + island_source_counts = ( + qt[["island", "source"]].group_by("island").groups.aggregate(np.sum) + ) + island_source_counts.rename_column("source", "has_siblings") + island_source_counts["has_siblings"] = island_source_counts["has_siblings"].astype( + bool + ) + qt = join(qt, island_source_counts, keys="island", join_type="left") + + qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"]) + _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky( + qt["coord"], nthneighbor=2 + ) + return qt diff --git a/vast_post_processing/cli/correct_vast.py b/vast_post_processing/cli/correct_vast.py index 611208e..ebd5276 100644 --- a/vast_post_processing/cli/correct_vast.py +++ b/vast_post_processing/cli/correct_vast.py @@ -1,8 +1,59 @@ from pathlib import Path from typing import Optional import typer +from astropy.table import QTable +from astropy.io import fits +from astropy import units as u -from vast_post_processing import corrections +from vast_post_processing.corrections import ( + shift_and_scale_catalog, + shift_and_scale_image, + calculate_positional_offsets, + calculate_flux_offsets, +) + + +def get_correct_correction_file(correction_files_list, epoch, img_field, img_sbid): + count = 0 + for f in chain.from_iterable(correction_files_list): + epoch_name = f.parent.name + if epoch_name in epoch: + filename = f.name + _, _, _, sbid, field, *_ = filename.split("_") + sbid = sbid.replace("-VAST", "") + field = field.replace(".csv", "") + if (sbid in img_sbid) & (field in img_field): + df = QTable.read(f) + flux_shifts = calculate_flux_offsets(df) + pos_shifts = calculate_positional_offsets(df) + count += 1 + return flux_shifts, pos_shifts + else: + continue + if count == 0: + return None, None + + +def get_psf_from_image(image_path: str): + """ + Funtion used to get the point spread function (PSF) extent in major and minor axis. + These will be in the header of the image file + + Parameters + ---------- + image_path: str + Path to the image file + + Returns + ------- + Tuple(psf_major, psf_minor) + Major and minor axes of the PSF. + """ + + hdu = fits.open(image_path) + psf_maj = hdu["BMAJ"] * u.degree + psf_min = hdu["BMIN"] * u.degree + return psf_maj.to(u.arcsec), psf_min.to(u.arcsec) def main( @@ -16,9 +67,13 @@ def main( file_okay=False, dir_okay=True, ), - vast_corrections_csv: Path = typer.Argument( - ..., - help="Path to VAST corrections CSV file produced by vast-xmatch.", + vast_corrections_csv_root: Path = typer.Option( + "/data/vast-survey/VAST/askap-surveys-database/vast/db/", + help=( + "Path to VAST corrections CSV file produced by vast-xmatch. Tries to use" + " the default path of these files. If not the user can override this by" + "giving a path to file" + ), exists=True, file_okay=True, dir_okay=False, @@ -34,9 +89,184 @@ def main( overwrite: bool = False, verbose: bool = False, ): - corrections.correct_vast( - vast_tile_data_root, vast_corrections_csv, epoch, overwrite, verbose - ) + """Read astrometric and flux corrections produced by vast-xmatch and apply them to + VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. + """ + # configure logger + if not verbose: + # replace the default sink + logger.remove() + logger.add(sys.stderr, level="INFO") + + # read corrections + # corrections_df = ( + # pd.read_csv(vast_corrections_csv) + # .set_index(["release_epoch", "field", "sbid"]) + # .sort_index() + # ) + image_path_glob_list: list[Generator[Path, None, None]] = [] + components_path_glob_list: list[Generator[Path, None, None]] = [] + correction_files_path_glob_list: list[Generator[Path, None, None]] = [] + if epoch is None or len(epoch) == 0: + image_path_glob_list.append( + vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") + ) + components_path_glob_list.append( + vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml") + ) + correction_files_path_glob_list.append( + vast_corrections_csv_root.glob("epoch_*/cat_match_RACS0*.csv") + ) + else: + for n in epoch: + image_path_glob_list.append( + vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") + ) + components_path_glob_list.append( + vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml") + ) + correction_files_path_glob_list.append( + vast_corrections_csv_root.glob(f"epoch_{n}/cat_match_RACS0*.csv") + ) + + # correct images + for image_path in chain.from_iterable(image_path_glob_list): + epoch_dir = image_path.parent.name + _, _, field, sbid_str, *_ = image_path.name.split(".") + sbid = int(sbid_str[2:]) + # get rms and background images + rms_path = ( + vast_tile_data_root + / "STOKESI_RMSMAPS" + / epoch_dir + / f"noiseMap.{image_path.name}" + ) + bkg_path = ( + vast_tile_data_root + / "STOKESI_RMSMAPS" + / epoch_dir + / f"meanMap.{image_path.name}" + ) + # get corrections + skip = False + # try: + # corrections = corrections_df.loc[(epoch_dir, field, sbid)] + # except KeyError: + # skip = True + # logger.warning( + # f"Corrections not found for {image_path} ({epoch_dir}, {field}," + # f" {sbid})." + # ) + flux_corrections, pos_corrections = get_correct_correction_file( + correction_files_list=correction_files_path_glob_list, + epoch=epoch_dir, + img_field=field, + img_sbid=sbid_str, + ) + if (flux_corrections is None) | (pos_corrections is None): + skip = True + logger.warning( + f"Corrections not found for {image_path} ({epoch_dir}, {field}," + f" {sbid})." + ) + else: + scale, offset, scale_err, offset_err = flux_corrections + dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections + if not rms_path.exists(): + logger.warning(f"RMS image not found for {image_path}.") + if not bkg_path.exists(): + logger.warning(f"Background image not found for {image_path}.") + skip = not (rms_path.exists() and bkg_path.exists()) or skip + if skip: + logger.warning(f"Skipping {image_path}.") + continue + + for path in (image_path, rms_path, bkg_path): + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = vast_tile_data_root / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + # _ = shift_and_scale_image( + # path, + # output_dir, + # flux_scale=corrections.flux_peak_correction_multiplicative, + # flux_offset_mJy=corrections.flux_peak_correction_additive, + # ra_offset_arcsec=corrections.ra_correction, + # dec_offset_arcsec=corrections.dec_correction, + # overwrite=overwrite, + # ) + _ = shift_and_scale_image( + path, + output_dir, + flux_scale=scale, + flux_offset_mJy=offset, + ra_offset_arcsec=dra_median, + dec_offset_arcsec=ddec_median, + overwrite=overwrite, + ) + + # correct catalogs + for components_path in chain.from_iterable(components_path_glob_list): + epoch_dir = components_path.parent.name + _, _, field, sbid_str, *_ = components_path.name.split(".") + sbid = int(sbid_str[2:]) + # get island catalog + islands_path = components_path.with_name( + components_path.name.replace(".components", ".islands") + ) + # get corrections + skip = False + # try: + # corrections = corrections_df.loc[(epoch_dir, field, sbid)] + # except KeyError: + # skip = True + # logger.warning( + # f"Corrections not found for {image_path} ({epoch_dir}, {field}," + # f" {sbid})." + # ) + flux_corrections, pos_corrections = get_correct_correction_file( + correction_files_list=correction_files_path_glob_list, + epoch=epoch_dir, + img_field=field, + img_sbid=sbid_str, + ) + if (flux_corrections is None) | (pos_corrections is None): + skip = True + logger.warning( + f"Corrections not found for {image_path} ({epoch_dir}, {field}," + f" {sbid})." + ) + else: + scale, offset, scale_err, offset_err = flux_corrections + dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections + if not islands_path.exists(): + logger.warning(f"Islands catalogue not found for {components_path}.") + skip = not islands_path.exists() or skip + if skip: + logger.warning(f"Skipping {components_path}.") + continue + + for path in (components_path, islands_path): + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = vast_tile_data_root / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + # _ = shift_and_scale_catalog( + # path, + # output_dir, + # flux_scale=corrections.flux_peak_correction_multiplicative, + # flux_offset_mJy=corrections.flux_peak_correction_additive, + # ra_offset_arcsec=corrections.ra_correction, + # dec_offset_arcsec=corrections.dec_correction, + # overwrite=overwrite, + # ) + _ = shift_and_scale_catalog( + path, + output_dir, + flux_scale=scale, + flux_offset_mJy=offset, + ra_offset_arcsec=dra_median, + dec_offset_arcsec=ddec_median, + overwrite=overwrite, + ) if __name__ == "__main__": diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py new file mode 100644 index 0000000..da4e95f --- /dev/null +++ b/vast_post_processing/cli/run_corrections.py @@ -0,0 +1,346 @@ +from loguru import logger +from pathlib import Path +from typing import Optional, Tuple, Generator +from astropy.coordinates import Angle +import astropy.units as u +import click, sys, os +from uncertainties import ufloat +from itertools import chain +import pandas as pd +import typer +from astropy.table import QTable +from astropy.io import fits +from astropy import units as u +from vast_post_processing.catalogs import Catalog + +from vast_post_processing.corrections import ( + shift_and_scale_catalog, + shift_and_scale_image, + vast_xmatch_qc, +) + + +class _AstropyUnitType(click.ParamType): + def convert(self, value, param, ctx, unit_physical_type): + try: + unit = u.Unit(value) + except ValueError: + self.fail(f"astropy.units.Unit does not understand: {value}.") + if unit.physical_type != unit_physical_type: + self.fail( + f"{unit} is a {unit.physical_type} unit. It must be of type" + f" {unit_physical_type}." + ) + else: + return unit + + +class AngleUnitType(_AstropyUnitType): + name = "angle_unit" + + def convert(self, value, param, ctx): + return super().convert(value, param, ctx, "angle") + + +class FluxUnitType(_AstropyUnitType): + name = "flux_unit" + + def convert(self, value, param, ctx): + return super().convert(value, param, ctx, "spectral flux density") + + +class AngleQuantityType(click.ParamType): + name = "angle_quantity" + + def convert(self, value, param, ctx): + try: + angle = Angle(value) + return angle + except ValueError: + self.fail(f"astropy.coordinates.Angle does not understand: {value}.") + + +ANGLE_UNIT_TYPE = AngleUnitType() +FLUX_UNIT_TYPE = FluxUnitType() +ANGLE_QUANTITY_TYPE = AngleQuantityType() + + +def get_correct_correction_file(correction_files_list, img_field): + count = 0 + for f in chain.from_iterable(correction_files_list): + filename = f.name + _, _, field, *_ = filename.split(".") + field = field.replace("RACS", "VAST") + if field in img_field: + count += 1 + return f + else: + continue + if count == 0: + return None + + +def get_psf_from_image(image_path: str): + """ + Funtion used to get the point spread function (PSF) extent in major and minor axis. + These will be in the header of the image file + + Parameters + ---------- + image_path: str + Path to the image file + + Returns + ------- + Tuple(psf_major, psf_minor) + Major and minor axes of the PSF. + """ + + hdu = fits.open(image_path) + psf_maj = hdu["BMAJ"] * u.degree + psf_min = hdu["BMIN"] * u.degree + hdu.close() + return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec)) + + +def main( + vast_tile_data_root: Path = typer.Argument( + ..., + help=( + "Path to VAST TILES data directory, i.e. the directory that contains the" + " STOKES* directories." + ), + exists=True, + file_okay=False, + dir_okay=True, + ), + vast_corrections_root: Path = typer.Option( + "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY", + help=( + "Path to RACS data that is can be used to correct VAST data. Tries to use" + " EPOCH00 as the defualt epoch. If not the user can override this by" + " giving a path to a folder that contain the selavy output" + ), + exists=True, + file_okay=False, + dir_okay=True, + ), + epoch: Optional[list[int]] = typer.Option( + None, + help=( + "Only correct the given observation epochs. Can be given multiple times," + " e.g. --epoch 1 --epoch 2. If no epochs are given (the default), then" + " correct all available epochs." + ), + ), + radius: Optional[ANGLE_QUANTITY_TYPE] = typer.Option( + "10 arcsec", + help=( + "Maximum separation limit for nearest-neighbour crossmatch. Accepts any " + "string understood by astropy.coordinates.Angle." + ), + ), + condon: Optional[bool] = typer.Option( + True, + help=( + "Calculate Condon (1997) flux errors and use them instead of the original " + "errors. Will also correct the peak flux values for noise. Requires that the " + "input images follow the VAST naming convention, for TILE images: EPOCH01/" + "TILES/STOKESI_IMAGES/selavy-image.i.SB9667.cont.VAST_0102-06A.linmos.taylor.0" + ".restored.conv.fits. Note that for TILE images, the epoch is determined " + "from the full path. If the input catalogs do not follow this convention, then " + "the PSF sizes must be supplied using --psf-reference and/or --psf. The " + "deafult behaviour is to lookup the PSF sizes from the header of the image" + ), + ), + psf_ref: Optional[list[float]] = typer.Option( + None, + help=( + "If using --condon but want to give the psfs manually, use this specified PSF size in " + "arcsec for `reference_catalog`. First argument is major axis followed by nimor axis." + ), + ), + psf: Optional[list[float]] = typer.Option( + None, + help=( + "If using --condon but want to give the psfs manually, use this specified PSF size in " + "arcsec for `catalof`. First argument is major axis followed by nimor axis." + ), + ), + overwrite: bool = False, + verbose: bool = False, +): + """Read astrometric and flux corrections produced by vast-xmatch and apply them to + VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. + """ + # configure logger + if not verbose: + # replace the default sink + logger.remove() + logger.add(sys.stderr, level="INFO") + + # read corrections + image_path_glob_list: list[Generator[Path, None, None]] = [] + components_path_glob_list: list[Generator[Path, None, None]] = [] + correction_files_path_glob_list: list[Generator[Path, None, None]] = [] + + correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml")) + + if epoch is None or len(epoch) == 0: + image_path_glob_list.append( + vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") + ) + components_path_glob_list.append( + vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml") + ) + else: + for n in epoch: + image_path_glob_list.append( + vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") + ) + components_path_glob_list.append( + vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml") + ) + + # construct output path to store corrections + corr_dir = vast_tile_data_root / "corr_db" + if not os.path.isdir(corr_dir): + os.mkdir(corr_dir) + + # get corrections for an image and the correct it + for image_path in chain.from_iterable(image_path_glob_list): + epoch_dir = image_path.parent.name + _, _, field, sbid_str, *_ = image_path.name.split(".") + sbid = int(sbid_str[2:]) + + # get rms and background images + rms_path = ( + vast_tile_data_root + / "STOKESI_RMSMAPS" + / epoch_dir + / f"noiseMap.{image_path.name}" + ) + bkg_path = ( + vast_tile_data_root + / "STOKESI_RMSMAPS" + / epoch_dir + / f"meanMap.{image_path.name}" + ) + + # construct output path to store corrections for each epoch + epoch_corr_dir = vast_tile_data_root / "corr_db" / epoch_dir + + if not os.path.isdir(epoch_corr_dir): + os.mkdir(epoch_corr_dir) + + ref_file = get_correct_correction_file( + correction_files_list=correction_files_path_glob_list, + img_field=field, + ) + + skip = False + if not rms_path.exists(): + logger.warning(f"RMS image not found for {image_path}.") + if not bkg_path.exists(): + logger.warning(f"Background image not found for {image_path}.") + skip = ( + not ((rms_path.exists()) and (bkg_path.exists()) and (ref_file is not None)) + or skip + ) + if skip: + if not ((rms_path.exists()) and (bkg_path.exists())): + logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist") + elif ref_file is None: + logger.warning(f"Skipping {image_path}, no reference field found.") + continue + else: + crossmatch_file = epoch_corr_dir / image_path.replace( + "components.xml", "corrections.csv" + ) + csv_file = epoch_corr_dir / "corrections.csv" + + # Get the psf measurements to estimate errors follwoing Condon 1997 + if psf_ref is not None: + psf_reference = psf_ref + else: + psf_reference = get_psf_from_image(ref_file) + + if psf is not None: + psf_image = psf + else: + psf_image = get_psf_from_image(image_path) + ( + dra_median_value, + ddec_median_value, + flux_corr_mult, + flux_corr_add, + ) = vast_xmatch_qc( + reference_catalog_path=ref_file, + catalog_path=image_path, + radius=Angle(radius), + condon=condon, + psf_reference=psf_reference, + psf=psf_image, + fix_m=False, + fix_b=False, + crossmatch_output=crossmatch_file, + csv_output=csv_file, + ) + + # get corrections + for path in (image_path, rms_path, bkg_path): + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = vast_tile_data_root / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + _ = shift_and_scale_image( + path, + output_dir, + flux_scale=flux_corr_mult, + flux_offset_mJy=flux_corr_add, + ra_offset_arcsec=dra_median_value, + dec_offset_arcsec=ddec_median_value, + overwrite=overwrite, + ) + + # Do the same for catalog files + # Look for any component and island files correspnding to this image + comp_files = [] + for p in list(components_path_glob_list[0]): + comp_file_name = p.name + comp_file_epoch = p.parent.name + if ( + (epoch_dir in comp_file_epoch) + and (field in comp_file_name) + and (f"SB{sbid}" in comp_file_name) + ): + comp_files.append(p) + + if len(comp_files) == 0: + logger.warning(f"Selavy catalogue not found for the image {image_path}") + elif len(comp_files) == 1: + if ".components" in comp_files[0].name: + logger.warning( + f"Islannd catalogue not found for the image {image_path}" + ) + else: + logger.warning( + f"Islannd catalogue not found for the image {image_path}" + ) + else: + for path in comp_files: + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = vast_tile_data_root / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + _ = shift_and_scale_catalog( + path, + output_dir, + flux_scale=flux_corr_mult, + flux_offset_mJy=flux_corr_add, + ra_offset_arcsec=dra_median_value, + dec_offset_arcsec=ddec_median_value, + overwrite=overwrite, + ) + + +if __name__ == "__main__": + typer.run(main) diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index c5d6ade..8603d63 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -3,14 +3,127 @@ from pathlib import Path import warnings - -from astropy.coordinates import SkyCoord +from astropy.coordinates import SkyCoord, Angle from astropy.io import fits from astropy.io.votable import parse import astropy.units as u +from uncertainties import ufloat from astropy.wcs import WCS, FITSFixedWarning from loguru import logger import numpy as np +from typing import Tuple, Optional +from vast_post_processing.catalogs import Catalog +from vast_post_processing.crossmatch import ( + crossmatch_qtables, + calculate_positional_offsets, + calculate_flux_offsets, +) + + +def vast_xmatch_qc( + reference_catalog_path: str, + catalog_path: str, + radius: Angle = Angle("10arcsec"), + condon: bool = False, + psf_reference: Optional[Tuple[float, float]] = None, + psf: Optional[Tuple[float, float]] = None, + fix_m: bool = False, + fix_b: bool = False, + positional_unit: u.Unit = u.Unit("arcsec"), + flux_unit: u.Unit = u.Unit("mJy"), + crossmatch_output: Optional[str] = None, + csv_output: Optional[str] = None, +): + # convert catalog path strings to Path objects + reference_catalog_path = Path(reference_catalog_path) + catalog_path = Path(catalog_path) + flux_unit /= u.beam # add beam divisor as we currently only work with peak fluxes + + reference_catalog = Catalog( + reference_catalog_path, + psf=psf_reference, + condon=condon, + input_format="selavy", + ) + catalog = Catalog( + catalog_path, + psf=psf, + condon=condon, + input_format="selavy", + ) + + # perform the crossmatch + xmatch_qt = crossmatch_qtables(catalog, reference_catalog, radius=radius) + # select xmatches with non-zero flux errors and no siblings + logger.info("Removing crossmatched sources with siblings or flux peak errors = 0.") + mask = xmatch_qt["flux_peak_err"] > 0 + mask &= xmatch_qt["flux_peak_err_reference"] > 0 + mask &= xmatch_qt["has_siblings"] == 0 + mask &= xmatch_qt["has_siblings_reference"] == 0 + data = xmatch_qt[mask] + logger.info( + f"{len(data):.2f} crossmatched sources remaining ({(len(data) / len(xmatch_qt)) * 100:.2f}%).", + ) + + # Write the cross-match data into csv + if crossmatch_output is not None: + data.write("crossmatch.csv", overwrite=True) + # calculate positional offsets and flux ratio + dra_median, ddec_median, dra_madfm, ddec_madfm = calculate_positional_offsets(data) + dra_median_value = dra_median.to(positional_unit).value + dra_madfm_value = dra_madfm.to(positional_unit).value + ddec_median_value = ddec_median.to(positional_unit).value + ddec_madfm_value = ddec_madfm.to(positional_unit).value + logger.info( + f"dRA median: {dra_median_value:.2f} MADFM: {dra_madfm_value:.2f} {positional_unit}. dDec median: {ddec_median_value:.2f} MADFM: {ddec_madfm_value:.2f} {positional_unit}.", + ) + + gradient, offset, gradient_err, offset_err = calculate_flux_offsets( + data, fix_m=fix_m, fix_b=fix_b + ) + ugradient = ufloat(gradient, gradient_err) + uoffset = ufloat(offset.to(flux_unit).value, offset_err.to(flux_unit).value) + logger.info( + f"ODR fit parameters: Sp = Sp,ref * {ugradient} + {uoffset} {flux_unit}.", + ) + + flux_corr_mult = 1 / ugradient + flux_corr_add = -1 * uoffset + + if csv_output is not None: + # output has been requested + + if True: # csv_output is not None: + csv_output_path = Path(csv_output) # ensure Path object + sbid = catalog.sbid if catalog.sbid is not None else "" + if not csv_output_path.exists(): + f = open(csv_output_path, "w") + print( + "field,release_epoch,sbid,ra_correction,dec_correction,ra_madfm," + "dec_madfm,flux_peak_correction_multiplicative,flux_peak_correction_additive," + "flux_peak_correction_multiplicative_err,flux_peak_correction_additive_err," + "n_sources", + file=f, + ) + else: + f = open(csv_output_path, "a") + logger.info( + "Writing corrections CSV. To correct positions, add the corrections to" + " the original source positions i.e. RA' = RA + ra_correction /" + " cos(Dec). To correct fluxes, add the additive correction and multiply" + " the result by the multiplicative correction i.e. S' =" + " flux_peak_correction_multiplicative(S +" + " flux_peak_correction_additive)." + ) + print( + f"{catalog.field},{catalog.epoch},{sbid},{dra_median_value * -1}," + f"{ddec_median_value * -1},{dra_madfm_value},{ddec_madfm_value}," + f"{flux_corr_mult.nominal_value},{flux_corr_add.nominal_value}," + f"{flux_corr_mult.std_dev},{flux_corr_add.std_dev},{len(data)}", + file=f, + ) + f.close() + return dra_median_value, ddec_median_value, flux_corr_mult, flux_corr_add def shift_and_scale_image( diff --git a/vast_post_processing/crossmatch.py b/vast_post_processing/crossmatch.py new file mode 100644 index 0000000..bdbdc3c --- /dev/null +++ b/vast_post_processing/crossmatch.py @@ -0,0 +1,153 @@ +import logging +from typing import Tuple + +from astropy.coordinates import SkyCoord, Angle, match_coordinates_sky +from astropy.table import QTable, join, join_skycoord +import astropy.units as u +import numpy as np +from scipy import odr + +from vast_post_processing.catalogs import Catalog + + +logger = logging.getLogger(__name__) + + +def median_abs_deviation(data): + median = np.median(data) + return np.median(np.abs(data - median)) + + +def straight_line(B, x): + m, b = B + return m * x + b + + +def join_match_coordinates_sky( + coords1: SkyCoord, coords2: SkyCoord, seplimit: u.arcsec +): + idx, separation, dist_3d = match_coordinates_sky(coords1, coords2) + mask = separation < seplimit + return np.where(mask)[0], idx[mask], separation[mask], dist_3d[mask] + + +def crossmatch_qtables( + catalog: Catalog, + catalog_reference: Catalog, + radius: Angle = Angle("10 arcsec"), + catalog_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"), + catalog_reference_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"), +) -> QTable: + catalog_ra, catalog_dec = catalog_coord_cols + catalog_reference_ra, catalog_reference_dec = catalog_reference_coord_cols + + logger.debug("Using crossmatch radius: %s.", radius) + + xmatch = join( + catalog.table, + catalog_reference.table, + keys="coord", + table_names=["", "reference"], + join_funcs={ + "coord": join_skycoord(radius, distance_func=join_match_coordinates_sky) + }, + ) + # remove trailing _ from catalog column names + xmatch.rename_columns( + [col for col in xmatch.colnames if col.endswith("_")], + [col.rstrip("_") for col in xmatch.colnames if col.endswith("_")], + ) + # compute the separations + xmatch["separation"] = xmatch["coord_reference"].separation(xmatch["coord"]) + xmatch["dra"], xmatch["ddec"] = xmatch["coord_reference"].spherical_offsets_to( + xmatch["coord"] + ) + xmatch["flux_peak_ratio"] = ( + xmatch["flux_peak"] / xmatch["flux_peak_reference"] + ).decompose() + + logger.info( + "Num cross-matches: %d. Num cross-matches to unique reference source: %d" + " (%d%%).", + len(xmatch), + len(set(xmatch["coord_id"])), + (len(set(xmatch["coord_id"])) / len(xmatch)) * 100, + ) + + return xmatch + + +def calculate_positional_offsets( + xmatch_qt: QTable, +) -> Tuple[u.Quantity, u.Quantity, u.Quantity, u.Quantity]: + """Calculate the median positional offsets and the median absolute deviation between + matched sources. + + Parameters + ---------- + xmatch_qt : QTable + QTable of crossmatched sources. Must contain columns: dra, ddec. + + Returns + ------- + Tuple[u.Quantity, u.Quantity, u.Quantity, u.Quantity] + Median RA offset, median Dec offset, median absolute deviation of RA offsets, + median absolute deviation of Dec offsets. Units match their inputs and are of + angular type. + """ + dra_median = np.median(xmatch_qt["dra"]) + dra_madfm = median_abs_deviation(xmatch_qt["dra"]) + ddec_median = np.median(xmatch_qt["ddec"]) + ddec_madfm = median_abs_deviation(xmatch_qt["ddec"]) + + return dra_median, ddec_median, dra_madfm, ddec_madfm + + +def calculate_flux_offsets( + xmatch_qt: QTable, + init_m: float = 1.0, + init_b: float = 0.0, + fix_m: bool = False, + fix_b: bool = False, +) -> Tuple[float, u.Quantity, float, u.Quantity]: + """Calculate the gradient and offset of a straight-line fit to the peak fluxes for + crossmatched sources. The function `y = mx + b` is fit to the reference peak fluxes + vs the peak fluxes using orthogonal distance regression with `scipy.odr`. + + Parameters + ---------- + xmatch_qt : QTable + QTable of crossmatched sources. Must contain columns: flux_peak, + flux_peak_reference, flux_peak_err, flux_peak_err_reference. + init_m : float + Initial gradient parameter passed to the fitting function, default 1.0. + init_b : float + Initial offset parameter passed to the fitting function, default 0.0. + fix_m : bool + If True, do not allow the gradient to vary during fitting, default False. + fix_b : bool + If True, do not allow the offest to vary during fitting, default False. + + Returns + ------- + Tuple[float, u.Quantity, float, u.Quantity] + Model fit parameters: the gradient, intercept (offset), gradient error, and + intercept error. Offset and offset error unit match the reference flux peak + input and are of spectral flux density type. + """ + ifixb = [0 if fix_m else 1, 0 if fix_b else 1] + flux_unit = xmatch_qt["flux_peak_reference"].unit + linear_model = odr.Model(straight_line) + # convert all to reference flux unit as ODR does not preserve Quantity objects + odr_data = odr.RealData( + xmatch_qt["flux_peak_reference"].to(flux_unit).value, + xmatch_qt["flux_peak"].to(flux_unit).value, + sx=xmatch_qt["flux_peak_err_reference"].to(flux_unit).value, + sy=xmatch_qt["flux_peak_err"].to(flux_unit).value, + ) + odr_obj = odr.ODR(odr_data, linear_model, beta0=[init_m, init_b], ifixb=ifixb) + odr_out = odr_obj.run() + gradient, offset = odr_out.beta + gradient_err, offset_err = odr_out.sd_beta + + return gradient, offset * flux_unit, gradient_err, offset_err * flux_unit From 77e49b18e747bcf7bcf4fa7f62d3e005f7f169ed Mon Sep 17 00:00:00 2001 From: Akash Date: Wed, 12 Jul 2023 18:20:47 -0500 Subject: [PATCH 18/31] Cleaned up minor naming issues with variables --- vast_post_processing/cli/run_corrections.py | 92 +++++++++++---------- vast_post_processing/corrections.py | 2 +- 2 files changed, 50 insertions(+), 44 deletions(-) diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index da4e95f..6f1426f 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -71,9 +71,9 @@ def get_correct_correction_file(correction_files_list, img_field): filename = f.name _, _, field, *_ = filename.split(".") field = field.replace("RACS", "VAST") - if field in img_field: + if (field in img_field) and ("components" in filename): count += 1 - return f + return f.as_posix() else: continue if count == 0: @@ -95,10 +95,12 @@ def get_psf_from_image(image_path: str): Tuple(psf_major, psf_minor) Major and minor axes of the PSF. """ - + image_path = image_path.replace("SELAVY", "IMAGES") + image_path = image_path.replace("selavy-", "") + image_path = image_path.replace(".components.xml", ".fits") hdu = fits.open(image_path) - psf_maj = hdu["BMAJ"] * u.degree - psf_min = hdu["BMIN"] * u.degree + psf_maj = hdu[0].header["BMAJ"] * u.degree + psf_min = hdu[0].header["BMIN"] * u.degree hdu.close() return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec)) @@ -133,8 +135,8 @@ def main( " correct all available epochs." ), ), - radius: Optional[ANGLE_QUANTITY_TYPE] = typer.Option( - "10 arcsec", + radius: Optional[float] = typer.Option( + 10, help=( "Maximum separation limit for nearest-neighbour crossmatch. Accepts any " "string understood by astropy.coordinates.Angle." @@ -164,7 +166,7 @@ def main( None, help=( "If using --condon but want to give the psfs manually, use this specified PSF size in " - "arcsec for `catalof`. First argument is major axis followed by nimor axis." + "arcsec for `catalog`. First argument is major axis followed by nimor axis." ), ), overwrite: bool = False, @@ -228,7 +230,7 @@ def main( ) # construct output path to store corrections for each epoch - epoch_corr_dir = vast_tile_data_root / "corr_db" / epoch_dir + epoch_corr_dir = corr_dir / epoch_dir if not os.path.isdir(epoch_corr_dir): os.mkdir(epoch_corr_dir) @@ -243,8 +245,37 @@ def main( logger.warning(f"RMS image not found for {image_path}.") if not bkg_path.exists(): logger.warning(f"Background image not found for {image_path}.") + + # Look for any component and island files correspnding to this image + comp_files = [] + for p in list(components_path_glob_list[0]): + comp_file_name = p.name + comp_file_epoch = p.parent.name + if ( + (epoch_dir in comp_file_epoch) + and (field in comp_file_name) + and (f"SB{sbid}" in comp_file_name) + ): + comp_files.append(p) + + component_file = None + island_file = None + if len(comp_files) == 0: + logger.warning(f"Selavy catalogue not found for the image {image_path}") + else: + for i in comp_files: + if "components" in i.as_posix(): + component_file = i + elif "islands" in i.as_posix(): + island_file = i + skip = ( - not ((rms_path.exists()) and (bkg_path.exists()) and (ref_file is not None)) + not ( + (rms_path.exists()) + and (bkg_path.exists()) + and (ref_file is not None) + and (component_file is not None) + ) or skip ) if skip: @@ -254,10 +285,9 @@ def main( logger.warning(f"Skipping {image_path}, no reference field found.") continue else: - crossmatch_file = epoch_corr_dir / image_path.replace( - "components.xml", "corrections.csv" - ) - csv_file = epoch_corr_dir / "corrections.csv" + fname = image_path.name.replace(".fits", "corrections.csv") + crossmatch_file = epoch_corr_dir / fname + csv_file = epoch_corr_dir / "all_fields_corrections.csv" # Get the psf measurements to estimate errors follwoing Condon 1997 if psf_ref is not None: @@ -268,7 +298,7 @@ def main( if psf is not None: psf_image = psf else: - psf_image = get_psf_from_image(image_path) + psf_image = get_psf_from_image(image_path.as_posix()) ( dra_median_value, ddec_median_value, @@ -276,8 +306,8 @@ def main( flux_corr_add, ) = vast_xmatch_qc( reference_catalog_path=ref_file, - catalog_path=image_path, - radius=Angle(radius), + catalog_path=component_file.as_posix(), + radius=Angle(radius * u.arcsec), condon=condon, psf_reference=psf_reference, psf=psf_image, @@ -302,32 +332,8 @@ def main( overwrite=overwrite, ) - # Do the same for catalog files - # Look for any component and island files correspnding to this image - comp_files = [] - for p in list(components_path_glob_list[0]): - comp_file_name = p.name - comp_file_epoch = p.parent.name - if ( - (epoch_dir in comp_file_epoch) - and (field in comp_file_name) - and (f"SB{sbid}" in comp_file_name) - ): - comp_files.append(p) - - if len(comp_files) == 0: - logger.warning(f"Selavy catalogue not found for the image {image_path}") - elif len(comp_files) == 1: - if ".components" in comp_files[0].name: - logger.warning( - f"Islannd catalogue not found for the image {image_path}" - ) - else: - logger.warning( - f"Islannd catalogue not found for the image {image_path}" - ) - else: - for path in comp_files: + # Do the same for catalog files + for path in (component_file, island_file): stokes_dir = f"{path.parent.parent.name}_CORRECTED" output_dir = vast_tile_data_root / stokes_dir / epoch_dir output_dir.mkdir(parents=True, exist_ok=True) diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 8603d63..7467195 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -67,7 +67,7 @@ def vast_xmatch_qc( # Write the cross-match data into csv if crossmatch_output is not None: - data.write("crossmatch.csv", overwrite=True) + data.write(crossmatch_output, overwrite=True) # calculate positional offsets and flux ratio dra_median, ddec_median, dra_madfm, ddec_madfm = calculate_positional_offsets(data) dra_median_value = dra_median.to(positional_unit).value From efaf3697ca8ad4e5baabd7796124e7e4de9e956f Mon Sep 17 00:00:00 2001 From: Akash Date: Fri, 14 Jul 2023 01:09:46 -0500 Subject: [PATCH 19/31] Fixed quantities with units; component files matching made easy --- vast_post_processing/cli/run_corrections.py | 51 +++++++-------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index 6f1426f..99bb051 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -67,7 +67,7 @@ def convert(self, value, param, ctx): def get_correct_correction_file(correction_files_list, img_field): count = 0 - for f in chain.from_iterable(correction_files_list): + for f in correction_files_list: filename = f.name _, _, field, *_ = filename.split(".") field = field.replace("RACS", "VAST") @@ -187,13 +187,14 @@ def main( correction_files_path_glob_list: list[Generator[Path, None, None]] = [] correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml")) + correction_files_path_glob_list = list(correction_files_path_glob_list[0]) if epoch is None or len(epoch) == 0: image_path_glob_list.append( vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") ) components_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml") + vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml") ) else: for n in epoch: @@ -201,7 +202,7 @@ def main( vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") ) components_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml") + vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml") ) # construct output path to store corrections @@ -247,27 +248,9 @@ def main( logger.warning(f"Background image not found for {image_path}.") # Look for any component and island files correspnding to this image - comp_files = [] - for p in list(components_path_glob_list[0]): - comp_file_name = p.name - comp_file_epoch = p.parent.name - if ( - (epoch_dir in comp_file_epoch) - and (field in comp_file_name) - and (f"SB{sbid}" in comp_file_name) - ): - comp_files.append(p) - - component_file = None - island_file = None - if len(comp_files) == 0: - logger.warning(f"Selavy catalogue not found for the image {image_path}") - else: - for i in comp_files: - if "components" in i.as_posix(): - component_file = i - elif "islands" in i.as_posix(): - island_file = i + + component_file = Path(ref_file) + island_file = Path(ref_file.replace("components", "islands")) skip = ( not ( @@ -290,12 +273,12 @@ def main( csv_file = epoch_corr_dir / "all_fields_corrections.csv" # Get the psf measurements to estimate errors follwoing Condon 1997 - if psf_ref is not None: + if len(psf_ref) > 0: psf_reference = psf_ref else: psf_reference = get_psf_from_image(ref_file) - if psf is not None: + if len(psf) > 0: psf_image = psf else: psf_image = get_psf_from_image(image_path.as_posix()) @@ -325,10 +308,10 @@ def main( _ = shift_and_scale_image( path, output_dir, - flux_scale=flux_corr_mult, - flux_offset_mJy=flux_corr_add, - ra_offset_arcsec=dra_median_value, - dec_offset_arcsec=ddec_median_value, + flux_scale=flux_corr_mult.n, + flux_offset_mJy=flux_corr_add.n, + ra_offset_arcsec=dra_median_value.item(), + dec_offset_arcsec=ddec_median_value.item(), overwrite=overwrite, ) @@ -340,10 +323,10 @@ def main( _ = shift_and_scale_catalog( path, output_dir, - flux_scale=flux_corr_mult, - flux_offset_mJy=flux_corr_add, - ra_offset_arcsec=dra_median_value, - dec_offset_arcsec=ddec_median_value, + flux_scale=flux_corr_mult.n, + flux_offset_mJy=flux_corr_add.n, + ra_offset_arcsec=dra_median_value.item(), + dec_offset_arcsec=ddec_median_value.item(), overwrite=overwrite, ) From 7e1be371db89f4055081f3998e8c01216040a2db Mon Sep 17 00:00:00 2001 From: Akash Date: Thu, 10 Aug 2023 13:44:49 -0500 Subject: [PATCH 20/31] Corrected the wrong path for the catalog files --- vast_post_processing/cli/run_corrections.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index 99bb051..9eac01b 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -248,22 +248,31 @@ def main( logger.warning(f"Background image not found for {image_path}.") # Look for any component and island files correspnding to this image + image_root = image_path.parent.as_posix() + catalog_root = image_root.replace("IMAGES", "SELAVY") - component_file = Path(ref_file) - island_file = Path(ref_file.replace("components", "islands")) + catalog_filename = image_path.name.replace("image", "selavy-image") + catalog_filename = catalog_filename.replace(".fits", ".components.xml") + + catalog_filepath = f"{catalog_root}/{catalog_filename}" + + component_file = Path(catalog_filepath) + island_file = Path(catalog_filepath.replace("components", "islands")) skip = ( not ( (rms_path.exists()) and (bkg_path.exists()) and (ref_file is not None) - and (component_file is not None) + and (component_file.exists()) ) or skip ) if skip: if not ((rms_path.exists()) and (bkg_path.exists())): logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist") + elif not (component_file.exists()): + logger.warning(f"Skipping {image_path}, catalog files do not exist") elif ref_file is None: logger.warning(f"Skipping {image_path}, no reference field found.") continue From 1d3bd393514148a6df7ff591f0a5c503718c7e74 Mon Sep 17 00:00:00 2001 From: Akash Date: Mon, 14 Aug 2023 00:36:32 -0500 Subject: [PATCH 21/31] Re-organized code so that this can be passed to cropping, added docstrings, cleaned the redundant parts of the code --- vast_post_processing/catalogs.py | 375 +++++++------- vast_post_processing/cli/run_corrections.py | 283 +---------- vast_post_processing/corrections.py | 512 +++++++++++++------- vast_post_processing/crossmatch.py | 43 +- 4 files changed, 599 insertions(+), 614 deletions(-) diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py index a75b000..a83b094 100644 --- a/vast_post_processing/catalogs.py +++ b/vast_post_processing/catalogs.py @@ -27,150 +27,37 @@ "rms_image": u.mJy / u.beam, } - -class UnknownCatalogInputFormat(Exception): - pass - - -class Catalog: - CATALOG_TYPE_TILE = "TILE" - CATALOG_TYPE_COMBINED = "COMBINED" - CATALOG_TYPES = ( - CATALOG_TYPE_TILE, - CATALOG_TYPE_COMBINED, - ) - - def __init__( - self, - path: Path, - psf: Optional[Tuple[float, float]] = None, - input_format: str = "selavy", - condon: bool = False, - positive_fluxes_only: bool = True, - ): - self.path: Path - self.table: QTable - self.field: Optional[str] - self.epoch: Optional[str] - self.sbid: Optional[str] - self.psf_major: Optional[u.Quantity] - self.psf_minor: Optional[u.Quantity] - self.type: str - - # read catalog - if input_format == "selavy": - if path.suffix == ".txt": - logger.debug("Reading %s as a Selavy txt catalog.", path) - read_catalog = read_selavy - else: - logger.debug("Reading %s as a Selavy VOTable catalog.", path) - read_catalog = read_selavy_votable - elif input_format == "aegean": - logger.debug("Reading %s as an Aegean catalog.", path) - read_catalog = read_aegean_csv - else: - logger.error( - "The format of input files is not supported. Only selavy and aegean are supported" - ) - raise SystemExit - self.path = path - self.table = read_catalog(path) - - # filter sources with bad sizes and optionally negative/0 fluxes - if positive_fluxes_only: - logger.info( - "Filtering %d sources with fluxes <= 0.", - (self.table["flux_peak"] <= 0).sum(), - ) - self.table = self.table[self.table["flux_peak"] > 0] - logger.info( - "Filtering %d sources with fitted sizes <= 0.", - ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(), - ) - self.table = self.table[ - (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0) - ] - - # read epoch, field, sbid, psf's - epoch_name = path.parent.name - _, _, field, sbid, *_ = path.name.split(".") - self.epoch = epoch_name - self.field = field.replace("VAST_", "") - self.sbid = sbid - - if psf is not None: - self.psf_major, self.psf_minor = psf * u.arcsec - logger.debug( - "Using user provided PSF for %s: %s, %s.", - self.path, - self.psf_major, - self.psf_minor, - ) - else: - logger.warning( - "PSF is unknown for %s. Condon errors will be unavailable.", self.path - ) - self.psf_major = None - self.psf_minor = None - - if condon and self.psf_major is not None and self.psf_minor is not None: - _ = self.calculate_condon_flux_errors(correct_peak_for_noise=True) - logger.debug("Condon errors computed for %s.", self.path) - - def calculate_condon_flux_errors( - self, - alpha_maj1=2.5, - alpha_min1=0.5, - alpha_maj2=0.5, - alpha_min2=2.5, - alpha_maj3=1.5, - alpha_min3=1.5, - clean_bias=0.0, - clean_bias_error=0.0, - frac_flux_cal_error=0.0, - correct_peak_for_noise=False, - ): - noise = self.table["rms_image"] - snr = self.table["flux_peak"] / noise - - rho_sq3 = ( - ( - self.table["maj_axis"] - * self.table["min_axis"] - / (4.0 * self.psf_major * self.psf_minor) - ) - * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj3 - * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min3 - * snr**2 - ) - - flux_peak_col = self.table["flux_peak"] - flux_peak_condon = self.table["flux_peak"] + ( - -(noise**2) / self.table["flux_peak"] + clean_bias - ) - if correct_peak_for_noise: - flux_peak_col = flux_peak_condon - - errorpeaksq = ( - (frac_flux_cal_error * flux_peak_col) ** 2 - + clean_bias_error**2 - + 2.0 * flux_peak_col**2 / rho_sq3 - ) - errorpeak = np.sqrt(errorpeaksq) - - self.table["flux_peak_condon"] = flux_peak_condon - self.table["flux_peak_selavy"] = self.table["flux_peak"] - self.table["flux_peak_err_condon"] = errorpeak - self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"] - self.table["flux_peak_err"] = self.table["flux_peak_err_condon"] - if correct_peak_for_noise: - self.table["flux_peak"] = self.table["flux_peak_condon"] - return flux_peak_condon, errorpeak +AEGEAN_COLUMN_MAP = { + # aegean name: (selavy name, aegean unit) + "ra": ("ra_deg_cont", u.deg), + "dec": ("dec_deg_cont", u.deg), + "err_ra": ("ra_err", u.deg), + "err_dec": ("dec_err", u.deg), + "peak_flux": ("flux_peak", u.Jy / u.beam), + "err_peak_flux": ("flux_peak_err", u.Jy / u.beam), + "a": ("maj_axis", u.arcsec), + "b": ("min_axis", u.arcsec), + "pa": ("pos_ang", u.arcsec), + "err_a": ("maj_axis_err", u.arcsec), + "err_b": ("min_axis_err", u.deg), + "err_pa": ("pos_ang_err", u.deg), + "local_rms": ("rms_image", u.Jy / u.beam), +} def _convert_selavy_columns_to_quantites( qt: QTable, units: Dict[str, u.Unit] = SELAVY_COLUMN_UNITS ) -> QTable: + """Takes in a selavy component table and adds units to respective quantities + + Args: + qt (QTable): the component catalog + units (Dict[str, u.Unit], optional): The dictionary with parameters and + their units. Defaults to SELAVY_COLUMN_UNITS. + + Returns: + QTable: Table with units to the parameters + """ for col, unit in units.items(): qt[col].unit = unit return qt @@ -210,6 +97,14 @@ def read_selavy(catalog_path: Path) -> QTable: def read_selavy_votable(catalog_path: Path) -> QTable: + """Helper function to read the selavy catalog, if the input format is votable + + Args: + catalog_path (Path): Input Path to the catalog file + + Returns: + QTable: The component table + """ t = Table.read(catalog_path, format="votable", use_names_over_ids=True) # remove units from str columns and fix unrecognized flux units for col in t.itercols(): @@ -225,17 +120,6 @@ def read_selavy_votable(catalog_path: Path) -> QTable: return qt -def read_hdf(catalog_path: Path) -> pd.DataFrame: - df = pd.read_hdf(catalog_path, key="data") - df["field"] = df.field.str.split(".", n=1, expand=True)[0] - qt = _convert_selavy_columns_to_quantites(QTable.from_pandas(df)) - qt["coord"] = SkyCoord(ra=qt["ra_deg_cont"], dec=qt["dec_deg_cont"]) - _, qt["nn_separation"], _ = qt["coord"].match_to_catalog_sky( - qt["coord"], nthneighbor=2 - ) - return qt - - def read_aegean_csv(catalog_path: Path) -> QTable: """Read an Aegean CSV component catalog and return a QTable. Assumed to contain at least the following columns with the given units: @@ -260,22 +144,6 @@ def read_aegean_csv(catalog_path: Path) -> QTable: - `nn_separation`: separation to the nearest-neighbour source as a Quantity with angular units. """ - AEGEAN_COLUMN_MAP = { - # aegean name: (selavy name, aegean unit) - "ra": ("ra_deg_cont", u.deg), - "dec": ("dec_deg_cont", u.deg), - "err_ra": ("ra_err", u.deg), - "err_dec": ("dec_err", u.deg), - "peak_flux": ("flux_peak", u.Jy / u.beam), - "err_peak_flux": ("flux_peak_err", u.Jy / u.beam), - "a": ("maj_axis", u.arcsec), - "b": ("min_axis", u.arcsec), - "pa": ("pos_ang", u.arcsec), - "err_a": ("maj_axis_err", u.arcsec), - "err_b": ("min_axis_err", u.deg), - "err_pa": ("pos_ang_err", u.deg), - "local_rms": ("rms_image", u.Jy / u.beam), - } qt = QTable.read(catalog_path) # rename columns to match selavy convention and assign units for col, (new_col, unit) in AEGEAN_COLUMN_MAP.items(): @@ -296,3 +164,178 @@ def read_aegean_csv(catalog_path: Path) -> QTable: qt["coord"], nthneighbor=2 ) return qt + + +class Catalog: + """Class to make a catalog object from the selavy/Aegean files. This + is then used for catalog matching between the referecne catalog and + the current catalog to select for sources and get flux and astrometric + corrections. + + Raises: + SystemExit: if the input catalog files are other than Selavy/Aegean + products + """ + + def __init__( + self, + path: Path, + psf: Optional[Tuple[float, float]] = None, + input_format: str = "selavy", + condon: bool = False, + apply_flux_limit: bool = True, + flux_limit: float = 0, + ): + self.path: Path + self.table: QTable + self.input_format: Optional[str] + self.flux_flag: Optional[bool] + self.flux_lim: Optional[float] + self.field: Optional[str] + self.epoch: Optional[str] + self.sbid: Optional[str] + self.psf_major: Optional[u.Quantity] + self.psf_minor: Optional[u.Quantity] + self.type: str + + self.path = path + self.input_format = input_format + self.flux_flag = apply_flux_limit + self.flux_lim = flux_limit + + # Read the catalog + self._read_catalog() + + # Filter sources + self._filter_sources() + + # Get epoch, field, sbid from the file name + epoch_name = path.parent.name + _, _, field, sbid, *_ = path.name.split(".") + self.epoch = epoch_name + self.field = field.replace("VAST_", "") + self.sbid = sbid + + # Parse the psf info + if psf is not None: + self.psf_major, self.psf_minor = psf * u.arcsec + logger.debug( + "Using user provided PSF for %s: %s, %s.", + self.path, + self.psf_major, + self.psf_minor, + ) + else: + logger.warning( + "PSF is unknown for %s. Condon errors will be unavailable.", self.path + ) + self.psf_major = None + self.psf_minor = None + + # Calculate the covariant error using Condon 1997 + if condon and self.psf_major is not None and self.psf_minor is not None: + self.calculate_condon_flux_errors(correct_peak_for_noise=True) + logger.debug("Condon errors computed for %s.", self.path) + + def _read_catalog(self): + """Helper function to read and parse the input files + + Raises: + SystemExit: if the input catalog files are other than Selavy/Aegean + products + """ + path = self.path + if self.input_format == "selavy": + if path.suffix == ".txt": + logger.debug("Reading %s as a Selavy txt catalog.", path) + read_catalog = read_selavy + else: + logger.debug("Reading %s as a Selavy VOTable catalog.", path) + read_catalog = read_selavy_votable + elif self.input_format == "aegean": + logger.debug("Reading %s as an Aegean catalog.", path) + read_catalog = read_aegean_csv + else: + logger.error( + "The format of input files is not supported. Only selavy and aegean are supported" + ) + raise SystemExit + + self.table = read_catalog(path) + + def _filter_sources(self): + """Helper function to filter sources that are used for cross-match; + filter sources with bad sizes and optionally given flux limits""" + if self.flux_flag: + lim = self.flux_lim + logger.info( + f"Filtering %d sources with fluxes <= {lim}", + (self.table["flux_peak"] <= lim).sum(), + ) + self.table = self.table[self.table["flux_peak"] > lim] + logger.info( + "Filtering %d sources with fitted sizes <= 0.", + ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(), + ) + self.table = self.table[ + (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0) + ] + + def calculate_condon_flux_errors( + self, + alpha_maj: float = 1.5, + alpha_min: float = 1.5, + clean_bias: float = 0.0, + clean_bias_error: float = 0.0, + frac_flux_cal_error: float = 0.0, + correct_peak_for_noise: bool = False, + ): + """Calculates the covariant error using Condon 1997. See equation 41 + of Condon 1997 for reference + + Args: + alpha_maj (float, optional): power for major axis correction. Defaults to 1.5 + alpha_min (float, optional): power for major axis correction. Defaults to 1.5. + clean_bias (float, optional): additive flux bias. Defaults to 0.0. + clean_bias_error (float, optional): error in additive flux bias. Defaults to 0.0. + frac_flux_cal_error (float, optional): multiplicative flux error. Defaults to 0.0. + correct_peak_for_noise (bool, optional): flag to re-write the peak flux from + selavy. Defaults to False. + """ + noise = self.table["rms_image"] + snr = self.table["flux_peak"] / noise + + # See equation 41 of Condon 1997 to calculate the signal to noise + rho_sq3 = ( + ( + self.table["maj_axis"] + * self.table["min_axis"] + / (4.0 * self.psf_major * self.psf_minor) + ) + * (1.0 + (self.psf_major / self.table["maj_axis"]) ** 2) ** alpha_maj + * (1.0 + (self.psf_minor / self.table["min_axis"]) ** 2) ** alpha_min + * snr**2 + ) + + # Correct the peak flux now. + flux_peak_col = self.table["flux_peak"] + flux_peak_condon = self.table["flux_peak"] + ( + -(noise**2) / self.table["flux_peak"] + clean_bias + ) + if correct_peak_for_noise: + flux_peak_col = flux_peak_condon + + errorpeaksq = ( + (frac_flux_cal_error * flux_peak_col) ** 2 + + clean_bias_error**2 + + 2.0 * flux_peak_col**2 / rho_sq3 + ) + errorpeak = np.sqrt(errorpeaksq) + + self.table["flux_peak_condon"] = flux_peak_condon + self.table["flux_peak_selavy"] = self.table["flux_peak"] + self.table["flux_peak_err_condon"] = errorpeak + self.table["flux_peak_err_selavy"] = self.table["flux_peak_err"] + self.table["flux_peak_err"] = self.table["flux_peak_err_condon"] + if correct_peak_for_noise: + self.table["flux_peak"] = self.table["flux_peak_condon"] diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index 9eac01b..811c8ce 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -1,108 +1,10 @@ from loguru import logger from pathlib import Path -from typing import Optional, Tuple, Generator -from astropy.coordinates import Angle -import astropy.units as u -import click, sys, os +from typing import Optional from uncertainties import ufloat -from itertools import chain -import pandas as pd -import typer -from astropy.table import QTable -from astropy.io import fits -from astropy import units as u -from vast_post_processing.catalogs import Catalog +import typer, sys -from vast_post_processing.corrections import ( - shift_and_scale_catalog, - shift_and_scale_image, - vast_xmatch_qc, -) - - -class _AstropyUnitType(click.ParamType): - def convert(self, value, param, ctx, unit_physical_type): - try: - unit = u.Unit(value) - except ValueError: - self.fail(f"astropy.units.Unit does not understand: {value}.") - if unit.physical_type != unit_physical_type: - self.fail( - f"{unit} is a {unit.physical_type} unit. It must be of type" - f" {unit_physical_type}." - ) - else: - return unit - - -class AngleUnitType(_AstropyUnitType): - name = "angle_unit" - - def convert(self, value, param, ctx): - return super().convert(value, param, ctx, "angle") - - -class FluxUnitType(_AstropyUnitType): - name = "flux_unit" - - def convert(self, value, param, ctx): - return super().convert(value, param, ctx, "spectral flux density") - - -class AngleQuantityType(click.ParamType): - name = "angle_quantity" - - def convert(self, value, param, ctx): - try: - angle = Angle(value) - return angle - except ValueError: - self.fail(f"astropy.coordinates.Angle does not understand: {value}.") - - -ANGLE_UNIT_TYPE = AngleUnitType() -FLUX_UNIT_TYPE = FluxUnitType() -ANGLE_QUANTITY_TYPE = AngleQuantityType() - - -def get_correct_correction_file(correction_files_list, img_field): - count = 0 - for f in correction_files_list: - filename = f.name - _, _, field, *_ = filename.split(".") - field = field.replace("RACS", "VAST") - if (field in img_field) and ("components" in filename): - count += 1 - return f.as_posix() - else: - continue - if count == 0: - return None - - -def get_psf_from_image(image_path: str): - """ - Funtion used to get the point spread function (PSF) extent in major and minor axis. - These will be in the header of the image file - - Parameters - ---------- - image_path: str - Path to the image file - - Returns - ------- - Tuple(psf_major, psf_minor) - Major and minor axes of the PSF. - """ - image_path = image_path.replace("SELAVY", "IMAGES") - image_path = image_path.replace("selavy-", "") - image_path = image_path.replace(".components.xml", ".fits") - hdu = fits.open(image_path) - psf_maj = hdu[0].header["BMAJ"] * u.degree - psf_min = hdu[0].header["BMIN"] * u.degree - hdu.close() - return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec)) +from vast_post_processing.corrections import correct_files def main( @@ -169,10 +71,17 @@ def main( "arcsec for `catalog`. First argument is major axis followed by nimor axis." ), ), + outdir: Optional[str] = typer.Option( + None, + help="Stem of the output directory to store the corrected images and cataloges to. The default" + "way is to construct it from the tile directory, by making folders with _CORRECTED tag attached" + "to them as suffix", + ), overwrite: bool = False, verbose: bool = False, ): - """Read astrometric and flux corrections produced by vast-xmatch and apply them to + """ + Read astrometric and flux corrections produced by vast-xmatch and apply them to VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. """ # configure logger @@ -180,164 +89,18 @@ def main( # replace the default sink logger.remove() logger.add(sys.stderr, level="INFO") - - # read corrections - image_path_glob_list: list[Generator[Path, None, None]] = [] - components_path_glob_list: list[Generator[Path, None, None]] = [] - correction_files_path_glob_list: list[Generator[Path, None, None]] = [] - - correction_files_path_glob_list.append(vast_corrections_root.glob("*.xml")) - correction_files_path_glob_list = list(correction_files_path_glob_list[0]) - - if epoch is None or len(epoch) == 0: - image_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") - ) - components_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml") - ) - else: - for n in epoch: - image_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") - ) - components_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml") - ) - - # construct output path to store corrections - corr_dir = vast_tile_data_root / "corr_db" - if not os.path.isdir(corr_dir): - os.mkdir(corr_dir) - - # get corrections for an image and the correct it - for image_path in chain.from_iterable(image_path_glob_list): - epoch_dir = image_path.parent.name - _, _, field, sbid_str, *_ = image_path.name.split(".") - sbid = int(sbid_str[2:]) - - # get rms and background images - rms_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"noiseMap.{image_path.name}" - ) - bkg_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"meanMap.{image_path.name}" - ) - - # construct output path to store corrections for each epoch - epoch_corr_dir = corr_dir / epoch_dir - - if not os.path.isdir(epoch_corr_dir): - os.mkdir(epoch_corr_dir) - - ref_file = get_correct_correction_file( - correction_files_list=correction_files_path_glob_list, - img_field=field, - ) - - skip = False - if not rms_path.exists(): - logger.warning(f"RMS image not found for {image_path}.") - if not bkg_path.exists(): - logger.warning(f"Background image not found for {image_path}.") - - # Look for any component and island files correspnding to this image - image_root = image_path.parent.as_posix() - catalog_root = image_root.replace("IMAGES", "SELAVY") - - catalog_filename = image_path.name.replace("image", "selavy-image") - catalog_filename = catalog_filename.replace(".fits", ".components.xml") - - catalog_filepath = f"{catalog_root}/{catalog_filename}" - - component_file = Path(catalog_filepath) - island_file = Path(catalog_filepath.replace("components", "islands")) - - skip = ( - not ( - (rms_path.exists()) - and (bkg_path.exists()) - and (ref_file is not None) - and (component_file.exists()) - ) - or skip - ) - if skip: - if not ((rms_path.exists()) and (bkg_path.exists())): - logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist") - elif not (component_file.exists()): - logger.warning(f"Skipping {image_path}, catalog files do not exist") - elif ref_file is None: - logger.warning(f"Skipping {image_path}, no reference field found.") - continue - else: - fname = image_path.name.replace(".fits", "corrections.csv") - crossmatch_file = epoch_corr_dir / fname - csv_file = epoch_corr_dir / "all_fields_corrections.csv" - - # Get the psf measurements to estimate errors follwoing Condon 1997 - if len(psf_ref) > 0: - psf_reference = psf_ref - else: - psf_reference = get_psf_from_image(ref_file) - - if len(psf) > 0: - psf_image = psf - else: - psf_image = get_psf_from_image(image_path.as_posix()) - ( - dra_median_value, - ddec_median_value, - flux_corr_mult, - flux_corr_add, - ) = vast_xmatch_qc( - reference_catalog_path=ref_file, - catalog_path=component_file.as_posix(), - radius=Angle(radius * u.arcsec), - condon=condon, - psf_reference=psf_reference, - psf=psf_image, - fix_m=False, - fix_b=False, - crossmatch_output=crossmatch_file, - csv_output=csv_file, - ) - - # get corrections - for path in (image_path, rms_path, bkg_path): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - _ = shift_and_scale_image( - path, - output_dir, - flux_scale=flux_corr_mult.n, - flux_offset_mJy=flux_corr_add.n, - ra_offset_arcsec=dra_median_value.item(), - dec_offset_arcsec=ddec_median_value.item(), - overwrite=overwrite, - ) - - # Do the same for catalog files - for path in (component_file, island_file): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - _ = shift_and_scale_catalog( - path, - output_dir, - flux_scale=flux_corr_mult.n, - flux_offset_mJy=flux_corr_add.n, - ra_offset_arcsec=dra_median_value.item(), - dec_offset_arcsec=ddec_median_value.item(), - overwrite=overwrite, - ) + correct_files( + vast_tile_data_root=vast_tile_data_root, + vast_corrections_root=vast_corrections_root, + epoch=epoch, + radius=radius, + condon=condon, + psf_ref=psf_ref, + psf=psf, + outdir=outdir, + overwrite=overwrite, + verbose=verbose, + ) if __name__ == "__main__": diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 7467195..171550e 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -2,7 +2,9 @@ """ from pathlib import Path -import warnings +import warnings, sys, os +from typing import Generator +from itertools import chain from astropy.coordinates import SkyCoord, Angle from astropy.io import fits from astropy.io.votable import parse @@ -34,6 +36,39 @@ def vast_xmatch_qc( crossmatch_output: Optional[str] = None, csv_output: Optional[str] = None, ): + """Function to cross-match two catalogs and filter sources that are within + a given radius + + Args: + reference_catalog_path (str): Path to the reference catalog + catalog_path (str): Path to the catalog that needs flux/astrometric corrections + radius (Angle, optional): Cross-match radius. Defaults to Angle("10arcsec"). + condon (bool, optional): Flag to calculate Condon error. Defaults to False. + psf_reference (Optional[Tuple[float, float]], optional): PSF of the reference catalog. + This includes information about the major/minor axis FWHM. Defaults to None. If None, + Condon errors will not be calculated. + psf (Optional[Tuple[float, float]], optional): PSF of the input catalog. + This includes information about the major/minor axis FWHM. Defaults to None. If None, + Condon errors will not be calculated. + fix_m (bool, optional): Flag to fix the slope. For tge straight line fit, should we fix + the slope to certain value or leave it free to be fit. Defaults to False. + fix_b (bool, optional): Flag to fix the intercept. For tge straight line fit, should we fix + the slope to certain value or leave it free to be fit. Defaults to False. + positional_unit (u.Unit, optional): output unit in which the astrometric offset is given. + Defaults to u.Unit("arcsec"). + flux_unit (u.Unit, optional): output unit in which the flux scale is given. + Defaults to u.Unit("mJy"). + crossmatch_output (Optional[str], optional): File path to write the crossmatch output. + Defaults to None, which means no file is written + csv_output (Optional[str], optional): File path to write the flux/astrometric corrections. + Defaults to None, which means no file is written + + Returns: + dra_median_value: The median offset in RA (arcsec) + ddec_median_value: The median offset in DEC (arcsec) + flux_corr_mult: Multiplicative flux correction + flux_corr_add: Additive flux correction + """ # convert catalog path strings to Path objects reference_catalog_path = Path(reference_catalog_path) catalog_path = Path(catalog_path) @@ -128,46 +163,26 @@ def vast_xmatch_qc( def shift_and_scale_image( image_path: Path, - output_dir_path: Path, flux_scale: float = 1.0, flux_offset_mJy: float = 0.0, ra_offset_arcsec: float = 0.0, dec_offset_arcsec: float = 0.0, replace_nan: bool = False, - overwrite: bool = False, -) -> Path: +): """Apply astrometric and flux corrections to a FITS image. - Parameters - ---------- - image_path : Path - Path to image. - output_dir_path : Path - Path to write corrected image. - flux_scale : float, optional - Flux scale, by default 1.0 - flux_offset_mJy : float, optional - Flux offset in mJy, by default 0.0 - ra_offset_arcsec : float, optional - Right ascension offset in arcsec, by default 0.0 - dec_offset_arcsec : float, optional - Declination offset in arcsec, by default 0.0 - replace_nan : bool, optional - Whether to replace `NaN` pixels with 0, by default False - overwrite : bool, optional - Whether to write over existing image, by default False + Args: + image_path (Path): Path for the input image + flux_scale (float, optional): Multiplicative flux correction. Defaults to 1.0. + flux_offset_mJy (float, optional): Additive flux correction. Defaults to 0.0. + ra_offset_arcsec (float, optional): RA offset in arcsec. Defaults to 0.0. + dec_offset_arcsec (float, optional): DEC offset in arcsec. Defaults to 0.0. + replace_nan (bool, optional): Replace NAN's in the data with 0. Defaults to False. - Returns - ------- - output_path : Path - Path to corrected image. + Returns: + astropy.io.fits.hdu.image.PrimaryHDU: the HDU of the corrected image """ - # Create new output path and check for existing image at path logger.debug(f"Correcting {image_path} ...") - output_path = output_dir_path / image_path.with_suffix(".corrected.fits").name - if output_path.exists() and not overwrite: - logger.warning(f"Will not overwrite existing image: {output_path}.") - return output_path # Open image image_hdul = fits.open(image_path) @@ -205,51 +220,29 @@ def shift_and_scale_image( image_hdu.header["RAOFF"] = ra_offset_arcsec image_hdu.header["DECOFF"] = dec_offset_arcsec - # Safely write image to file and return path to corrected image - if output_path.exists() and overwrite: - logger.warning(f"Overwriting existing image: {output_path}.") - image_hdul.writeto(str(output_path), overwrite=True) - else: - image_hdul.writeto(str(output_path)) - logger.success(f"Wrote corrected image: {output_path}.") - image_hdul.close() - return output_path + return image_hdul def shift_and_scale_catalog( catalog_path: Path, - output_dir_path: Path, flux_scale: float = 1.0, flux_offset_mJy: float = 0.0, ra_offset_arcsec: float = 0.0, dec_offset_arcsec: float = 0.0, - overwrite: bool = False, -) -> Path: - """Apply astrometric and flux corrections to a VAST VOTable. +): + """Apply astrometric and flux corrections to a catalog. - Parameters - ---------- - catalog_path : Path - Path to catalog. - output_dir_path : Path - Path to write corrected catalog to. - flux_scale : float, optional - Flux scale, by default 1.0 - flux_offset_mJy : float, optional - Flux offset in mJy, by default 0.0 - ra_offset_arcsec : float, optional - Right ascension offset in arcsec, by default 0.0 - dec_offset_arcsec : float, optional - Declination offset in arcsec, by default 0.0 - overwrite : bool, optional - Whether to write over existing catalog, by default False + Args: + catalog_path (Path): Path for the input catalog + flux_scale (float, optional): Multiplicative flux correction. Defaults to 1.0. + flux_offset_mJy (float, optional): Additive flux correction. Defaults to 0.0. + ra_offset_arcsec (float, optional): RA offset in arcsec. Defaults to 0.0. + dec_offset_arcsec (float, optional): DEC offset in arcsec. Defaults to 0.0. - Returns - ------- - output_path : Path - Path to corrected catalog. + Returns: + astropy.io.votable: the corrected catalog """ - # Flux-unit columns in all catalogs + # flux-unit columns in all catalogs FLUX_COLS = ( "col_flux_peak", "col_flux_int", @@ -273,10 +266,6 @@ def shift_and_scale_catalog( # Create new output path and check for existing catalog at path logger.debug(f"Correcting {catalog_path} ...") is_island = ".islands" in catalog_path.name - output_path = output_dir_path / catalog_path.with_suffix(".corrected.xml").name - if output_path.exists() and not overwrite: - logger.warning(f"Will not overwrite existing catalogue: {output_path}.") - return output_path # Open catalog votablefile = parse(catalog_path) @@ -314,38 +303,275 @@ def shift_and_scale_catalog( for col in cols: votable.array[col] = flux_scale * (votable.array[col] + flux_offset_mJy) - # Safely write corrected VOTable to file and return path to corrected - # catalog - if output_path.exists() and overwrite: - logger.warning(f"Overwriting existing catalogue: {output_path}.") - output_path.unlink() - votablefile.to_xml(str(output_path)) + return votablefile + + +def get_correct_file(correction_files_dir, img_field): + """Helper function to get the file from the reference catalogs which + observed the same field. + + Args: + correction_files_list (list): Path to the correction files directory + img_field (str): The field name of the input catalog + + Returns: + str: the correspoding file with the same field as the one requested. + """ + # we need to string the last A from the field + if img_field[-1] == "A": + img_field = img_field[:-1] + img_field = img_field.replace("VAST", "RACS") + matched_field = list(correction_files_dir.glob(f"*{img_field}*components*")) + if len(matched_field) > 0: + # This means that there are multpile files with the same field, + # possibly with different sbid's corresponding to different observations + return matched_field[0].as_posix() else: - votablefile.to_xml(str(output_path)) - logger.success(f"Wrote corrected catalogue: {output_path}.") - return output_path + return None -# Separated logic +def get_psf_from_image(image_path: str): + """ + Funtion used to get the point spread function (PSF) extent in major and minor axis. + These will be in the header of the image file -from itertools import chain -from pathlib import Path -import sys -from typing import Optional, Generator + Parameters + ---------- + image_path: str + Path to the image file -from loguru import logger -import pandas as pd + Returns + ------- + Tuple(psf_major, psf_minor) + Major and minor axes of the PSF. + """ + image_path = image_path.replace("SELAVY", "IMAGES") + image_path = image_path.replace("selavy-", "") + image_path = image_path.replace(".components.xml", ".fits") + hdu = fits.open(image_path) + psf_maj = hdu[0].header["BMAJ"] * u.degree + psf_min = hdu[0].header["BMIN"] * u.degree + hdu.close() + return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec)) + + +def correct_field( + image_path: Path, + vast_corrections_root: Path = "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY", + radius: float = 10, + condon: bool = True, + psf_ref: list[float] = None, + psf: list[float] = None, + write_output: bool = True, + outdir: str = None, + overwrite: bool = False, +): + """Read astrometric and flux corrections produced by vast-xmatch and apply them to + VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. + Args: + image path (Path): Path to the image file that needs to be corrected. + vast_corrections_root (Path, optional): Path to the catalogues of referecne catalog. + Defaults to "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY". + radius (float, optional): Crossmatch radius. Defaults to 10. + condon (bool, optional): Flag to replace errros with Condon errors. Defaults to True. + psf_ref (list[float], optional): PSF information of the reference catalog. Defaults to None. + psf (list[float], optional): PSF information of the input catalog. Defaults to None. + write_output (bool, optional): Write the corrected image and catalog files or return the + corrected hdul and the corrected table?. Defaults to True, which means to write + outdir (str, optional): The stem of the output directory to write the files to + overwrite (bool, optional): Overwrite the existing files?. Defaults to False. + """ + epoch_dir = image_path.parent.name + _, _, field, *_ = image_path.name.split(".") + + # get rms and background images + rms_root = Path( + image_path.parent.as_posix().replace("STOKESI_IMAGES", "STOKESI_RMSMAPS") + ) + rms_path = rms_root / f"noiseMap.{image_path.name}" + bkg_path = rms_root / f"meanMap.{image_path.name}" + + correction_files_dir = Path(vast_corrections_root) + ref_file = get_correct_file( + correction_files_dir=correction_files_dir, + img_field=field, + ) -def correct_vast( + if outdir is None: + outdir = image_path.parent.parent.parent + + # construct output path to store corrections for each epoch + corr_dir = outdir / "corr_db" + if not os.path.isdir(corr_dir): + os.mkdir(corr_dir) + epoch_corr_dir = corr_dir / epoch_dir + + if not os.path.isdir(epoch_corr_dir): + os.mkdir(epoch_corr_dir) + + skip = False + if not rms_path.exists(): + logger.warning(f"RMS image not found for {image_path}.") + if not bkg_path.exists(): + logger.warning(f"Background image not found for {image_path}.") + + # Look for any component and island files correspnding to this image + image_root = image_path.parent.as_posix() + catalog_root = image_root.replace("IMAGES", "SELAVY") + + catalog_filename = image_path.name.replace("image", "selavy-image") + catalog_filename = catalog_filename.replace(".fits", ".components.xml") + + catalog_filepath = f"{catalog_root}/{catalog_filename}" + + component_file = Path(catalog_filepath) + island_file = Path(catalog_filepath.replace("components", "islands")) + + skip = ( + not ( + (rms_path.exists()) + and (bkg_path.exists()) + and (ref_file is not None) + and (component_file.exists()) + ) + or skip + ) + if skip: + if not ((rms_path.exists()) and (bkg_path.exists())): + logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist") + elif not (component_file.exists()): + logger.warning(f"Skipping {image_path}, catalog files do not exist") + elif ref_file is None: + logger.warning(f"Skipping {image_path}, no reference field found.") + return None + else: + fname = image_path.name.replace(".fits", "corrections.csv") + crossmatch_file = epoch_corr_dir / fname + csv_file = epoch_corr_dir / "all_fields_corrections.csv" + + # Get the psf measurements to estimate errors follwoing Condon 1997 + if len(psf_ref) > 0: + psf_reference = psf_ref + else: + psf_reference = get_psf_from_image(ref_file) + + if len(psf) > 0: + psf_image = psf + else: + psf_image = get_psf_from_image(image_path.as_posix()) + + ( + dra_median_value, + ddec_median_value, + flux_corr_mult, + flux_corr_add, + ) = vast_xmatch_qc( + reference_catalog_path=ref_file, + catalog_path=component_file.as_posix(), + radius=Angle(radius * u.arcsec), + condon=condon, + psf_reference=psf_reference, + psf=psf_image, + fix_m=False, + fix_b=False, + crossmatch_output=crossmatch_file, + csv_output=csv_file, + ) + + # get corrections + corrected_hdul = [] + for path in (image_path, rms_path, bkg_path): + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = outdir / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + output_path = output_dir / path.with_suffix(".corrected.fits").name + if output_path.exists() and not overwrite: + logger.warning(f"Will not overwrite existing image: {output_path}.") + else: + corrected_hdu = shift_and_scale_image( + path, + flux_scale=flux_corr_mult.n, + flux_offset_mJy=flux_corr_add.n, + ra_offset_arcsec=dra_median_value.item(), + dec_offset_arcsec=ddec_median_value.item(), + ) + if write_output: + if output_path.exists() and overwrite: + logger.warning(f"Overwriting existing image: {output_path}.") + corrected_hdu.writeto(str(output_path), overwrite=True) + else: + corrected_hdu.writeto(str(output_path)) + logger.success(f"Writing corrected image to: {output_path}.") + corrected_hdu.close() + else: + corrected_hdul.append(corrected_hdu) + + # Do the same for catalog files + corrected_catalogs = [] + for path in (component_file, island_file): + stokes_dir = f"{path.parent.parent.name}_CORRECTED" + output_dir = outdir / stokes_dir / epoch_dir + output_dir.mkdir(parents=True, exist_ok=True) + output_path = output_dir / path.with_suffix(".corrected.xml").name + if output_path.exists() and not overwrite: + logger.warning(f"Will not overwrite existing catalogue: {output_path}.") + else: + corrected_catalog = shift_and_scale_catalog( + path, + flux_scale=flux_corr_mult.n, + flux_offset_mJy=flux_corr_add.n, + ra_offset_arcsec=dra_median_value.item(), + dec_offset_arcsec=ddec_median_value.item(), + ) + if write_output: + # write corrected VOTable + if output_path.exists() and overwrite: + logger.warning( + f"Overwriting existing catalogue: {output_path}." + ) + output_path.unlink() + corrected_catalog.to_xml(output_path.as_posix()) + else: + corrected_catalog.to_xml(output_path.as_posix()) + logger.success(f"Writing corrected catalogue: {output_path}.") + else: + corrected_catalogs.append(corrected_catalog) + return (corrected_hdul, corrected_catalogs) + + +def correct_files( vast_tile_data_root: Path, - vast_corrections_csv: Path, - epoch: Optional[list[int]], + vast_corrections_root: Path = "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY", + epoch: list[int] = None, + radius: float = 10, + condon: bool = True, + psf_ref: list[float] = None, + psf: list[float] = None, + write_output: bool = True, + outdir: str = None, overwrite: bool = False, verbose: bool = False, ): """Read astrometric and flux corrections produced by vast-xmatch and apply them to VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. + + Args: + vast_tile_data_root (Path): Path to the data that needs to be corrected. + Should follow VAST convention, something like + /data/VAST/vast-data/TILES/ that has STOKESI_IMAGES/epoch_xx/ + vast_corrections_root (Path, optional): Path to the catalogues of referecne catalog. + Defaults to "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY". + epoch (list[int], optional): Epoch to be corrected. Defaults to None. + radius (float, optional): Crossmatch radius. Defaults to 10. + condon (bool, optional): Flag to replace errros with Condon errors. Defaults to True. + psf_ref (list[float], optional): PSF information of the reference catalog. Defaults to None. + psf (list[float], optional): PSF information of the input catalog. Defaults to None. + write_output (bool, optional): Write the corrected image and catalog files or return the + corrected hdul and the corrected table?. Defaults to True, which means to write + outdir (str, optional): The stem of the output directory to write the files to + overwrite (bool, optional): Overwrite the existing files?. Defaults to False. + verbose (bool, optional): Show more log messages. Defaults to False. """ # configure logger if not verbose: @@ -354,19 +580,15 @@ def correct_vast( logger.add(sys.stderr, level="INFO") # read corrections - corrections_df = ( - pd.read_csv(vast_corrections_csv) - .set_index(["release_epoch", "field", "sbid"]) - .sort_index() - ) image_path_glob_list: list[Generator[Path, None, None]] = [] components_path_glob_list: list[Generator[Path, None, None]] = [] + if epoch is None or len(epoch) == 0: image_path_glob_list.append( vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") ) components_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml") + vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml") ) else: for n in epoch: @@ -374,97 +596,19 @@ def correct_vast( vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") ) components_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml") + vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml") ) - # correct images + # get corrections for an image and the correct it for image_path in chain.from_iterable(image_path_glob_list): - epoch_dir = image_path.parent.name - _, _, field, sbid_str, *_ = image_path.name.split(".") - sbid = int(sbid_str[2:]) - # get rms and background images - rms_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"noiseMap.{image_path.name}" - ) - bkg_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"meanMap.{image_path.name}" - ) - # get corrections - skip = False - try: - corrections = corrections_df.loc[(epoch_dir, field, sbid)] - except KeyError: - skip = True - logger.warning( - f"Corrections not found for {image_path} ({epoch_dir}, {field}," - f" {sbid})." - ) - if not rms_path.exists(): - logger.warning(f"RMS image not found for {image_path}.") - if not bkg_path.exists(): - logger.warning(f"Background image not found for {image_path}.") - skip = not (rms_path.exists() and bkg_path.exists()) or skip - if skip: - logger.warning(f"Skipping {image_path}.") - continue - - # TODO determine what these variables are and where they are from - for path in (image_path, rms_path, bkg_path): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - _ = shift_and_scale_image( - path, - output_dir, - flux_scale=corrections.flux_peak_correction_multiplicative, - flux_offset_mJy=corrections.flux_peak_correction_additive, - ra_offset_arcsec=corrections.ra_correction, - dec_offset_arcsec=corrections.dec_correction, - overwrite=overwrite, - ) - - # correct catalogs - for components_path in chain.from_iterable(components_path_glob_list): - epoch_dir = components_path.parent.name - _, _, field, sbid_str, *_ = components_path.name.split(".") - sbid = int(sbid_str[2:]) - # get island catalog - islands_path = components_path.with_name( - components_path.name.replace(".components", ".islands") + correct_field( + image_path=image_path, + vast_corrections_root=vast_corrections_root, + radius=radius, + condon=condon, + psf_ref=psf_ref, + psf=psf, + write_output=write_output, + outdir=outdir, + overwrite=overwrite, ) - # get corrections - skip = False - try: - corrections = corrections_df.loc[(epoch_dir, field, sbid)] - except KeyError: - skip = True - logger.warning( - f"Corrections not found for {components_path} ({epoch_dir}, {field}," - f" {sbid})." - ) - if not islands_path.exists(): - logger.warning(f"Islands catalogue not found for {components_path}.") - skip = not islands_path.exists() or skip - if skip: - logger.warning(f"Skipping {components_path}.") - continue - - for path in (components_path, islands_path): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - _ = shift_and_scale_catalog( - path, - output_dir, - flux_scale=corrections.flux_peak_correction_multiplicative, - flux_offset_mJy=corrections.flux_peak_correction_additive, - ra_offset_arcsec=corrections.ra_correction, - dec_offset_arcsec=corrections.dec_correction, - overwrite=overwrite, - ) diff --git a/vast_post_processing/crossmatch.py b/vast_post_processing/crossmatch.py index bdbdc3c..c5ee01f 100644 --- a/vast_post_processing/crossmatch.py +++ b/vast_post_processing/crossmatch.py @@ -14,11 +14,28 @@ def median_abs_deviation(data): + """helper function to calculate the median offset + + Args: + data (list): List/array of offsets + + Returns: + float: the median offset + """ median = np.median(data) return np.median(np.abs(data - median)) def straight_line(B, x): + """Helper function for fitting. Defines a straight line + + Args: + B (list): (slope, intercept) of the line + x (list): input X-axis data + + Returns: + list: the straight line + """ m, b = B return m * x + b @@ -26,6 +43,19 @@ def straight_line(B, x): def join_match_coordinates_sky( coords1: SkyCoord, coords2: SkyCoord, seplimit: u.arcsec ): + """Helper function to do the cross match + + Args: + coords1 (SkyCoord): Input coordinates + coords2 (SkyCoord): Reference coordinates + seplimit (u.arcsec): cross-match radius + + Returns: + numpy.ndarray: Array to see which of the input coordinates have a cross match + numpy.ndarray: Indices of the input catalog where there is source in reference + catlog within separation limit + numpy.ndarray: The separation distance for the cross matches + """ idx, separation, dist_3d = match_coordinates_sky(coords1, coords2) mask = separation < seplimit return np.where(mask)[0], idx[mask], separation[mask], dist_3d[mask] @@ -35,12 +65,17 @@ def crossmatch_qtables( catalog: Catalog, catalog_reference: Catalog, radius: Angle = Angle("10 arcsec"), - catalog_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"), - catalog_reference_coord_cols: Tuple[str, str] = ("ra_deg_cont", "dec_deg_cont"), ) -> QTable: - catalog_ra, catalog_dec = catalog_coord_cols - catalog_reference_ra, catalog_reference_dec = catalog_reference_coord_cols + """Main function to filter cross-matched sources. + Args: + catalog (Catalog): Input catalog + catalog_reference (Catalog): Reference catalog + radius (Angle, optional): cross-match radius. Defaults to Angle("10 arcsec"). + + Returns: + QTable: filtered table that return the cross matches + """ logger.debug("Using crossmatch radius: %s.", radius) xmatch = join( From d4477a8b3d5410fbab61511c1c2aab7de06f3b34 Mon Sep 17 00:00:00 2001 From: Akash Anumarlapudi Date: Mon, 14 Aug 2023 11:43:55 -0500 Subject: [PATCH 22/31] Fixed typos --- vast_post_processing/cli/run_corrections.py | 4 ++-- vast_post_processing/corrections.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index 811c8ce..0347ab7 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -22,7 +22,7 @@ def main( "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY", help=( "Path to RACS data that is can be used to correct VAST data. Tries to use" - " EPOCH00 as the defualt epoch. If not the user can override this by" + " EPOCH00 as the default epoch. If not the user can override this by" " giving a path to a folder that contain the selavy output" ), exists=True, @@ -54,7 +54,7 @@ def main( ".restored.conv.fits. Note that for TILE images, the epoch is determined " "from the full path. If the input catalogs do not follow this convention, then " "the PSF sizes must be supplied using --psf-reference and/or --psf. The " - "deafult behaviour is to lookup the PSF sizes from the header of the image" + "default behaviour is to lookup the PSF sizes from the header of the image" ), ), psf_ref: Optional[list[float]] = typer.Option( diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 171550e..84d86cf 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -333,12 +333,13 @@ def get_correct_file(correction_files_dir, img_field): def get_psf_from_image(image_path: str): """ Funtion used to get the point spread function (PSF) extent in major and minor axis. - These will be in the header of the image file + These will be in the header of the image file. If a component file is give, it will + construct the image path from this and then gets the psf information Parameters ---------- image_path: str - Path to the image file + Path to the image file or a component file Returns ------- From 513d2c35484e40b64be559ac5f05d38ac38eaef7 Mon Sep 17 00:00:00 2001 From: Akash Anumarlapudi Date: Mon, 14 Aug 2023 14:04:32 -0500 Subject: [PATCH 23/31] New log message --- vast_post_processing/corrections.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 84d86cf..bc3be5b 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -613,3 +613,6 @@ def correct_files( outdir=outdir, overwrite=overwrite, ) + logger.info( + f"Successfully corrected the images and catalogs for {image_path.as_posix()}" + ) From 005f45f2a0943e5e2b84c73da7edf009d4a3f84b Mon Sep 17 00:00:00 2001 From: Akash Anumarlapudi Date: Mon, 14 Aug 2023 14:40:01 -0500 Subject: [PATCH 24/31] Deleted older corrections cli file --- vast_post_processing/cli/correct_vast.py | 273 ----------------------- 1 file changed, 273 deletions(-) delete mode 100644 vast_post_processing/cli/correct_vast.py diff --git a/vast_post_processing/cli/correct_vast.py b/vast_post_processing/cli/correct_vast.py deleted file mode 100644 index ebd5276..0000000 --- a/vast_post_processing/cli/correct_vast.py +++ /dev/null @@ -1,273 +0,0 @@ -from pathlib import Path -from typing import Optional -import typer -from astropy.table import QTable -from astropy.io import fits -from astropy import units as u - -from vast_post_processing.corrections import ( - shift_and_scale_catalog, - shift_and_scale_image, - calculate_positional_offsets, - calculate_flux_offsets, -) - - -def get_correct_correction_file(correction_files_list, epoch, img_field, img_sbid): - count = 0 - for f in chain.from_iterable(correction_files_list): - epoch_name = f.parent.name - if epoch_name in epoch: - filename = f.name - _, _, _, sbid, field, *_ = filename.split("_") - sbid = sbid.replace("-VAST", "") - field = field.replace(".csv", "") - if (sbid in img_sbid) & (field in img_field): - df = QTable.read(f) - flux_shifts = calculate_flux_offsets(df) - pos_shifts = calculate_positional_offsets(df) - count += 1 - return flux_shifts, pos_shifts - else: - continue - if count == 0: - return None, None - - -def get_psf_from_image(image_path: str): - """ - Funtion used to get the point spread function (PSF) extent in major and minor axis. - These will be in the header of the image file - - Parameters - ---------- - image_path: str - Path to the image file - - Returns - ------- - Tuple(psf_major, psf_minor) - Major and minor axes of the PSF. - """ - - hdu = fits.open(image_path) - psf_maj = hdu["BMAJ"] * u.degree - psf_min = hdu["BMIN"] * u.degree - return psf_maj.to(u.arcsec), psf_min.to(u.arcsec) - - -def main( - vast_tile_data_root: Path = typer.Argument( - ..., - help=( - "Path to VAST TILES data directory, i.e. the directory that contains the" - " STOKES* directories." - ), - exists=True, - file_okay=False, - dir_okay=True, - ), - vast_corrections_csv_root: Path = typer.Option( - "/data/vast-survey/VAST/askap-surveys-database/vast/db/", - help=( - "Path to VAST corrections CSV file produced by vast-xmatch. Tries to use" - " the default path of these files. If not the user can override this by" - "giving a path to file" - ), - exists=True, - file_okay=True, - dir_okay=False, - ), - epoch: Optional[list[int]] = typer.Option( - None, - help=( - "Only correct the given observation epochs. Can be given multiple times," - " e.g. --epoch 1 --epoch 2. If no epochs are given (the default), then" - " correct all available epochs." - ), - ), - overwrite: bool = False, - verbose: bool = False, -): - """Read astrometric and flux corrections produced by vast-xmatch and apply them to - VAST images and catalogues in vast-data. See https://github.com/marxide/vast-xmatch. - """ - # configure logger - if not verbose: - # replace the default sink - logger.remove() - logger.add(sys.stderr, level="INFO") - - # read corrections - # corrections_df = ( - # pd.read_csv(vast_corrections_csv) - # .set_index(["release_epoch", "field", "sbid"]) - # .sort_index() - # ) - image_path_glob_list: list[Generator[Path, None, None]] = [] - components_path_glob_list: list[Generator[Path, None, None]] = [] - correction_files_path_glob_list: list[Generator[Path, None, None]] = [] - if epoch is None or len(epoch) == 0: - image_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") - ) - components_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.components.xml") - ) - correction_files_path_glob_list.append( - vast_corrections_csv_root.glob("epoch_*/cat_match_RACS0*.csv") - ) - else: - for n in epoch: - image_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") - ) - components_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.components.xml") - ) - correction_files_path_glob_list.append( - vast_corrections_csv_root.glob(f"epoch_{n}/cat_match_RACS0*.csv") - ) - - # correct images - for image_path in chain.from_iterable(image_path_glob_list): - epoch_dir = image_path.parent.name - _, _, field, sbid_str, *_ = image_path.name.split(".") - sbid = int(sbid_str[2:]) - # get rms and background images - rms_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"noiseMap.{image_path.name}" - ) - bkg_path = ( - vast_tile_data_root - / "STOKESI_RMSMAPS" - / epoch_dir - / f"meanMap.{image_path.name}" - ) - # get corrections - skip = False - # try: - # corrections = corrections_df.loc[(epoch_dir, field, sbid)] - # except KeyError: - # skip = True - # logger.warning( - # f"Corrections not found for {image_path} ({epoch_dir}, {field}," - # f" {sbid})." - # ) - flux_corrections, pos_corrections = get_correct_correction_file( - correction_files_list=correction_files_path_glob_list, - epoch=epoch_dir, - img_field=field, - img_sbid=sbid_str, - ) - if (flux_corrections is None) | (pos_corrections is None): - skip = True - logger.warning( - f"Corrections not found for {image_path} ({epoch_dir}, {field}," - f" {sbid})." - ) - else: - scale, offset, scale_err, offset_err = flux_corrections - dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections - if not rms_path.exists(): - logger.warning(f"RMS image not found for {image_path}.") - if not bkg_path.exists(): - logger.warning(f"Background image not found for {image_path}.") - skip = not (rms_path.exists() and bkg_path.exists()) or skip - if skip: - logger.warning(f"Skipping {image_path}.") - continue - - for path in (image_path, rms_path, bkg_path): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - # _ = shift_and_scale_image( - # path, - # output_dir, - # flux_scale=corrections.flux_peak_correction_multiplicative, - # flux_offset_mJy=corrections.flux_peak_correction_additive, - # ra_offset_arcsec=corrections.ra_correction, - # dec_offset_arcsec=corrections.dec_correction, - # overwrite=overwrite, - # ) - _ = shift_and_scale_image( - path, - output_dir, - flux_scale=scale, - flux_offset_mJy=offset, - ra_offset_arcsec=dra_median, - dec_offset_arcsec=ddec_median, - overwrite=overwrite, - ) - - # correct catalogs - for components_path in chain.from_iterable(components_path_glob_list): - epoch_dir = components_path.parent.name - _, _, field, sbid_str, *_ = components_path.name.split(".") - sbid = int(sbid_str[2:]) - # get island catalog - islands_path = components_path.with_name( - components_path.name.replace(".components", ".islands") - ) - # get corrections - skip = False - # try: - # corrections = corrections_df.loc[(epoch_dir, field, sbid)] - # except KeyError: - # skip = True - # logger.warning( - # f"Corrections not found for {image_path} ({epoch_dir}, {field}," - # f" {sbid})." - # ) - flux_corrections, pos_corrections = get_correct_correction_file( - correction_files_list=correction_files_path_glob_list, - epoch=epoch_dir, - img_field=field, - img_sbid=sbid_str, - ) - if (flux_corrections is None) | (pos_corrections is None): - skip = True - logger.warning( - f"Corrections not found for {image_path} ({epoch_dir}, {field}," - f" {sbid})." - ) - else: - scale, offset, scale_err, offset_err = flux_corrections - dra_median, ddec_median, dra_madfm, ddec_madfm = pos_corrections - if not islands_path.exists(): - logger.warning(f"Islands catalogue not found for {components_path}.") - skip = not islands_path.exists() or skip - if skip: - logger.warning(f"Skipping {components_path}.") - continue - - for path in (components_path, islands_path): - stokes_dir = f"{path.parent.parent.name}_CORRECTED" - output_dir = vast_tile_data_root / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) - # _ = shift_and_scale_catalog( - # path, - # output_dir, - # flux_scale=corrections.flux_peak_correction_multiplicative, - # flux_offset_mJy=corrections.flux_peak_correction_additive, - # ra_offset_arcsec=corrections.ra_correction, - # dec_offset_arcsec=corrections.dec_correction, - # overwrite=overwrite, - # ) - _ = shift_and_scale_catalog( - path, - output_dir, - flux_scale=scale, - flux_offset_mJy=offset, - ra_offset_arcsec=dra_median, - dec_offset_arcsec=ddec_median, - overwrite=overwrite, - ) - - -if __name__ == "__main__": - typer.run(main) From 27395f2c0b4c3cd4e95c59b2520de292d20139a8 Mon Sep 17 00:00:00 2001 From: Akash Anumarlapudi Date: Tue, 15 Aug 2023 11:16:08 -0500 Subject: [PATCH 25/31] Make new directories only when write_output=True --- vast_post_processing/corrections.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 821b638..eba68f2 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -475,7 +475,6 @@ def correct_field( for path in (image_path, rms_path, bkg_path): stokes_dir = f"{path.parent.parent.name}_CORRECTED" output_dir = outdir / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / path.with_suffix(".corrected.fits").name if output_path.exists() and not overwrite: logger.warning(f"Will not overwrite existing image: {output_path}.") @@ -488,6 +487,7 @@ def correct_field( dec_offset_arcsec=ddec_median_value.item(), ) if write_output: + output_dir.mkdir(parents=True, exist_ok=True) if output_path.exists() and overwrite: logger.warning(f"Overwriting existing image: {output_path}.") corrected_hdu.writeto(str(output_path), overwrite=True) @@ -503,7 +503,6 @@ def correct_field( for path in (component_file, island_file): stokes_dir = f"{path.parent.parent.name}_CORRECTED" output_dir = outdir / stokes_dir / epoch_dir - output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / path.with_suffix(".corrected.xml").name if output_path.exists() and not overwrite: logger.warning(f"Will not overwrite existing catalogue: {output_path}.") @@ -516,6 +515,7 @@ def correct_field( dec_offset_arcsec=ddec_median_value.item(), ) if write_output: + output_dir.mkdir(parents=True, exist_ok=True) # write corrected VOTable if output_path.exists() and overwrite: logger.warning( From ef8698efb30d7b488175220ad04e94be1b0163f0 Mon Sep 17 00:00:00 2001 From: Akash Anumarlapudi Date: Wed, 16 Aug 2023 17:21:33 -0500 Subject: [PATCH 26/31] Updated the filtering function for catalogs --- vast_post_processing/catalogs.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py index a83b094..28a10a8 100644 --- a/vast_post_processing/catalogs.py +++ b/vast_post_processing/catalogs.py @@ -266,20 +266,35 @@ def _read_catalog(self): def _filter_sources(self): """Helper function to filter sources that are used for cross-match; filter sources with bad sizes and optionally given flux limits""" + + # Add a flux threshold flag if self.flux_flag: lim = self.flux_lim logger.info( f"Filtering %d sources with fluxes <= {lim}", (self.table["flux_peak"] <= lim).sum(), ) - self.table = self.table[self.table["flux_peak"] > lim] + flux_mask = self.table["flux_peak"] > lim + # self.table = self.table[self.table["flux_peak"] > lim] + # Add good psf flag logger.info( "Filtering %d sources with fitted sizes <= 0.", ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(), ) - self.table = self.table[ - (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0) - ] + psf_mask = (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0) + + # point source flag + ps_metric = self.table["flux_peak"] / self.table["flux_int"] + ps_mask = ps_metric < 1.5 + + # Add snr flag + snr_mask = self.table["flux_peak"] / self.table["rms_image"] > 20 + + # Select distant sources + dist_mask = self.table["nn_separation"].to(u.arcsec).value > 60 + + mask = (flux_mask) & (psf_mask) & (ps_mask) & (snr_mask) & (dist_mask) + self.table = self.table[mask] def calculate_condon_flux_errors( self, From b495dba62a523c245eb4297cf6dacfded8ab6f3a Mon Sep 17 00:00:00 2001 From: Akash Anumarlapudi Date: Wed, 16 Aug 2023 18:26:06 -0500 Subject: [PATCH 27/31] Deal with all epochs or single epoch the same way --- vast_post_processing/corrections.py | 66 +++++++++++++++-------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index eba68f2..a16de44 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -570,39 +570,41 @@ def correct_files( logger.remove() logger.add(sys.stderr, level="INFO") - # read corrections - image_path_glob_list: list[Generator[Path, None, None]] = [] - components_path_glob_list: list[Generator[Path, None, None]] = [] - + # Read all the epochs if epoch is None or len(epoch) == 0: - image_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*/*.fits") - ) - components_path_glob_list.append( - vast_tile_data_root.glob("STOKESI_SELAVY/epoch_*/*.xml") - ) + epoch_dirs = list(vast_tile_data_root.glob("STOKESI_IMAGES/epoch_*")) else: - for n in epoch: - image_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_IMAGES/epoch_{n}/*.fits") + epoch_dirs = [] + epoch_dirs = [ + vast_tile_data_root / "STOKESI_IMAGES" / f"epoch_{e}" for e in epoch + ] + + logger.info( + f"Corrections requested of these epochs: {[i.name for i in epoch_dirs]}" + ) + + # Work on individual epochs + for e in epoch_dirs: + # read fits/xml files + image_path_glob_list: list[Generator[Path, None, None]] = [] + components_path_glob_list: list[Generator[Path, None, None]] = [] + + image_path_glob_list.append(e.glob("*.fits")) + components_path_glob_list.append(e.glob("*.xml")) + + # get corrections for every image and the correct it + for image_path in chain.from_iterable(image_path_glob_list): + correct_field( + image_path=image_path, + vast_corrections_root=vast_corrections_root, + radius=radius, + condon=condon, + psf_ref=psf_ref, + psf=psf, + write_output=write_output, + outdir=outdir, + overwrite=overwrite, ) - components_path_glob_list.append( - vast_tile_data_root.glob(f"STOKESI_SELAVY/epoch_{n}/*.xml") + logger.info( + f"Successfully corrected the images and catalogs for {image_path.as_posix()}" ) - - # get corrections for an image and the correct it - for image_path in chain.from_iterable(image_path_glob_list): - correct_field( - image_path=image_path, - vast_corrections_root=vast_corrections_root, - radius=radius, - condon=condon, - psf_ref=psf_ref, - psf=psf, - write_output=write_output, - outdir=outdir, - overwrite=overwrite, - ) - logger.info( - f"Successfully corrected the images and catalogs for {image_path.as_posix()}" - ) From 3450e474344c7e53aa05b73c7265311ef195dd7b Mon Sep 17 00:00:00 2001 From: Akash Anumarlapudi Date: Thu, 17 Aug 2023 18:09:01 -0500 Subject: [PATCH 28/31] User decides whether to skip entire epoch or a single file --- vast_post_processing/corrections.py | 227 ++++++++++++++++++++-------- 1 file changed, 160 insertions(+), 67 deletions(-) diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index a16de44..d3f42fb 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -5,6 +5,7 @@ from astropy.coordinates import SkyCoord, Angle from astropy.io import fits from astropy.io.votable import parse +from astropy.io.votable.tree import Param import astropy.units as u from uncertainties import ufloat from astropy.wcs import WCS, FITSFixedWarning @@ -184,10 +185,23 @@ def shift_and_scale_image( image_hdul = fits.open(image_path) image_hdu = image_hdul[0] - # do the flux scaling - image_hdu.data = flux_scale * (image_hdu.data + (flux_offset_mJy * 1e-3)) - image_hdu.header["FLUXOFF"] = flux_offset_mJy * 1e-3 + # do the flux scaling, but check that the data is in Jy + if image_hdu.header["BUNIT"] == "Jy/beam": + data_unit = u.Jy + else: + data_unit = u.mJy + image_hdu.data = flux_scale * ( + image_hdu.data + (flux_offset_mJy * (u.mJy.to(data_unit))) + ) + image_hdu.header["FLUXOFF"] = flux_offset_mJy * (u.mJy.to(data_unit)) image_hdu.header["FLUXSCL"] = flux_scale + + image_hdu[ + "HISTORY" + ] = """ + Image has been corrected for flux by a scaling factor and an offset given by + FLUXSCL and FLUXOFF. + """ # check for NaN if replace_nan: if np.any(np.isnan(image_hdu.data)): @@ -214,6 +228,13 @@ def shift_and_scale_image( image_hdu.header["RAOFF"] = ra_offset_arcsec image_hdu.header["DECOFF"] = dec_offset_arcsec + image_hdu[ + "HISTORY" + ] = """ + Image has been corrected for astrometric position by a an offset in both directions + given by RAOFF and DECOFF using a model RA=RA+RAOFF/COS(DEC), DEC=DEC+DECOFF. + """ + return image_hdul @@ -293,10 +314,50 @@ def shift_and_scale_catalog( for col in cols: votable.array[col] = flux_scale * (votable.array[col] + flux_offset_mJy) + # Add in the corrections to the votable + flux_scl_param = Param( + votable=votablefile, + ID="flux_scl", + name="flux_scl", + value=flux_scale, + datatype="float", + unit=None, + ) + flux_off_param = Param( + votable=votablefile, + ID="flux_offset", + name="flux_offset", + value=flux_offset_mJy, + datatype="float", + unit=u.mJy, + ) + + ra_offset_param = Param( + votable=votablefile, + ID="ra_offset", + name="ra_offset", + value=ra_offset_arcsec, + datatype="float", + unit=u.arcsec, + ) + + dec_offset_param = Param( + votable=votablefile, + ID="dec_offset", + name="dec_offset", + value=dec_offset_arcsec, + datatype="float", + unit=u.arcsec, + ) + + votablefile.params.extend( + [ra_offset_param, dec_offset_param, flux_scl_param, flux_off_param] + ) + return votablefile -def get_correct_file(correction_files_dir, img_field): +def get_correct_file(correction_files_dir: list, img_field: str): """Helper function to get the file from the reference catalogs which observed the same field. @@ -339,13 +400,57 @@ def get_psf_from_image(image_path: str): image_path = image_path.replace("SELAVY", "IMAGES") image_path = image_path.replace("selavy-", "") image_path = image_path.replace(".components.xml", ".fits") - hdu = fits.open(image_path) + hdu = fits.getheader(image_path) psf_maj = hdu[0].header["BMAJ"] * u.degree psf_min = hdu[0].header["BMIN"] * u.degree - hdu.close() + # hdu.close() return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec)) +def check_for_files(image_path: str): + """Helper function to cehck for bkg/noise maps and the component/island + catalogs given the image file + + Args: + image_path (str): Path to the image file + """ + # get rms and background images + rms_root = Path( + image_path.parent.as_posix().replace("STOKESI_IMAGES", "STOKESI_RMSMAPS") + ) + rms_path = rms_root / f"noiseMap.{image_path.name}" + bkg_path = rms_root / f"meanMap.{image_path.name}" + + skip = False + if not rms_path.exists(): + logger.warning(f"RMS image not found for {image_path}.") + if not bkg_path.exists(): + logger.warning(f"Background image not found for {image_path}.") + + # Look for any component and island files correspnding to this image + image_root = image_path.parent.as_posix() + catalog_root = image_root.replace("IMAGES", "SELAVY") + + catalog_filename = image_path.name.replace("image", "selavy-image") + catalog_filename = catalog_filename.replace(".fits", ".components.xml") + + catalog_filepath = f"{catalog_root}/{catalog_filename}" + + component_file = Path(catalog_filepath) + island_file = Path(catalog_filepath.replace("components", "islands")) + + skip = ( + not ( + (rms_path.exists()) + and (bkg_path.exists()) + and (island_file.exists()) + and (component_file.exists()) + ) + or skip + ) + return skip, (bkg_path, rms_path, component_file, island_file) + + def correct_field( image_path: Path, vast_corrections_root: Path = "/data/vast-survey/RACS/release-format/EPOCH00/TILES/STOKESI_SELAVY", @@ -376,13 +481,7 @@ def correct_field( epoch_dir = image_path.parent.name _, _, field, *_ = image_path.name.split(".") - # get rms and background images - rms_root = Path( - image_path.parent.as_posix().replace("STOKESI_IMAGES", "STOKESI_RMSMAPS") - ) - rms_path = rms_root / f"noiseMap.{image_path.name}" - bkg_path = rms_root / f"meanMap.{image_path.name}" - + # get the correction file correction_files_dir = Path(vast_corrections_root) ref_file = get_correct_file( correction_files_dir=correction_files_dir, @@ -394,40 +493,17 @@ def correct_field( # construct output path to store corrections for each epoch corr_dir = outdir / "corr_db" - if not os.path.isdir(corr_dir): - os.mkdir(corr_dir) + if not corr_dir.isdir(): + corr_dir.mkdir() epoch_corr_dir = corr_dir / epoch_dir - if not os.path.isdir(epoch_corr_dir): - os.mkdir(epoch_corr_dir) + if not epoch_corr_dir.isdir(): + epoch_corr_dir.mkdir() - skip = False - if not rms_path.exists(): - logger.warning(f"RMS image not found for {image_path}.") - if not bkg_path.exists(): - logger.warning(f"Background image not found for {image_path}.") - - # Look for any component and island files correspnding to this image - image_root = image_path.parent.as_posix() - catalog_root = image_root.replace("IMAGES", "SELAVY") - - catalog_filename = image_path.name.replace("image", "selavy-image") - catalog_filename = catalog_filename.replace(".fits", ".components.xml") - - catalog_filepath = f"{catalog_root}/{catalog_filename}" - - component_file = Path(catalog_filepath) - island_file = Path(catalog_filepath.replace("components", "islands")) - - skip = ( - not ( - (rms_path.exists()) - and (bkg_path.exists()) - and (ref_file is not None) - and (component_file.exists()) - ) - or skip - ) + # check for auxiliary files + skip, aux_files = check_for_files(image_path=image_path) + skip |= ref_file is None + bkg_path, rms_path, component_file, island_file = aux_files if skip: if not ((rms_path.exists()) and (bkg_path.exists())): logger.warning(f"Skipping {image_path}, RMS/BKG maps do not exist") @@ -471,7 +547,7 @@ def correct_field( ) # get corrections - corrected_hdul = [] + corrected_hdus = [] for path in (image_path, rms_path, bkg_path): stokes_dir = f"{path.parent.parent.name}_CORRECTED" output_dir = outdir / stokes_dir / epoch_dir @@ -496,7 +572,7 @@ def correct_field( logger.success(f"Writing corrected image to: {output_path}.") corrected_hdu.close() else: - corrected_hdul.append(corrected_hdu) + corrected_hdus.append(corrected_hdu) # Do the same for catalog files corrected_catalogs = [] @@ -506,6 +582,7 @@ def correct_field( output_path = output_dir / path.with_suffix(".corrected.xml").name if output_path.exists() and not overwrite: logger.warning(f"Will not overwrite existing catalogue: {output_path}.") + continue else: corrected_catalog = shift_and_scale_catalog( path, @@ -528,7 +605,7 @@ def correct_field( logger.success(f"Writing corrected catalogue: {output_path}.") else: corrected_catalogs.append(corrected_catalog) - return (corrected_hdul, corrected_catalogs) + return (corrected_hdus, corrected_catalogs) def correct_files( @@ -542,6 +619,7 @@ def correct_files( write_output: bool = True, outdir: str = None, overwrite: bool = False, + skip_on_missing=False, verbose: bool = False, ): """Read astrometric and flux corrections produced by vast-xmatch and apply them to @@ -587,24 +665,39 @@ def correct_files( for e in epoch_dirs: # read fits/xml files image_path_glob_list: list[Generator[Path, None, None]] = [] - components_path_glob_list: list[Generator[Path, None, None]] = [] - image_path_glob_list.append(e.glob("*.fits")) - components_path_glob_list.append(e.glob("*.xml")) - - # get corrections for every image and the correct it - for image_path in chain.from_iterable(image_path_glob_list): - correct_field( - image_path=image_path, - vast_corrections_root=vast_corrections_root, - radius=radius, - condon=condon, - psf_ref=psf_ref, - psf=psf, - write_output=write_output, - outdir=outdir, - overwrite=overwrite, - ) - logger.info( - f"Successfully corrected the images and catalogs for {image_path.as_posix()}" - ) + image_files = list(image_path_glob_list) + + skip_epoch = False + for img in image_files: + skip_file, _ = check_for_files(image_path=img) + skip_epoch |= skip_file + if skip_epoch: + logger.warning( + f"One/Some of the bkg/rms/catlogues is are missing for {img}" + ) + break + if skip_on_missing: + if skip_epoch: + logger.warning( + "User input is to skip the entire epoch if one of the images \ + have missing bkg/rms/catalog files, so skipping epoch {e}" + ) + break + else: + # get corrections for every image and the correct it + for image_path in image_files: + correct_field( + image_path=image_path, + vast_corrections_root=vast_corrections_root, + radius=radius, + condon=condon, + psf_ref=psf_ref, + psf=psf, + write_output=write_output, + outdir=outdir, + overwrite=overwrite, + ) + logger.info( + f"Successfully corrected the images and catalogs for {image_path.as_posix()}" + ) From 903b04b4c6bc2f14497e56597324e142a624132b Mon Sep 17 00:00:00 2001 From: Akash Anumarlapudi Date: Sun, 20 Aug 2023 20:06:43 -0500 Subject: [PATCH 29/31] changed variables to function arguments for filtering sources --- vast_post_processing/catalogs.py | 40 ++++++++++++++++++++++++----- vast_post_processing/corrections.py | 4 +-- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py index 28a10a8..3f1acb8 100644 --- a/vast_post_processing/catalogs.py +++ b/vast_post_processing/catalogs.py @@ -182,10 +182,32 @@ def __init__( path: Path, psf: Optional[Tuple[float, float]] = None, input_format: str = "selavy", - condon: bool = False, - apply_flux_limit: bool = True, + condon: bool = True, flux_limit: float = 0, + snr_limit: float = 20, + nneighbor: float = 1, + apply_flux_limit: bool = True, + select_point_sources: bool = True, ): + """Defines a catalog class to read the component files + + Args: + path (Path): path to the component file (selavy/aegean supported right now) + psf (Optional[Tuple[float, float]], optional): The major and minor axis dimensions + in arcsec. Defaults to None. Used to calculate condon errors + input_format (str, optional): are the component files selavy or aegean generated?. + Defaults to "selavy". + condon (bool, optional): Apply condon corrections. Defaults to True. + flux_limit (float, optional): Flux limit to select sources (sources with peak flux + > this will be selected). Defaults to 0. + snr_limit (float, optional): SNR limit to select sources (sources with SNR > this + will be selected). Defaults to 20. + nneighbor (float, optional): Distance to nearest neighbor (in arcmin). Sources with + neighbors < this will be removed. Defaults to 1. + apply_flux_limit (bool, optional): Flag to decide to apply flux limit. Defaults to True. + select_point_sources (bool, optional): Flag to decide to select point sources. + Defaults to True + """ self.path: Path self.table: QTable self.input_format: Optional[str] @@ -202,6 +224,9 @@ def __init__( self.input_format = input_format self.flux_flag = apply_flux_limit self.flux_lim = flux_limit + self.snr_lim = snr_limit + self.sep_lim = nneighbor # In arcmin + self.point_sources = select_point_sources # Read the catalog self._read_catalog() @@ -284,14 +309,17 @@ def _filter_sources(self): psf_mask = (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0) # point source flag - ps_metric = self.table["flux_peak"] / self.table["flux_int"] - ps_mask = ps_metric < 1.5 + if self.point_sources: + ps_metric = self.table["flux_peak"] / self.table["flux_int"] + ps_mask = ps_metric < 1.5 + else: + ps_mask = np.ones(len(self.table)).astype(bool) # Add snr flag - snr_mask = self.table["flux_peak"] / self.table["rms_image"] > 20 + snr_mask = self.table["flux_peak"] / self.table["rms_image"] > self.snr_lim # Select distant sources - dist_mask = self.table["nn_separation"].to(u.arcsec).value > 60 + dist_mask = self.table["nn_separation"].to(u.arcsec).value > 60 * self.sep_lim mask = (flux_mask) & (psf_mask) & (ps_mask) & (snr_mask) & (dist_mask) self.table = self.table[mask] diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index d3f42fb..635792e 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -493,11 +493,11 @@ def correct_field( # construct output path to store corrections for each epoch corr_dir = outdir / "corr_db" - if not corr_dir.isdir(): + if not corr_dir.is_dir(): corr_dir.mkdir() epoch_corr_dir = corr_dir / epoch_dir - if not epoch_corr_dir.isdir(): + if not epoch_corr_dir.is_dir(): epoch_corr_dir.mkdir() # check for auxiliary files From ffeae45107deb1f1a731f287a5ce49a4e7c874ff Mon Sep 17 00:00:00 2001 From: Akash Date: Mon, 21 Aug 2023 11:02:28 -0500 Subject: [PATCH 30/31] Tested it on a couple of epoch, changed code to use f-strings --- vast_post_processing/catalogs.py | 54 ++++++++++++--------- vast_post_processing/cli/run_corrections.py | 6 +++ vast_post_processing/corrections.py | 46 ++++++++---------- vast_post_processing/crossmatch.py | 16 ++---- 4 files changed, 63 insertions(+), 59 deletions(-) diff --git a/vast_post_processing/catalogs.py b/vast_post_processing/catalogs.py index 3f1acb8..a2d72f2 100644 --- a/vast_post_processing/catalogs.py +++ b/vast_post_processing/catalogs.py @@ -1,4 +1,4 @@ -import logging +from loguru import logger from pathlib import Path from typing import Tuple, Union, Dict, Optional from urllib.parse import quote @@ -9,8 +9,6 @@ import numpy as np import pandas as pd -logger = logging.getLogger(__name__) - SELAVY_COLUMN_UNITS = { "ra_deg_cont": u.deg, "dec_deg_cont": u.deg, @@ -245,14 +243,11 @@ def __init__( if psf is not None: self.psf_major, self.psf_minor = psf * u.arcsec logger.debug( - "Using user provided PSF for %s: %s, %s.", - self.path, - self.psf_major, - self.psf_minor, + f"Using user provided PSF for {self.path}: {self.psf_major}, {self.psf_minor}." ) else: logger.warning( - "PSF is unknown for %s. Condon errors will be unavailable.", self.path + f"PSF is unknown for {self.path}. Condon errors will be unavailable." ) self.psf_major = None self.psf_minor = None @@ -260,7 +255,7 @@ def __init__( # Calculate the covariant error using Condon 1997 if condon and self.psf_major is not None and self.psf_minor is not None: self.calculate_condon_flux_errors(correct_peak_for_noise=True) - logger.debug("Condon errors computed for %s.", self.path) + logger.debug(f"Condon errors computed for {self.path}.") def _read_catalog(self): """Helper function to read and parse the input files @@ -272,13 +267,13 @@ def _read_catalog(self): path = self.path if self.input_format == "selavy": if path.suffix == ".txt": - logger.debug("Reading %s as a Selavy txt catalog.", path) + logger.debug(f"Reading {path} as a Selavy txt catalog.") read_catalog = read_selavy else: - logger.debug("Reading %s as a Selavy VOTable catalog.", path) + logger.debug(f"Reading {path} as a Selavy VOTable catalog.") read_catalog = read_selavy_votable elif self.input_format == "aegean": - logger.debug("Reading %s as an Aegean catalog.", path) + logger.debug(f"Reading {path} as an Aegean catalog.") read_catalog = read_aegean_csv else: logger.error( @@ -292,37 +287,52 @@ def _filter_sources(self): """Helper function to filter sources that are used for cross-match; filter sources with bad sizes and optionally given flux limits""" + sources = self.table + flux_peak = (self.table["flux_peak"].to(u.mJy / u.beam)).value + flux_int = (self.table["flux_int"].to(u.mJy)).value + rms = (self.table["rms_image"].to(u.mJy / u.beam)).value + # Add a flux threshold flag if self.flux_flag: lim = self.flux_lim + flux_mask = flux_peak > lim logger.info( - f"Filtering %d sources with fluxes <= {lim}", - (self.table["flux_peak"] <= lim).sum(), + f"Filtering {len(sources[~flux_mask])} sources with fluxes <= {lim}" ) - flux_mask = self.table["flux_peak"] > lim - # self.table = self.table[self.table["flux_peak"] > lim] + # Add good psf flag + psf_mask = (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0) logger.info( - "Filtering %d sources with fitted sizes <= 0.", - ((self.table["maj_axis"] <= 0) | (self.table["min_axis"] <= 0)).sum(), + f"Filtering {len(sources[~psf_mask])} sources with fitted sizes <= 0." ) - psf_mask = (self.table["maj_axis"] > 0) & (self.table["min_axis"] > 0) - # point source flag if self.point_sources: - ps_metric = self.table["flux_peak"] / self.table["flux_int"] + ps_metric = np.divide( + flux_peak, flux_int, where=flux_int != 0, out=np.zeros_like(flux_int) + ) ps_mask = ps_metric < 1.5 + logger.info( + f"Filtering {len(sources[~ps_mask])} sources that are not point sources." + ) else: ps_mask = np.ones(len(self.table)).astype(bool) # Add snr flag - snr_mask = self.table["flux_peak"] / self.table["rms_image"] > self.snr_lim + snr = np.divide(flux_peak, rms, where=rms != 0, out=np.zeros_like(rms)) + snr_mask = snr > self.snr_lim + logger.info( + f"Filtering {len(sources[~snr_mask])} sources with SNR <= {self.snr_lim}" + ) # Select distant sources dist_mask = self.table["nn_separation"].to(u.arcsec).value > 60 * self.sep_lim + logger.info( + f"Filtering {len(sources[~dist_mask])} sources that have neighbors within {self.sep_lim} arcmin." + ) mask = (flux_mask) & (psf_mask) & (ps_mask) & (snr_mask) & (dist_mask) self.table = self.table[mask] + logger.info(f"Filtering {len(sources[~mask])} sources in total.") def calculate_condon_flux_errors( self, diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index 0347ab7..f4790ed 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -78,6 +78,11 @@ def main( "to them as suffix", ), overwrite: bool = False, + skip_on_missing: Optional[bool] = typer.Option( + False, + help="If there are missing files (noise/bkg/catalogs) corresponding to an image file, should" + "we skip the entire epoch or just that one files? Defaults to skipping just that file.", + ), verbose: bool = False, ): """ @@ -99,6 +104,7 @@ def main( psf=psf, outdir=outdir, overwrite=overwrite, + skip_on_missing=skip_on_missing, verbose=verbose, ) diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 635792e..31f0c9f 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -196,12 +196,9 @@ def shift_and_scale_image( image_hdu.header["FLUXOFF"] = flux_offset_mJy * (u.mJy.to(data_unit)) image_hdu.header["FLUXSCL"] = flux_scale - image_hdu[ + image_hdu.header[ "HISTORY" - ] = """ - Image has been corrected for flux by a scaling factor and an offset given by - FLUXSCL and FLUXOFF. - """ + ] = "Image has been corrected for flux by a scaling factor and an offset given by FLUXSCL and FLUXOFF." # check for NaN if replace_nan: if np.any(np.isnan(image_hdu.data)): @@ -228,12 +225,9 @@ def shift_and_scale_image( image_hdu.header["RAOFF"] = ra_offset_arcsec image_hdu.header["DECOFF"] = dec_offset_arcsec - image_hdu[ + image_hdu.header[ "HISTORY" - ] = """ - Image has been corrected for astrometric position by a an offset in both directions - given by RAOFF and DECOFF using a model RA=RA+RAOFF/COS(DEC), DEC=DEC+DECOFF. - """ + ] = "Image has been corrected for astrometric position by a an offset in both directions given by RAOFF and DECOFF using a model RA=RA+RAOFF/COS(DEC), DEC=DEC+DECOFF" return image_hdul @@ -400,9 +394,9 @@ def get_psf_from_image(image_path: str): image_path = image_path.replace("SELAVY", "IMAGES") image_path = image_path.replace("selavy-", "") image_path = image_path.replace(".components.xml", ".fits") - hdu = fits.getheader(image_path) - psf_maj = hdu[0].header["BMAJ"] * u.degree - psf_min = hdu[0].header["BMIN"] * u.degree + hdr = fits.getheader(image_path) + psf_maj = hdr["BMAJ"] * u.degree + psf_min = hdr["BMIN"] * u.degree # hdu.close() return (psf_maj.to(u.arcsec), psf_min.to(u.arcsec)) @@ -605,6 +599,9 @@ def correct_field( logger.success(f"Writing corrected catalogue: {output_path}.") else: corrected_catalogs.append(corrected_catalog) + logger.info( + f"Successfully corrected the images and catalogs for {image_path.as_posix()}" + ) return (corrected_hdus, corrected_catalogs) @@ -666,8 +663,7 @@ def correct_files( # read fits/xml files image_path_glob_list: list[Generator[Path, None, None]] = [] image_path_glob_list.append(e.glob("*.fits")) - image_files = list(image_path_glob_list) - + image_files = list(image_path_glob_list[0]) skip_epoch = False for img in image_files: skip_file, _ = check_for_files(image_path=img) @@ -677,17 +673,16 @@ def correct_files( f"One/Some of the bkg/rms/catlogues is are missing for {img}" ) break - if skip_on_missing: - if skip_epoch: - logger.warning( - "User input is to skip the entire epoch if one of the images \ - have missing bkg/rms/catalog files, so skipping epoch {e}" - ) - break + if skip_on_missing & skip_epoch: + logger.warning( + "User input is to skip the entire epoch if one of the images" + f"have missing bkg/rms/catalog files, so skipping epoch {e}" + ) + else: # get corrections for every image and the correct it for image_path in image_files: - correct_field( + products = correct_field( image_path=image_path, vast_corrections_root=vast_corrections_root, radius=radius, @@ -698,6 +693,5 @@ def correct_files( outdir=outdir, overwrite=overwrite, ) - logger.info( - f"Successfully corrected the images and catalogs for {image_path.as_posix()}" - ) + if products is not None: + hdus, catalogs = products diff --git a/vast_post_processing/crossmatch.py b/vast_post_processing/crossmatch.py index c5ee01f..11061bd 100644 --- a/vast_post_processing/crossmatch.py +++ b/vast_post_processing/crossmatch.py @@ -1,6 +1,5 @@ -import logging from typing import Tuple - +from loguru import logger from astropy.coordinates import SkyCoord, Angle, match_coordinates_sky from astropy.table import QTable, join, join_skycoord import astropy.units as u @@ -10,9 +9,6 @@ from vast_post_processing.catalogs import Catalog -logger = logging.getLogger(__name__) - - def median_abs_deviation(data): """helper function to calculate the median offset @@ -76,7 +72,7 @@ def crossmatch_qtables( Returns: QTable: filtered table that return the cross matches """ - logger.debug("Using crossmatch radius: %s.", radius) + logger.debug(f"Using crossmatch radius: {radius}.") xmatch = join( catalog.table, @@ -102,11 +98,9 @@ def crossmatch_qtables( ).decompose() logger.info( - "Num cross-matches: %d. Num cross-matches to unique reference source: %d" - " (%d%%).", - len(xmatch), - len(set(xmatch["coord_id"])), - (len(set(xmatch["coord_id"])) / len(xmatch)) * 100, + f"Num cross-matches: {len(xmatch)}. Num cross-matches to unique reference " + f"source: {len(set(xmatch['coord_id']))} -- " + f" ({(len(set(xmatch['coord_id'])) / len(xmatch)) * 100})." ) return xmatch From f3a5ae858bfb103ca26f3cfbffe937aebed3c864 Mon Sep 17 00:00:00 2001 From: Akash Date: Mon, 28 Aug 2023 12:49:45 +1000 Subject: [PATCH 31/31] Added catalog filtering parameters as CLI arguments. --- vast_post_processing/cli/run_corrections.py | 28 ++++ vast_post_processing/corrections.py | 136 +++++++++++++------- 2 files changed, 114 insertions(+), 50 deletions(-) diff --git a/vast_post_processing/cli/run_corrections.py b/vast_post_processing/cli/run_corrections.py index f4790ed..5b2c202 100644 --- a/vast_post_processing/cli/run_corrections.py +++ b/vast_post_processing/cli/run_corrections.py @@ -71,6 +71,29 @@ def main( "arcsec for `catalog`. First argument is major axis followed by nimor axis." ), ), + flux_limit: Optional[float] = typer.Option( + 0, + help="Flux limit to select sources (sources with peak flux" + "> this will be selected). Defaults to 0.", + ), + snr_limit: Optional[float] = typer.Option( + 20, + help="SNR limit to select sources (sources with SNR > this" + "will be selected). Defaults to 20.", + ), + nneighbor: Optional[float] = typer.Option( + 1, + help="Distance to nearest neighbor (in arcmin). Sources with" + "neighbors < this will be removed. Defaults to 1.", + ), + apply_flux_limit: Optional[bool] = typer.Option( + True, + help="Flag to decide to apply flux limit. Defaults to True", + ), + select_point_sources: Optional[bool] = typer.Option( + True, + help="Flag to decide to select point sources. Defaults to True", + ), outdir: Optional[str] = typer.Option( None, help="Stem of the output directory to store the corrected images and cataloges to. The default" @@ -102,6 +125,11 @@ def main( condon=condon, psf_ref=psf_ref, psf=psf, + flux_limit=flux_limit, + snr_limit=snr_limit, + nneighbor=nneighbor, + apply_flux_limit=apply_flux_limit, + select_point_sources=select_point_sources, outdir=outdir, overwrite=overwrite, skip_on_missing=skip_on_missing, diff --git a/vast_post_processing/corrections.py b/vast_post_processing/corrections.py index 31f0c9f..0b10720 100644 --- a/vast_post_processing/corrections.py +++ b/vast_post_processing/corrections.py @@ -31,6 +31,11 @@ def vast_xmatch_qc( fix_b: bool = False, positional_unit: u.Unit = u.Unit("arcsec"), flux_unit: u.Unit = u.Unit("mJy"), + flux_limit: float = 0, + snr_limit: float = 20, + nneighbor: float = 1, + apply_flux_limit: bool = True, + select_point_sources: bool = True, crossmatch_output: Optional[str] = None, csv_output: Optional[str] = None, ): @@ -56,6 +61,15 @@ def vast_xmatch_qc( Defaults to u.Unit("arcsec"). flux_unit (u.Unit, optional): output unit in which the flux scale is given. Defaults to u.Unit("mJy"). + flux_limit (float, optional): Flux limit to select sources (sources with peak flux + > this will be selected). Defaults to 0. + snr_limit (float, optional): SNR limit to select sources (sources with SNR > this + will be selected). Defaults to 20. + nneighbor (float, optional): Distance to nearest neighbor (in arcmin). Sources with + neighbors < this will be removed. Defaults to 1. + apply_flux_limit (bool, optional): Flag to decide to apply flux limit. Defaults to True. + select_point_sources (bool, optional): Flag to decide to select point sources. + Defaults to True crossmatch_output (Optional[str], optional): File path to write the crossmatch output. Defaults to None, which means no file is written csv_output (Optional[str], optional): File path to write the flux/astrometric corrections. @@ -77,12 +91,22 @@ def vast_xmatch_qc( psf=psf_reference, condon=condon, input_format="selavy", + flux_limit=flux_limit, + snr_limit=snr_limit, + nneighbor=nneighbor, + apply_flux_limit=apply_flux_limit, + select_point_sources=select_point_sources, ) catalog = Catalog( catalog_path, psf=psf, condon=condon, input_format="selavy", + flux_limit=flux_limit, + snr_limit=snr_limit, + nneighbor=nneighbor, + apply_flux_limit=apply_flux_limit, + select_point_sources=select_point_sources, ) # perform the crossmatch @@ -125,37 +149,28 @@ def vast_xmatch_qc( if csv_output is not None: # output has been requested - - if True: # csv_output is not None: - csv_output_path = Path(csv_output) # ensure Path object - sbid = catalog.sbid if catalog.sbid is not None else "" - if not csv_output_path.exists(): - f = open(csv_output_path, "w") - print( - "field,release_epoch,sbid,ra_correction,dec_correction,ra_madfm," - "dec_madfm,flux_peak_correction_multiplicative,flux_peak_correction_additive," - "flux_peak_correction_multiplicative_err,flux_peak_correction_additive_err," - "n_sources", - file=f, - ) - else: - f = open(csv_output_path, "a") - logger.info( - "Writing corrections CSV. To correct positions, add the corrections to" - " the original source positions i.e. RA' = RA + ra_correction /" - " cos(Dec). To correct fluxes, add the additive correction and multiply" - " the result by the multiplicative correction i.e. S' =" - " flux_peak_correction_multiplicative(S +" - " flux_peak_correction_additive)." - ) - print( - f"{catalog.field},{catalog.epoch},{sbid},{dra_median_value * -1}," - f"{ddec_median_value * -1},{dra_madfm_value},{ddec_madfm_value}," - f"{flux_corr_mult.nominal_value},{flux_corr_add.nominal_value}," - f"{flux_corr_mult.std_dev},{flux_corr_add.std_dev},{len(data)}", - file=f, - ) - f.close() + csv_output_path = Path(csv_output) # ensure Path object + sbid = catalog.sbid if catalog.sbid is not None else "" + if not csv_output_path.exists(): + f = open(csv_output_path, "w") + else: + f = open(csv_output_path, "a") + logger.info( + "Writing corrections CSV. To correct positions, add the corrections to" + " the original source positions i.e. RA' = RA + ra_correction /" + " cos(Dec). To correct fluxes, add the additive correction and multiply" + " the result by the multiplicative correction i.e. S' =" + " flux_peak_correction_multiplicative(S +" + " flux_peak_correction_additive)." + ) + print( + f"{catalog.field},{catalog.epoch},{sbid},{dra_median_value * -1}," + f"{ddec_median_value * -1},{dra_madfm_value},{ddec_madfm_value}," + f"{flux_corr_mult.nominal_value},{flux_corr_add.nominal_value}," + f"{flux_corr_mult.std_dev},{flux_corr_add.std_dev},{len(data)}", + file=f, + ) + f.close() return dra_median_value, ddec_median_value, flux_corr_mult, flux_corr_add @@ -193,12 +208,13 @@ def shift_and_scale_image( image_hdu.data = flux_scale * ( image_hdu.data + (flux_offset_mJy * (u.mJy.to(data_unit))) ) - image_hdu.header["FLUXOFF"] = flux_offset_mJy * (u.mJy.to(data_unit)) - image_hdu.header["FLUXSCL"] = flux_scale + image_hdu.header["FLUXOFFSET"] = flux_offset_mJy * (u.mJy.to(data_unit)) + image_hdu.header["FLUXSCALE"] = flux_scale - image_hdu.header[ - "HISTORY" - ] = "Image has been corrected for flux by a scaling factor and an offset given by FLUXSCL and FLUXOFF." + image_hdu.header.add_history( + "Image has been corrected for flux by a scaling factor and\ + an offset given by FLUXSCALE and FLUXOFFSET." + ) # check for NaN if replace_nan: if np.any(np.isnan(image_hdu.data)): @@ -225,9 +241,11 @@ def shift_and_scale_image( image_hdu.header["RAOFF"] = ra_offset_arcsec image_hdu.header["DECOFF"] = dec_offset_arcsec - image_hdu.header[ - "HISTORY" - ] = "Image has been corrected for astrometric position by a an offset in both directions given by RAOFF and DECOFF using a model RA=RA+RAOFF/COS(DEC), DEC=DEC+DECOFF" + image_hdu.header.add_history( + "Image has been corrected for astrometric position by a an offset\ + in both directions given by RAOFF and DECOFF using a model\ + RA=RA+RAOFF/COS(DEC), DEC=DEC+DECOFF" + ) return image_hdul @@ -311,16 +329,16 @@ def shift_and_scale_catalog( # Add in the corrections to the votable flux_scl_param = Param( votable=votablefile, - ID="flux_scl", - name="flux_scl", + ID="FluxScale", + name="FluxScale", value=flux_scale, datatype="float", unit=None, ) flux_off_param = Param( votable=votablefile, - ID="flux_offset", - name="flux_offset", + ID="FluxOffset", + name="FluxOffset", value=flux_offset_mJy, datatype="float", unit=u.mJy, @@ -328,8 +346,8 @@ def shift_and_scale_catalog( ra_offset_param = Param( votable=votablefile, - ID="ra_offset", - name="ra_offset", + ID="RAOffset", + name="RAOffset", value=ra_offset_arcsec, datatype="float", unit=u.arcsec, @@ -337,8 +355,8 @@ def shift_and_scale_catalog( dec_offset_param = Param( votable=votablefile, - ID="dec_offset", - name="dec_offset", + ID="DECOffset", + name="DECOffset", value=dec_offset_arcsec, datatype="float", unit=u.arcsec, @@ -452,6 +470,11 @@ def correct_field( condon: bool = True, psf_ref: list[float] = None, psf: list[float] = None, + flux_limit: float = 0, + snr_limit: float = 20, + nneighbor: float = 1, + apply_flux_limit: bool = True, + select_point_sources: bool = True, write_output: bool = True, outdir: str = None, overwrite: bool = False, @@ -536,6 +559,11 @@ def correct_field( psf=psf_image, fix_m=False, fix_b=False, + flux_limit=flux_limit, + snr_limit=snr_limit, + nneighbor=nneighbor, + apply_flux_limit=apply_flux_limit, + select_point_sources=select_point_sources, crossmatch_output=crossmatch_file, csv_output=csv_file, ) @@ -613,6 +641,11 @@ def correct_files( condon: bool = True, psf_ref: list[float] = None, psf: list[float] = None, + flux_limit: float = 0, + snr_limit: float = 20, + nneighbor: float = 1, + apply_flux_limit: bool = True, + select_point_sources: bool = True, write_output: bool = True, outdir: str = None, overwrite: bool = False, @@ -682,16 +715,19 @@ def correct_files( else: # get corrections for every image and the correct it for image_path in image_files: - products = correct_field( + _ = correct_field( image_path=image_path, vast_corrections_root=vast_corrections_root, radius=radius, condon=condon, psf_ref=psf_ref, psf=psf, + flux_limit=flux_limit, + snr_limit=snr_limit, + nneighbor=nneighbor, + apply_flux_limit=apply_flux_limit, + select_point_sources=select_point_sources, write_output=write_output, outdir=outdir, overwrite=overwrite, ) - if products is not None: - hdus, catalogs = products