From 6b2dffe9d0dec5a30ecabda92ee89ec0e5589893 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 25 Oct 2024 17:52:33 +0200 Subject: [PATCH 01/19] added stats calc tool --- pyproject.toml | 1 + src/ctapipe/tools/stats_calculation.py | 171 +++++++++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 src/ctapipe/tools/stats_calculation.py diff --git a/pyproject.toml b/pyproject.toml index 27c6cf0b7fc..6382a94dedc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,6 +107,7 @@ ctapipe-process = "ctapipe.tools.process:main" ctapipe-merge = "ctapipe.tools.merge:main" ctapipe-fileinfo = "ctapipe.tools.fileinfo:main" ctapipe-quickstart = "ctapipe.tools.quickstart:main" +ctapipe-stats-calculation = "ctapipe.tools.stats_calculation:main" ctapipe-train-energy-regressor = "ctapipe.tools.train_energy_regressor:main" ctapipe-train-particle-classifier = "ctapipe.tools.train_particle_classifier:main" ctapipe-train-disp-reconstructor = "ctapipe.tools.train_disp_reconstructor:main" diff --git a/src/ctapipe/tools/stats_calculation.py b/src/ctapipe/tools/stats_calculation.py new file mode 100644 index 00000000000..080d1b74c45 --- /dev/null +++ b/src/ctapipe/tools/stats_calculation.py @@ -0,0 +1,171 @@ +""" +Perform statistics calculation from DL1 data +""" + +import pathlib + +import numpy as np +from astropy.table import vstack + +from ctapipe.core import Tool +from ctapipe.core.tool import ToolConfigurationError +from ctapipe.core.traits import ( + Bool, + CaselessStrEnum, + Path, + Unicode, + classes_with_traits, +) +from ctapipe.instrument import SubarrayDescription +from ctapipe.io import write_table +from ctapipe.io.tableloader import TableLoader +from ctapipe.monitoring.calculator import PixelStatisticsCalculator + + +class StatisticsCalculatorTool(Tool): + """ + Perform statistics calculation for DL1 data + """ + + name = "StatisticsCalculatorTool" + description = "Perform statistics calculation for DL1 data" + + examples = """ + To calculate statistics of DL1 data files: + + > ctapipe-stats-calculation --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite + + """ + + input_url = Path( + help="Input CTA HDF5 files for DL1 data", + allow_none=True, + exists=True, + directory_ok=False, + file_ok=True, + ).tag(config=True) + + dl1_column_name = CaselessStrEnum( + ["image", "peak_time", "variance"], + default_value="image", + allow_none=False, + help="Column name of the DL1 data to calculate statistics", + ).tag(config=True) + + output_column_name = Unicode( + default_value="statistics", + allow_none=False, + help="Column name of the output statistics", + ).tag(config=True) + + output_path = Path( + help="Output filename", default_value=pathlib.Path("monitoring.h5") + ).tag(config=True) + + overwrite = Bool(help="Overwrite output file if it exists").tag(config=True) + + aliases = { + ("i", "input_url"): "StatisticsCalculatorTool.input_url", + ("o", "output_path"): "StatisticsCalculatorTool.output_path", + } + + flags = { + "overwrite": ( + {"StatisticsCalculatorTool": {"overwrite": True}}, + "Overwrite existing files", + ), + } + + classes = classes_with_traits(PixelStatisticsCalculator) + + def setup(self): + # Check that the input and output files are not the same + if self.input_url == self.output_path: + raise ToolConfigurationError( + "Input and output files are same. Fix your configuration / cli arguments." + ) + + # Load the subarray description from the input file + subarray = SubarrayDescription.from_hdf(self.input_url) + if subarray.n_tels != 1: + raise IOError( + "This tool is only intended for single telescope data." + "Please provide a file with only one telescope." + ) + self.tel_id = subarray.tel_ids[0] + # Set the table name for the output file + self.table_name = ( + f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{self.tel_id:03d}" + ) + # Initialization of the statistics calculator + self.stats_calculator = PixelStatisticsCalculator( + parent=self, subarray=subarray + ) + # Read the whole dl1 images with the 'TableLoader' + input_data = TableLoader(input_url=self.input_url) + self.dl1_table = input_data.read_telescope_events_by_id( + dl1_images=True, + dl1_parameters=False, + dl1_muons=False, + dl2=False, + simulated=False, + true_images=False, + true_parameters=False, + instrument=False, + pointing=False, + )[self.tel_id] + + def start(self): + # Perform the first pass of the statistics calculation + aggregated_stats = self.stats_calculator.first_pass( + table=self.dl1_table, + tel_id=self.tel_id, + col_name=self.dl1_column_name, + ) + # Check if 'chunk_shift' is selected + if self.stats_calculator.chunk_shift is not None: + # Check if there are any faulty chunks to perform a second pass over the data + if np.any(~aggregated_stats["is_valid"].data): + # Perform the second pass of the statistics calculation + aggregated_stats_secondpass = self.stats_calculator.second_pass( + table=self.dl1_table, + valid_chunks=aggregated_stats["is_valid"].data, + tel_id=self.tel_id, + col_name=self.dl1_column_name, + ) + # Stack the statistic values from the first and second pass + aggregated_stats = vstack( + [aggregated_stats, aggregated_stats_secondpass] + ) + # Sort the stacked aggregated statistic values by starting time + aggregated_stats.sort(["time_start"]) + else: + self.log.info( + "No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", + self.tel_id, + ) + # Write the aggregated statistics and their outlier mask to the output file + write_table( + aggregated_stats, + self.output_path, + self.table_name, + overwrite=self.overwrite, + ) + + def finish(self): + self.log.info( + "DL1 monitoring data was stored in '%s' under '%s'", + self.output_path, + self.table_name, + ) + self.log.info("Tool is shutting down") + + +def main(): + # Run the tool + tool = StatisticsCalculatorTool() + tool.run() + + +if __name__ == "main": + main() From d8f63bbec32292807a82185b84688263066cc36e Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 25 Oct 2024 18:02:19 +0200 Subject: [PATCH 02/19] added example config for stats calc --- src/ctapipe/resources/stats_calc_config.yaml | 36 ++++++++++++++++++++ src/ctapipe/tools/quickstart.py | 1 + 2 files changed, 37 insertions(+) create mode 100644 src/ctapipe/resources/stats_calc_config.yaml diff --git a/src/ctapipe/resources/stats_calc_config.yaml b/src/ctapipe/resources/stats_calc_config.yaml new file mode 100644 index 00000000000..59cc601e468 --- /dev/null +++ b/src/ctapipe/resources/stats_calc_config.yaml @@ -0,0 +1,36 @@ +StatisticsCalculatorTool: + dl1_column_name: "image" + output_column_name: "statistics" + +PixelStatisticsCalculator: + stats_aggregator_type: [["id", 1, "SigmaClippingAggregator"]] + chunk_shift: 1000 + faulty_pixels_fraction: 0.1 + outlier_detector_list: [ + { + "apply_to": "median", + "name": "MedianOutlierDetector", + "config": { + "median_range_factors": [-15, 15], + }, + }, + { + "apply_to": "median", + "name": "RangeOutlierDetector", + "config": { + "validity_range": [-20, 120], + } + } + { + "apply_to": "std", + "name": "StdOutlierDetector", + "config": { + "std_range_factors": [-15, 15], + }, + } + ] + +SigmaClippingAggregator: + chunk_size: 2500 + max_sigma: 4 + iterations: 5 diff --git a/src/ctapipe/tools/quickstart.py b/src/ctapipe/tools/quickstart.py index 67410458d95..9cab9c97d99 100644 --- a/src/ctapipe/tools/quickstart.py +++ b/src/ctapipe/tools/quickstart.py @@ -15,6 +15,7 @@ "stage1_config.yaml", "stage2_config.yaml", "ml_preprocessing_config.yaml", + "stats_calc_config.yaml", "train_energy_regressor.yaml", "train_particle_classifier.yaml", "train_disp_reconstructor.yaml", From 6bd9dde82619a2d12f3ee261b040f8dab216ea3e Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 09:55:45 +0100 Subject: [PATCH 03/19] allow to process multiple tels Since we should also support the processing of MCs, we might want to run the stats calc tool over multiple tels. --- src/ctapipe/resources/stats_calc_config.yaml | 7 +- src/ctapipe/tools/stats_calculation.py | 120 ++++++++++--------- 2 files changed, 68 insertions(+), 59 deletions(-) diff --git a/src/ctapipe/resources/stats_calc_config.yaml b/src/ctapipe/resources/stats_calc_config.yaml index 59cc601e468..965bd6dae68 100644 --- a/src/ctapipe/resources/stats_calc_config.yaml +++ b/src/ctapipe/resources/stats_calc_config.yaml @@ -1,9 +1,10 @@ StatisticsCalculatorTool: - dl1_column_name: "image" + allowed_tels: [1,2,3,4] + dl1a_column_name: "image" output_column_name: "statistics" PixelStatisticsCalculator: - stats_aggregator_type: [["id", 1, "SigmaClippingAggregator"]] + stats_aggregator_type: [["type", "*", "SigmaClippingAggregator"]] chunk_shift: 1000 faulty_pixels_fraction: 0.1 outlier_detector_list: [ @@ -20,7 +21,7 @@ PixelStatisticsCalculator: "config": { "validity_range": [-20, 120], } - } + }, { "apply_to": "std", "name": "StdOutlierDetector", diff --git a/src/ctapipe/tools/stats_calculation.py b/src/ctapipe/tools/stats_calculation.py index 080d1b74c45..b472ba461ae 100644 --- a/src/ctapipe/tools/stats_calculation.py +++ b/src/ctapipe/tools/stats_calculation.py @@ -1,5 +1,5 @@ """ -Perform statistics calculation from DL1 data +Perform statistics calculation from DL1a image data """ import pathlib @@ -12,7 +12,9 @@ from ctapipe.core.traits import ( Bool, CaselessStrEnum, + CInt, Path, + Set, Unicode, classes_with_traits, ) @@ -24,32 +26,42 @@ class StatisticsCalculatorTool(Tool): """ - Perform statistics calculation for DL1 data + Perform statistics calculation for DL1a image data """ name = "StatisticsCalculatorTool" - description = "Perform statistics calculation for DL1 data" + description = "Perform statistics calculation for DL1a image data" examples = """ - To calculate statistics of DL1 data files: + To calculate statistics of DL1a image data files: > ctapipe-stats-calculation --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite """ input_url = Path( - help="Input CTA HDF5 files for DL1 data", + help="Input CTA HDF5 files including DL1a image data", allow_none=True, exists=True, directory_ok=False, file_ok=True, ).tag(config=True) - dl1_column_name = CaselessStrEnum( + allowed_tels = Set( + trait=CInt(), + default_value=None, + allow_none=True, + help=( + "List of allowed tel_ids, others will be ignored. " + "If None, all telescopes in the input stream will be included." + ), + ).tag(config=True) + + dl1a_column_name = CaselessStrEnum( ["image", "peak_time", "variance"], default_value="image", allow_none=False, - help="Column name of the DL1 data to calculate statistics", + help="Column name of the DL1a image data to calculate statistics", ).tag(config=True) output_column_name = Unicode( @@ -87,23 +99,17 @@ def setup(self): # Load the subarray description from the input file subarray = SubarrayDescription.from_hdf(self.input_url) - if subarray.n_tels != 1: - raise IOError( - "This tool is only intended for single telescope data." - "Please provide a file with only one telescope." - ) - self.tel_id = subarray.tel_ids[0] - # Set the table name for the output file - self.table_name = ( - f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{self.tel_id:03d}" - ) # Initialization of the statistics calculator self.stats_calculator = PixelStatisticsCalculator( parent=self, subarray=subarray ) - # Read the whole dl1 images with the 'TableLoader' + # Read the input data with the 'TableLoader' input_data = TableLoader(input_url=self.input_url) - self.dl1_table = input_data.read_telescope_events_by_id( + # Get the telescope ids from the input data or use the allowed_tels configuration + tel_ids = subarray.tel_ids if self.allowed_tels is None else self.allowed_tels + # Read the whole dl1 images + self.dl1_tables = input_data.read_telescope_events_by_id( + telescopes=tel_ids, dl1_images=True, dl1_parameters=False, dl1_muons=False, @@ -113,50 +119,52 @@ def setup(self): true_parameters=False, instrument=False, pointing=False, - )[self.tel_id] + ) def start(self): - # Perform the first pass of the statistics calculation - aggregated_stats = self.stats_calculator.first_pass( - table=self.dl1_table, - tel_id=self.tel_id, - col_name=self.dl1_column_name, - ) - # Check if 'chunk_shift' is selected - if self.stats_calculator.chunk_shift is not None: - # Check if there are any faulty chunks to perform a second pass over the data - if np.any(~aggregated_stats["is_valid"].data): - # Perform the second pass of the statistics calculation - aggregated_stats_secondpass = self.stats_calculator.second_pass( - table=self.dl1_table, - valid_chunks=aggregated_stats["is_valid"].data, - tel_id=self.tel_id, - col_name=self.dl1_column_name, - ) - # Stack the statistic values from the first and second pass - aggregated_stats = vstack( - [aggregated_stats, aggregated_stats_secondpass] - ) - # Sort the stacked aggregated statistic values by starting time - aggregated_stats.sort(["time_start"]) - else: - self.log.info( - "No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", - self.tel_id, - ) - # Write the aggregated statistics and their outlier mask to the output file - write_table( - aggregated_stats, - self.output_path, - self.table_name, - overwrite=self.overwrite, - ) + # Iterate over the telescope ids and their corresponding dl1 tables + for tel_id, dl1_table in self.dl1_tables.items(): + # Perform the first pass of the statistics calculation + aggregated_stats = self.stats_calculator.first_pass( + table=dl1_table, + tel_id=tel_id, + col_name=self.dl1a_column_name, + ) + # Check if 'chunk_shift' is selected + if self.stats_calculator.chunk_shift is not None: + # Check if there are any faulty chunks to perform a second pass over the data + if np.any(~aggregated_stats["is_valid"].data): + # Perform the second pass of the statistics calculation + aggregated_stats_secondpass = self.stats_calculator.second_pass( + table=dl1_table, + valid_chunks=aggregated_stats["is_valid"].data, + tel_id=tel_id, + col_name=self.dl1a_column_name, + ) + # Stack the statistic values from the first and second pass + aggregated_stats = vstack( + [aggregated_stats, aggregated_stats_secondpass] + ) + # Sort the stacked aggregated statistic values by starting time + aggregated_stats.sort(["time_start"]) + else: + self.log.info( + "No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", + tel_id, + ) + # Write the aggregated statistics and their outlier mask to the output file + write_table( + aggregated_stats, + self.output_path, + f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{tel_id:03d}", + overwrite=self.overwrite, + ) def finish(self): self.log.info( "DL1 monitoring data was stored in '%s' under '%s'", self.output_path, - self.table_name, + f"/dl1/monitoring/telescope/{self.output_column_name}", ) self.log.info("Tool is shutting down") From c8ab02ed11b44d8ef648f5056a656f6458f5d5c3 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 09:58:54 +0100 Subject: [PATCH 04/19] added unit test for stats calc tool --- src/ctapipe/tools/tests/test_stats_calc.py | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/ctapipe/tools/tests/test_stats_calc.py diff --git a/src/ctapipe/tools/tests/test_stats_calc.py b/src/ctapipe/tools/tests/test_stats_calc.py new file mode 100644 index 00000000000..97157ace955 --- /dev/null +++ b/src/ctapipe/tools/tests/test_stats_calc.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" +Test ctapipe-stats-calculation tool +""" + +from traitlets.config.loader import Config + +from ctapipe.core import run_tool +from ctapipe.io import read_table +from ctapipe.tools.stats_calculation import StatisticsCalculatorTool + + +def test_stats_calc_tool(tmp_path, dl1_image_file): + """check statistics calculation from DL1a files""" + + # Create a configuration suitable for the test + tel_id = 3 + config = Config( + { + "StatisticsCalculatorTool": { + "allowed_tels": [tel_id], + "dl1a_column_name": "image", + "output_column_name": "statistics", + }, + "PixelStatisticsCalculator": { + "stats_aggregator_type": [ + ("id", tel_id, "PlainAggregator"), + ], + }, + "PlainAggregator": { + "chunk_size": 1, + }, + } + ) + # Set the output file path + monitoring_file = tmp_path / "monitoring.dl1.h5" + # Run the tool with the configuration and the input file + run_tool( + StatisticsCalculatorTool(config=config), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={monitoring_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + # Check that the output file has been created + assert monitoring_file.exists() + # Check that the output file is not empty + assert ( + read_table( + monitoring_file, + path=f"/dl1/monitoring/telescope/statistics/tel_{tel_id:03d}", + )["mean"] + is not None + ) From c2da337ff15b2e66d6236e58b79bf2a48514e6ea Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 10:05:23 +0100 Subject: [PATCH 05/19] add changelog --- docs/changes/2628.features.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changes/2628.features.rst diff --git a/docs/changes/2628.features.rst b/docs/changes/2628.features.rst new file mode 100644 index 00000000000..f57b32854af --- /dev/null +++ b/docs/changes/2628.features.rst @@ -0,0 +1 @@ +Add a generic stats-calculation tool utilizing the PixelStatisticsCalculator. From e902d6ad545f3c30ce3371cfe829c7b1a34f8659 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 10:26:04 +0100 Subject: [PATCH 06/19] polish docs --- docs/user-guide/tools.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/user-guide/tools.rst b/docs/user-guide/tools.rst index 619b08bf793..f9c6c1b36cd 100644 --- a/docs/user-guide/tools.rst +++ b/docs/user-guide/tools.rst @@ -17,6 +17,7 @@ Data Processing Tools * ``ctapipe-quickstart``: create some default analysis configurations and a working directory * ``ctapipe-process``: Process event data in any supported format from R0/R1/DL0 to DL1 or DL2 HDF5 files. * ``ctapipe-apply-models``: Tool to apply machine learning models in bulk (as opposed to event by event). +* ``ctapipe-stats-calculation``: Tool to aggregate statistics and detect outliers from DL1a image data. * ``ctapipe-train-disp-reconstructor`` : Train the ML models for the `ctapipe.reco.DispReconstructor` (monoscopic reconstruction) * ``ctapipe-train-energy-regressor``: Train the ML models for the `ctapipe.reco.EnergyRegressor` (energy estimation) * ``ctapipe-train-particle-classifier``: Train the ML models for the `ctapipe.reco.ParticleClassifier` (gamma-hadron separation) From 259569b1eae624e69496ece8a85c70602b6b795a Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 14:44:35 +0100 Subject: [PATCH 07/19] include first round of comments rename the tool and file name only keep dl1 table of the particular telescope into RAM added tests for tool config errors rename input col name adopt yaml syntax in example config for stats calculation --- docs/user-guide/tools.rst | 2 +- pyproject.toml | 2 +- .../resources/calculate_pixel_stats.yaml | 32 ++++++ src/ctapipe/resources/stats_calc_config.yaml | 37 ------- ...alculation.py => calculate_pixel_stats.py} | 62 ++++++----- .../tools/tests/test_calculate_pixel_stats.py | 102 ++++++++++++++++++ src/ctapipe/tools/tests/test_stats_calc.py | 57 ---------- 7 files changed, 170 insertions(+), 124 deletions(-) create mode 100644 src/ctapipe/resources/calculate_pixel_stats.yaml delete mode 100644 src/ctapipe/resources/stats_calc_config.yaml rename src/ctapipe/tools/{stats_calculation.py => calculate_pixel_stats.py} (72%) create mode 100644 src/ctapipe/tools/tests/test_calculate_pixel_stats.py delete mode 100644 src/ctapipe/tools/tests/test_stats_calc.py diff --git a/docs/user-guide/tools.rst b/docs/user-guide/tools.rst index f9c6c1b36cd..1a0b2320d9b 100644 --- a/docs/user-guide/tools.rst +++ b/docs/user-guide/tools.rst @@ -17,7 +17,7 @@ Data Processing Tools * ``ctapipe-quickstart``: create some default analysis configurations and a working directory * ``ctapipe-process``: Process event data in any supported format from R0/R1/DL0 to DL1 or DL2 HDF5 files. * ``ctapipe-apply-models``: Tool to apply machine learning models in bulk (as opposed to event by event). -* ``ctapipe-stats-calculation``: Tool to aggregate statistics and detect outliers from DL1a image data. +* ``ctapipe-calculate-pixel-statistics``: Tool to aggregate statistics and detect outliers from pixel-wise image data. * ``ctapipe-train-disp-reconstructor`` : Train the ML models for the `ctapipe.reco.DispReconstructor` (monoscopic reconstruction) * ``ctapipe-train-energy-regressor``: Train the ML models for the `ctapipe.reco.EnergyRegressor` (energy estimation) * ``ctapipe-train-particle-classifier``: Train the ML models for the `ctapipe.reco.ParticleClassifier` (gamma-hadron separation) diff --git a/pyproject.toml b/pyproject.toml index 6382a94dedc..fc0fbbc4bab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,7 +107,7 @@ ctapipe-process = "ctapipe.tools.process:main" ctapipe-merge = "ctapipe.tools.merge:main" ctapipe-fileinfo = "ctapipe.tools.fileinfo:main" ctapipe-quickstart = "ctapipe.tools.quickstart:main" -ctapipe-stats-calculation = "ctapipe.tools.stats_calculation:main" +ctapipe-calculate-pixel-statistics = "ctapipe.tools.calculate_pixel_stats:main" ctapipe-train-energy-regressor = "ctapipe.tools.train_energy_regressor:main" ctapipe-train-particle-classifier = "ctapipe.tools.train_particle_classifier:main" ctapipe-train-disp-reconstructor = "ctapipe.tools.train_disp_reconstructor:main" diff --git a/src/ctapipe/resources/calculate_pixel_stats.yaml b/src/ctapipe/resources/calculate_pixel_stats.yaml new file mode 100644 index 00000000000..e4d1f0b3866 --- /dev/null +++ b/src/ctapipe/resources/calculate_pixel_stats.yaml @@ -0,0 +1,32 @@ +StatisticsCalculatorTool: + allowed_tels: [1,2,3,4] + input_column_name: image + output_column_name: statistics + +PixelStatisticsCalculator: + stats_aggregator_type: + - ["type", "LST*", "SigmaClippingAggregator"], + - ["type", "MST*", "PlainAggregator"], + + chunk_shift: 1000 + faulty_pixels_fraction: 0.1 + outlier_detector_list: + - name: MedianOutlierDetector + apply_to: median + config: + median_range_factors: [-15, 15] + - name: RangeOutlierDetector + apply_to: median + config: + validity_range: [-20, 120] + - name: StdOutlierDetector + apply_to: std + config: + std_range_factors: [-15, 15] + +SigmaClippingAggregator: + chunk_size: 2500 + max_sigma: 4 + iterations: 5 +PlainAggregator: + chunk_size: 2500 diff --git a/src/ctapipe/resources/stats_calc_config.yaml b/src/ctapipe/resources/stats_calc_config.yaml deleted file mode 100644 index 965bd6dae68..00000000000 --- a/src/ctapipe/resources/stats_calc_config.yaml +++ /dev/null @@ -1,37 +0,0 @@ -StatisticsCalculatorTool: - allowed_tels: [1,2,3,4] - dl1a_column_name: "image" - output_column_name: "statistics" - -PixelStatisticsCalculator: - stats_aggregator_type: [["type", "*", "SigmaClippingAggregator"]] - chunk_shift: 1000 - faulty_pixels_fraction: 0.1 - outlier_detector_list: [ - { - "apply_to": "median", - "name": "MedianOutlierDetector", - "config": { - "median_range_factors": [-15, 15], - }, - }, - { - "apply_to": "median", - "name": "RangeOutlierDetector", - "config": { - "validity_range": [-20, 120], - } - }, - { - "apply_to": "std", - "name": "StdOutlierDetector", - "config": { - "std_range_factors": [-15, 15], - }, - } - ] - -SigmaClippingAggregator: - chunk_size: 2500 - max_sigma: 4 - iterations: 5 diff --git a/src/ctapipe/tools/stats_calculation.py b/src/ctapipe/tools/calculate_pixel_stats.py similarity index 72% rename from src/ctapipe/tools/stats_calculation.py rename to src/ctapipe/tools/calculate_pixel_stats.py index b472ba461ae..4616c70b4e9 100644 --- a/src/ctapipe/tools/stats_calculation.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -1,5 +1,5 @@ """ -Perform statistics calculation from DL1a image data +Perform statistics calculation from pixel-wise image data """ import pathlib @@ -11,7 +11,6 @@ from ctapipe.core.tool import ToolConfigurationError from ctapipe.core.traits import ( Bool, - CaselessStrEnum, CInt, Path, Set, @@ -26,21 +25,21 @@ class StatisticsCalculatorTool(Tool): """ - Perform statistics calculation for DL1a image data + Perform statistics calculation for pixel-wise image data """ name = "StatisticsCalculatorTool" - description = "Perform statistics calculation for DL1a image data" + description = "Perform statistics calculation for pixel-wise image data" examples = """ - To calculate statistics of DL1a image data files: + To calculate statistics of pixel-wise image data files: - > ctapipe-stats-calculation --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite + > ctapipe-calculate-pixel-statistics --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite """ input_url = Path( - help="Input CTA HDF5 files including DL1a image data", + help="Input CTA HDF5 files including pixel-wise image data", allow_none=True, exists=True, directory_ok=False, @@ -57,11 +56,10 @@ class StatisticsCalculatorTool(Tool): ), ).tag(config=True) - dl1a_column_name = CaselessStrEnum( - ["image", "peak_time", "variance"], + input_column_name = Unicode( default_value="image", allow_none=False, - help="Column name of the DL1a image data to calculate statistics", + help="Column name of the pixel-wise image data to calculate statistics", ).tag(config=True) output_column_name = Unicode( @@ -104,31 +102,39 @@ def setup(self): parent=self, subarray=subarray ) # Read the input data with the 'TableLoader' - input_data = TableLoader(input_url=self.input_url) + self.input_data = TableLoader(input_url=self.input_url) # Get the telescope ids from the input data or use the allowed_tels configuration - tel_ids = subarray.tel_ids if self.allowed_tels is None else self.allowed_tels - # Read the whole dl1 images - self.dl1_tables = input_data.read_telescope_events_by_id( - telescopes=tel_ids, - dl1_images=True, - dl1_parameters=False, - dl1_muons=False, - dl2=False, - simulated=False, - true_images=False, - true_parameters=False, - instrument=False, - pointing=False, + self.tel_ids = ( + subarray.tel_ids if self.allowed_tels is None else self.allowed_tels ) def start(self): - # Iterate over the telescope ids and their corresponding dl1 tables - for tel_id, dl1_table in self.dl1_tables.items(): + # Iterate over the telescope ids and calculate the statistics + for tel_id in self.tel_ids: + # Read the whole dl1 images for one particular telescope + dl1_table = self.input_data.read_telescope_events_by_id( + telescopes=tel_id, + dl1_images=True, + dl1_parameters=False, + dl1_muons=False, + dl2=False, + simulated=False, + true_images=False, + true_parameters=False, + instrument=False, + pointing=False, + )[tel_id] + # Check if the input column name is in the table + if self.input_column_name not in dl1_table.colnames: + raise ToolConfigurationError( + f"Column '{self.input_column_name}' not found " + f"in the input data for telescope 'tel_id={tel_id}'." + ) # Perform the first pass of the statistics calculation aggregated_stats = self.stats_calculator.first_pass( table=dl1_table, tel_id=tel_id, - col_name=self.dl1a_column_name, + col_name=self.input_column_name, ) # Check if 'chunk_shift' is selected if self.stats_calculator.chunk_shift is not None: @@ -139,7 +145,7 @@ def start(self): table=dl1_table, valid_chunks=aggregated_stats["is_valid"].data, tel_id=tel_id, - col_name=self.dl1a_column_name, + col_name=self.input_column_name, ) # Stack the statistic values from the first and second pass aggregated_stats = vstack( diff --git a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py new file mode 100644 index 00000000000..38e61354b65 --- /dev/null +++ b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +""" +Test ctapipe-calculate-pixel-statistics tool +""" + +import pytest +from traitlets.config.loader import Config + +from ctapipe.core import run_tool +from ctapipe.core.tool import ToolConfigurationError +from ctapipe.io import read_table +from ctapipe.tools.calculate_pixel_stats import StatisticsCalculatorTool + + +def test_calculate_pixel_stats_tool(tmp_path, dl1_image_file): + """check statistics calculation from pixel-wise image data files""" + + # Create a configuration suitable for the test + tel_id = 3 + config = Config( + { + "StatisticsCalculatorTool": { + "allowed_tels": [tel_id], + "input_column_name": "image", + "output_column_name": "statistics", + }, + "PixelStatisticsCalculator": { + "stats_aggregator_type": [ + ("id", tel_id, "PlainAggregator"), + ], + }, + "PlainAggregator": { + "chunk_size": 1, + }, + } + ) + # Set the output file path + monitoring_file = tmp_path / "monitoring.dl1.h5" + # Run the tool with the configuration and the input file + run_tool( + StatisticsCalculatorTool(config=config), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={monitoring_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + # Check that the output file has been created + assert monitoring_file.exists() + # Check that the output file is not empty + assert ( + read_table( + monitoring_file, + path=f"/dl1/monitoring/telescope/statistics/tel_{tel_id:03d}", + )["mean"] + is not None + ) + + +def test_tool_config_error(tmp_path, dl1_image_file): + """check tool configuration error""" + + # Run the tool with the configuration and the input file + config = Config( + { + "StatisticsCalculatorTool": { + "allowed_tels": [3], + "input_column_name": "image_charges", + "output_column_name": "statistics", + } + } + ) + # Set the output file path + monitoring_file = tmp_path / "monitoring.dl1.h5" + # Check if ToolConfigurationError is raised + # when the column name of the pixel-wise image data is not correct + with pytest.raises(ToolConfigurationError): + run_tool( + StatisticsCalculatorTool(config=config), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={monitoring_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + # Check if ToolConfigurationError is raised + # when the input and output files are the same + with pytest.raises(ToolConfigurationError): + run_tool( + StatisticsCalculatorTool(), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={dl1_image_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) diff --git a/src/ctapipe/tools/tests/test_stats_calc.py b/src/ctapipe/tools/tests/test_stats_calc.py deleted file mode 100644 index 97157ace955..00000000000 --- a/src/ctapipe/tools/tests/test_stats_calc.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python3 -""" -Test ctapipe-stats-calculation tool -""" - -from traitlets.config.loader import Config - -from ctapipe.core import run_tool -from ctapipe.io import read_table -from ctapipe.tools.stats_calculation import StatisticsCalculatorTool - - -def test_stats_calc_tool(tmp_path, dl1_image_file): - """check statistics calculation from DL1a files""" - - # Create a configuration suitable for the test - tel_id = 3 - config = Config( - { - "StatisticsCalculatorTool": { - "allowed_tels": [tel_id], - "dl1a_column_name": "image", - "output_column_name": "statistics", - }, - "PixelStatisticsCalculator": { - "stats_aggregator_type": [ - ("id", tel_id, "PlainAggregator"), - ], - }, - "PlainAggregator": { - "chunk_size": 1, - }, - } - ) - # Set the output file path - monitoring_file = tmp_path / "monitoring.dl1.h5" - # Run the tool with the configuration and the input file - run_tool( - StatisticsCalculatorTool(config=config), - argv=[ - f"--input_url={dl1_image_file}", - f"--output_path={monitoring_file}", - "--overwrite", - ], - cwd=tmp_path, - raises=True, - ) - # Check that the output file has been created - assert monitoring_file.exists() - # Check that the output file is not empty - assert ( - read_table( - monitoring_file, - path=f"/dl1/monitoring/telescope/statistics/tel_{tel_id:03d}", - )["mean"] - is not None - ) From 2d677aea5c21fa20f2c2e3207ea54eca03cc79f7 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 14:59:48 +0100 Subject: [PATCH 08/19] rename config file also in quickstart tool --- src/ctapipe/tools/quickstart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctapipe/tools/quickstart.py b/src/ctapipe/tools/quickstart.py index 9cab9c97d99..f8dfaff0d3c 100644 --- a/src/ctapipe/tools/quickstart.py +++ b/src/ctapipe/tools/quickstart.py @@ -12,10 +12,10 @@ CONFIGS_TO_WRITE = [ "base_config.yaml", + "calculate_pixel_stats.yaml", "stage1_config.yaml", "stage2_config.yaml", "ml_preprocessing_config.yaml", - "stats_calc_config.yaml", "train_energy_regressor.yaml", "train_particle_classifier.yaml", "train_disp_reconstructor.yaml", From e380666e90a8c24b1c8a10a7eea59db50672a03b Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 16:28:16 +0100 Subject: [PATCH 09/19] remove redundant , in stats calc example config --- src/ctapipe/resources/calculate_pixel_stats.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ctapipe/resources/calculate_pixel_stats.yaml b/src/ctapipe/resources/calculate_pixel_stats.yaml index e4d1f0b3866..48e262d3ab2 100644 --- a/src/ctapipe/resources/calculate_pixel_stats.yaml +++ b/src/ctapipe/resources/calculate_pixel_stats.yaml @@ -5,8 +5,8 @@ StatisticsCalculatorTool: PixelStatisticsCalculator: stats_aggregator_type: - - ["type", "LST*", "SigmaClippingAggregator"], - - ["type", "MST*", "PlainAggregator"], + - ["type", "LST*", "SigmaClippingAggregator"] + - ["type", "MST*", "PlainAggregator"] chunk_shift: 1000 faulty_pixels_fraction: 0.1 From 0f681c59d7c2e72e8f3ca7f960c3bdd18cb57b4f Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 13:11:21 +0100 Subject: [PATCH 10/19] use TableLoader for input handling --- src/ctapipe/tools/calculate_pixel_stats.py | 39 ++++++++++------------ 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index 4616c70b4e9..89bdcb40065 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -34,18 +34,10 @@ class StatisticsCalculatorTool(Tool): examples = """ To calculate statistics of pixel-wise image data files: - > ctapipe-calculate-pixel-statistics --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite + > ctapipe-calculate-pixel-statistics --TableLoader.input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite """ - input_url = Path( - help="Input CTA HDF5 files including pixel-wise image data", - allow_none=True, - exists=True, - directory_ok=False, - file_ok=True, - ).tag(config=True) - allowed_tels = Set( trait=CInt(), default_value=None, @@ -75,7 +67,7 @@ class StatisticsCalculatorTool(Tool): overwrite = Bool(help="Overwrite output file if it exists").tag(config=True) aliases = { - ("i", "input_url"): "StatisticsCalculatorTool.input_url", + ("i", "input_url"): "TableLoader.input_url", ("o", "output_path"): "StatisticsCalculatorTool.output_path", } @@ -86,34 +78,39 @@ class StatisticsCalculatorTool(Tool): ), } - classes = classes_with_traits(PixelStatisticsCalculator) + classes = [ + TableLoader, + ] + classes_with_traits(PixelStatisticsCalculator) def setup(self): + # Read the input data with the 'TableLoader' + self.input_data = TableLoader( + parent=self, + ) # Check that the input and output files are not the same - if self.input_url == self.output_path: + if self.input_data.input_url == self.output_path: raise ToolConfigurationError( "Input and output files are same. Fix your configuration / cli arguments." ) - # Load the subarray description from the input file - subarray = SubarrayDescription.from_hdf(self.input_url) - # Initialization of the statistics calculator - self.stats_calculator = PixelStatisticsCalculator( - parent=self, subarray=subarray - ) - # Read the input data with the 'TableLoader' - self.input_data = TableLoader(input_url=self.input_url) + subarray = SubarrayDescription.from_hdf(self.input_data.input_url) # Get the telescope ids from the input data or use the allowed_tels configuration self.tel_ids = ( subarray.tel_ids if self.allowed_tels is None else self.allowed_tels ) + # Initialization of the statistics calculator + self.stats_calculator = PixelStatisticsCalculator( + parent=self, subarray=subarray + ) def start(self): # Iterate over the telescope ids and calculate the statistics for tel_id in self.tel_ids: # Read the whole dl1 images for one particular telescope dl1_table = self.input_data.read_telescope_events_by_id( - telescopes=tel_id, + telescopes=[ + tel_id, + ], dl1_images=True, dl1_parameters=False, dl1_muons=False, From bb71a4f130647a615eac720f0f8ff69ff8dea284 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 13:17:03 +0100 Subject: [PATCH 11/19] fix changelog filename --- docs/changes/2628.features.rst | 1 - 1 file changed, 1 deletion(-) delete mode 100644 docs/changes/2628.features.rst diff --git a/docs/changes/2628.features.rst b/docs/changes/2628.features.rst deleted file mode 100644 index f57b32854af..00000000000 --- a/docs/changes/2628.features.rst +++ /dev/null @@ -1 +0,0 @@ -Add a generic stats-calculation tool utilizing the PixelStatisticsCalculator. From d3ea92546b912b94b4ac664009170d34c66c0749 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 13:19:46 +0100 Subject: [PATCH 12/19] add changelog file --- docs/changes/2628.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changes/2628.feature.rst diff --git a/docs/changes/2628.feature.rst b/docs/changes/2628.feature.rst new file mode 100644 index 00000000000..f57b32854af --- /dev/null +++ b/docs/changes/2628.feature.rst @@ -0,0 +1 @@ +Add a generic stats-calculation tool utilizing the PixelStatisticsCalculator. From bc36037326d8b5f9625eb9f05765b4844e6d67e6 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 13:52:55 +0100 Subject: [PATCH 13/19] add proper ToolConfigurationError if chunk size is too large --- src/ctapipe/tools/calculate_pixel_stats.py | 9 +++++++++ .../tools/tests/test_calculate_pixel_stats.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index 89bdcb40065..5603575a342 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -121,6 +121,15 @@ def start(self): instrument=False, pointing=False, )[tel_id] + # Check if the chunk size does not exceed the table length of the input data + if self.stats_calculator.stats_aggregators[ + self.stats_calculator.stats_aggregator_type.tel[tel_id] + ].chunk_size > len(dl1_table): + raise ToolConfigurationError( + f"Change --StatisticsAggregator.chunk_size to decrease the chunk size " + f"of the aggregation to at least '{len(dl1_table)}' (table length of the " + f"input data for telescope 'tel_id={tel_id}')." + ) # Check if the input column name is in the table if self.input_column_name not in dl1_table.colnames: raise ToolConfigurationError( diff --git a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py index 38e61354b65..9a8c3416847 100644 --- a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py +++ b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py @@ -100,3 +100,18 @@ def test_tool_config_error(tmp_path, dl1_image_file): cwd=tmp_path, raises=True, ) + # Check if ToolConfigurationError is raised + # when the chunk size is larger than the number of events in the input file + with pytest.raises(ToolConfigurationError): + run_tool( + StatisticsCalculatorTool(), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={monitoring_file}", + "--StatisticsCalculatorTool.allowed_tels=3", + "--StatisticsAggregator.chunk_size=2500", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) From 915e0197812a4c45f754370eb09687b855342304 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 14:27:08 +0100 Subject: [PATCH 14/19] added metadata renamed output_column_name to output_table_name --- src/ctapipe/tools/calculate_pixel_stats.py | 11 +++++++---- src/ctapipe/tools/tests/test_calculate_pixel_stats.py | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index 5603575a342..aff8b32e417 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -54,10 +54,10 @@ class StatisticsCalculatorTool(Tool): help="Column name of the pixel-wise image data to calculate statistics", ).tag(config=True) - output_column_name = Unicode( + output_table_name = Unicode( default_value="statistics", allow_none=False, - help="Column name of the output statistics", + help="Table name of the output statistics", ).tag(config=True) output_path = Path( @@ -164,11 +164,14 @@ def start(self): "No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", tel_id, ) + # Add metadata to the aggregated statistics + aggregated_stats.meta["input_url"] = self.input_data.input_url + aggregated_stats.meta["input_column_name"] = self.input_column_name # Write the aggregated statistics and their outlier mask to the output file write_table( aggregated_stats, self.output_path, - f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{tel_id:03d}", + f"/dl1/monitoring/telescope/{self.output_table_name}/tel_{tel_id:03d}", overwrite=self.overwrite, ) @@ -176,7 +179,7 @@ def finish(self): self.log.info( "DL1 monitoring data was stored in '%s' under '%s'", self.output_path, - f"/dl1/monitoring/telescope/{self.output_column_name}", + f"/dl1/monitoring/telescope/{self.output_table_name}", ) self.log.info("Tool is shutting down") diff --git a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py index 9a8c3416847..d64660c687a 100644 --- a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py +++ b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py @@ -22,7 +22,7 @@ def test_calculate_pixel_stats_tool(tmp_path, dl1_image_file): "StatisticsCalculatorTool": { "allowed_tels": [tel_id], "input_column_name": "image", - "output_column_name": "statistics", + "output_table_name": "statistics", }, "PixelStatisticsCalculator": { "stats_aggregator_type": [ @@ -68,7 +68,7 @@ def test_tool_config_error(tmp_path, dl1_image_file): "StatisticsCalculatorTool": { "allowed_tels": [3], "input_column_name": "image_charges", - "output_column_name": "statistics", + "output_table_name": "statistics", } } ) From 588adf249ab5c2f90700546730774c5829d91089 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 14:30:00 +0100 Subject: [PATCH 15/19] polish error message --- src/ctapipe/tools/calculate_pixel_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index aff8b32e417..aa8b15629f3 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -127,7 +127,7 @@ def start(self): ].chunk_size > len(dl1_table): raise ToolConfigurationError( f"Change --StatisticsAggregator.chunk_size to decrease the chunk size " - f"of the aggregation to at least '{len(dl1_table)}' (table length of the " + f"of the aggregation to a maximum of '{len(dl1_table)}' (table length of the " f"input data for telescope 'tel_id={tel_id}')." ) # Check if the input column name is in the table From 7472bbb9060e96778201e9de6668d0907488f10a Mon Sep 17 00:00:00 2001 From: Tjark Miener Date: Wed, 18 Dec 2024 14:27:01 +0100 Subject: [PATCH 16/19] added match in pytest raise --- .../tools/tests/test_calculate_pixel_stats.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py index d64660c687a..bcd3af2a66d 100644 --- a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py +++ b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py @@ -76,12 +76,15 @@ def test_tool_config_error(tmp_path, dl1_image_file): monitoring_file = tmp_path / "monitoring.dl1.h5" # Check if ToolConfigurationError is raised # when the column name of the pixel-wise image data is not correct - with pytest.raises(ToolConfigurationError): + with pytest.raises( + ToolConfigurationError, match="Column 'image_charges' not found" + ): run_tool( StatisticsCalculatorTool(config=config), argv=[ f"--input_url={dl1_image_file}", f"--output_path={monitoring_file}", + "--StatisticsAggregator.chunk_size=1", "--overwrite", ], cwd=tmp_path, @@ -89,7 +92,9 @@ def test_tool_config_error(tmp_path, dl1_image_file): ) # Check if ToolConfigurationError is raised # when the input and output files are the same - with pytest.raises(ToolConfigurationError): + with pytest.raises( + ToolConfigurationError, match="Input and output files are same." + ): run_tool( StatisticsCalculatorTool(), argv=[ @@ -102,7 +107,9 @@ def test_tool_config_error(tmp_path, dl1_image_file): ) # Check if ToolConfigurationError is raised # when the chunk size is larger than the number of events in the input file - with pytest.raises(ToolConfigurationError): + with pytest.raises( + ToolConfigurationError, match="Change --StatisticsAggregator.chunk_size" + ): run_tool( StatisticsCalculatorTool(), argv=[ From 751bbd2c958c5a4d47c179d8823f100331f30318 Mon Sep 17 00:00:00 2001 From: Tjark Miener Date: Thu, 19 Dec 2024 10:52:53 +0100 Subject: [PATCH 17/19] add the event type to the meta data remove the input url since it is irrelevant --- src/ctapipe/tools/calculate_pixel_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index aa8b15629f3..0b9de78d6e0 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -165,7 +165,7 @@ def start(self): tel_id, ) # Add metadata to the aggregated statistics - aggregated_stats.meta["input_url"] = self.input_data.input_url + aggregated_stats.meta["event_type"] = dl1_table["event_type"][0] aggregated_stats.meta["input_column_name"] = self.input_column_name # Write the aggregated statistics and their outlier mask to the output file write_table( From 3815a8f607b0988b57187f0e0ed945340771afb6 Mon Sep 17 00:00:00 2001 From: Tjark Miener Date: Thu, 9 Jan 2025 16:27:48 +0100 Subject: [PATCH 18/19] rename tool for calculate the pixel stats --- src/ctapipe/resources/calculate_pixel_stats.yaml | 4 ++-- src/ctapipe/tools/calculate_pixel_stats.py | 10 +++++----- .../tools/tests/test_calculate_pixel_stats.py | 16 ++++++++-------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/ctapipe/resources/calculate_pixel_stats.yaml b/src/ctapipe/resources/calculate_pixel_stats.yaml index 48e262d3ab2..8573e4c0489 100644 --- a/src/ctapipe/resources/calculate_pixel_stats.yaml +++ b/src/ctapipe/resources/calculate_pixel_stats.yaml @@ -1,7 +1,7 @@ -StatisticsCalculatorTool: +PixelStatisticsCalculatorTool: allowed_tels: [1,2,3,4] input_column_name: image - output_column_name: statistics + output_table_name: statistics PixelStatisticsCalculator: stats_aggregator_type: diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index 0b9de78d6e0..d5e20192c95 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -23,12 +23,12 @@ from ctapipe.monitoring.calculator import PixelStatisticsCalculator -class StatisticsCalculatorTool(Tool): +class PixelStatisticsCalculatorTool(Tool): """ Perform statistics calculation for pixel-wise image data """ - name = "StatisticsCalculatorTool" + name = "ctapipe-calculate-pixel-statistics" description = "Perform statistics calculation for pixel-wise image data" examples = """ @@ -68,12 +68,12 @@ class StatisticsCalculatorTool(Tool): aliases = { ("i", "input_url"): "TableLoader.input_url", - ("o", "output_path"): "StatisticsCalculatorTool.output_path", + ("o", "output_path"): "PixelStatisticsCalculatorTool.output_path", } flags = { "overwrite": ( - {"StatisticsCalculatorTool": {"overwrite": True}}, + {"PixelStatisticsCalculatorTool": {"overwrite": True}}, "Overwrite existing files", ), } @@ -186,7 +186,7 @@ def finish(self): def main(): # Run the tool - tool = StatisticsCalculatorTool() + tool = PixelStatisticsCalculatorTool() tool.run() diff --git a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py index bcd3af2a66d..1e0330fa40f 100644 --- a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py +++ b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py @@ -9,7 +9,7 @@ from ctapipe.core import run_tool from ctapipe.core.tool import ToolConfigurationError from ctapipe.io import read_table -from ctapipe.tools.calculate_pixel_stats import StatisticsCalculatorTool +from ctapipe.tools.calculate_pixel_stats import PixelStatisticsCalculatorTool def test_calculate_pixel_stats_tool(tmp_path, dl1_image_file): @@ -19,7 +19,7 @@ def test_calculate_pixel_stats_tool(tmp_path, dl1_image_file): tel_id = 3 config = Config( { - "StatisticsCalculatorTool": { + "PixelStatisticsCalculatorTool": { "allowed_tels": [tel_id], "input_column_name": "image", "output_table_name": "statistics", @@ -38,7 +38,7 @@ def test_calculate_pixel_stats_tool(tmp_path, dl1_image_file): monitoring_file = tmp_path / "monitoring.dl1.h5" # Run the tool with the configuration and the input file run_tool( - StatisticsCalculatorTool(config=config), + PixelStatisticsCalculatorTool(config=config), argv=[ f"--input_url={dl1_image_file}", f"--output_path={monitoring_file}", @@ -65,7 +65,7 @@ def test_tool_config_error(tmp_path, dl1_image_file): # Run the tool with the configuration and the input file config = Config( { - "StatisticsCalculatorTool": { + "PixelStatisticsCalculatorTool": { "allowed_tels": [3], "input_column_name": "image_charges", "output_table_name": "statistics", @@ -80,7 +80,7 @@ def test_tool_config_error(tmp_path, dl1_image_file): ToolConfigurationError, match="Column 'image_charges' not found" ): run_tool( - StatisticsCalculatorTool(config=config), + PixelStatisticsCalculatorTool(config=config), argv=[ f"--input_url={dl1_image_file}", f"--output_path={monitoring_file}", @@ -96,7 +96,7 @@ def test_tool_config_error(tmp_path, dl1_image_file): ToolConfigurationError, match="Input and output files are same." ): run_tool( - StatisticsCalculatorTool(), + PixelStatisticsCalculatorTool(), argv=[ f"--input_url={dl1_image_file}", f"--output_path={dl1_image_file}", @@ -111,11 +111,11 @@ def test_tool_config_error(tmp_path, dl1_image_file): ToolConfigurationError, match="Change --StatisticsAggregator.chunk_size" ): run_tool( - StatisticsCalculatorTool(), + PixelStatisticsCalculatorTool(), argv=[ f"--input_url={dl1_image_file}", f"--output_path={monitoring_file}", - "--StatisticsCalculatorTool.allowed_tels=3", + "--PixelStatisticsCalculatorTool.allowed_tels=3", "--StatisticsAggregator.chunk_size=2500", "--overwrite", ], From e24e5236bf6b9d3539f5b64c25414958dd574a2b Mon Sep 17 00:00:00 2001 From: "mykhailo.dalchenko" Date: Tue, 28 Jan 2025 16:11:37 +0100 Subject: [PATCH 19/19] Remove hardcoded TableLoader parameters, enforce only images loading --- src/ctapipe/tools/calculate_pixel_stats.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index d5e20192c95..18aa07632eb 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -92,6 +92,12 @@ def setup(self): raise ToolConfigurationError( "Input and output files are same. Fix your configuration / cli arguments." ) + if "dl1_images" in self.input_data.config.TableLoader: + if not self.input_data.dl1_images: + raise ToolConfigurationError( + "The TableLoader must read dl1 images. Set 'dl1_images' to True." + ) + self.input_data.dl1_images = True # Load the subarray description from the input file subarray = SubarrayDescription.from_hdf(self.input_data.input_url) # Get the telescope ids from the input data or use the allowed_tels configuration @@ -111,15 +117,6 @@ def start(self): telescopes=[ tel_id, ], - dl1_images=True, - dl1_parameters=False, - dl1_muons=False, - dl2=False, - simulated=False, - true_images=False, - true_parameters=False, - instrument=False, - pointing=False, )[tel_id] # Check if the chunk size does not exceed the table length of the input data if self.stats_calculator.stats_aggregators[