-
Notifications
You must be signed in to change notification settings - Fork 272
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2628 from cta-observatory/stats_calc_tool
Stats calc tool
- Loading branch information
Showing
7 changed files
with
351 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Add a generic stats-calculation tool utilizing the PixelStatisticsCalculator. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
PixelStatisticsCalculatorTool: | ||
allowed_tels: [1,2,3,4] | ||
input_column_name: image | ||
output_table_name: statistics | ||
|
||
PixelStatisticsCalculator: | ||
stats_aggregator_type: | ||
- ["type", "LST*", "SigmaClippingAggregator"] | ||
- ["type", "MST*", "PlainAggregator"] | ||
|
||
chunk_shift: 1000 | ||
faulty_pixels_fraction: 0.1 | ||
outlier_detector_list: | ||
- name: MedianOutlierDetector | ||
apply_to: median | ||
config: | ||
median_range_factors: [-15, 15] | ||
- name: RangeOutlierDetector | ||
apply_to: median | ||
config: | ||
validity_range: [-20, 120] | ||
- name: StdOutlierDetector | ||
apply_to: std | ||
config: | ||
std_range_factors: [-15, 15] | ||
|
||
SigmaClippingAggregator: | ||
chunk_size: 2500 | ||
max_sigma: 4 | ||
iterations: 5 | ||
PlainAggregator: | ||
chunk_size: 2500 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
""" | ||
Perform statistics calculation from pixel-wise image data | ||
""" | ||
|
||
import pathlib | ||
|
||
import numpy as np | ||
from astropy.table import vstack | ||
|
||
from ctapipe.core import Tool | ||
from ctapipe.core.tool import ToolConfigurationError | ||
from ctapipe.core.traits import ( | ||
Bool, | ||
CInt, | ||
Path, | ||
Set, | ||
Unicode, | ||
classes_with_traits, | ||
) | ||
from ctapipe.instrument import SubarrayDescription | ||
from ctapipe.io import write_table | ||
from ctapipe.io.tableloader import TableLoader | ||
from ctapipe.monitoring.calculator import PixelStatisticsCalculator | ||
|
||
|
||
class PixelStatisticsCalculatorTool(Tool): | ||
""" | ||
Perform statistics calculation for pixel-wise image data | ||
""" | ||
|
||
name = "ctapipe-calculate-pixel-statistics" | ||
description = "Perform statistics calculation for pixel-wise image data" | ||
|
||
examples = """ | ||
To calculate statistics of pixel-wise image data files: | ||
> ctapipe-calculate-pixel-statistics --TableLoader.input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite | ||
""" | ||
|
||
allowed_tels = Set( | ||
trait=CInt(), | ||
default_value=None, | ||
allow_none=True, | ||
help=( | ||
"List of allowed tel_ids, others will be ignored. " | ||
"If None, all telescopes in the input stream will be included." | ||
), | ||
).tag(config=True) | ||
|
||
input_column_name = Unicode( | ||
default_value="image", | ||
allow_none=False, | ||
help="Column name of the pixel-wise image data to calculate statistics", | ||
).tag(config=True) | ||
|
||
output_table_name = Unicode( | ||
default_value="statistics", | ||
allow_none=False, | ||
help="Table name of the output statistics", | ||
).tag(config=True) | ||
|
||
output_path = Path( | ||
help="Output filename", default_value=pathlib.Path("monitoring.h5") | ||
).tag(config=True) | ||
|
||
overwrite = Bool(help="Overwrite output file if it exists").tag(config=True) | ||
|
||
aliases = { | ||
("i", "input_url"): "TableLoader.input_url", | ||
("o", "output_path"): "PixelStatisticsCalculatorTool.output_path", | ||
} | ||
|
||
flags = { | ||
"overwrite": ( | ||
{"PixelStatisticsCalculatorTool": {"overwrite": True}}, | ||
"Overwrite existing files", | ||
), | ||
} | ||
|
||
classes = [ | ||
TableLoader, | ||
] + classes_with_traits(PixelStatisticsCalculator) | ||
|
||
def setup(self): | ||
# Read the input data with the 'TableLoader' | ||
self.input_data = TableLoader( | ||
parent=self, | ||
) | ||
# Check that the input and output files are not the same | ||
if self.input_data.input_url == self.output_path: | ||
raise ToolConfigurationError( | ||
"Input and output files are same. Fix your configuration / cli arguments." | ||
) | ||
if "dl1_images" in self.input_data.config.TableLoader: | ||
if not self.input_data.dl1_images: | ||
raise ToolConfigurationError( | ||
"The TableLoader must read dl1 images. Set 'dl1_images' to True." | ||
) | ||
self.input_data.dl1_images = True | ||
# Load the subarray description from the input file | ||
subarray = SubarrayDescription.from_hdf(self.input_data.input_url) | ||
# Get the telescope ids from the input data or use the allowed_tels configuration | ||
self.tel_ids = ( | ||
subarray.tel_ids if self.allowed_tels is None else self.allowed_tels | ||
) | ||
# Initialization of the statistics calculator | ||
self.stats_calculator = PixelStatisticsCalculator( | ||
parent=self, subarray=subarray | ||
) | ||
|
||
def start(self): | ||
# Iterate over the telescope ids and calculate the statistics | ||
for tel_id in self.tel_ids: | ||
# Read the whole dl1 images for one particular telescope | ||
dl1_table = self.input_data.read_telescope_events_by_id( | ||
telescopes=[ | ||
tel_id, | ||
], | ||
)[tel_id] | ||
# Check if the chunk size does not exceed the table length of the input data | ||
if self.stats_calculator.stats_aggregators[ | ||
self.stats_calculator.stats_aggregator_type.tel[tel_id] | ||
].chunk_size > len(dl1_table): | ||
raise ToolConfigurationError( | ||
f"Change --StatisticsAggregator.chunk_size to decrease the chunk size " | ||
f"of the aggregation to a maximum of '{len(dl1_table)}' (table length of the " | ||
f"input data for telescope 'tel_id={tel_id}')." | ||
) | ||
# Check if the input column name is in the table | ||
if self.input_column_name not in dl1_table.colnames: | ||
raise ToolConfigurationError( | ||
f"Column '{self.input_column_name}' not found " | ||
f"in the input data for telescope 'tel_id={tel_id}'." | ||
) | ||
# Perform the first pass of the statistics calculation | ||
aggregated_stats = self.stats_calculator.first_pass( | ||
table=dl1_table, | ||
tel_id=tel_id, | ||
col_name=self.input_column_name, | ||
) | ||
# Check if 'chunk_shift' is selected | ||
if self.stats_calculator.chunk_shift is not None: | ||
# Check if there are any faulty chunks to perform a second pass over the data | ||
if np.any(~aggregated_stats["is_valid"].data): | ||
# Perform the second pass of the statistics calculation | ||
aggregated_stats_secondpass = self.stats_calculator.second_pass( | ||
table=dl1_table, | ||
valid_chunks=aggregated_stats["is_valid"].data, | ||
tel_id=tel_id, | ||
col_name=self.input_column_name, | ||
) | ||
# Stack the statistic values from the first and second pass | ||
aggregated_stats = vstack( | ||
[aggregated_stats, aggregated_stats_secondpass] | ||
) | ||
# Sort the stacked aggregated statistic values by starting time | ||
aggregated_stats.sort(["time_start"]) | ||
else: | ||
self.log.info( | ||
"No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", | ||
tel_id, | ||
) | ||
# Add metadata to the aggregated statistics | ||
aggregated_stats.meta["event_type"] = dl1_table["event_type"][0] | ||
aggregated_stats.meta["input_column_name"] = self.input_column_name | ||
# Write the aggregated statistics and their outlier mask to the output file | ||
write_table( | ||
aggregated_stats, | ||
self.output_path, | ||
f"/dl1/monitoring/telescope/{self.output_table_name}/tel_{tel_id:03d}", | ||
overwrite=self.overwrite, | ||
) | ||
|
||
def finish(self): | ||
self.log.info( | ||
"DL1 monitoring data was stored in '%s' under '%s'", | ||
self.output_path, | ||
f"/dl1/monitoring/telescope/{self.output_table_name}", | ||
) | ||
self.log.info("Tool is shutting down") | ||
|
||
|
||
def main(): | ||
# Run the tool | ||
tool = PixelStatisticsCalculatorTool() | ||
tool.run() | ||
|
||
|
||
if __name__ == "main": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
Test ctapipe-calculate-pixel-statistics tool | ||
""" | ||
|
||
import pytest | ||
from traitlets.config.loader import Config | ||
|
||
from ctapipe.core import run_tool | ||
from ctapipe.core.tool import ToolConfigurationError | ||
from ctapipe.io import read_table | ||
from ctapipe.tools.calculate_pixel_stats import PixelStatisticsCalculatorTool | ||
|
||
|
||
def test_calculate_pixel_stats_tool(tmp_path, dl1_image_file): | ||
"""check statistics calculation from pixel-wise image data files""" | ||
|
||
# Create a configuration suitable for the test | ||
tel_id = 3 | ||
config = Config( | ||
{ | ||
"PixelStatisticsCalculatorTool": { | ||
"allowed_tels": [tel_id], | ||
"input_column_name": "image", | ||
"output_table_name": "statistics", | ||
}, | ||
"PixelStatisticsCalculator": { | ||
"stats_aggregator_type": [ | ||
("id", tel_id, "PlainAggregator"), | ||
], | ||
}, | ||
"PlainAggregator": { | ||
"chunk_size": 1, | ||
}, | ||
} | ||
) | ||
# Set the output file path | ||
monitoring_file = tmp_path / "monitoring.dl1.h5" | ||
# Run the tool with the configuration and the input file | ||
run_tool( | ||
PixelStatisticsCalculatorTool(config=config), | ||
argv=[ | ||
f"--input_url={dl1_image_file}", | ||
f"--output_path={monitoring_file}", | ||
"--overwrite", | ||
], | ||
cwd=tmp_path, | ||
raises=True, | ||
) | ||
# Check that the output file has been created | ||
assert monitoring_file.exists() | ||
# Check that the output file is not empty | ||
assert ( | ||
read_table( | ||
monitoring_file, | ||
path=f"/dl1/monitoring/telescope/statistics/tel_{tel_id:03d}", | ||
)["mean"] | ||
is not None | ||
) | ||
|
||
|
||
def test_tool_config_error(tmp_path, dl1_image_file): | ||
"""check tool configuration error""" | ||
|
||
# Run the tool with the configuration and the input file | ||
config = Config( | ||
{ | ||
"PixelStatisticsCalculatorTool": { | ||
"allowed_tels": [3], | ||
"input_column_name": "image_charges", | ||
"output_table_name": "statistics", | ||
} | ||
} | ||
) | ||
# Set the output file path | ||
monitoring_file = tmp_path / "monitoring.dl1.h5" | ||
# Check if ToolConfigurationError is raised | ||
# when the column name of the pixel-wise image data is not correct | ||
with pytest.raises( | ||
ToolConfigurationError, match="Column 'image_charges' not found" | ||
): | ||
run_tool( | ||
PixelStatisticsCalculatorTool(config=config), | ||
argv=[ | ||
f"--input_url={dl1_image_file}", | ||
f"--output_path={monitoring_file}", | ||
"--StatisticsAggregator.chunk_size=1", | ||
"--overwrite", | ||
], | ||
cwd=tmp_path, | ||
raises=True, | ||
) | ||
# Check if ToolConfigurationError is raised | ||
# when the input and output files are the same | ||
with pytest.raises( | ||
ToolConfigurationError, match="Input and output files are same." | ||
): | ||
run_tool( | ||
PixelStatisticsCalculatorTool(), | ||
argv=[ | ||
f"--input_url={dl1_image_file}", | ||
f"--output_path={dl1_image_file}", | ||
"--overwrite", | ||
], | ||
cwd=tmp_path, | ||
raises=True, | ||
) | ||
# Check if ToolConfigurationError is raised | ||
# when the chunk size is larger than the number of events in the input file | ||
with pytest.raises( | ||
ToolConfigurationError, match="Change --StatisticsAggregator.chunk_size" | ||
): | ||
run_tool( | ||
PixelStatisticsCalculatorTool(), | ||
argv=[ | ||
f"--input_url={dl1_image_file}", | ||
f"--output_path={monitoring_file}", | ||
"--PixelStatisticsCalculatorTool.allowed_tels=3", | ||
"--StatisticsAggregator.chunk_size=2500", | ||
"--overwrite", | ||
], | ||
cwd=tmp_path, | ||
raises=True, | ||
) |