diff --git a/code/nnv/examples/Submission/FORMALISE2025/.env b/code/nnv/examples/Submission/FORMALISE2025/.env new file mode 100644 index 000000000..70734b74a --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/.env @@ -0,0 +1,2 @@ +NNV_PATH='' # replace with '/path/to/nnv' +NPY_MATLAB_PATH='' # replace with '/path/to/npy-matlab/npy-matlab' \ No newline at end of file diff --git a/code/nnv/examples/Submission/FORMALISE2025/.gitignore b/code/nnv/examples/Submission/FORMALISE2025/.gitignore new file mode 100644 index 000000000..7fd3f957f --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/.gitignore @@ -0,0 +1,14 @@ +.DS_Store + +# don't commit the data +*data/ZoomIn* +*data/ZoomOut* +*data/GTSRB* +*data/STMNIST* + +# matlab stuff +*.asv +*tbxmanager* + +# python stuff +*__pycache__* diff --git a/code/nnv/examples/Submission/FORMALISE2025/LICENSE.txt b/code/nnv/examples/Submission/FORMALISE2025/LICENSE.txt new file mode 100644 index 000000000..5b765c90e --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/code/nnv/examples/Submission/FORMALISE2025/README.md b/code/nnv/examples/Submission/FORMALISE2025/README.md new file mode 100644 index 000000000..ba46c7aec --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/README.md @@ -0,0 +1,105 @@ +# (Artifact) Robustness Verification of Video Classification Neural Networks + +This artifact is used to reproduce the results in _Robustness Verification of Video Classification Neural Networks_. + +All results from _Robustness Verification of Video Classification Neural Networks_ were captured using an Apple M1 Max 10-core CPU@3.20GHz×10 with 64GB of RAM. The times reported to run the smoke test and reproduce results are approximated based on this computer. + +# Requirements + +The following resources are required to run this artifact: + +- MATLAB 2024a with NNV and npy-matlab installed and added to the path. +- conda environment with Python v3.11. Install rquirements from requirements.txt. Make sure to install the source files. + +# Installation + +1. Clone NNV and npy-matlab and install them ensuring that both have been added to the MATLAB path. + +``` +# Clone NNV and npy-matlab +git clone --recursive https://github.com/verivital/nnv.git +git clone https://github.com/kwikteam/npy-matlab.git +``` + +Follow the instructions under heading `# Installation:` on [NNV's README](https://github.com/verivital/nnv/blob/master/README.md). Note, if using a computer with an Apple silicon CPU, then it may be necessary to remove a line from NNV's `install.m` script. The following line + +```matlab +tbxmanager install lcp hysdel cddmex clpmex glpkmex fourier sedumi; +``` + +can be removed/commented out. + +For `npy-matlab`, add and save the path to MATLAB. Again, instructions are available on [`npy-matlab`'s README](https://github.com/kwikteam/npy-matlab/blob/master/README.md). + +2. Download the following dataset files from here into the `FORMALISE2025/data` folder: + +- + +3. Create a conda environment with `Python v3.11` and install the requirements from `requirements.txt`. Additionally, install the source files to the environment. Both can be done by running the following commands from the root directory (`FORMALISE2025`): + + ```bash + pip install -r requirements.txt + + # before installing source files, make sure to navigate to this src directory, e.g. + cd /path/to/FORMALISE2025/src + pip install -e . + ``` + +4. Modify the `verify.py` file to add the path to your NNV and npy-matlab directories (the repositories that were cloned earlier). For the `npy-matlab` repository, you'll want to reference the subfolder in the directory also called `npy-matlab`, e.g. `/some/path/to/npy-matlab/npy-matlab`. + +5. + +# Smoke Test Instructions (~30 minutes) + +Instructions to quickly test the software needed to reproduce the artifacts of the paper. If no issues arise during the smoke test, you can safely proceed to reproducing all artifacts as described in the below section. + +1. Open a terminal and navigate to the `FORMALISE2025` directory. +2. Make sure the conda environment with the installed dependencies is activated. +3. Run the following command to enable the smoke test script to be executed and then run it: + +```bash +chmod +x run_smoketest.sh && ./run_smoketest.sh +``` + +4. The smoketest will verify a single sample from each of the different datasets and with the different verification algorithms. The results will be output to a `smoketest_outputs.txt` file. Assuming everything ran smoothly, you should see: + +``` +********************************************** + Smoke test complete. +********************************************** +``` + +# Reproducing the Full Results + +Assuming the average runtime for the experiments remains as shown in the paper, then it will take approximately 9-10 days to reproduce the results. As such, there will also be an intermediary where we verify 1/10 samples verified in the full experiments. Those instructions will be in the next section. For now, we discuss how to reproduce the full results. The instructions will be the exact same for running the smoke test, only we use a different script. + +1. Open a terminal and navigate to the `FORMALISE2025` directory. +2. Make sure the conda environment with the installed dependencies is activated. +3. Run the following command to begin reproducing the artifacts: + +```bash +chmod +x run_vvn.sh && ./run_vvn.sh +``` + +# Reproducing a Subset of the Results + +This set of instructions is for reproducing a subset of the results. In this case, we verify every tenth sample from what was done for the original verification. Generating the reachable output range plots remains the same. + +1. Open a terminal and navigate to the `FORMALISE2025` directory. +2. Make sure the conda environment with the installed dependencies is activated. +3. Run the following command to begin reproducing a subset of the artifacts: + +```bash +chmod +x run_subset_vvn.sh && run_subset_vvn.sh +``` + +# Results + +| Artifact | Filepath | Description | +| -------- | ------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Table II | `results/table2.txt | A table of results (PSRV and average runtime) for all experiments. | +| Fig. 7 | `results/reach_stmnist_plot.png` and `results/reach_bad_stmnist_plot.png` | Plots of the reachable output ranges on a sample of STMNIST with different epsilon values (one robust, the other non-robust). | +| Fig. 8 | `results/runtime_comp.png` | A visualization of how average runtime changes for the Zoom In, Zoom Out, and GTSRB datasets as the number of frames and magnitude of the perturbation changes. | + + + diff --git a/code/nnv/examples/Submission/FORMALISE2025/data/about.txt b/code/nnv/examples/Submission/FORMALISE2025/data/about.txt new file mode 100644 index 000000000..d1a5c37b3 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/data/about.txt @@ -0,0 +1 @@ +The datasets are too large to be pushed to the repository. Please navigate to the release to find the dataset files and put them in this directory accordingly. diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/gtsrb_16f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/gtsrb_16f.onnx new file mode 100644 index 000000000..74dfe4669 Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/gtsrb_16f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/gtsrb_4f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/gtsrb_4f.onnx new file mode 100644 index 000000000..d4e11f98e Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/gtsrb_4f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/gtsrb_8f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/gtsrb_8f.onnx new file mode 100644 index 000000000..e7d94e3a0 Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/gtsrb_8f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_16f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_16f.onnx new file mode 100644 index 000000000..5b74d7dca Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_16f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_32f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_32f.onnx new file mode 100644 index 000000000..74b3faad4 Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_32f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_4f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_4f.onnx new file mode 100644 index 000000000..60c4a0d52 Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_4f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_64f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_64f.onnx new file mode 100644 index 000000000..966491870 Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_64f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_8f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_8f.onnx new file mode 100644 index 000000000..c782c2dc4 Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/stmnist_8f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/zoomin_16f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/zoomin_16f.onnx new file mode 100644 index 000000000..bc474f948 Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/zoomin_16f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/zoomin_4f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/zoomin_4f.onnx new file mode 100644 index 000000000..90fb2be84 Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/zoomin_4f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/zoomin_8f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/zoomin_8f.onnx new file mode 100644 index 000000000..4ed26cd6c Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/zoomin_8f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/zoomout_16f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/zoomout_16f.onnx new file mode 100644 index 000000000..f86fb8710 Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/zoomout_16f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/zoomout_4f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/zoomout_4f.onnx new file mode 100644 index 000000000..68f92d9f1 Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/zoomout_4f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/models/zoomout_8f.onnx b/code/nnv/examples/Submission/FORMALISE2025/models/zoomout_8f.onnx new file mode 100644 index 000000000..de85bc924 Binary files /dev/null and b/code/nnv/examples/Submission/FORMALISE2025/models/zoomout_8f.onnx differ diff --git a/code/nnv/examples/Submission/FORMALISE2025/requirements.txt b/code/nnv/examples/Submission/FORMALISE2025/requirements.txt new file mode 100644 index 000000000..d54744642 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/requirements.txt @@ -0,0 +1,25 @@ +coloredlogs==15.0.1 +contourpy==1.2.1 +cycler==0.12.1 +eagerpy==0.30.0 +flatbuffers==24.3.25 +fonttools==4.53.0 +humanfriendly==10.0 +kiwisolver==1.4.5 +matlabengine==24.1.2 +matplotlib==3.9.0 +mpmath==1.3.0 +numpy==1.26.4 +onnxruntime==1.18.0 +packaging==24.1 +pillow==10.3.0 +protobuf==5.27.1 +pyparsing==3.1.2 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +requests==2.32.3 +scipy==1.14.1 +six==1.16.0 +sympy==1.12.1 +typing_extensions==4.12.2 +urllib3==2.2.3 diff --git a/code/nnv/examples/Submission/FORMALISE2025/run_smoketest.sh b/code/nnv/examples/Submission/FORMALISE2025/run_smoketest.sh new file mode 100755 index 000000000..8cfaddddf --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/run_smoketest.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +OUTPUT_FILE="smoketest_outputs.txt" + +# Clear the file if it exists +> $OUTPUT_FILE + +echo "Starting smoke test..." +echo "Starting smoke test..." >> $OUTPUT_FILE + +####################### +### ZOOM IN RESULTS ### +####################### +# Zoom In 4f, relax +python src/vvn/verify.py zoom_in relax 1 4 1800 751 >> $OUTPUT_FILE 2>&1 + +# Zoom In 8f, relax +python src/vvn/verify.py zoom_in relax 1 8 1800 751 >> $OUTPUT_FILE 2>&1 + +# Zoom In 16f, relax +python src/vvn/verify.py zoom_in relax 1 16 1800 751 >> $OUTPUT_FILE 2>&1 + +# Zoom In 4f, approx +python src/vvn/verify.py zoom_in approx 1 4 1800 751 >> $OUTPUT_FILE 2>&1 + +# Zoom In 8f, approx +python src/vvn/verify.py zoom_in approx 1 8 1800 751 >> $OUTPUT_FILE 2>&1 + + +######################## +### ZOOM OUT RESULTS ### +######################## +# Zoom Out 4f, relax +python src/vvn/verify.py zoom_out relax 1 4 1800 624 >> $OUTPUT_FILE 2>&1 + +# Zoom Out 8f, relax +python src/vvn/verify.py zoom_out relax 1 8 1800 624 >> $OUTPUT_FILE 2>&1 + +# Zoom Out 16f, relax +python src/vvn/verify.py zoom_out relax 1 16 1800 624 >> $OUTPUT_FILE 2>&1 + +# Zoom Out 4f, approx +python src/vvn/verify.py zoom_out approx 1 4 1800 624 >> $OUTPUT_FILE 2>&1 + +# Zoom Out 8f, approx +python src/vvn/verify.py zoom_out approx 1 8 1800 624 >> $OUTPUT_FILE 2>&1 + + +####################### +#### GTSRB RESULTS #### +####################### +# GTSRB 4f, relax +python src/vvn/verify.py gtsrb relax 1 4 1800 12597 >> $OUTPUT_FILE 2>&1 + +# GTSRB 8f, relax +python src/vvn/verify.py gtsrb relax 1 8 1800 12597 >> $OUTPUT_FILE 2>&1 + +# GTSRB 16f, relax +python src/vvn/verify.py gtsrb relax 1 16 1800 12597 >> $OUTPUT_FILE 2>&1 + +# GTSRB 4f, approx +python src/vvn/verify.py gtsrb approx 1 4 1800 12597 >> $OUTPUT_FILE 2>&1 + +# GTSRB 8f, approx +python src/vvn/verify.py gtsrb approx 1 8 1800 12597 >> $OUTPUT_FILE 2>&1 + +# GTSRB 16f, approx +python src/vvn/verify.py gtsrb approx 1 16 1800 12597 >> $OUTPUT_FILE 2>&1 + + +####################### +### STMNIST RESULTS ### +####################### +# STMNIST 16f, relax +python src/vvn/verify.py stmnist relax 1 16 1800 311 >> $OUTPUT_FILE 2>&1 + +# STMNIST 32f, relax +python src/vvn/verify.py stmnist relax 1 32 1800 311 >> $OUTPUT_FILE 2>&1 + +# STMNIST 64f, relax +python src/vvn/verify.py stmnist relax 1 64 1800 311 >> $OUTPUT_FILE 2>&1 + +# STMNIST 16f, approx +python src/vvn/verify.py stmnist approx 1 16 1800 311 >> $OUTPUT_FILE 2>&1 + +# STMNIST 32f, approx +python src/vvn/verify.py stmnist approx 1 32 1800 311 >> $OUTPUT_FILE 2>&1 + +# STMNIST 64f, approx +python src/vvn/verify.py stmnist approx 1 64 1800 311 >> $OUTPUT_FILE 2>&1 + + +echo -e "\n\n**********************************************" +echo " Smoke test complete. " +echo -e "**********************************************\n\n" diff --git a/code/nnv/examples/Submission/FORMALISE2025/run_subset_vvn.sh b/code/nnv/examples/Submission/FORMALISE2025/run_subset_vvn.sh new file mode 100755 index 000000000..7e14c94df --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/run_subset_vvn.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# TABLE II (GET ALL VERIFICATION RESULTS) +python src/run.py subset # MNIST Video +python src/run_gtsrb.py subset # GTSRB +python src/run_stmnist.py subset # STMNIST +python src/analysis/make_table2.py + + +# FIGURE 8 (COMPARISON OF AVERAGE RUNTIME) +python src/analysis/make_plots.py + + +# FIGURE 7 (REACHABLE OUTPUT PLOTS) +python src/generate_output_plots.py \ No newline at end of file diff --git a/code/nnv/examples/Submission/FORMALISE2025/run_vvn.sh b/code/nnv/examples/Submission/FORMALISE2025/run_vvn.sh new file mode 100755 index 000000000..04cd97f80 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/run_vvn.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# TABLE II (GET ALL VERIFICATION RESULTS) +python src/run.py # MNIST Video +python src/run_gtsrb.py # GTSRB +python src/run_stmnist.py # STMNIST +python src/analysis/make_table2.py + + +# FIGURE 8 (COMPARISON OF AVERAGE RUNTIME) +python src/analysis/make_plots.py + + +# FIGURE 7 (REACHABLE OUTPUT PLOTS) +python src/generate_output_plots.py \ No newline at end of file diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/analysis/make_plots.py b/code/nnv/examples/Submission/FORMALISE2025/src/analysis/make_plots.py new file mode 100644 index 000000000..9b0fe130e --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/analysis/make_plots.py @@ -0,0 +1,135 @@ +import csv +import numpy as np +import os +import matplotlib.pyplot as plt +import matplotlib.lines as mlines +import matplotlib.patches as mpatches + +# local modules +import vvn.prep as vp +from vvn.config import Config +import vvn.gtsrbprep as vgp +import vvn.stmnistprep as vsp + +x = ['1/255', '2/255', '3/255'] + +# get the results dir +root = os.path.dirname(os.getcwd()) # Submission folder +output_dir = os.path.join(root, 'FORMALISE2025' ,'results') + +def summarize(output_file_dir): + average_times = [] + for filename in os.listdir(output_file_dir): + if filename == '.DS_Store': + continue + + fp = os.path.join(output_file_dir, filename) + + # open the results csv file + with open(fp, 'r', newline='') as f: + reader = csv.reader(f, delimiter=',') + + # skip the header + next(reader) + + res = [] + t = [] + + # read the values and build the new arrays for analysis + for row in reader: + res.append(row[1]) + t.append(row[2] if not row[2] == 'timeout' else 1800.0) + + # have to convert strings to valid floats before casting to int + res = np.array(res).astype(float) + res = np.array(res).astype(int) + t = np.array(t).astype(float) + + # count the number of verified samples + total_verified = np.sum(res[res == 1]) + + # calculate average time to verify + average_time = np.mean(t) + + average_times.append(average_time) + + return average_times + +# ZOOM IN +config = Config( + class_size=10, + epsilon=[1/255, 2/255, 3/255], + ds_type='zoom_in', + sample_len=4, + ver_algorithm='relax', + timeout=1800, + output_dir=output_dir +) +zoomin_4 = summarize(vp.build_output_filepath(config=config, parent_only=True)) + +config.sample_len = 8 +zoomin_8 = summarize(vp.build_output_filepath(config=config, parent_only=True)) + +config.sample_len = 16 +zoomin_16 = summarize(vp.build_output_filepath(config=config, parent_only=True)) + + +# ZOOM OUT +config.ds_type = 'zoom_out' +config.sample_len = 4 +zoomout_4 = summarize(vp.build_output_filepath(config=config, parent_only=True)) + +config.sample_len = 8 +zoomout_8 = summarize(vp.build_output_filepath(config=config, parent_only=True)) + +config.sample_len = 16 +zoomout_16 = summarize(vp.build_output_filepath(config=config, parent_only=True)) + + +# GTSRB +config.ds_type = 'gtsrb' +config.sample_len = 4 +gtsrb_4 = summarize(vgp.build_output_filepath(config=config, parent_only=True)) + +config.sample_len = 8 +gtsrb_8 = summarize(vgp.build_output_filepath(config=config, parent_only=True)) + +config.sample_len = 16 +gtsrb_16 = summarize(vgp.build_output_filepath(config=config, parent_only=True)) + + +if __name__ == "__main__": + plt.figure(figsize=(7, 8)) + + plt.plot(x, zoomin_4, linestyle='solid', color='red') + plt.plot(x, zoomin_8, linestyle='solid', color='green') + plt.plot(x, zoomin_16, linestyle='solid', color='blue') + + plt.plot(x, zoomout_4, linestyle='dashed', color='red') + plt.plot(x, zoomout_8, linestyle='dashed', color='green') + plt.plot(x, zoomout_16, linestyle='dashed', color='blue') + + plt.plot(x, gtsrb_4, linestyle='dotted', color='red') + plt.plot(x, gtsrb_8, linestyle='dotted', color='green') + plt.plot(x, gtsrb_16, linestyle='dotted', color='blue') + + plt.yscale('log') + + # legend lines + line1 = mlines.Line2D([], [], color='black', linestyle='solid', label='Zoom In') + line2 = mlines.Line2D([], [], color='black', linestyle='dashed', label='Zoom Out') + line3 = mlines.Line2D([], [], color='black', linestyle='dotted', label='GTSRB') + + # legend colors + color_red = mpatches.Patch(color='red', label='4 frames') + color_green = mpatches.Patch(color='green', label='8 frames') + color_blue = mpatches.Patch(color='blue', label='16 frames') + + # add labels and title + plt.xlabel("Epsilon") + plt.ylabel("Runtime (s)") + plt.title("Comparison of average runtime") + plt.legend(handles=[line1, line2, line3, color_red, color_green, color_blue], loc='upper left') + + plot_fp = os.path.join(root, 'FORMALISE2025', 'figs', 'runtime_comp.png') + plt.savefig(plot_fp, dpi=300) diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/analysis/make_table2.py b/code/nnv/examples/Submission/FORMALISE2025/src/analysis/make_table2.py new file mode 100644 index 000000000..922345d73 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/analysis/make_table2.py @@ -0,0 +1,271 @@ +import csv +import numpy as np +import os +import matplotlib.pyplot as plt +import matplotlib.lines as mlines +import matplotlib.patches as mpatches + +# local modules +import vvn.prep as vp +from vvn.config import Config +import vvn.gtsrbprep as vgp +import vvn.stmnistprep as vsp + +# get the results dir +root = os.path.dirname(os.getcwd()) # Submission folder +output_dir = os.path.join(root, 'FORMALISE2025' ,'results') + +def summarize(output_file_dir): + # print(f'{output_file_dir}') + num_samples_verified = [] + average_times = [] + for filename in os.listdir(output_file_dir): + if filename == '.DS_Store': + continue + + fp = os.path.join(output_file_dir, filename) + + # open the results csv file + with open(fp, 'r', newline='') as f: + reader = csv.reader(f, delimiter=',') + + # skip the header + next(reader) + + res = [] + t = [] + + # read the values and build the new arrays for analysis + for row in reader: + res.append(row[1]) + t.append(row[2] if not row[2] == 'timeout' else 1800.0) + + # have to convert strings to valid floats before casting to int + res = np.array(res).astype(float) + res = np.array(res).astype(int) + t = np.array(t).astype(float) + + # count the number of verified samples + total_verified = np.sum(res[res == 1]) + num_samples_verified.append(total_verified) + + # calculate average time to verify + average_time = np.mean(t) + + average_times.append(average_time) + + return num_samples_verified, average_times + +# OUTPUTS +tv = [] +rt = [] + +# ZOOM IN RELAX +config = Config( + class_size=10, + epsilon=[1/255, 2/255, 3/255], + ds_type='zoom_in', + sample_len=4, + ver_algorithm='relax', + timeout=1800, + output_dir=output_dir +) +zoomin_4_tv_relax, zoomin_4_relax = summarize(vp.build_output_filepath(config=config, parent_only=True)) +tv.append(zoomin_4_tv_relax) +rt.append(zoomin_4_relax) + +config.sample_len = 8 +zoomin_8_tv_relax, zoomin_8_relax = summarize(vp.build_output_filepath(config=config, parent_only=True)) +tv.append(zoomin_8_tv_relax) +rt.append(zoomin_8_relax) + +config.sample_len = 16 +zoomin_16_tv_relax, zoomin_16_relax = summarize(vp.build_output_filepath(config=config, parent_only=True)) +tv.append(zoomin_16_tv_relax) +rt.append(zoomin_16_relax) + +# ZOOM IN APPROX +config.sample_len = 4 +config.ver_algorithm = 'approx' +zoomin_4_tv_approx, zoomin_4_approx = summarize(vp.build_output_filepath(config=config, parent_only=True)) +tv.append(zoomin_4_tv_approx) +rt.append(zoomin_4_approx) + +config.sample_len = 8 +zoomin_8_tv_approx, zoomin_8_approx = summarize(vp.build_output_filepath(config=config, parent_only=True)) +tv.append(zoomin_8_tv_approx) +rt.append(zoomin_8_approx) + + +# ZOOM OUT RELAX +config.ds_type = 'zoom_out' +config.sample_len = 4 +config.ver_algorithm = 'relax' +zoomout_4_tv_relax, zoomout_4_relax = summarize(vp.build_output_filepath(config=config, parent_only=True)) +tv.append(zoomout_4_tv_relax) +rt.append(zoomout_4_relax) + +config.sample_len = 8 +zoomout_8_tv_relax, zoomout_8_relax = summarize(vp.build_output_filepath(config=config, parent_only=True)) +tv.append(zoomout_8_tv_relax) +rt.append(zoomout_8_relax) + +config.sample_len = 16 +zoomout_16_tv_relax, zoomout_16_relax = summarize(vp.build_output_filepath(config=config, parent_only=True)) +tv.append(zoomout_16_tv_relax) +rt.append(zoomout_16_relax) + +# ZOOM OUT APPROX +config.sample_len = 4 +config.ver_algorithm = 'approx' +zoomout_4_tv_approx, zoomout_4_approx = summarize(vp.build_output_filepath(config=config, parent_only=True)) +tv.append(zoomout_4_tv_approx) +rt.append(zoomout_4_approx) + +config.sample_len = 8 +zoomout_8_tv_approx, zoomout_8_approx = summarize(vp.build_output_filepath(config=config, parent_only=True)) +tv.append(zoomout_8_tv_approx) +rt.append(zoomout_8_approx) + + +# GTSRB RELAX +config.ds_type = 'gtsrb' +config.sample_len = 4 +config.ver_algorithm = 'relax' +gtsrb_4_tv_relax, gtsrb_4_relax = summarize(vgp.build_output_filepath(config=config, parent_only=True)) +tv.append(gtsrb_4_tv_relax) +rt.append(gtsrb_4_relax) + +config.sample_len = 8 +gtsrb_8_tv_relax, gtsrb_8_relax = summarize(vgp.build_output_filepath(config=config, parent_only=True)) +tv.append(gtsrb_8_tv_relax) +rt.append(gtsrb_8_relax) + +config.sample_len = 16 +gtsrb_16_tv_relax, gtsrb_16_relax = summarize(vgp.build_output_filepath(config=config, parent_only=True)) +tv.append(gtsrb_16_tv_relax) +rt.append(gtsrb_16_relax) + +# GTSRB APPROX +config.sample_len = 4 +config.ver_algorithm = 'approx' +gtsrb_4_tv_approx, gtsrb_4_approx = summarize(vgp.build_output_filepath(config=config, parent_only=True)) +tv.append(gtsrb_4_tv_approx) +rt.append(gtsrb_4_approx) + +config.sample_len = 8 +gtsrb_8_tv_approx, gtsrb_8_approx = summarize(vgp.build_output_filepath(config=config, parent_only=True)) +tv.append(gtsrb_8_tv_approx) +rt.append(gtsrb_8_approx) + +config.sample_len = 16 +gtsrb_16_tv_approx, gtsrb_16_approx = summarize(vgp.build_output_filepath(config=config, parent_only=True)) +tv.append(gtsrb_16_tv_approx) +rt.append(gtsrb_16_approx) + + +# STMNIST RELAX +config.ds_type = 'stmnist' +config.sample_len = 16 +config.ver_algorithm = 'relax' +stmnist_16_tv_relax, stmnist_16_relax = summarize(vsp.build_output_filepath(config=config, parent_only=True)) +tv.append(stmnist_16_tv_relax) +rt.append(stmnist_16_relax) + +config.sample_len = 32 +stmnist_32_tv_relax, stmnist_32_relax = summarize(vgp.build_output_filepath(config=config, parent_only=True)) +tv.append(stmnist_32_tv_relax) +rt.append(stmnist_32_relax) + +config.sample_len = 64 +stmnist_64_tv_relax, stmnist_64_relax = summarize(vgp.build_output_filepath(config=config, parent_only=True)) +tv.append(stmnist_64_tv_relax) +rt.append(stmnist_64_relax) + +# STMNIST APPROX +config.sample_len = 16 +config.ver_algorithm = 'approx' +stmnist_16_tv_approx, stmnist_16_approx = summarize(vsp.build_output_filepath(config=config, parent_only=True)) +tv.append(stmnist_16_tv_approx) +rt.append(stmnist_16_approx) + +config.sample_len = 32 +stmnist_32_tv_approx, stmnist_32_approx = summarize(vgp.build_output_filepath(config=config, parent_only=True)) +tv.append(stmnist_32_tv_approx) +rt.append(stmnist_32_approx) + +config.sample_len = 64 +stmnist_64_tv_approx, stmnist_64_approx = summarize(vgp.build_output_filepath(config=config, parent_only=True)) +tv.append(stmnist_64_tv_approx) +rt.append(stmnist_64_approx) + + +if __name__ == "__main__": + datasets = ["Zoom In", "Zoom Out", "GTSRB", "STMNIST"] + veralgs = ["relax", "approx"] + + # Create all the labels + labels = [] + for ds in datasets: + for va in veralgs: + if (ds == "Zoom In" or ds == "Zoom Out") and va == "approx": + sample_lengths = ["4", "8"] + + elif ds == "STMNIST": + sample_lengths = ["4", "8", "16"] + else: + sample_lengths = ["16", "32", "64"] + + for sl in sample_lengths: + for eps in ["1/255", "2/255", "3/255"]: + labels.append(f'{ds} {va} {sl} {eps}') + + # needs to be flattened for pairing with label later + total_verified_flattened = [ + x for sample in tv + for x in sample + ] + + # needs to be flattened for pairing with label later + running_time_flattened = [ + x for sample in rt + for x in sample + ] + + MAX_LENGTH_LABEL = max([len(label) for label in labels]) + + # Pair together the labels and results + labels_and_results = zip(labels, total_verified_flattened, running_time_flattened) + + # Create the row for the table + def format_row(sample): + label, tv, t = sample + label = f"{label:<{MAX_LENGTH_LABEL}}" + + # compute percent verified + num_attempted = 215 if 'GTSRB' in label else 100 + percent_verified = round((tv / num_attempted) * 100, 2) + t = round(t, 2) + + pv_string = f"{percent_verified:<{6}}" + at_string = f"{t:<{9}}" + + return " | ".join([label, pv_string, at_string]) + + # Create the human-readable table + output_file = os.path.join(output_dir, "table2.txt") + with open(output_file, 'w') as f: + + # write the header + label_header = f"{'LABEL':^{MAX_LENGTH_LABEL}}" + psrv_header = f"{'PSRV':^{6}}" + time_header = f"{'Avg. Time':^{10}}" + header = " | ".join([label_header, psrv_header, time_header]) + line = '-'*len(header) + f.write(header + "\n") + f.write(line + "\n") + + for sample in labels_and_results: + f.write(format_row(sample) + "\n") + + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/generate_output_plots.py b/code/nnv/examples/Submission/FORMALISE2025/src/generate_output_plots.py new file mode 100644 index 000000000..5ef024532 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/generate_output_plots.py @@ -0,0 +1,23 @@ +from dotenv import load_dotenv +import matlab.engine +import os + +if __name__=="__main__": + # load environment variables + load_dotenv() + + # define global variables + NNV_PATH = os.environ['NNV_PATH'] + NPY_MATLAB_PATH = os.environ['NPY_MATLAB_PATH'] + + eng = matlab.engine.start_matlab() + print('started matlab engine!') + + # add nnv path and npy-matlab path + eng.addpath(os.getcwd()) + eng.addpath(eng.genpath(NNV_PATH)) + eng.addpath(eng.genpath(NPY_MATLAB_PATH)) + + # run the script + eng.generate_output_plots(nargout=0) + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/pyproject.toml b/code/nnv/examples/Submission/FORMALISE2025/src/pyproject.toml new file mode 100644 index 000000000..629fd987a --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/pyproject.toml @@ -0,0 +1,10 @@ +# https://stackoverflow.com/questions/6323860/sibling-package-imports +[project] +name = "vvn" +version = "0.0.1" +description = "Verification of video classification neural networks" + +[build-system] +build-backend = "flit_core.buildapi" +requires = ["flit_core >=3.2,<4"] + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/run.py b/code/nnv/examples/Submission/FORMALISE2025/src/run.py new file mode 100644 index 000000000..b58ef367c --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/run.py @@ -0,0 +1,102 @@ +import os +import sys + +import vvn.prep as vp +import vvn.verify as vvn +from vvn.config import Config + + +if __name__ == "__main__": + # get the results dir + root = os.path.dirname(os.getcwd()) # Submission folder + output_dir = os.path.join(root, 'FORMALISE2025' ,'results') + + # define the starting configuration + config = Config( + class_size=10, + epsilon=[1/255, 2/255, 3/255], + ds_type='zoom_in', + sample_len=4, + ver_algorithm='relax', + timeout=1800, + output_dir=output_dir + ) + + # get the samples you wish to verify + # zoom_in_samples, zoom_out_samples = vp.generate_indices(config) + zoom_in_samples = [751, 137, 34, 878, 328, 289, 262, 168, 871, 126, 796, 877, 645, 108, 693, 501, 44, 41, 117, 257, 272, 597, 707, 37, 661, 237, 845, 763, 830, 645, 498, 259, 532, 692, 331, 972, 8, 904, 966, 193, 826, 501, 407, 331, 187, 254, 908, 402, 126, 116, 452, 121, 429, 411, 709, 317, 969, 57, 863, 543, 636, 150, 450, 99, 652, 350, 999, 736, 724, 432, 680, 230, 833, 90, 61, 780, 267, 922, 346, 100, 272, 125, 452, 331, 537, 744, 435, 198, 442, 423, 248, 790, 320, 830, 806, 761, 92, 714, 744, 207] + zoom_out_samples = [624, 882, 278, 180, 540, 439, 306, 757, 821, 654, 248, 817, 368, 949, 963, 59, 260, 34, 357, 465, 304, 69, 238, 666, 867, 356, 239, 776, 585, 460, 760, 536, 158, 301, 154, 280, 908, 659, 632, 297, 910, 687, 499, 686, 463, 418, 248, 152, 596, 578, 96, 922, 50, 117, 169, 738, 176, 989, 809, 491, 702, 67, 445, 441, 547, 616, 285, 649, 12, 809, 872, 126, 812, 630, 916, 303, 952, 758, 390, 120, 332, 507, 174, 529, 4, 873, 868, 297, 586, 933, 196, 594, 112, 736, 337, 755, 719, 223, 169, 433] + + if len(sys.argv) < 2: + zoom_in_samples = zoom_in_samples[:, :, 10] + zoom_out_samples = zoom_out_samples[:, :, 10] + + # ===================================== + # ============ RELAX ================== + # ===================================== + + # run experiment #1 : dataset = zoom in, video length = 4 + vvn.run(config=config, indices=zoom_in_samples) + + # run experiment #2 : dataset = zoom out, video length = 4 + config.ds_type = 'zoom_out' + vvn.run(config=config, indices=zoom_out_samples) + + # run experiment #3 : dataset = zoom in , video length = 8 + config.ds_type = 'zoom_in' + config.sample_len = 8 + vvn.run(config=config, indices=zoom_in_samples) + + # run experiment #4 : dataset = zoom out, video length = 8 + config.ds_type = 'zoom_out' + vvn.run(config=config, indices=zoom_out_samples) + + # run experiment #5 : dataset = zoom in, video length = 16 + config.ds_type = 'zoom_in' + config.sample_len = 16 + vvn.run(config=config, indices=zoom_in_samples) + + # run experiment #6 : dataset = zoom out, video length = 16 + config.ds_type = 'zoom_out' + vvn.run(config=config, indices=zoom_out_samples) + + + # ===================================== + # ============ APPROX ================= + # ===================================== + + config = Config( + class_size=10, + epsilon=[1/255, 2/255, 3/255], + ds_type='zoom_in', + sample_len=4, + ver_algorithm='approx', + timeout=1800, + output_dir=output_dir + ) + + # run experiment #1 : dataset = zoom in, video length = 4 + vvn.run(config=config, indices=zoom_in_samples) + + # run experiment #2 : dataset = zoom out, video length = 4 + config.ds_type = 'zoom_out' + vvn.run(config=config, indices=zoom_out_samples) + + # run experiment #3 : dataset = zoom in , video length = 8 + config.ds_type = 'zoom_in' + config.sample_len = 8 + vvn.run(config=config, indices=zoom_in_samples) + + # run experiment #4 : dataset = zoom out, video length = 8 + config.ds_type = 'zoom_out' + vvn.run(config=config, indices=zoom_out_samples) + + # run experiment #5 : dataset = zoom in, video length = 16 + config.ds_type = 'zoom_in' + config.sample_len = 16 + vvn.run(config=config, indices=zoom_in_samples) + + # run experiment #6 : dataset = zoom out, video length = 16 + config.ds_type = 'zoom_out' + vvn.run(config=config, indices=zoom_out_samples) + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/run_gtsrb.py b/code/nnv/examples/Submission/FORMALISE2025/src/run_gtsrb.py new file mode 100644 index 000000000..99edd7458 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/run_gtsrb.py @@ -0,0 +1,71 @@ +import os +import sys + +import vvn.gtsrbprep as vgp +import vvn.verify as vvn +from vvn.config import Config + + +if __name__ == "__main__": + # get the results dir + root = os.path.dirname(os.getcwd()) + output_dir = os.path.join(root, 'FORMALISE2025' ,'results') + + # define the starting configuration + config = Config( + class_size=5, + epsilon=[1/255, 2/255, 3/255], + ds_type='gtsrb', + sample_len=4, + ver_algorithm='relax', + timeout=1800, + output_dir=output_dir + ) + + # get the samples you wish to verify + # samples = vgp.generate_indices(config) + samples = [12597, 2196, 507, 5423, 4840, 4410, 2743, 2020, 10754, 1707, 11622, 8312, 635, 596, 1851, 4320, 4595, 9948, 11849, 535, 11062, 3942, 10746, 8256, 4356, 8857, 11599, 5483, 127, 3154, 8326, 6696, 5474, 3075, 4256, 6625, 2013, 1832, 7475, 1908, 7068, 6765, 11879, 5214, 858, 9051, 10579, 2459, 7450, 1550, 10884, 5771, 12391, 12180, 7128, 11366, 3809, 1367, 910, 4497, 5706, 1570, 4599, 1989, 7478, 5480, 8937, 12524, 7190, 3209, 7303, 6996, 4143, 5263, 1406, 11985, 12510, 3378, 10534, 4838, 3228, 9106, 7467, 5324, 12607, 10979, 4343, 6387, 1109, 4525, 641, 6208, 7898, 5278, 1302, 4171, 11173, 6190, 4204, 9824, 7788, 9041, 2814, 5221, 2744, 4873, 11065, 10632, 5180, 11510, 8452, 11492, 7863, 7133, 4337, 2721, 10042, 9713, 1796, 935, 2161, 3020, 12365, 3161, 8318, 11739, 1249, 7568, 7504, 11728, 9222, 10436, 4971, 10910, 228, 2253, 10594, 5260, 12629, 6695, 2199, 5778, 8578, 3125, 8944, 61, 5195, 9852, 3527, 10001, 2101, 12318, 5875, 12588, 10001, 11982, 3943, 3020, 7371, 3190, 10639, 10457, 10, 11787, 6384, 9622, 398, 2204, 7152, 6052, 4732, 1147, 4760, 11178, 1549, 1685, 9577, 1358, 10501, 2476, 2531, 9366, 10839, 3267, 5224, 10405, 11933, 8336, 4190, 10637, 3985, 6135, 7857, 7363, 8642, 10196, 8901, 2385, 4901, 4439, 1258, 6656, 427, 11580, 10921, 4549, 11583, 4353, 141, 1398, 12441, 1162, 4523, 1326, 629] + + if len(sys.argv) < 2: + samples = samples[:, :, 10] + + # ===================================== + # ============ RELAX ================== + # ===================================== + + # run experiment #1 : dataset = gtsrb, video length = 4 + vvn.run_gtsrb(config=config, indices=samples) + + # run experiment #2 : dataset = gtsrb, video length = 8 + config.sample_len = 8 + vvn.run_gtsrb(config=config, indices=samples) + + # run experiment #3 : dataset = gtsrb, video length = 16 + config.sample_len = 16 + vvn.run_gtsrb(config=config, indices=samples) + + + # ===================================== + # ============ APPROX ================= + # ===================================== + + config = Config( + class_size=5, + epsilon=[1/255, 2/255, 3/255], + ds_type='gtsrb', + sample_len=4, + ver_algorithm='approx', + timeout=1800, + output_dir=output_dir + ) + + # run experiment #1 : dataset = gtsrb, video length = 4 + vvn.run_gtsrb(config=config, indices=samples) + + # run experiment #2 : dataset = gtsrb, video length = 8 + config.sample_len = 8 + vvn.run_gtsrb(config=config, indices=samples) + + # run experiment #3 : dataset = gtsrb, video length = 16 + config.sample_len = 16 + vvn.run_gtsrb(config=config, indices=samples) diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/run_stmnist.py b/code/nnv/examples/Submission/FORMALISE2025/src/run_stmnist.py new file mode 100644 index 000000000..914792553 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/run_stmnist.py @@ -0,0 +1,70 @@ +import os +import sys + +import vvn.stmnistprep as vsp +import vvn.verify as vvn +from vvn.config import Config + + +if __name__ == "__main__": + # get the results dir + root = os.path.dirname(os.getcwd()) + output_dir = os.path.join(root, 'FORMALISE2025' ,'results') + + # define the starting configuration + config = Config( + class_size=10, + epsilon=[1/255, 2/255, 3/255], + ds_type='stmnist', + sample_len=16, + ver_algorithm='relax', + timeout=1800, + output_dir=output_dir + ) + + # get the samples you wish to verify + # samples = vsp.generate_indices(config) + samples = [311, 64, 719, 646, 595, 385, 268, 220, 1090, 77, 75, 237, 568, 616, 1290, 67, 515, 1086, 579, 1162, 725, 14, 425, 1090, 880, 418, 558, 866, 268, 236, 992, 256, 915, 888, 694, 112, 1181, 1378, 344, 989, 197, 763, 932, 499, 171, 117, 603, 758, 200, 616, 267, 992, 725, 1172, 946, 430, 973, 911, 545, 700, 182, 450, 1373, 646, 431, 1187, 990, 709, 573, 841, 139, 605, 77, 821, 1045, 701, 164, 549, 820, 550, 1282, 1029, 1180, 396, 696, 385, 649, 1382, 693, 1106, 1039, 932, 573, 384, 1298, 1267, 234, 120, 298, 413] + + if len(sys.argv) < 2: + samples = samples[:, :, 10] + + # ===================================== + # ============ RELAX ================== + # ===================================== + + # run experiment #1 : dataset = stmnist, video length = 16 + vvn.run_stmnist(config=config, indices=samples) + + # run experiment #2 : dataset = stmnist, video length = 32 + config.sample_len = 32 + vvn.run_stmnist(config=config, indices=samples) + + # run experiment #3 : dataset = stmnist, video length = 64 + config.sample_len = 64 + vvn.run_stmnist(config=config, indices=samples) + + # ===================================== + # ============ APPROX ================= + # ===================================== + + config = Config( + class_size=10, + epsilon=[1/255, 2/255, 3/255], + ds_type='stmnist', + sample_len=16, + ver_algorithm='approx', + timeout=1800, + output_dir=output_dir + ) + + # run experiment #1 : dataset = stmnist, video length = 16 + vvn.run_stmnist(config=config, indices=samples) + + # run experiment #2 : dataset = stmnist, video length = 32 + config.sample_len = 32 + vvn.run_stmnist(config=config, indices=samples) + + # run experiment #3 : dataset = stmnist, video length = 64 + config.sample_len = 64 + vvn.run_stmnist(config=config, indices=samples) \ No newline at end of file diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/smoketest.m b/code/nnv/examples/Submission/FORMALISE2025/src/smoketest.m new file mode 100644 index 000000000..ec4d86357 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/smoketest.m @@ -0,0 +1,62 @@ +% Run smoke test. + +datasets = ["zoom_in", "zoom_out", "gtsrb", "stminst"]; +veralgs = ["relax", "approx"] +timeout = 1800 +epsilon_index = 1; + +% Compute number of samples to verify in smoke test +numSamples = length(datasets) * length(veralgs) * 3; + +% Make a matrix to save results +results = zeros([numSamples+1 3]); + +% Add headers to the results file +results(1, 1) = "Res"; +results(1, 2) = "Time"; +results(1, 3) = "Met"; + +num_samples_verified = 2; + +for dataset_index=1:length(datasets) + dataset = datasets(dataset_index); + + if dataset == "stmnist" + frame_nums = [16, 32, 64]; + else + frame_nums = [4, 8, 16]; + end + + for frame_num_index=1:length(frame_nums) + frame_num = frame_nums(frame_num_index); + + for veralg_index=1:length(veralgs) + veralg = veralgs(veralg_index); + + if dataset == "zoom_in" || dataset == "zoom_out" + if frame_num == 16 + continue + end + [res, t, met] = verifyvideo(dataset, frame_num, veralg, 1, 751) + + elseif dataset == "gtsrb" + [res, t, met] = verifygtsrb(dataset, frame_num, veralg, 1, 1) + + else + [res, t, met] = verifystmnist(dataset, frame_num, veralg, 1, 1) + end + + % Add the results to the table + results(num_samples_verified, 1) = res; + results(num_samples_verified, 2) = t; + results(num_samples_verified, 3) = met; + + % Increment to next row in results table + num_samples_verified = num_samples_verified + 1; + end + end +end + +% Save the results +filename = "smoketest_outputs.txt"; +writematrix(results, filename); diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn.m b/code/nnv/examples/Submission/FORMALISE2025/src/vvn.m new file mode 100644 index 000000000..e69de29bb diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/__init__.py b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/config.py b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/config.py new file mode 100644 index 000000000..c4b5962b1 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/config.py @@ -0,0 +1,42 @@ +import copy +from dataclasses import dataclass, fields +from typing import List, Literal, Self + + +@dataclass +class Config: + # Experimental settings + class_size: int # number of samples to verify per class + epsilon: List # [1/255, 2/255, 3/255] + timeout: int # 1800 (s) + output_dir: str # /path/to/vvn/results + + # Verification settings + ds_type: Literal['zoom_in', 'zoom_out', 'gtsrb', 'stmnist'] + sample_len: Literal[4, 8, 16, 32, 64] # length of videos in number of frames + ver_algorithm: Literal['relax', 'approx'] # types of verification algorithms to use + + def update(self, **kwargs): + for k, v in kwargs.items(): + if hasattr(self, k): + setattr(self, k, v) + else: + raise AttributeError(f'{k} is not a valid field of {self.__class__.__name__}.') + + def tostr(self) -> Self: + # make a deepcopy of the config so that we don't + # accidentally override the original config values + config = copy.deepcopy(self) + + for f in fields(config): + value = getattr(config, f.name) + + # handle case where value is list + if isinstance(value, list): + setattr(config, f.name, list(map(str, value))) + + # should be safe to handle everything else + elif not isinstance(value, str): + setattr(config, f.name, str(value)) + + return config diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/generate_output_plots.m b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/generate_output_plots.m new file mode 100644 index 000000000..7aeadcb7e --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/generate_output_plots.m @@ -0,0 +1,213 @@ +%% Load things +% Load data +data = readNPY("data/STMNIST/test/stmnistvideo_64f_test_data_seq.npy"); +labels = readNPY("data/STMNIST/test/stmnistvideo_64f_test_labels_seq.npy"); + +% Preprocessing +% from [B D C H W] to [B D H W C] +reshaped_data = permute(data, [1, 3, 4, 5, 2]); +datacopy = reshaped_data(:,:,:,:,:); + +% Experimental variables +numClasses = 10; + +% Size of attack +epsilon = [1/255; 2/255; 3/255]; +nE = length(epsilon); + +% Load the model +modelName = "stmnist_64f.onnx"; +netonnx = importONNXNetwork("models/" + modelName, "InputDataFormats", "BCSSS", "OutputDataFormats", "BC"); +net = matlab2nnv(netonnx); +net.OutputSize = numClasses; +disp("Finished loading model: " + modelName); + +%% Verification settings +reachOptions = struct; +reachOptions.reachMethod = "relax-star-area"; +reachOptions.relaxFactor = 0.5; + +%% Make predictions on test set to check that we are verifying correct +outputLabels = zeros(length(datacopy)); +index = 719; % 311 for a really long example + +s = datacopy(index,:,:,:,:); +s = squeeze(s); +l = labels(index) + 1; + +fprintf('Label: %d \n', l); + +%% ROBUST SAMPLE +eps = 1/255; + +fprintf('Starting verification with epsilon %d \n', eps); + +% Perform L_inf attack +[VS, lb_clip, ub_clip] = L_inf_attack(s, eps, 64); + +output = net.evaluate(s); +[~, P] = max(output); + +LB_output = net.evaluate(lb_clip); +[~, LBPred] = max(LB_output); + +UB_output = net.evaluate(ub_clip); +[~, UBPred] = max(UB_output); + +%% +t = tic; + +% NEED THIS HERE SO MET EXISTS +try + % run verification algorithm + fprintf("Verification algorithm starting.\n") + temp = net.verify_robustness(VS, reachOptions, l); + fprintf("Verification algorithm finished.\n") + +catch ME + met = ME.message; + temp = -1; + fprintf(met); +end + +p = profile('info'); + +res = temp; +time = toc(t); + +fprintf("\n"); +fprintf("Result : %d \n", res); +fprintf("Time: %f\n", time); + +fprintf("Computing reach sets...\n") +% Get the reachable sets +R = net.reachSet{end}; + +fprintf("Done computing reach sets! \n") + + +fprintf("Get the ranges for ub/lb \n") +% Get ranges for each output index +[lb_out, ub_out] = R.getRanges; +lb_out = squeeze(lb_out); +ub_out = squeeze(ub_out); + +fprintf("Now to plotting! \n"); + +% Get middle point for each output and range sizes +mid_range = (lb_out + ub_out)/2; +range_size = ub_out - mid_range; + +% Label for x-axis +x = [0 1 2 3 4 5 6 7 8 9]; + +% Visualize set ranges and evaluation points +figure; +errorbar(x, mid_range, range_size, '.', 'Color', 'r', 'LineWidth', 2); +hold on; +xlim([-0.5, 9.5]); +scatter(x, output, 30, 'x', 'MarkerEdgeColor', 'r'); +title('Reachable Outputs'); +xlabel('Label'); +ylabel('Reachable Output Range on the Input Set'); +% Save the figure +saveas(figure, "figs/reach_stmnist_plot.png"); + +%% NON-ROBUST SAMPLE +eps = 3/255; + +fprintf('Starting verification with epsilon %d \n', eps); + +% Perform L_inf attack +[VS, lb_clip, ub_clip] = L_inf_attack(s, eps, 64); + +output = net.evaluate(s); +[~, P] = max(output); + +LB_output = net.evaluate(lb_clip); +[~, LBPred] = max(LB_output); + +UB_output = net.evaluate(ub_clip); +[~, UBPred] = max(UB_output); + +%% +t = tic; + +% NEED THIS HERE SO MET EXISTS +try + % run verification algorithm + fprintf("Verification algorithm starting.\n") + temp = net.verify_robustness(VS, reachOptions, l); + fprintf("Verification algorithm finished.\n") + +catch ME + met = ME.message; + temp = -1; + fprintf(met); +end + +p = profile('info'); + +res = temp; +time = toc(t); + +fprintf("\n"); +fprintf("Result : %d \n", res); +fprintf("Time: %f\n", time); + +fprintf("Computing reach sets...\n") +% Get the reachable sets +R = net.reachSet{end}; + +fprintf("Done computing reach sets! \n") + + +fprintf("Get the ranges for ub/lb \n") +% Get ranges for each output index +[lb_out, ub_out] = R.getRanges; +lb_out = squeeze(lb_out); +ub_out = squeeze(ub_out); + +fprintf("Now to plotting! \n"); + +% Get middle point for each output and range sizes +mid_range = (lb_out + ub_out)/2; +range_size = ub_out - mid_range; + +% Label for x-axis +x = [0 1 2 3 4 5 6 7 8 9]; + +% Visualize set ranges and evaluation points +figure; +errorbar(x, mid_range, range_size, '.', 'Color', 'r', 'LineWidth', 2); +hold on; +xlim([-0.5, 9.5]); +scatter(x, output, 30, 'x', 'MarkerEdgeColor', 'r'); +title('Reachable Outputs'); +xlabel('Label'); +ylabel('Reachable Output Range on the Input Set'); +% Save the figure +saveas(figure, "figs/reach_bad_stmnist_plot.png"); + + +%% Helper Functions +function [VS, lb_clip, ub_clip] = L_inf_attack(x, epsilon, numFrames) + lb = squeeze(x); + ub = squeeze(x); + + % Perturb the frames + for fn=1:numFrames + lb(fn, :, :, :) = x(fn, :, :, :) - epsilon; + ub(fn, :, :, :) = x(fn, :, :, :) + epsilon; + end + + % Clip the perturbed values to be between 0-1 + lb_min = zeros(numFrames, 10, 10, 2); + ub_max = ones(numFrames, 10, 10, 2); + lb_clip = max(lb, lb_min); + ub_clip = min(ub, ub_max); + + % Create the volume star + VS = VolumeStar(lb_clip, ub_clip); +end + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/gtsrbprep.py b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/gtsrbprep.py new file mode 100644 index 000000000..f583e351f --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/gtsrbprep.py @@ -0,0 +1,161 @@ +# python standard library +import csv +import itertools +import onnxruntime +import os +import random +from functools import reduce +from collections import defaultdict +from typing import List + +# third-party packages +import numpy as np + +# local modules +from vvn.config import Config + +# define global variables +PATH_TO_MODELS = os.path.join(os.getcwd(), 'models') +PATH_TO_DATA = os.path.join(os.getcwd(), 'data') + +# set seed +# random.seed(42) + +def prepare_filetree(config: Config): + dst = 'gtsrb' + for va in ['relax', 'approx']: + for length in ['4', '8', '16']: + for eps_filename in [f'eps={e}_255' for e in range(1, 4)]: + fp = build_output_filepath(config, eps_filename) + + # create the parent directories if they don't already exist + os.makedirs(os.path.join(config.output_dir, dst, va, length), exist_ok=True) + + # make the results files once we know all directories have been made + for va in ['relax', 'approx']: + for length in ['4', '8', '16']: + for eps_filename in [f'eps={e}_255' for e in range(1, 4)]: + fp = build_output_filepath(config, eps_filename) + + # if the file doesn't exist yet, create it + if not os.path.isfile(fp): + with open(fp, 'w', newline='') as f: + # write CSV headers + writer = csv.writer(f) + writer.writerow(['Sample Number', 'Result', 'Time', 'Method']) + +def build_output_filepath(config: Config, filename=None, parent_only=False): + """ + For our purposes, we will be naming the file based on + the epsilon value used for the verification. + """ + # error handling + if not filename and not parent_only: + raise Exception(f'No filename given. Please provide a filename when parent_only={parent_only}.') + + # convert all config values to str + str_config = config.tostr() + + # get the values we need for building the output filepath + output_dir = str_config.output_dir + dst = str_config.ds_type + va = str_config.ver_algorithm + length = str_config.sample_len + + fp = os.path.join(output_dir, dst, va, length) + + return fp if parent_only else os.path.join(fp, filename) + '.csv' + +def get_correct_samples(modelpath, datapath) -> tuple[list[int], list[int]]: + outputs = [] + for sample_len in ['4', '8', '16']: + + # load the data + labels + data = np.load(os.path.join(datapath, 'GTSRB', 'test', f'gtsrbvideo_{sample_len}f_test_data_seq.npy')) + labels = np.load(os.path.join(datapath, 'GTSRB', 'test', f'gtsrbvideo_test_labels_seq.npy')) + + # select the model + model = os.path.join(modelpath, f'gtsrb_{sample_len}f.onnx') + + # load the model + start onnx runtime session + session = onnxruntime.InferenceSession(model) + + # specify input name for inference + input_name = session.get_inputs()[0].name + + # run inference + model_outputs = [] + + for i in range(data.shape[0]): + sample = data[i:i+1] + sample = sample.transpose(0, 2, 1, 3, 4) + sample = sample.astype(np.float32) # expects tensor(float) not tensor(double) + output = session.run(None, {input_name: sample}) + model_outputs.append(output[0]) + + # convert model_outputs from logits for each class to prediction + model_outputs = [np.argmax(model_outputs[i], axis=1) for i in range(data.shape[0])] + + # get the true labels and compare them to the outputs + labels = labels.astype(int).tolist() + + # filter for only correctly classified samples + correct_samples = [i for i in range(data.shape[0]) if model_outputs[i] == labels[i]] + + # add the model outputs to the corresponding list of correct samples + outputs.append(correct_samples) + + # return only samples that are correctly classified by all models + outputs = list(reduce(lambda out, lst: out.intersection(lst), map(set, outputs))) + + return outputs + +def generate_indices(config) -> tuple[list[int], list[int]]: + # unpack config settings + class_size = config.class_size + + # get the indices of all correctly classified samples + correct_outputs = get_correct_samples(PATH_TO_MODELS, PATH_TO_DATA) + + # partition the correctly classified samples by class + indices = defaultdict(list, {value: [i for i in correct_outputs] for value in range(0, 43)}) + + # check that there are atleast 10 correctly classified samples for each class + if not all(len(lst) >= class_size for lst in indices.values()): + raise Exception("Not enough correctly classified samples for 'zoom_in'.") + + # randomly sample 10 of the correctly classified samples per class + indices = [random.sample(indices[class_label], class_size) for class_label in indices.keys()] + + # flatten the list before returning + indices = list(itertools.chain(*indices)) + + # add 1 to all values of list because MATLAB uses 1-indexing + indices = [v + 1 for v in indices] + + if len(indices) < class_size * 10: + raise Exception("Not enough correctly classified samples.") + + # write the indices for the current experiment to its path (in this case it will be in random directory) + with open(os.path.join(os.getcwd(), 'results', 'random', 'gtsrb_indices.txt'), 'w') as f: + f.write(f'GTSRB Indices : {indices} \n') + + return indices + + + + + + + + + + + + + + + + + + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/prep.py b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/prep.py new file mode 100644 index 000000000..167d9edfd --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/prep.py @@ -0,0 +1,181 @@ +# python standard library +import csv +import itertools +import onnxruntime +import os +import random +from functools import reduce +from collections import defaultdict +from typing import List + +# third-party packages +import numpy as np + +# local modules +from vvn.config import Config + +# define global variables +PATH_TO_MODELS = os.path.join(os.getcwd(), 'models') +PATH_TO_DATA = os.path.join(os.getcwd(), 'data') + +# set seed +random.seed(42) + +def prepare_filetree(config: Config): + for dst in ['zoom_in', 'zoom_out']: + for va in ['relax', 'approx']: + for length in ['4', '8', '16']: + for eps_filename in [f'eps={e}_255' for e in range(1, 4)]: + fp = build_output_filepath(config, eps_filename) + + # create the parent directories if they don't already exist + os.makedirs(os.path.join(config.output_dir, dst, va, length), exist_ok=True) + + # make the results files once we know all directories have been made + for dst in ['zoom_in', 'zoom_out']: + for va in ['relax', 'approx']: + for length in ['4', '8', '16']: + for eps_filename in [f'eps={e}_255' for e in range(1, 4)]: + fp = build_output_filepath(config, eps_filename) + + # if the file doesn't exist yet, create it + if not os.path.isfile(fp): + with open(fp, 'w', newline='') as f: + # write CSV headers + writer = csv.writer(f) + writer.writerow(['Sample Number', 'Result', 'Time', 'Method']) + +def build_output_filepath(config: Config, filename=None, parent_only=False): + """ + For our purposes, we will be naming the file based on + the epsilon value used for the verification. + """ + # error handling + if not filename and not parent_only: + raise Exception(f'No filename given. Please provide a filename when parent_only={parent_only}.') + + # convert all config values to str + str_config = config.tostr() + + # get the values we need for building the output filepath + output_dir = str_config.output_dir + dst = str_config.ds_type + va = str_config.ver_algorithm + length = str_config.sample_len + + fp = os.path.join(output_dir, dst, va, length) + + return fp if parent_only else os.path.join(fp, filename) + '.csv' + +def get_correct_samples(modelpath, datapath) -> tuple[list[int], list[int]]: + zoom_in_outputs = [] + zoom_out_outputs = [] + for dst in ['zoom_in', 'zoom_out']: + alt_dst = 'ZoomIn' if dst == 'zoom_in' else 'ZoomOut' + for sample_len in ['4', '8', '16']: + + # load the data + labels + data = np.load(os.path.join(datapath, alt_dst, 'test', f'mnistvideo_{dst}_{sample_len}f_test_data_seq.npy')) + labels = np.load(os.path.join(datapath, alt_dst, 'test', f'mnistvideo_{dst}_test_labels_seq.npy')) + + # select the model + model_dst = dst.replace('_', '') + model = os.path.join(modelpath, f'{model_dst}_{sample_len}f.onnx') + + # load the model + start onnx runtime session + session = onnxruntime.InferenceSession(model) + + # specify input name for inference + input_name = session.get_inputs()[0].name + + # run inference + model_outputs = [] + + for i in range(data.shape[0]): + sample = data[i:i+1] + sample = sample.transpose(0, 2, 1, 3, 4) + output = session.run(None, {input_name: sample}) + model_outputs.append(output[0]) + + # convert model_outputs from logits for each class to prediction + model_outputs = [np.argmax(model_outputs[i], axis=1) for i in range(data.shape[0])] + + # get the true labels and compare them to the outputs + labels = labels.astype(int).tolist() + + # filter for only correctly classified samples + correct_samples = [i for i in range(data.shape[0]) if model_outputs[i] == labels[i]] + + # add the model outputs to the corresponding list of correct samples + if dst == 'zoom_in': + zoom_in_outputs.append(correct_samples) + # dst == 'zoom_out' + else: + zoom_out_outputs.append(correct_samples) + + # return only samples that are correctly classified by all models + zoom_in = list(reduce(lambda out, lst: out.intersection(lst), map(set, zoom_in_outputs))) + zoom_out = list(reduce(lambda out, lst: out.intersection(lst), map(set, zoom_out_outputs))) + + return zoom_in, zoom_out + +def generate_indices(config) -> tuple[list[int], list[int]]: + # unpack config settings + class_size = config.class_size + + # get the indices of all correctly classified samples + correct_zoom_in, correct_zoom_out = get_correct_samples(PATH_TO_MODELS, PATH_TO_DATA) + + # partition the correctly classified samples by class + zoom_in_indices = defaultdict(list, {value: [i for i in correct_zoom_in] for value in range(0, 10)}) + zoom_out_indices = defaultdict(list, {value: [i for i in correct_zoom_out] for value in range(0, 10)}) + + # check that there are atleast 10 correctly classified samples for each class + if not all(len(lst) >= class_size for lst in zoom_in_indices.values()): + raise Exception("Not enough correctly classified samples for 'zoom_in'.") + elif not all(len(lst) >= class_size for lst in zoom_out_indices.values()): + raise Exception("Not enough correctly classified samples for 'zoom_out'.") + + # randomly sample 10 of the correctly classified samples per class + zoom_in_indices = [random.sample(zoom_in_indices[class_label], class_size) for class_label in zoom_in_indices.keys()] + zoom_out_indices = [random.sample(zoom_out_indices[class_label], class_size) for class_label in zoom_out_indices.keys()] + + # flatten the list before returning + zoom_in_indices = list(itertools.chain(*zoom_in_indices)) + zoom_out_indices = list(itertools.chain(*zoom_out_indices)) + + # add 1 to all values of list because MATLAB uses 1-indexing + zoom_in_indices = [v + 1 for v in zoom_in_indices] + zoom_out_indices = [v + 1 for v in zoom_out_indices] + + if len(zoom_in_indices) < class_size * 10 or len(zoom_out_indices) < class_size * 10: + raise Exception("Not enough correctly classified samples.") + + print(f'Zoom In Indices : {zoom_in_indices} \n') + print(f'Zoom Out Indices : {zoom_out_indices} \n') + + # write the indices for the current experiment to its path (in this case it will be in random directory) + with open(os.path.join(os.getcwd(), 'results', 'random', 'indices.txt'), 'w') as f: + f.write(f'Zoom In Indices : {zoom_in_indices} \n') + f.write(f'Zoom Out Indices : {zoom_out_indices} \n') + + return zoom_in_indices, zoom_out_indices + + + + + + + + + + + + + + + + + + + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/stmnistprep.py b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/stmnistprep.py new file mode 100644 index 000000000..911ae254e --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/stmnistprep.py @@ -0,0 +1,165 @@ +# python standard library +import csv +import itertools +import onnxruntime +import os +import random +from functools import reduce +from collections import defaultdict +from typing import List + +# third-party packages +import numpy as np + +# local modules +from vvn.config import Config + +# define global variables +PATH_TO_MODELS = os.path.join(os.getcwd(), 'models') +PATH_TO_DATA = os.path.join(os.getcwd(), 'data') + +# set seed +# random.seed(42) + +def prepare_filetree(config: Config): + dst = 'stmnist' + for va in ['relax', 'approx']: + for length in ['16', '32', '64']: + for eps_filename in [f'eps={e}_255' for e in range(1, 4)]: + fp = build_output_filepath(config, eps_filename) + + # create the parent directories if they don't already exist + os.makedirs(os.path.join(config.output_dir, dst, va, length), exist_ok=True) + + # make the results files once we know all directories have been made + for va in ['relax', 'approx']: + for length in ['16', '32', '64']: + for eps_filename in [f'eps={e}_255' for e in range(1, 4)]: + fp = build_output_filepath(config, eps_filename) + + # if the file doesn't exist yet, create it + if not os.path.isfile(fp): + with open(fp, 'w', newline='') as f: + # write CSV headers + writer = csv.writer(f) + writer.writerow(['Sample Number', 'Result', 'Time', 'Method']) + +def build_output_filepath(config: Config, filename=None, parent_only=False): + """ + For our purposes, we will be naming the file based on + the epsilon value used for the verification. + """ + # error handling + if not filename and not parent_only: + raise Exception(f'No filename given. Please provide a filename when parent_only={parent_only}.') + + # convert all config values to str + str_config = config.tostr() + + # get the values we need for building the output filepath + output_dir = str_config.output_dir + dst = str_config.ds_type + va = str_config.ver_algorithm + length = str_config.sample_len + + fp = os.path.join(output_dir, dst, va, length) + + return fp if parent_only else os.path.join(fp, filename) + '.csv' + +def get_correct_samples(modelpath, datapath) -> tuple[list[int], list[int]]: + outputs = [] + for sample_len in ['16', '32', '64']: + + # load the data + labels + data = np.load(os.path.join(datapath, 'STMNIST', 'test', f'stmnistvideo_{sample_len}f_test_data_seq.npy')) + labels = np.load(os.path.join(datapath, 'STMNIST', 'test', f'stmnistvideo_{sample_len}f_test_labels_seq.npy')) + + # select the model + model = os.path.join(modelpath, f'stmnist_{sample_len}f.onnx') + + # load the model + start onnx runtime session + session = onnxruntime.InferenceSession(model) + + # specify input name for inference + input_name = session.get_inputs()[0].name + + # run inference + model_outputs = [] + + for i in range(data.shape[0]): + sample = data[i:i+1] + sample = sample.astype(np.float32) # expects tensor(float) not tensor(double) + output = session.run(None, {input_name: sample}) + model_outputs.append(output[0]) + + # convert model_outputs from logits for each class to prediction + model_outputs = [np.argmax(model_outputs[i], axis=1) for i in range(data.shape[0])] + + # get the true labels and compare them to the outputs + labels = labels.astype(int).tolist() + + # filter for only correctly classified samples + correct_samples = [i for i in range(data.shape[0]) if model_outputs[i] == labels[i]] + + # add the model outputs to the corresponding list of correct samples + outputs.append(correct_samples) + + # return only samples that are correctly classified by all models + outputs = list(reduce(lambda out, lst: out.intersection(lst), map(set, outputs))) + + return outputs + +def generate_indices(config) -> tuple[list[int], list[int]]: + # unpack config settings + class_size = config.class_size + + # get the indices of all correctly classified samples + correct_outputs = get_correct_samples(PATH_TO_MODELS, PATH_TO_DATA) + + # partition the correctly classified samples by class + indices = defaultdict(list, {value: [i for i in correct_outputs] for value in range(0, 10)}) + + # check that there are atleast 10 correctly classified samples for each class + if not all(len(lst) >= class_size for lst in indices.values()): + raise Exception("Not enough correctly classified samples.") + + # randomly sample 10 of the correctly classified samples per class + indices = [random.sample(indices[class_label], class_size) for class_label in indices.keys()] + + # flatten the list before returning + indices = list(itertools.chain(*indices)) + + # add 1 to all values of list because MATLAB uses 1-indexing + indices = [v + 1 for v in indices] + + if len(indices) < class_size * 10: + raise Exception("Not enough correctly classified samples.") + + print(f'Indices : {indices} \n') + + # write the indices for the current experiment to its path (in this case it will be in random directory) + with open(os.path.join(os.getcwd(), 'results', 'random', 'stmnist_indices.txt'), 'w') as f: + f.write(f'STMNIST Indices : {indices} \n') + + return indices + +if __name__ == "__main__": + pass + + + + + + + + + + + + + + + + + + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/check_verification.m b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/check_verification.m new file mode 100644 index 000000000..8c23c362a --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/check_verification.m @@ -0,0 +1,138 @@ +%% Check the verification + +% Define the input arguments +dsv = ''; +sl = 1; +vt = ''; +rt = ''; +i = 1; +e = 1; + +% Run the verification + create the plots +check_verify(dsv, sl, vt, rt, i, e); + +%% Helper Methods +function [] = check_verify(dsVar, smpLen, vType, rType, index, eps) + % + % dsVar (string) : the dataset type. either "zoom_in" or "zoom_out". + % smpLen (int) : the length of a sample (video) in the dataset. either 4, 8, or 16. + % vType (string) : the type of video verification. either "single_frame" or "all_frames". + % rType (string) : the type of reachability method used. either "relax-star-area" or "approx-star". + % index (int) : the index into the dataset for the sample we want to analyze. + % eps (int) : epsilon value for the L_inf attack. + % + + % Check input arguments first. + if dsVar ~= "zoom_in" && dsVar ~= "zoom_out" + printf("dsVar argument was invalid. Must be 'zoom_in' or 'zoom_out'.") + return + end + + if smpLen ~= 4 && smpLen ~= 8 && smpLen ~= 16 + printf("smpLen argument was invalid. Must be 4, 8, or 16.") + return + end + + if vType ~= "single_frame" && vType ~= "all_frames" + printf("vType argument was invalid. Must be 'single_frame' or 'all_frames'.") + return + end + + if rType ~= "relax-star-area" && rType ~= "approx-star" + printf("rType argument was invalid. Must be 'relax-star-area' or 'approx-star'.") + return + end + + % Get alternative names used in file naming. + dsVarCaps = ""; + dsVarShort = ""; + + if dsVar == "zoom_in" + dsVarCaps = "ZoomIn"; + dsVarShort = "zoomin"; + else + dsVarCaps = "ZoomOut"; + dsVarShort = "zoomout"; + end + + fprintf("Running robustness verification on %s dataset...", dsVarCaps); + + if ~exist(sprintf("../../results/check_verify/%s/%s/", vType, dsVarCaps), "dir") % can "dir" be double quotes? it was apostrophes + mkdir(sprintf("../../results/check_verify/%s/%s", vType, dsVarCaps)); + end + + % Load data + data = readNPY(sprintf("../../data/%s/test/mnistvideo_%s_%df_test_data_seq.npy", dsVarCaps, dsVar, smpLen)); + labels = readNPY(sprintf("../../data/%s/test/mnistvideo_%s_test_labels_seq.npy", dsVarCaps, dsVar)); + + % Preprocessing + reshaped_data = permute(data, [1, 3, 2, 4, 5]); % to match BCSSS + data_squeezed = squeeze(reshaped_data); + datacopy = data_squeezed(:,:,:,:); + + % Get the sample + sample = squeeze(datacopy(index,:,:,:)); + + lb = squeeze(sample); + ub = squeeze(sample); + + % Perform L_inf attack + lb = lb - eps; + ub = ub + eps; + + lb_min = zeros(size(sample)); + ub_max = ones(size(sample)); + lb_clip = max(lb, lb_min); + ub_clip = min(ub, ub_max); + + VS = VolumeStar(lb_clip, ub_clip); + + % Verification + Y_outputs = net.evaluate(sample); + [~, yPred] = max(Y_outputs); + + % Evaluate lower and upper bounds + LB_outputs = net.evaluate(lb_clip); + [~, LB_Pred] = max(LB_outputs); + UB_outputs = net.evaluate(ub_clip); + [~, UB_Pred] = max(UB_outputs); + + % Define reachability options + reachOptions = struct; + reachOptions.reachMethod = rType; + + if rType == 'relax-star-area' + reachOptions.relaxFactor = 0.5; + end + + % Run verification + t = tic; + res_approx = net.verify_robustness(VS, reachOptions, target); + fprintf('Robustness result with relax-star-area : %d', res_approx); + + R = net.reachSet{end}; + + [lb_out, ub_out] = R.getRanges; + lb_out = squeeze(lb_out); + ub_out = squeeze(ub_out); + + mid_range = (lb_out + ub_out) / 2; + range_size = ub_out - mid_range; + + x = [0 1 2 3 4 5 6 7 8 9]; + + figure; + errorbar(x, mid_range, range_size, '.'); + hold on; + xlim([-0.5 9.5]); + scatter(x, Y_outputs, 'x', 'MarkerEdgeColor', 'r'); + scatter(x, LB_outputs, 'x', 'MarkerEdgeColor', 'b'); + scatter(x, UB_outputs, 'x', 'MarkerEdgeColor', 'g'); + + if res_approx ~= 1 && res_approx ~= 0 + reachOptions = struct; + reachOptions.reachMethod = 'approx-star'; + res_approx = net.verify_robustness(VS, reachOptions, target); + fprintf('Robustness result with approx-star : %d', res_approx); + end +end diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/convert_results_to_json.m b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/convert_results_to_json.m new file mode 100644 index 000000000..67cdce924 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/convert_results_to_json.m @@ -0,0 +1,23 @@ +results_files = ["ZoomOut/C3D_small_robustness_results_FPV", "ZoomIn/C3D_small_robustness_results_FPV", "ScalabilityZoomOut/C3D_small_robustness_results_FPV", "ScalabilityZoomIn/C3D_small_robustness_results_FPV"]; + +for fp = 1:length(results_files) + + % Load the mat file + filename = results_files(fp); + filepath = "results/" + filename + ".mat"; + results = load(filepath); + + % Convert to JSON + json_file = jsonencode(results, PrettyPrint=true); + + % Save as JSON file + new_filename = "results/" + filename + ".json"; + fid = fopen(new_filename, "w"); + if fid == -1 + error("Cannot create JSON file."); + end + + fwrite(fid, json_file, "char"); + fclose(fid); +end + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/run_inference_3d.m b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/run_inference_3d.m new file mode 100644 index 000000000..9622b79a4 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/run_inference_3d.m @@ -0,0 +1,27 @@ +modelName = "C3D_zoom_out_3d_32x32.onnx"; +netonnx = importONNXNetwork("../../models/" + modelName, "InputDataFormats", "TBCSS", "OutputDataFormats", "BC"); +net = matlab2nnv(netonnx); +net.OutputSize = 10; +disp("Finished loading model: " + modelName); + +% Load data +data = readNPY('../../data/3D/ZoomOut/mnistvideo_32x32_test_data_seq.npy'); +labels = readNPY('../../data/3D/ZoomOut/mnistvideo_32x32_test_labels_seq.npy'); + +tc = 0; + +for i=1:length(data) + s = data(i,:,:,:,:); + s = permute(s, [1, 3, 2, 4, 5]); + s = squeeze(s); + l = labels(i); + + outputs = net.evaluate(s); + [~, P] = max(outputs); + + if l+1 == P + tc = tc + 1; + end +end + +fprintf("Total correct: %d \n", tc); diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/test_gtsrb.m b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/test_gtsrb.m new file mode 100644 index 000000000..179cfe43c --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/test_gtsrb.m @@ -0,0 +1,103 @@ +%% Load things +% Load data +data = readNPY("../../data/GTSRB/test/gtsrbvideo_16f_test_data_seq.npy"); +labels = readNPY("../../data/GTSRB/test/gtsrbvideo_test_labels_seq.npy"); + +% Preprocessing +% from [B D C H W] to [B D H W C] +reshaped_data = permute(data, [1, 2, 4, 5, 3]); +datacopy = reshaped_data(:,:,:,:,:); + +% Experimental variables +numClasses = 43; +% n = 10; % Number of images to evaluate per class +% N = n * numClasses; % Total number of samples to evaluate + +% Size of attack +epsilon = [1/255; 2/255; 3/255]; +nE = length(epsilon); + +% Load the model +modelName = "gtsrb_16f.onnx"; +netonnx = importONNXNetwork("../../models/" + modelName, "InputDataFormats", "BCSSS", "OutputDataFormats", "BC"); +net = matlab2nnv(netonnx); +net.OutputSize = numClasses; +disp("Finished loading model: " + modelName); + +%% Verification settings +reachOptions = struct; +reachOptions.reachMethod = "relax-star-area"; +reachOptions.relaxFactor = 0.5; + +%% Make predictions on test set to check that we are verifying correct +outputLabels = zeros(length(datacopy)); + +s = datacopy(1,:,:,:,:); +s = squeeze(s); +l = labels(1) + 1; + +output = net.evaluate(s); +[~, P] = max(output); + +%% +%%%%%%%%%%%%%%%% +% VERIFICATION % +%%%%%%%%%%%%%%%% + +eps = epsilon(1); +fprintf('Starting verification with epsilon %d \n', eps); + +% Get the sample +sample = squeeze(datacopy(1,:,:,:,:)); + +% Perform L_inf attack +VS = L_inf_attack(sample, eps, 16); + +%% +t = tic; + +% NEED THIS HERE SO MET EXISTS +try + % run verification algorithm + fprintf("Verification algorithm starting.") + temp = net.verify_robustness(VS, reachOptions, labels(1)+1); + fprintf("Verification algorithm finished.") + +catch ME + met = ME.message; + temp = -1; + fprintf(met); +end + +res = temp; +time = toc(t); + +fprintf("\n"); +fprintf("Result : %d \n", res); +fprintf("Time: %f", time); + +%% Helper Functions +function VS = L_inf_attack(x, epsilon, numFrames) + lb = squeeze(x); + ub = squeeze(x); + + % Perturb the frames + for fn=1:numFrames + lb(fn, :, :, :) = x(fn, :, :, :) - epsilon; + ub(fn, :, :, :) = x(fn, :, :, :) + epsilon; + end + + % Reshape for conversion to VolumeStar + lb = permute(lb, [2 3 1 4]); + ub = permute(ub, [2 3 1 4]); + + % Clip the perturbed values to be between 0-1 + lb_min = zeros(32, 32, numFrames, 3); + ub_max = ones(32, 32, numFrames, 3); + lb_clip = max(lb, lb_min); + ub_clip = min(ub, ub_max); + + % Create the volume star + VS = VolumeStar(lb_clip, ub_clip); +end + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/test_stmnist_models.m b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/test_stmnist_models.m new file mode 100644 index 000000000..544c30bdf --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/testing/test_stmnist_models.m @@ -0,0 +1,44 @@ +%% Load things +% Number of frames +nF = 4; + +% Load data +data = readNPY(sprintf("../../data/STMNIST/test/stmnistvideo_%df_test_data_seq.npy", nF)); +labels = readNPY(sprintf("../../data/STMNIST/test/stmnistvideo_%df_test_labels_seq.npy", nF)); + +% Preprocessing +% from [B D C H W] to [B D H W C] +reshaped_data = permute(data, [1, 3, 4, 5, 2]); +datacopy = reshaped_data(:,:,:,:,:); + +% Experimental variables +numClasses = 10; + +% Load the model +modelName = sprintf("stmnist_%df.onnx", nF); +netonnx = importONNXNetwork("../../models/" + modelName, "InputDataFormats", "BCSSS", "OutputDataFormats", "BC"); +net = matlab2nnv(netonnx); +net.OutputSize = numClasses; +disp("Finished loading model: " + modelName); + + +%% Make predictions on test set to check that we are verifying correct +outputLabels = zeros(length(datacopy), 1); +correct = 0; +total = length(datacopy); + +for i=1:total + s = datacopy(i,:,:,:,:); + s = squeeze(s); + l = labels(i) + 1; + + output = net.evaluate(s); + [~, P] = max(output); + + if P == l + correct = correct + 1; + end +end + +acc = correct / total; +fprintf("All done! \n") \ No newline at end of file diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verify.py b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verify.py new file mode 100644 index 000000000..2deb73a09 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verify.py @@ -0,0 +1,326 @@ +# python stdlib +import csv +import io +import os +import sys +from typing import Tuple + +# third-party packages +from dotenv import load_dotenv +import matlab.engine +import numpy as np + +# local modules +import vvn.prep as vp +from vvn.config import Config +import vvn.gtsrbprep as vgp +import vvn.stmnistprep as vsp + +# load environment variables +load_dotenv() + +# define global variables +NNV_PATH = os.environ['NNV_PATH'] +NPY_MATLAB_PATH = os.environ['NPY_MATLAB_PATH'] + +def prepare_engine(nnv_path, npy_matlab_path): + if not nnv_path or not npy_matlab_path: + raise Exception('One of nnv_path or npy_matlab_path is not defined. Please ensure these have been set before running.') + + # start matlab + eng = matlab.engine.start_matlab() + print('started matlab engine!') + + # add nnv path and npy-matlab path + eng.addpath(os.getcwd()) + eng.addpath(eng.genpath(nnv_path)) + eng.addpath(eng.genpath(npy_matlab_path)) + + # save reference to it for calling matlab scripts to engine later + return eng + +def verify(ds_type, sample_len, ver_algorithm, eng, index, eps_index, timeout) -> Tuple[int, float | str, str]: + # check that MATLAB engine was started correctly and is accessible + if not eng: + raise Exception('MATLAB Engine was not correctly started and shared. Please make sure to run `prepare_engine`.') + + # call to MATLAB script to run verification + future = eng.verifyvideo(ds_type, sample_len, ver_algorithm, index, eps_index, nargout=3, background=True, stdout=io.StringIO()) + + try: + [res, t, met] = future.result(timeout=float(timeout)) + + except matlab.engine.TimeoutError: + print('timeout') + res = 3 + t = 'timeout' + met = 'timeout' + + future.cancel() + + return res, t, met + +def run(config, indices) -> None: + # Unpack configuration settings; + epsilon = config.epsilon + timeout = config.timeout + + ds_type = config.ds_type + sample_len = config.sample_len + ver_algorithm = config.ver_algorithm + + print(f'Running verification with config: verification algorithm={ver_algorithm}, dataset type={ds_type}, video length={sample_len}') + + # make sure directory structure + results files are created and correct + vp.prepare_filetree(config) + + # make sure matlab is started + eng = prepare_engine(NNV_PATH, NPY_MATLAB_PATH) + + # start verification + for sample_num, index in enumerate(indices): + print(f'Iteration {sample_num + 1}') + + # select epsilon + for eps_index in range(1, len(epsilon) + 1): + output_file = vp.build_output_filepath(config=config, filename=f'eps={eps_index}_255') + + # verify the sample with a specific epsilon value + res, t, met = verify(ds_type, sample_len, ver_algorithm, eng, index, eps_index, timeout) + + # write the results + write_results(output_file, sample_num, res, t, met) + + # close matlab after experiment finishes + eng.quit() + +def verify_gtsrb(sample_len, ver_algorithm, eng, index, eps_index, timeout) -> Tuple[int, float | str, str]: + # check that MATLAB engine was started correctly and is accessible + if not eng: + raise Exception('MATLAB Engine was not correctly started and shared. Please make sure to run `prepare_engine`.') + + # call to MATLAB script to run verification + future = eng.verifygtsrb(sample_len, ver_algorithm, index, eps_index, nargout=3, background=True, stdout=io.StringIO()) + + try: + [res, t, met] = future.result(timeout=float(timeout)) + + except matlab.engine.TimeoutError: + print('timeout') + res = 3 + t = 'timeout' + met = 'timeout' + + future.cancel() + + return res, t, met + +def run_gtsrb(config, indices) -> None: + # Unpack configuration settings; + epsilon = config.epsilon + timeout = config.timeout + + ds_type = config.ds_type + sample_len = config.sample_len + ver_algorithm = config.ver_algorithm + + print(f'Running verification with config: verification algorithm={ver_algorithm}, dataset type={ds_type}, video length={sample_len}') + + # make sure directory structure + results files are created and correct + vgp.prepare_filetree(config) + + # make sure matlab is started + eng = prepare_engine(NNV_PATH, NPY_MATLAB_PATH) + + # start verification + for sample_num, index in enumerate(indices): + print(f'Iteration {sample_num + 1}') + + if_timeout = False + + # select epsilon + for eps_index in range(1, len(epsilon) + 1): + output_file = vgp.build_output_filepath(config=config, filename=f'eps={eps_index}_255') + + # skip if timeout was met at any point in the previous iterations + if if_timeout: + res, t, met = 3, "timeout", "timeout" + write_results(output_file, sample_num, res, t, met) + continue + + # verify the sample with a specific epsilon value + res, t, met = verify_gtsrb(sample_len, ver_algorithm, eng, index, eps_index, timeout) + + if res == 3: + if_timeout = True + + # write the results + write_results(output_file, sample_num, res, t, met) + + # close matlab after experiment finishes + eng.quit() + +def verify_stmnist(sample_len, ver_algorithm, eng, index, eps_index, timeout) -> Tuple[int, float | str, str]: + # check that MATLAB engine was started correctly and is accessible + if not eng: + raise Exception('MATLAB Engine was not correctly started and shared. Please make sure to run `prepare_engine`.') + + # call to MATLAB script to run verification + future = eng.verifystmnist(sample_len, ver_algorithm, index, eps_index, nargout=3, background=True, stdout=io.StringIO()) + + try: + [res, t, met] = future.result(timeout=float(timeout)) + + except matlab.engine.TimeoutError: + print('timeout') + res = 3 + t = 'timeout' + met = 'timeout' + + future.cancel() + + return res, t, met + +def run_stmnist(config, indices) -> None: + # Unpack configuration settings; + epsilon = config.epsilon + timeout = config.timeout + + ds_type = config.ds_type + sample_len = config.sample_len + ver_algorithm = config.ver_algorithm + + print(f'Running verification with config: verification algorithm={ver_algorithm}, dataset type={ds_type}, video length={sample_len}') + + # make sure directory structure + results files are created and correct + vsp.prepare_filetree(config) + + # make sure matlab is started + eng = prepare_engine(NNV_PATH, NPY_MATLAB_PATH) + + # start verification + for sample_num, index in enumerate(indices): + print(f'Iteration {sample_num + 1}') + + if_timeout = False + + # select epsilon + for eps_index in range(1, len(epsilon) + 1): + output_file = vsp.build_output_filepath(config=config, filename=f'eps={eps_index}_255') + + # skip if timeout was met at any point in the previous iterations + if if_timeout: + res, t, met = 3, "timeout", "timeout" + write_results(output_file, sample_num, res, t, met) + continue + + # verify the sample with a specific epsilon value + res, t, met = verify_stmnist(sample_len, ver_algorithm, eng, index, eps_index, timeout) + + if res == 3: + if_timeout = True + + # write the results + write_results(output_file, sample_num, res, t, met) + + # close matlab after experiment finishes + eng.quit() + +def write_results(output_file, sample_num, res, t, met): + with open(output_file, 'a', newline='') as f: + writer = csv.writer(f) + writer.writerow([sample_num, res, t, met]) + +def summarize(output_file_dir, data_len): + print(f'{output_file_dir}') + for filename in os.listdir(output_file_dir): + if filename == '.DS_Store': + continue + + fp = os.path.join(output_file_dir, filename) + + # open the results csv file + with open(fp, 'r', newline='') as f: + reader = csv.reader(f, delimiter=',') + + # skip the header + next(reader) + + res = [] + t = [] + + # read the values and build the new arrays for analysis + for row in reader: + res.append(row[1]) + t.append(row[2] if not row[2] == 'timeout' else 1800.0) + + # have to convert strings to valid floats before casting to int + res = np.array(res).astype(float) + res = np.array(res).astype(int) + t = np.array(t).astype(float) + + # count the number of verified samples + total_verified = np.sum(res[res == 1]) + + # calculate average time to verify + average_time = np.mean(t) + + # display the results + results_header_str = f'Results of verification with {filename.split(".")[0]}' + total_verified_str = f'Verified {int(total_verified)} robust samples out of {data_len}.' + average_time_str = f'Average running time was : {average_time}.' + rowlength = max(len(total_verified_str), len(average_time_str), len(results_header_str)) + print('='*rowlength) + print(results_header_str) + print('---') + print(total_verified_str) + print('---') + print(average_time_str) + print('='*rowlength) + print('\n\n') + + +if __name__ == "__main__": + # for smoke test + dst = sys.argv[1] + veralg = sys.argv[2] + epsilon_index = sys.argv[3] + sample_len = sys.argv[4] + timeout = sys.argv[5] + index = sys.argv[6] + + # verify command line arguments + if dst not in ['zoom_in', 'zoom_out', 'gtsrb', 'stmnist']: + raise error("Invalid dataset argument. Must be one of 'zoom_in', 'zoom_out', 'gtsrb' or 'stmnist'.") + + if veralg not in ['relax', 'approx']: + raise error("Invalid verification algorithm. Must be one of 'relax' or 'approx'.") + + if epsilon_index not in ['1', '2', '3']: + raise error("Invalid epsilon. Must be one of 1, 2, or 3.") + + if dst == 'stmnist': + if sample_len not in ['16', '32', '64']: + raise error("Invalid sample length for STMNIST. Must be one of 16, 32, 64.") + else: + if sample_len not in ['4', '8', '16']: + raise error("Invalid sample length for MNIST and GTSRB Video. Must be one of 4, 8, 16.") + + # convert CL arguments to correct data types + epsilon_index = int(epsilon_index) + sample_len = int(sample_len) + index = int(index) + timeout = int(timeout) + + # start matlab + eng = prepare_engine(NNV_PATH, NPY_MATLAB_PATH) + + if dst == 'zoom_in' or dst == 'zoom_out': + res, t, met = verify(dst, sample_len, veralg, eng, index, epsilon_index, timeout) + elif dst == 'stmnist': + res, t, met = verify_stmnist(sample_len, veralg, eng, index, epsilon_index, timeout) + else: + res, t, met = verify_gtsrb(sample_len, veralg, eng, index, epsilon_index, timeout) + + print(f'{dst}-{sample_len}f with {veralg} and e={epsilon_index}/255.') + print(f'Res: {res}, Time: {t}, Met: {met}\n') diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verifygtsrb.m b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verifygtsrb.m new file mode 100644 index 000000000..8b8d0fb78 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verifygtsrb.m @@ -0,0 +1,112 @@ +function [res, time, met] = verifygtsrb(smpLen, verAlg, index, epsIndex) + % + % smpLen (int) : the length of a sample (video) in the dataset. either 4, 8, or 16. + % verAlg (string) : the verification algorithm to use. either "relax" or "approx". + % index (int) : the index into the dataset to get the targeted sample to verify. + % epsIndex (int) : to help us select the epsilon value we would like to use for the attack. + % + + if smpLen ~= 4 && smpLen ~= 8 && smpLen ~= 16 + printf("smpLen argument was invalid. Must be 4, 8, or 16.") + return + end + + if verAlg ~= "relax" && verAlg ~= "approx" + printf("verAlg argument was invalid. Must be 'relax' or 'approx'.") + return + end + + fprintf("Running robustness verification on GTSRB %df dataset...", smpLen); + + % Load data + data = readNPY(sprintf("data/GTSRB/test/gtsrbvideo_%df_test_data_seq.npy", smpLen)); + labels = readNPY("data/GTSRB/test/gtsrbvideo_test_labels_seq.npy"); + + % Preprocessing + reshaped_data = permute(data, [1, 2, 4, 5, 3]); % to match BCSSS + datacopy = reshaped_data(:,:,:,:,:); + + % Experimental variables + numClasses = 43; + + % Size of attack + epsilon = [1/255; 2/255; 3/255]; + nE = length(epsilon); + + % Load the model + modelName = sprintf("gtsrb_%df.onnx", smpLen); + netonnx = importONNXNetwork("models/" + modelName, "InputDataFormats", "TBCSS", "OutputDataFormats", "BC"); + net = matlab2nnv(netonnx); + net.OutputSize = numClasses; + disp("Finished loading model: " + modelName); + + % Verification settings + reachOptions = struct; + if verAlg == "relax" + reachOptions.reachMethod = "relax-star-area"; + reachOptions.relaxFactor = 0.5; + elseif verAlg == "approx" + reachOptions.reachMethod = "approx-star"; + end + + % Make predictions on test set to check that we are verifying correct + % samples + outputLabels = zeros(length(datacopy)); + + s = datacopy(index,:,:,:,:); + s = squeeze(s); + + output = net.evaluate(s); + [~, P] = max(output); + + %%%%%%%%%%%%%%%% + % VERIFICATION % + %%%%%%%%%%%%%%%% + + eps = epsilon(epsIndex); + fprintf('Starting verification with epsilon %d \n', eps); + + % Get the sample + sample = squeeze(datacopy(index,:,:,:,:)); + + % Perform L_inf attack + VS = L_inf_attack(sample, eps, smpLen); + t = tic; + + % NEED THIS HERE SO MET EXISTS + met = verAlg; + + try + % run verification algorithm + temp = net.verify_robustness(VS, reachOptions, labels(index)+1); + + catch ME + met = ME.message; + temp = -1; + end + + res = temp; + time = toc(t); +end + +%% Helper Functions +function VS = L_inf_attack(x, epsilon, numFrames) + lb = squeeze(x); + ub = squeeze(x); + + % Perturb the frames + for fn=1:numFrames + lb(fn, :, :, :) = x(fn, :, :, :) - epsilon; + ub(fn, :, :, :) = x(fn, :, :, :) + epsilon; + end + + % Clip the perturbed values to be between 0-1 + lb_min = zeros(numFrames, 32, 32, 3); + ub_max = ones(numFrames, 32, 32, 3); + lb_clip = max(lb, lb_min); + ub_clip = min(ub, ub_max); + + % Create the volume star + VS = VolumeStar(lb_clip, ub_clip); +end + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verifystmnist.m b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verifystmnist.m new file mode 100644 index 000000000..3de102260 --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verifystmnist.m @@ -0,0 +1,113 @@ +function [res, time, met] = verifystmnist(smpLen, verAlg, index, epsIndex) + % + % smpLen (int) : the length of a sample (video) in the dataset. either 4, 8, or 16. + % verAlg (string) : the verification algorithm to use. either "relax" or "approx". + % index (int) : the index into the dataset to get the targeted sample to verify. + % epsIndex (int) : to help us select the epsilon value we would like to use for the attack. + % + + if smpLen ~= 16 && smpLen ~= 32 && smpLen ~= 64 + printf("smpLen argument was invalid. Must be 16, 32, or 64.") + return + end + + if verAlg ~= "relax" && verAlg ~= "approx" + printf("verAlg argument was invalid. Must be 'relax' or 'approx'.") + return + end + + fprintf("Running robustness verification on STMNIST %df dataset...", smpLen); + + % Load data + data = readNPY(sprintf("data/STMNIST/test/stmnistvideo_%df_test_data_seq.npy", smpLen)); + labels = readNPY(sprintf("data/STMNIST/test/stmnistvideo_%df_test_labels_seq.npy", smpLen)); + + % Preprocessing + reshaped_data = permute(data, [1, 3, 4, 5, 2]); % to match BCSSS + datacopy = reshaped_data(:,:,:,:,:); + + % Experimental variables + numClasses = 10; + + % Size of attack + epsilon = [1/255; 2/255; 3/255]; + nE = length(epsilon); + + % Load the model + modelName = sprintf("stmnist_%df.onnx", smpLen); + netonnx = importONNXNetwork("models/" + modelName, "InputDataFormats", "TBCSS", "OutputDataFormats", "BC"); + net = matlab2nnv(netonnx); + net.OutputSize = numClasses; + disp("Finished loading model: " + modelName); + + % Verification settings + reachOptions = struct; + if verAlg == "relax" + reachOptions.reachMethod = "relax-star-area"; + reachOptions.relaxFactor = 0.5; + elseif verAlg == "approx" + reachOptions.reachMethod = "approx-star"; + end + + % Make predictions on test set to check that we are verifying correct + % samples + outputLabels = zeros(length(datacopy)); + + s = datacopy(index,:,:,:,:); + s = squeeze(s); + + output = net.evaluate(s); + [~, P] = max(output); + + %%%%%%%%%%%%%%%% + % VERIFICATION % + %%%%%%%%%%%%%%%% + + eps = epsilon(epsIndex); + fprintf('Starting verification with epsilon %d \n', eps); + + % Get the sample + sample = squeeze(datacopy(index,:,:,:,:)); + + % Perform L_inf attack + VS = L_inf_attack(sample, eps, smpLen); + t = tic; + + % NEED THIS HERE SO MET EXISTS + met = verAlg; + + try + % run verification algorithm + temp = net.verify_robustness(VS, reachOptions, labels(index)+1); + + catch ME + met = ME.message; + temp = -1; + end + + res = temp; + time = toc(t); + +end + +%% Helper Functions +function VS = L_inf_attack(x, epsilon, numFrames) + lb = squeeze(x); + ub = squeeze(x); + + % Perturb the frames + for fn=1:numFrames + lb(fn, :, :, :) = x(fn, :, :, :) - epsilon; + ub(fn, :, :, :) = x(fn, :, :, :) + epsilon; + end + + % Clip the perturbed values to be between 0-1 + lb_min = zeros(numFrames, 10, 10, 2); + ub_max = ones(numFrames, 10, 10, 2); + lb_clip = max(lb, lb_min); + ub_clip = min(ub, ub_max); + + % Create the volume star + VS = VolumeStar(lb_clip, ub_clip); +end + diff --git a/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verifyvideo.m b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verifyvideo.m new file mode 100644 index 000000000..19100afca --- /dev/null +++ b/code/nnv/examples/Submission/FORMALISE2025/src/vvn/verifyvideo.m @@ -0,0 +1,132 @@ +function [res, time, met] = verifyvideo(dsVar, smpLen, verAlg, index, epsIndex) + % + % dsVar (string) : the dataset type. either "zoom_in" or "zoom_out". + % smpLen (int) : the length of a sample (video) in the dataset. either 4, 8, or 16. + % verAlg (string) : the verification algorithm to use. either "relax" or "approx". + % index (int) : the index into the dataset to get the targeted sample to verify. + % epsIndex (int) : to help us select the epsilon value we would like to use for the attack. + % + + % Check input arguments first. + if dsVar ~= "zoom_in" && dsVar ~= "zoom_out" + printf("dsVar argument was invalid. Must be 'zoom_in' or 'zoom_out'.") + return + end + + if smpLen ~= 4 && smpLen ~= 8 && smpLen ~= 16 + printf("smpLen argument was invalid. Must be 4, 8, or 16.") + return + end + + if verAlg ~= "relax" && verAlg ~= "approx" + printf("verAlg argument was invalid. Must be 'relax' or 'approx'.") + return + end + + % Get alternative names used in file naming. + dsVarCaps = ""; + dsVarShort = ""; + + if dsVar == "zoom_in" + dsVarCaps = "ZoomIn"; + dsVarShort = "zoomin"; + else + dsVarCaps = "ZoomOut"; + dsVarShort = "zoomout"; + end + + fprintf("Running robustness verification on %s dataset...", dsVarCaps); + + % Load data + data = readNPY(sprintf("data/%s/test/mnistvideo_%s_%df_test_data_seq.npy", dsVarCaps, dsVar, smpLen)); + labels = readNPY(sprintf("data/%s/test/mnistvideo_%s_test_labels_seq.npy", dsVarCaps, dsVar)); + + % Preprocessing + reshaped_data = permute(data, [1, 3, 2, 4, 5]); % to match BCSSS + data_squeezed = squeeze(reshaped_data); + datacopy = data_squeezed(:,:,:,:); + + % Experimental variables + numClasses = 10; + + % Size of attack + epsilon = [1/255; 2/255; 3/255]; + nE = length(epsilon); + + % Load the model + modelName = sprintf("%s_%df.onnx", dsVarShort, smpLen); + netonnx = importONNXNetwork("models/" + modelName, "InputDataFormats", "TBCSS", "OutputDataFormats", "BC"); + net = matlab2nnv(netonnx); + net.OutputSize = numClasses; + disp("Finished loading model: " + modelName); + + % Verification settings + reachOptions = struct; + if verAlg == "relax" + reachOptions.reachMethod = "relax-star-area"; + reachOptions.relaxFactor = 0.5; + elseif verAlg == "approx" + reachOptions.reachMethod = "approx-star"; + end + + % Make predictions on test set to check that we are verifying correct + % samples + outputLabels = zeros(length(datacopy)); + + s = datacopy(index,:,:,:); + s = squeeze(s); + l = labels(index) + 1; + + output = net.evaluate(s); + [~, P] = max(output); + + %%%%%%%%%%%%%%%% + % VERIFICATION % + %%%%%%%%%%%%%%%% + + eps = epsilon(epsIndex); + fprintf('Starting verification with epsilon %d \n', eps); + + % Get the sample + sample = squeeze(datacopy(index,:,:,:)); + + % Perform L_inf attack + VS = L_inf_attack(sample, eps, smpLen); + t = tic; + + % NEED THIS HERE SO MET EXISTS + met = verAlg; + + try + % run verification algorithm + temp = net.verify_robustness(VS, reachOptions, labels(index)+1); + + catch ME + met = ME.message; + temp = -1; + end + + res = temp; + time = toc(t); +end + +%% Helper Functions +function VS = L_inf_attack(x, epsilon, numFrames) + lb = squeeze(x); + ub = squeeze(x); + + % Perturb the frames + for fn=1:numFrames + lb(fn, :, :) = x(fn, :, :) - epsilon; + ub(fn, :, :) = x(fn, :, :) + epsilon; + end + + % Clip the perturbed values to be between 0-1 + lb_min = zeros(numFrames, 32, 32); + ub_max = ones(numFrames, 32, 32); + lb_clip = max(lb, lb_min); + ub_clip = min(ub, ub_max); + + % Create the volume star + VS = VolumeStar(lb_clip, ub_clip); +end