Skip to content

Commit

Permalink
Add action benchmark v2 (#3268)
Browse files Browse the repository at this point in the history
* Add action benchmark test template

* Fix vsp perf test

* Fix act perf test

* Add action to perf benchmark workflow

* Fix raw data path

* Fix benchmark options

* Fix vsp dataset format
  • Loading branch information
goodsong81 authored Apr 5, 2024
1 parent 326fddf commit c4db3af
Show file tree
Hide file tree
Showing 8 changed files with 292 additions and 56 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/perf_benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ jobs:
task: "semantic_segmentation"
- task-short: "vsp"
task: "visual_prompting"
- task-short: "act"
task: "action"
name: Perf-Benchmark-${{ matrix.task-short }}
runs-on: [self-hosted, linux, x64, dmount-v2]
timeout-minutes: 8640
Expand Down
22 changes: 18 additions & 4 deletions tests/perf/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def run(
"--engine.device",
self.accelerator,
]
for key, value in dataset.extra_overrides.items():
for key, value in dataset.extra_overrides.get("train", {}).items():
command.append(f"--{key}")
command.append(str(value))
command.extend(["--seed", str(seed)])
Expand All @@ -183,6 +183,9 @@ def run(
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("test", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)
self._rename_raw_data(
work_dir=sub_work_dir / ".latest" / "test",
Expand All @@ -198,6 +201,9 @@ def run(
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("export", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)

exported_model_path = sub_work_dir / ".latest" / "export" / "exported_model.xml"
Expand All @@ -214,6 +220,9 @@ def run(
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("test", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)

self._rename_raw_data(
Expand All @@ -235,6 +244,9 @@ def run(
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("optimize", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)

optimized_model_path = sub_work_dir / ".latest" / "optimize" / "optimized_model.xml"
Expand All @@ -252,6 +264,9 @@ def run(
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("test", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)

self._rename_raw_data(
Expand All @@ -267,9 +282,8 @@ def run(
return self.average_result(result, keys=["task", "model", "data_group", "data"])

def _run_command(self, command: list[str]) -> None:
if self.dry_run:
print(" ".join(command))
else:
print(" ".join(command))
if not self.dry_run:
subprocess.run(command, check=True) # noqa: S603

def _log_metrics(
Expand Down
2 changes: 1 addition & 1 deletion tests/perf/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ def fxt_benchmark_summary(
print(summary_results)
fxt_summary_csv.parent.mkdir(parents=True, exist_ok=True)
summary_results.to_csv(fxt_summary_csv)
raw_results.to_csv(fxt_summary_csv.parent / "benchmark-raw.csv")
raw_results.to_csv(fxt_summary_csv.parent / "perf-benchmark-raw.csv")
print(f" -> Saved to {fxt_summary_csv}.")

if fxt_mlflow_client:
Expand Down
182 changes: 182 additions & 0 deletions tests/perf/test_action.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

"""OTX action perfomance benchmark tests."""

from __future__ import annotations

from pathlib import Path

import pytest

from .benchmark import Benchmark
from .conftest import PerfTestBase


class TestPerfActionClassification(PerfTestBase):
"""Benchmark action classification."""

MODEL_TEST_CASES = [ # noqa: RUF012
Benchmark.Model(task="action/action_classification", name="movinet", category="speed"),
Benchmark.Model(task="action/action_classification", name="x3d", category="accuracy"),
]

DATASET_TEST_CASES = [ # noqa: RUF012
Benchmark.Dataset(
name="ucf-5percent-small",
path=Path("action/action_classification/ucf_kinetics_5percent_small"),
group="small",
num_repeat=5,
extra_overrides={
"train": {
"max_epochs": "10",
"deterministic": "True",
},
},
),
Benchmark.Dataset(
name="ucf-30percent-medium",
path=Path("action/action_classification/ucf_kinetics_30percent_medium"),
group="medium",
num_repeat=5,
extra_overrides={
"train": {
"max_epochs": "10",
"deterministic": "True",
},
},
),
Benchmark.Dataset(
name="ucf-large",
path=Path("action/action_classification/ucf_kinetics_large"),
group="large",
num_repeat=5,
extra_overrides={
"train": {
"max_epochs": "3",
"deterministic": "True",
},
},
),
]

BENCHMARK_CRITERIA = [ # noqa: RUF012
Benchmark.Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
Benchmark.Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
Benchmark.Criterion(name="test/accuracy", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="export/accuracy", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="optimize/accuracy", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
]

@pytest.mark.parametrize(
"fxt_model",
MODEL_TEST_CASES,
ids=lambda model: model.name,
indirect=True,
)
@pytest.mark.parametrize(
"fxt_dataset",
DATASET_TEST_CASES,
ids=lambda dataset: dataset.name,
indirect=True,
)
def test_perf(
self,
fxt_model: Benchmark.Model,
fxt_dataset: Benchmark.Dataset,
fxt_benchmark: Benchmark,
):
self._test_perf(
model=fxt_model,
dataset=fxt_dataset,
benchmark=fxt_benchmark,
criteria=self.BENCHMARK_CRITERIA,
)


class TestPerfActionDetection(PerfTestBase):
"""Benchmark action detection."""

MODEL_TEST_CASES = [ # noqa: RUF012
Benchmark.Model(task="action/action_detection", name="x3d_fastrcnn", category="accuracy"),
]

DATASET_TEST_CASES = [ # noqa: RUF012
Benchmark.Dataset(
name="ucf-5percent-small",
path=Path("action/action_detection/UCF101_ava_5percent"),
group="small",
num_repeat=5,
extra_overrides={
"train": {
"max_epochs": "3",
"deterministic": "True",
},
},
),
Benchmark.Dataset(
name="ucf-30percent-medium",
path=Path("action/action_detection/UCF101_ava_30percent"),
group="medium",
num_repeat=5,
extra_overrides={
"train": {
"max_epochs": "3",
"deterministic": "True",
},
},
),
Benchmark.Dataset(
name="ucf-large",
path=Path("action/action_detection/UCF101_ava"),
group="large",
num_repeat=5,
extra_overrides={
"train": {
"max_epochs": "1",
"deterministic": "True",
},
},
),
]

BENCHMARK_CRITERIA = [ # noqa: RUF012
Benchmark.Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
Benchmark.Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
Benchmark.Criterion(name="test/map_50", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="export/map_50", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="optimize/map_50", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
]

@pytest.mark.parametrize(
"fxt_model",
MODEL_TEST_CASES,
ids=lambda model: model.name,
indirect=True,
)
@pytest.mark.parametrize(
"fxt_dataset",
DATASET_TEST_CASES,
ids=lambda dataset: dataset.name,
indirect=True,
)
def test_perf(
self,
fxt_model: Benchmark.Model,
fxt_dataset: Benchmark.Dataset,
fxt_benchmark: Benchmark,
):
self._test_perf(
model=fxt_model,
dataset=fxt_dataset,
benchmark=fxt_benchmark,
criteria=self.BENCHMARK_CRITERIA,
)
10 changes: 2 additions & 8 deletions tests/perf/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,7 @@ class TestPerfHierarchicalLabelClassification(PerfTestBase):
path=Path("hlabel_classification/hlabel_CUB_small") / f"{idx}",
group="small",
num_repeat=5,
extra_overrides={
"model.num_multiclass_heads": "3",
"model.num_multilabel_classes": "0",
},
extra_overrides={},
)
for idx in (1, 2, 3)
] + [
Expand All @@ -192,10 +189,7 @@ class TestPerfHierarchicalLabelClassification(PerfTestBase):
path=Path("hlabel_classification/hlabel_CUB_medium"),
group="medium",
num_repeat=5,
extra_overrides={
"model.num_multiclass_heads": "23",
"model.num_multilabel_classes": "0",
},
extra_overrides={},
),
# Add large dataset
]
Expand Down
39 changes: 27 additions & 12 deletions tests/perf/test_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,15 @@ class TestPerfObjectDetection(PerfTestBase):
group="small",
num_repeat=5,
extra_overrides={
"deterministic": "True",
"metric": "otx.core.metrics.fmeasure.FMeasure",
"callback_monitor": "val/f1-score",
"scheduler.monitor": "val/f1-score",
"train": {
"deterministic": "True",
"metric": "otx.core.metrics.fmeasure.FMeasure",
"callback_monitor": "val/f1-score",
"scheduler.monitor": "val/f1-score",
},
"test": {
"metric": "otx.core.metrics.fmeasure.FMeasure",
},
},
)
for idx in (1, 2, 3)
Expand All @@ -47,10 +52,15 @@ class TestPerfObjectDetection(PerfTestBase):
group="medium",
num_repeat=5,
extra_overrides={
"deterministic": "True",
"metric": "otx.core.metrics.fmeasure.FMeasure",
"callback_monitor": "val/f1-score",
"scheduler.monitor": "val/f1-score",
"train": {
"deterministic": "True",
"metric": "otx.core.metrics.fmeasure.FMeasure",
"callback_monitor": "val/f1-score",
"scheduler.monitor": "val/f1-score",
},
"test": {
"metric": "otx.core.metrics.fmeasure.FMeasure",
},
},
),
Benchmark.Dataset(
Expand All @@ -59,10 +69,15 @@ class TestPerfObjectDetection(PerfTestBase):
group="large",
num_repeat=5,
extra_overrides={
"deterministic": "True",
"metric": "otx.core.metrics.fmeasure.FMeasure",
"callback_monitor": "val/f1-score",
"scheduler.monitor": "val/f1-score",
"train": {
"deterministic": "True",
"metric": "otx.core.metrics.fmeasure.FMeasure",
"callback_monitor": "val/f1-score",
"scheduler.monitor": "val/f1-score",
},
"test": {
"metric": "otx.core.metrics.fmeasure.FMeasure",
},
},
),
]
Expand Down
Loading

0 comments on commit c4db3af

Please sign in to comment.