Skip to content

Commit

Permalink
Experiments adjustments
Browse files Browse the repository at this point in the history
  • Loading branch information
robinholzi committed Sep 25, 2024
1 parent e16f05a commit a41ef3d
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 95 deletions.
2 changes: 2 additions & 0 deletions experiments/arxiv/compare_trigger_policies/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
# -------------------------------------------------------------------------------- #
# 2X: Drift triggers #
# -------------------------------------------------------------------------------- #
# 20: static tresholds are very hard to find, especially with such long timeline durations
# We, therefore, focus on dynamic thresholds.
# TODO
# Dynamic threshold drift
21: Experiment(
Expand Down
2 changes: 2 additions & 0 deletions experiments/huffpost/compare_trigger_policies/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
# -------------------------------------------------------------------------------- #
# 2X: Drift triggers #
# -------------------------------------------------------------------------------- #
# 20: static tresholds are very hard to find, especially with such long timeline durations
# We, therefore, focus on dynamic thresholds.
21: Experiment(
name="hp-datadrift-dynamic",
eval_handlers=(
Expand Down
184 changes: 89 additions & 95 deletions experiments/yearbook/compare_trigger_policies/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,9 @@
AvoidableMisclassificationCostTriggerConfig,
DataIncorporationLatencyCostTriggerConfig,
)
from modyn.config.schema.pipeline.trigger.performance.criterion import StaticNumberAvoidableMisclassificationCriterion
from modyn.config.schema.pipeline.trigger.performance.performance import (
PerformanceTriggerConfig,
PerformanceTriggerEvaluationConfig,
)
from modyn.config.schema.pipeline.trigger.simple.data_amount import DataAmountTriggerConfig
from modyn.utils.utils import SECONDS_PER_UNIT
from modynclient.config.schema.client_config import ModynClientConfig, Supervisor

Expand Down Expand Up @@ -192,20 +189,18 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
# gpu_device="cuda:1",
# ),
# # data amount baselines
11: Experiment(
name="yb-baseline-dataamount",
eval_handlers=(
construct_periodic_eval_handlers(intervals=BEST_PERIODIC_EVAL_INTERVAL, execution_time="manual") +
construct_between_trigger_eval_handler("manual")
),
data_amount_triggers={
f"{num_samples}": DataAmountTriggerConfig(num_samples=num_samples)
# for num_samples in ([250, 500, 1_000, 2_500, 5_000, 10_000, 15_000, 30_000])
# for num_samples in ([1_000, 2_500, 5_000, 10_000])
for num_samples in ([250, 500, 15_000, 30_000])
},
gpu_device="cuda:2",
),
# 11: Experiment(
# name="yb-baseline-dataamount",
# eval_handlers=(
# construct_periodic_eval_handlers(intervals=BEST_PERIODIC_EVAL_INTERVAL, execution_time="manual") +
# construct_between_trigger_eval_handler("manual")
# ),
# data_amount_triggers={
# f"{num_samples}": DataAmountTriggerConfig(num_samples=num_samples)
# for num_samples in ([250, 500, 1_000, 2_500, 5_000, 10_000, 15_000, 30_000])
# },
# gpu_device="cuda:2",
# ),
# -------------------------------------------------------------------------------- #
# 2X: Drift triggers #
# -------------------------------------------------------------------------------- #
Expand Down Expand Up @@ -306,84 +301,83 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
# # -------------------------------------------------------------------------------- #
# # 3X: Performance triggers #
# # -------------------------------------------------------------------------------- #
30: Experiment(
name="yb-performancetrigger",
eval_handlers=(
construct_periodic_eval_handlers(intervals=BEST_PERIODIC_EVAL_INTERVAL, execution_time="manual")
+ construct_between_trigger_eval_handler("manual")
),
performance_triggers={
f"{criterion_name}-int{detection_interval}y": PerformanceTriggerConfig(
evaluation_interval_data_points=detection_interval,
data_density_window_size=20, # performed well for drift, only used for #avoidable misclass
performance_triggers_window_size=20, # performed well for drift, only used for #avoidable misclass
warmup_intervals=3500 // detection_interval, # same as in drift case
warmup_policy=TimeTriggerConfig(every="3d", start_timestamp=_FIRST_TIMESTAMP),
evaluation=PerformanceTriggerEvaluationConfig(
device="cuda:2",
dataset=EvalDataConfig(
dataset_id="yearbook_train", # optional: extra holdout split
bytes_parser_function=yb_bytes_parser_function,
batch_size=512,
dataloader_workers=1,
metrics=[
AccuracyMetricConfig(evaluation_transformer_function=yb_evaluation_transformer_function),
],
),
),
mode="hindsight",
forecasting_method="ridge_regression",
decision_criteria={criterion_name: criterion},
)
# for detection_interval in [100, 250, 500]
for detection_interval in [250] # Solid choice
for criterion_name, criterion in (
# {
# f"static-{perf_threshold}": StaticPerformanceThresholdCriterion(
# metric="Accuracy", metric_threshold=perf_threshold
# )
# for perf_threshold in [0.7, 0.75, 0.8, 0.85, 0.875, 0.9, 0.925, 0.95]
# }
# | {
# f"dynamic-quant-{quantile}-{decision_window_size}": DynamicQuantilePerformanceThresholdCriterion(
# metric="Accuracy",
# quantile=quantile,
# window_size=decision_window_size,
# )
# for quantile in [0.05, 0.15, 0.3]
# for decision_window_size in [10, 20, 30]
# }
# |
# { # TODO: not completed
# f"dynamic-rollavg-{deviation}-{decision_window_size}": DynamicRollingAveragePerformanceThresholdCriterion(
# metric="Accuracy",
# deviation=deviation,
# absolute=False,
# window_size=decision_window_size,
# )
# for deviation in reversed([0.05, 0.1, 0.2, 0.3])
# for decision_window_size in [10, 20, 30]
# }
# |
{
f"num_misclass-{num_misclassifications}-exp-{expected_accuracy}-red-{allow_reduction}-": StaticNumberAvoidableMisclassificationCriterion(
expected_accuracy=expected_accuracy,
allow_reduction=allow_reduction,
avoidable_misclassification_threshold=num_misclassifications,
)
# for num_misclassifications, expected_accuracy, allow_reduction in [
# (1500, 0.95, False),
# ]
# cuda1: 100, 200, 500, 1000
# cuda2: 50
for num_misclassifications in reversed([50]) # TODO: 50, 100, 200, 500, 1000, 1500
for expected_accuracy in [0.85, 0.9, 0.95]
for allow_reduction in [True, False]
}
).items()
},
gpu_device="cuda:2",
),
# 30: Experiment(
# name="yb-performancetrigger",
# eval_handlers=(
# construct_periodic_eval_handlers(intervals=BEST_PERIODIC_EVAL_INTERVAL, execution_time="manual")
# + construct_between_trigger_eval_handler("manual")
# ),
# performance_triggers={
# f"{criterion_name}-int{detection_interval}y": PerformanceTriggerConfig(
# evaluation_interval_data_points=detection_interval,
# data_density_window_size=20, # performed well for drift, only used for #avoidable misclass
# performance_triggers_window_size=20, # performed well for drift, only used for #avoidable misclass
# warmup_intervals=3500 // detection_interval, # same as in drift case
# warmup_policy=TimeTriggerConfig(every="3d", start_timestamp=_FIRST_TIMESTAMP),
# evaluation=PerformanceTriggerEvaluationConfig(
# device="cuda:2",
# dataset=EvalDataConfig(
# dataset_id="yearbook_train", # optional: extra holdout split
# bytes_parser_function=yb_bytes_parser_function,
# batch_size=512,
# dataloader_workers=1,
# metrics=[
# AccuracyMetricConfig(evaluation_transformer_function=yb_evaluation_transformer_function),
# ],
# ),
# ),
# mode="hindsight",
# forecasting_method="ridge_regression",
# decision_criteria={criterion_name: criterion},
# )
# # for detection_interval in [100, 250, 500]
# for detection_interval in [250] # Solid choice
# for criterion_name, criterion in (
# {
# f"static-{perf_threshold}": StaticPerformanceThresholdCriterion(
# metric="Accuracy", metric_threshold=perf_threshold
# )
# for perf_threshold in [0.7, 0.75, 0.8, 0.85, 0.875, 0.9, 0.925, 0.95]
# }
# | {
# f"dynamic-quant-{quantile}-{decision_window_size}": DynamicQuantilePerformanceThresholdCriterion(
# metric="Accuracy",
# quantile=quantile,
# window_size=decision_window_size,
# )
# for quantile in [0.05, 0.15, 0.3]
# for decision_window_size in [10, 20, 30]
# }
# |
# { # only executed for 250 and 500 detection intervals
# f"dynamic-rollavg-{deviation}-{decision_window_size}": DynamicRollingAveragePerformanceThresholdCriterion(
# metric="Accuracy",
# deviation=deviation,
# absolute=False,
# window_size=decision_window_size,
# )
# for deviation in reversed([0.05, 0.1, 0.2, 0.3])
# for decision_window_size in [10, 20, 30]
# }
# |
# {
# # only executed for 250 detection interval
# f"num_misclass-{num_misclassifications}-exp-{expected_accuracy}-red-{allow_reduction}-": StaticNumberAvoidableMisclassificationCriterion(
# expected_accuracy=expected_accuracy,
# allow_reduction=allow_reduction,
# avoidable_misclassification_threshold=num_misclassifications,
# )
# # for num_misclassifications, expected_accuracy, allow_reduction in [
# # (1500, 0.95, False),
# # ]
# for num_misclassifications in reversed([50, 100, 200, 500, 1000, 1500])
# for expected_accuracy in [0.85, 0.9, 0.95]
# for allow_reduction in [True, False]
# }
# ).items()
# },
# gpu_device="cuda:2",
# ),
# -------------------------------------------------------------------------------- #
# 4X: Cost aware triggers #
# -------------------------------------------------------------------------------- #
Expand Down

0 comments on commit a41ef3d

Please sign in to comment.