Skip to content

Commit

Permalink
Experiments adjustments
Browse files Browse the repository at this point in the history
  • Loading branch information
robinholzi committed Sep 24, 2024
1 parent a468578 commit e16f05a
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 131 deletions.
49 changes: 28 additions & 21 deletions experiments/arxiv/compare_trigger_policies/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,20 @@
DynamicRollingAverageThresholdCriterion,
)
from modyn.config.schema.pipeline.trigger.drift.detection_window.time_ import TimeWindowingStrategy
from modyn.config.schema.pipeline.trigger.performance.criterion import StaticNumberAvoidableMisclassificationCriterion
from modyn.config.schema.pipeline.trigger.performance.performance import PerformanceTriggerConfig, PerformanceTriggerEvaluationConfig
from modyn.config.schema.pipeline.trigger.simple.data_amount import DataAmountTriggerConfig
from modyn.config.schema.pipeline.trigger.performance.criterion import (
DynamicQuantilePerformanceThresholdCriterion,
DynamicRollingAveragePerformanceThresholdCriterion,
StaticNumberAvoidableMisclassificationCriterion,
StaticPerformanceThresholdCriterion,
)
from modyn.config.schema.pipeline.trigger.performance.performance import (
PerformanceTriggerConfig,
PerformanceTriggerEvaluationConfig,
)
from modyn.config.schema.pipeline.trigger.simple.time import TimeTriggerConfig
from modyn.utils.utils import SECONDS_PER_UNIT
from modynclient.config.schema.client_config import ModynClientConfig, Supervisor


from .pipeline_config import (
arxiv_bytes_parser_function,
arxiv_evaluation_transformer_function,
Expand Down Expand Up @@ -220,15 +226,15 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
+ construct_between_trigger_eval_handler("manual")
),
performance_triggers={
f"{criterion_name}-int{detection_interval}y": PerformanceTriggerConfig(
f"{criterion_name}-int{detection_interval}": PerformanceTriggerConfig(
evaluation_interval_data_points=detection_interval,
data_density_window_size=20, # performed well for drift, only used for #avoidable misclass
performance_triggers_window_size=20, # performed well for drift, only used for #avoidable misclass
warmup_intervals=200_000 // detection_interval, # first 200k of 2mio samples are warmup
# triggering every 3 years during the warmup phase seems reasonable.
warmup_policy=TimeTriggerConfig(every="2y", start_timestamp=_FIRST_TIMESTAMP),
evaluation=PerformanceTriggerEvaluationConfig(
device="cuda:2",
device="cuda:3",
dataset=EvalDataConfig(
dataset_id="arxiv_kaggle_train", # optional: extra holdout split
bytes_parser_function=arxiv_bytes_parser_function,
Expand All @@ -245,11 +251,12 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
)
for detection_interval in [20_000]
for criterion_name, criterion in (
# peak accuracy 0.6-0.65
# {
# f"static-{perf_threshold}": StaticPerformanceThresholdCriterion(
# metric="Accuracy", metric_threshold=perf_threshold
# )
# for perf_threshold in [0.45, 0.5, 0.55, 0.6]
# for perf_threshold in [0.45, 0.5, 0.55] # 0.6 --> too many triggers
# }
# |
# {
Expand All @@ -258,8 +265,8 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
# quantile=quantile,
# window_size=decision_window_size,
# )
# for quantile in [0.05, 0.15, 0.3]
# for decision_window_size in [15, 30]
# for quantile in [0.05, 0.15]
# for decision_window_size in [20]
# }
# |
# {
Expand All @@ -270,22 +277,22 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
# window_size=decision_window_size,
# )
# for deviation in reversed([0.1, 0.2, 0.3])
# for decision_window_size in [15, 30]
# for decision_window_size in [20]
# }
# |
{
f"num_misclass-{num_misclassifications}-exp-{expected_accuracy}-red-{allow_reduction}-": StaticNumberAvoidableMisclassificationCriterion(
expected_accuracy=expected_accuracy,
allow_reduction=allow_reduction,
avoidable_misclassification_threshold=num_misclassifications,
)
for num_misclassifications in reversed([10000]) # 1000, 2000, 5000, 7500, 10000
for expected_accuracy in [0.5, 0.55, 0.6]
for allow_reduction in [False]
}
# {
# f"num_misclass-{num_misclassifications}-exp-{expected_accuracy}-red-{allow_reduction}-": StaticNumberAvoidableMisclassificationCriterion(
# expected_accuracy=expected_accuracy,
# allow_reduction=allow_reduction,
# avoidable_misclassification_threshold=num_misclassifications,
# )
# for num_misclassifications in reversed([10_000, 15_000, 30_000, 50_000, 100_000])
# for expected_accuracy in [0.6]
# for allow_reduction in [False]
# }
).items()
},
gpu_device="cuda:2",
gpu_device="cuda:3",
),
}

Expand Down
69 changes: 36 additions & 33 deletions experiments/huffpost/compare_trigger_policies/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from modyn.config.schema.pipeline.trigger.drift.config import DataDriftTriggerConfig
from modyn.config.schema.pipeline.trigger.drift.criterion import (
DynamicQuantileThresholdCriterion,
DynamicRollingAverageThresholdCriterion,
)
from modyn.config.schema.pipeline.trigger.drift.detection_window.time_ import TimeWindowingStrategy
from modyn.config.schema.pipeline.trigger.performance.criterion import (
Expand All @@ -33,6 +34,7 @@
PerformanceTriggerConfig,
PerformanceTriggerEvaluationConfig,
)
from modyn.config.schema.pipeline.trigger.simple.data_amount import DataAmountTriggerConfig
from modyn.config.schema.pipeline.trigger.simple.time import TimeTriggerConfig
from modynclient.config.schema.client_config import ModynClientConfig, Supervisor

Expand Down Expand Up @@ -141,29 +143,26 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
# gpu_device="cuda:2",
# ),
# # data amount baselines
# 11: Experiment(
# name="hp-baseline-dataamount",
# eval_handlers=(
# construct_periodic_eval_handlers(intervals=PERIODIC_EVAL_INTERVAL, execution_time="manual")
# + construct_between_trigger_eval_handler("manual")
# ),
# data_amount_triggers={
# f"{num_samples}": DataAmountTriggerConfig(num_samples=num_samples)
# for num_samples in ([5_000, 80_000])
# # 2: 10_000, 20_000, 40_000
# # 3: 5_000, 80_000
# },
# gpu_device="cuda:3",
# ),
11: Experiment(
name="hp-baseline-dataamount",
eval_handlers=(
construct_periodic_eval_handlers(intervals=PERIODIC_EVAL_INTERVAL, execution_time="manual")
+ construct_between_trigger_eval_handler("manual")
),
data_amount_triggers={
f"{num_samples}": DataAmountTriggerConfig(num_samples=num_samples)
for num_samples in ([15_000, 30_000]) # 5_000, 10_000, 15_000, 20_000, 30_000, 40_000, 80_000
},
gpu_device="cuda:2",
),
# -------------------------------------------------------------------------------- #
# 2X: Drift triggers #
# -------------------------------------------------------------------------------- #
# TODO: rerun huffpost with different eval set
21: Experiment(
name="hp-datadrift-dynamic",
eval_handlers=(
construct_periodic_eval_handlers(intervals=PERIODIC_EVAL_INTERVAL, execution_time="manual")
+ construct_between_trigger_eval_handler("manual")
# + construct_between_trigger_eval_handler("manual") # not executed to speed things up
),
drift_detection_triggers={
f"{criterion_name}_int{detection_interval}_win{window_size}": DataDriftTriggerConfig(
Expand All @@ -185,28 +184,26 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
# multiprocessing across gpus
for detection_interval in [1500]
for window_size in ["1y"] # dataset specific
for decision_window_size in [15, 30] # TODO: check
for decision_window_size in [20] # more values
for criterion_name, criterion in (
{
f"mmd-quant-{quantile}-{decision_window_size}": DynamicQuantileThresholdCriterion(
window_size=decision_window_size, quantile=quantile
)
for quantile in [0.02, 0.05, 0.10, 0.15] # TODO: 0.3
for quantile in [0.05, 0.10, 0.15] # TODO: 0.3
# cuda3
}
|
{
f"mmd-rollavg-{deviation}-{decision_window_size}": DynamicRollingAverageThresholdCriterion(
window_size=decision_window_size, deviation=deviation, absolute=False
)
for deviation in reversed([0.5, 1.0, 2.0, 5.0]) # TODO: 0.05, 0.2,
# cuda3
}
# |
# {
# f"mmd-rollavg-{deviation}-{decision_window_size}": DynamicRollingAverageThresholdCriterion(
# window_size=decision_window_size, deviation=deviation, absolute=False
# )
# for deviation in [0.5, 1.0, 2.0, 5.0] # TODO: 0.05, 0.2,
# # 0:
# # 1:
# # 2: 0.5
# # 3: 1.0, 2.0
# }
).items()
},
gpu_device="cuda:0",
gpu_device="cuda:3",
),
# -------------------------------------------------------------------------------- #
# 3X: Performance triggers #
Expand Down Expand Up @@ -277,9 +274,15 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
allow_reduction=allow_reduction,
avoidable_misclassification_threshold=num_misclassifications,
)
for num_misclassifications in reversed([10000]) # 1000, 2000, 5000, 7500, 10000
for expected_accuracy in [0.5, 0.55, 0.6]
for allow_reduction in [False] # TODO: test with [False]
# for num_misclassifications in reversed([250, 500, 1000, 4000, 8000]) # 250, 500, 1000, 4000
# for expected_accuracy in [0.5, 0.6]
# for allow_reduction in [False]
for num_misclassifications, expected_accuracy, allow_reduction in [
(500, 0.5, False), # TODO:
(500, 0.6, False),
(250, 0.5, False),
(250, 0.6, False),
]
}
).items()
},
Expand Down
Loading

0 comments on commit e16f05a

Please sign in to comment.