From 6943f7a1eb8413eb2a0516559f340aafd5b464c4 Mon Sep 17 00:00:00 2001 From: Robin Holzinger Date: Sat, 28 Sep 2024 16:15:51 +0200 Subject: [PATCH] Fix linting --- .../arxiv/compare_trigger_policies/run.py | 11 +-- .../pipeline_config.py | 6 +- .../huffpost/compare_trigger_policies/run.py | 12 +-- .../yearbook/compare_trigger_policies/run.py | 87 ++++++++++++------- .../pipeline/trigger/drift/alibi_detect.py | 32 ++----- .../pipeline_executor/evaluation_executor.py | 10 +-- .../pipeline_executor/pipeline_executor.py | 6 +- .../cost/incorporation_latency_tracker.py | 5 -- .../internal/triggers/drift/detector/alibi.py | 23 +---- 9 files changed, 84 insertions(+), 108 deletions(-) diff --git a/experiments/arxiv/compare_trigger_policies/run.py b/experiments/arxiv/compare_trigger_policies/run.py index 9b18c2ec4..4a7a162f1 100644 --- a/experiments/arxiv/compare_trigger_policies/run.py +++ b/experiments/arxiv/compare_trigger_policies/run.py @@ -176,7 +176,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: limit_cur=window_size, allow_overlap=False, ), - # frist 200k of 2mio samples are warmup + # first 200k of 2mio samples are warmup warmup_intervals=200_000 // detection_interval, # triggering every 3 years during the warmup phase seems reasonable. warmup_policy=TimeTriggerConfig(every="2y", start_timestamp=_FIRST_TIMESTAMP), @@ -248,8 +248,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: ) for perf_threshold in [0.45, 0.5, 0.55] # 0.6 --> too many triggers } - | - { + | { f"dynamic-quant-{quantile}-{decision_window_size}": DynamicQuantilePerformanceThresholdCriterion( metric="Accuracy", quantile=quantile, @@ -258,8 +257,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: for quantile in [0.05, 0.15] for decision_window_size in [20] } - | - { + | { f"dynamic-rollavg-{deviation}-{decision_window_size}": DynamicRollingAveragePerformanceThresholdCriterion( metric="Accuracy", deviation=deviation, @@ -269,8 +267,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: for deviation in reversed([0.1, 0.2, 0.3]) for decision_window_size in [20] } - | - { + | { f"num_misclass-{num_misclassifications}-exp-{expected_accuracy}-red-{allow_reduction}-": StaticNumberAvoidableMisclassificationCriterion( expected_accuracy=expected_accuracy, allow_reduction=allow_reduction, diff --git a/experiments/huffpost/compare_trigger_policies/pipeline_config.py b/experiments/huffpost/compare_trigger_policies/pipeline_config.py index 420b26368..823f9e3ed 100644 --- a/experiments/huffpost/compare_trigger_policies/pipeline_config.py +++ b/experiments/huffpost/compare_trigger_policies/pipeline_config.py @@ -58,9 +58,7 @@ def gen_pipeline_config( name="default", algorithm="AdamW", source="PyTorch", - param_groups=[ - OptimizerParamGroup(module="model", config={"lr": 0.00002, "weight_decay": 0.01}) - ], + param_groups=[OptimizerParamGroup(module="model", config={"lr": 0.00002, "weight_decay": 0.01})], ) ], optimization_criterion=OptimizationCriterion(name="CrossEntropyLoss"), @@ -81,7 +79,7 @@ def gen_pipeline_config( evaluation=EvaluationConfig( handlers=eval_handlers, device=gpu_device, - after_training_evaluation_workers=2, # one worker needs 8-9 GB of memory + after_training_evaluation_workers=2, # one worker needs 8-9 GB of memory after_pipeline_evaluation_workers=2, datasets=[ EvalDataConfig( diff --git a/experiments/huffpost/compare_trigger_policies/run.py b/experiments/huffpost/compare_trigger_policies/run.py index 07800fd44..5ba0e2a65 100644 --- a/experiments/huffpost/compare_trigger_policies/run.py +++ b/experiments/huffpost/compare_trigger_policies/run.py @@ -197,8 +197,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: ) for quantile in [0.05, 0.10, 0.15] } - | - { + | { f"mmd-rollavg-{deviation}-{decision_window_size}": DynamicRollingAverageThresholdCriterion( window_size=decision_window_size, deviation=deviation, absolute=False ) @@ -249,8 +248,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: ) for perf_threshold in [0.45, 0.5, 0.55, 0.6] } - | - { + | { f"dynamic-quant-{quantile}-{decision_window_size}": DynamicQuantilePerformanceThresholdCriterion( metric="Accuracy", quantile=quantile, @@ -259,8 +257,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: for quantile in [0.05, 0.15, 0.3] for decision_window_size in [15, 30] } - | - { + | { f"dynamic-rollavg-{deviation}-{decision_window_size}": DynamicRollingAveragePerformanceThresholdCriterion( metric="Accuracy", deviation=deviation, @@ -270,8 +267,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: for deviation in reversed([0.1, 0.2, 0.3]) for decision_window_size in [15, 30] } - | - { + | { f"num_misclass-{num_misclassifications}-exp-{expected_accuracy}-red-{allow_reduction}-": StaticNumberAvoidableMisclassificationCriterion( expected_accuracy=expected_accuracy, allow_reduction=allow_reduction, diff --git a/experiments/yearbook/compare_trigger_policies/run.py b/experiments/yearbook/compare_trigger_policies/run.py index 839e170ac..988aafc0f 100644 --- a/experiments/yearbook/compare_trigger_policies/run.py +++ b/experiments/yearbook/compare_trigger_policies/run.py @@ -145,16 +145,17 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: 0: Experiment( name="yb-dev", eval_handlers=( - construct_periodic_eval_handlers(intervals=[ - ("current", "20h"), # total: 1 year - ("delta+-1y", f"{1*24+3}h"), # total: 3 years - ], execution_time="after_pipeline") + construct_periodic_eval_handlers( + intervals=[ + ("current", "20h"), # total: 1 year + ("delta+-1y", f"{1*24+3}h"), # total: 3 years + ], + execution_time="after_pipeline", + ) # construct_slicing_eval_handler("after_pipeline") + # construct_between_trigger_eval_handler("after_pipeline") ), - time_triggers={ - "20y": TimeTriggerConfig(every="20d", start_timestamp=_FIRST_TIMESTAMP) - }, + time_triggers={"20y": TimeTriggerConfig(every="20d", start_timestamp=_FIRST_TIMESTAMP)}, gpu_device="cuda:0", ), # -------------------------------------------------------------------------------- # @@ -164,8 +165,8 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: 1: Experiment( name="yb-baseline-time", eval_handlers=( - construct_periodic_eval_handlers(intervals=_ALL_PERIODIC_EVAL_INTERVALS, execution_time="after_pipeline") + - construct_between_trigger_eval_handler("after_pipeline") + construct_periodic_eval_handlers(intervals=_ALL_PERIODIC_EVAL_INTERVALS, execution_time="after_pipeline") + + construct_between_trigger_eval_handler("after_pipeline") ), time_triggers={ f"{schedule}y": TimeTriggerConfig(every=f"{schedule}d", start_timestamp=_FIRST_TIMESTAMP) @@ -177,8 +178,8 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: 2: Experiment( name="yb-baseline-dataamount", eval_handlers=( - construct_periodic_eval_handlers(intervals=_ALL_PERIODIC_EVAL_INTERVALS, execution_time="after_pipeline") + - construct_between_trigger_eval_handler("after_pipeline") + construct_periodic_eval_handlers(intervals=_ALL_PERIODIC_EVAL_INTERVALS, execution_time="after_pipeline") + + construct_between_trigger_eval_handler("after_pipeline") ), data_amount_triggers={ f"{num_samples}": DataAmountTriggerConfig(num_samples=num_samples) @@ -194,8 +195,8 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: 10: Experiment( name="yb-baseline-time", eval_handlers=( - construct_periodic_eval_handlers(intervals=BEST_PERIODIC_EVAL_INTERVAL, execution_time="manual") + - construct_between_trigger_eval_handler("manual") + construct_periodic_eval_handlers(intervals=BEST_PERIODIC_EVAL_INTERVAL, execution_time="manual") + + construct_between_trigger_eval_handler("manual") ), time_triggers={ f"{schedule}y": TimeTriggerConfig(every=f"{schedule}d", start_timestamp=_FIRST_TIMESTAMP) @@ -207,8 +208,8 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: 11: Experiment( name="yb-baseline-dataamount", eval_handlers=( - construct_periodic_eval_handlers(intervals=BEST_PERIODIC_EVAL_INTERVAL, execution_time="manual") + - construct_between_trigger_eval_handler("manual") + construct_periodic_eval_handlers(intervals=BEST_PERIODIC_EVAL_INTERVAL, execution_time="manual") + + construct_between_trigger_eval_handler("manual") ), data_amount_triggers={ f"{num_samples}": DataAmountTriggerConfig(num_samples=num_samples) @@ -223,15 +224,17 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: 20: Experiment( name="yb-datadrift-static", eval_handlers=( - construct_periodic_eval_handlers(intervals=BEST_PERIODIC_EVAL_INTERVAL, execution_time="manual") + - construct_between_trigger_eval_handler("manual") + construct_periodic_eval_handlers(intervals=BEST_PERIODIC_EVAL_INTERVAL, execution_time="manual") + + construct_between_trigger_eval_handler("manual") ), drift_detection_triggers={ f"{criterion_name}_int{detection_interval}_win{window_size}": DataDriftTriggerConfig( evaluation_interval_data_points=detection_interval, windowing_strategy=TimeWindowingStrategy( # overlap has no affect acc. to offline exploration - limit_ref=window_size, limit_cur=window_size, allow_overlap=False + limit_ref=window_size, + limit_cur=window_size, + allow_overlap=False, ), # with 30k samples and 84 years, 10y are roughly 30000/84*10=3500 samples # hence, if we want ~10 years of warmup, to 3500/detection_interval warmup intervals @@ -240,11 +243,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: warmup_policy=TimeTriggerConfig(every="3d", start_timestamp=_FIRST_TIMESTAMP), # 5k samples are enough for drift detection, in yearbook we won't accumulate that many anyway sample_size=5_000, - metrics={ - "mmd": AlibiDetectMmdDriftMetric( - decision_criterion=criterion, device="gpu" - ) - }, + metrics={"mmd": AlibiDetectMmdDriftMetric(decision_criterion=criterion, device="gpu")}, ) for detection_interval in [100, 250, 500, 1_000] for window_size in ["1d", "4d", "10d"] @@ -302,8 +301,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: ) for quantile in [0.05, 0.1, 0.15, 0.3] } - | - { + | { f"mmd-rollavg-{deviation}-{decision_window_size}": DynamicRollingAverageThresholdCriterion( window_size=decision_window_size, deviation=deviation, absolute=False ) @@ -363,8 +361,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: for quantile in [0.05, 0.15, 0.3] for decision_window_size in [10, 20, 30] } - | - { # only executed for 250 and 500 detection intervals + | { # only executed for 250 and 500 detection intervals f"dynamic-rollavg-{deviation}-{decision_window_size}": DynamicRollingAveragePerformanceThresholdCriterion( metric="Accuracy", deviation=deviation, @@ -374,8 +371,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: for deviation in reversed([0.05, 0.1, 0.2, 0.3]) for decision_window_size in [10, 20, 30] } - | - { + | { # only executed for 250 detection interval f"num_misclass-{num_misclassifications}-exp-{expected_accuracy}-red-{allow_reduction}-": StaticNumberAvoidableMisclassificationCriterion( expected_accuracy=expected_accuracy, @@ -418,8 +414,31 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: # - in that time regret stacks up, therefore we need to require more regret than 2000 for a training for exchange_rate in ( x * SECONDS_PER_UNIT["d"] - # numer of training samples that are equivalent to a training second - for x in reversed([50, 75, 100, 120, 140, 160, 180, 200, 220, 240, 260, 280, 300, 400, 500, 750, 1000, 1500, 2000, 4000]) + # number of training samples that are equivalent to a training second + for x in reversed( + [ + 50, + 75, + 100, + 120, + 140, + 160, + 180, + 200, + 220, + 240, + 260, + 280, + 300, + 400, + 500, + 750, + 1000, + 1500, + 2000, + 4000, + ] + ) ) }, gpu_device="cuda:2", @@ -462,8 +481,10 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]: for allow_reduction in [False] for exchange_rate in ( x * SECONDS_PER_UNIT["d"] - # numer of training samples that are equivalent to a training second - for x in reversed([ 0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 10, 100, 1000, 10_000, 15_000, 20_000, 50_000, 100_000]) + # number of training samples that are equivalent to a training second + for x in reversed( + [0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 10, 100, 1000, 10_000, 15_000, 20_000, 50_000, 100_000] + ) ) }, gpu_device="cuda:2", diff --git a/modyn/config/schema/pipeline/trigger/drift/alibi_detect.py b/modyn/config/schema/pipeline/trigger/drift/alibi_detect.py index bd05cf35f..e18f2824d 100644 --- a/modyn/config/schema/pipeline/trigger/drift/alibi_detect.py +++ b/modyn/config/schema/pipeline/trigger/drift/alibi_detect.py @@ -14,13 +14,9 @@ class _AlibiDetectBaseDriftMetric(BaseMetric): - p_val: float = Field( - 0.05, description="The p-value threshold for the drift detection." - ) + p_val: float = Field(0.05, description="The p-value threshold for the drift detection.") x_ref_preprocessed: bool = Field(False) - preprocessor: AlibiDetectNLPreprocessor | None = Field( - None, description="Preprocessor function." - ) + preprocessor: AlibiDetectNLPreprocessor | None = Field(None, description="Preprocessor function.") class AlibiDetectDeviceMixin(ModynBaseModel): @@ -73,12 +69,8 @@ def validate_threshold_permutations(self) -> "AlibiDetectMmdDriftMetric": return self -class AlibiDetectClassifierDriftMetric( - _AlibiDetectBaseDriftMetric, AlibiDetectDeviceMixin -): - id: Literal["AlibiDetectClassifierDriftMetric"] = Field( - "AlibiDetectClassifierDriftMetric" - ) +class AlibiDetectClassifierDriftMetric(_AlibiDetectBaseDriftMetric, AlibiDetectDeviceMixin): + id: Literal["AlibiDetectClassifierDriftMetric"] = Field("AlibiDetectClassifierDriftMetric") classifier_id: str = Field( description="The model to use for classifications; has to be registered in alibi_detector.py" ) @@ -92,15 +84,11 @@ class AlibiDetectKSDriftMetric( id: Literal["AlibiDetectKSDriftMetric"] = Field("AlibiDetectKSDriftMetric") -class AlibiDetectCVMDriftMetric( - _AlibiDetectBaseDriftMetric, _AlibiDetectCorrectionMixin -): +class AlibiDetectCVMDriftMetric(_AlibiDetectBaseDriftMetric, _AlibiDetectCorrectionMixin): id: Literal["AlibiDetectCVMDriftMetric"] = Field("AlibiDetectCVMDriftMetric") -class AlibiDetectLSDDDriftMetric( - _AlibiDetectBaseDriftMetric, _AlibiDetectCorrectionMixin, AlibiDetectDeviceMixin -): +class AlibiDetectLSDDDriftMetric(_AlibiDetectBaseDriftMetric, _AlibiDetectCorrectionMixin, AlibiDetectDeviceMixin): id: Literal["AlibiDetectLSDDDriftMetric"] = Field("AlibiDetectLSDDDriftMetric") @@ -113,12 +101,8 @@ class AlibiDetectFETDriftMetric( n_features: int | None = Field(None) -class AlibiDetectChiSquareDriftMetric( - _AlibiDetectBaseDriftMetric, _AlibiDetectCorrectionMixin -): - id: Literal["AlibiDetectChiSquareDriftMetric"] = Field( - "AlibiDetectChiSquareDriftMetric" - ) +class AlibiDetectChiSquareDriftMetric(_AlibiDetectBaseDriftMetric, _AlibiDetectCorrectionMixin): + id: Literal["AlibiDetectChiSquareDriftMetric"] = Field("AlibiDetectChiSquareDriftMetric") n_features: int | None = Field(None) diff --git a/modyn/supervisor/internal/pipeline_executor/evaluation_executor.py b/modyn/supervisor/internal/pipeline_executor/evaluation_executor.py index 1feadffee..edbb36074 100644 --- a/modyn/supervisor/internal/pipeline_executor/evaluation_executor.py +++ b/modyn/supervisor/internal/pipeline_executor/evaluation_executor.py @@ -179,7 +179,7 @@ def run_pipeline_evaluations( def run_post_pipeline_evaluations(self, manual_run: bool = False, num_workers: int | None = None) -> SupervisorLogs: """Evaluate the trained models after the core pipeline and store the results. - + Args: manual_run: If True, only the evaluations that are marked as manual will be executed. num_workers: The number of workers to use for the evaluations. If None, the number of workers will be @@ -230,9 +230,7 @@ def run_post_pipeline_evaluations(self, manual_run: bool = False, num_workers: i sample_time=-1, trigger_idx=-1, ), - num_workers=( - num_workers if num_workers else self.pipeline.evaluation.after_pipeline_evaluation_workers - ) + num_workers=(num_workers if num_workers else self.pipeline.evaluation.after_pipeline_evaluation_workers), ) return logs @@ -481,9 +479,9 @@ def eval_executor_multi_pipeline(pipelines_dir: Path, num_workers: int, pids: li print("Path not found") sys.exit(1) - num_workers = int(input("Enter number of workers (<= 0 will use the pipeline default): ")) + num_workers: int = int(input("Enter number of workers (<= 0 will use the pipeline default): ")) if num_workers <= 0: - num_workers = None + num_workers = 1 if single_pipeline_mode.lower() == "y": p_id = int(input("Enter pipeline id: ")) diff --git a/modyn/supervisor/internal/pipeline_executor/pipeline_executor.py b/modyn/supervisor/internal/pipeline_executor/pipeline_executor.py index 747b04d2b..10c6b527d 100644 --- a/modyn/supervisor/internal/pipeline_executor/pipeline_executor.py +++ b/modyn/supervisor/internal/pipeline_executor/pipeline_executor.py @@ -644,8 +644,10 @@ def _handle_single_trigger( # fetch latest training time from tracker data tracking_df_train = s.tracking[PipelineStage.TRAIN.name] - max_trigger_idx = tracking_df_train['trigger_idx'].max() - time_at_trainer = float(tracking_df_train[tracking_df_train["trigger_idx"] == max_trigger_idx]["train_time_at_trainer"][0]) + max_trigger_idx = tracking_df_train["trigger_idx"].max() + time_at_trainer = float( + tracking_df_train[tracking_df_train["trigger_idx"] == max_trigger_idx]["train_time_at_trainer"][0] + ) last_training_seconds = time_at_trainer / 1_000 # ms to s self.trigger.inform_new_model(model_id, num_samples_in_trigger, last_training_seconds) diff --git a/modyn/supervisor/internal/triggers/cost/incorporation_latency_tracker.py b/modyn/supervisor/internal/triggers/cost/incorporation_latency_tracker.py index 2d4712edb..613c42cc5 100644 --- a/modyn/supervisor/internal/triggers/cost/incorporation_latency_tracker.py +++ b/modyn/supervisor/internal/triggers/cost/incorporation_latency_tracker.py @@ -42,11 +42,6 @@ def add_latency(self, regret: float, batch_duration: float) -> float: return self._cumulative_latency_regret - def inform_trigger(self) -> None: - """Informs the tracker about a trigger which will reset the counter.""" - self._current_regret = 0 - self._cumulative_latency_regret = 0 - def add_latencies( self, regrets: list[tuple[int, float]], diff --git a/modyn/supervisor/internal/triggers/drift/detector/alibi.py b/modyn/supervisor/internal/triggers/drift/detector/alibi.py index 9468aaa5c..8bcfaedcb 100644 --- a/modyn/supervisor/internal/triggers/drift/detector/alibi.py +++ b/modyn/supervisor/internal/triggers/drift/detector/alibi.py @@ -32,24 +32,13 @@ ) from modyn.supervisor.internal.triggers.drift.detector.drift import DriftDetector -_AlibiMetrics = ( - MMDDrift - | ClassifierDrift - | ChiSquareDrift - | CVMDrift - | FETDrift - | KSDrift - | LSDDDrift - | MMDDrift -) +_AlibiMetrics = MMDDrift | ClassifierDrift | ChiSquareDrift | CVMDrift | FETDrift | KSDrift | LSDDDrift | MMDDrift class AlibiDriftDetector(DriftDetector): def __init__(self, metrics_config: dict[str, AlibiDetectDriftMetric]): alibi_metrics_config = { - metric_ref: config - for metric_ref, config in metrics_config.items() - if config.id.startswith("AlibiDetect") + metric_ref: config for metric_ref, config in metrics_config.items() if config.id.startswith("AlibiDetect") } super().__init__(alibi_metrics_config) @@ -123,9 +112,7 @@ def _alibi_detect_metric_factory(config: AlibiDetectDriftMetric, embeddings_ref: kwargs = {} if config.preprocessor: - kwargs.update( - {"preprocess_fn": config.preprocessor.gen_preprocess_fn(config.device)} - ) + kwargs.update({"preprocess_fn": config.preprocessor.gen_preprocess_fn(config.device)}) if isinstance(config, AlibiDetectMmdDriftMetric): assert kernel is not None @@ -202,6 +189,4 @@ def _alibi_detect_metric_factory(config: AlibiDetectDriftMetric, embeddings_ref: **kwargs, ) - raise NotImplementedError( - f"Metric {config.id} is not supported in AlibiDetectDriftMetric." - ) + raise NotImplementedError(f"Metric {config.id} is not supported in AlibiDetectDriftMetric.")