diff --git a/src/optimagic/__init__.py b/src/optimagic/__init__.py index 28e912234..04213b265 100644 --- a/src/optimagic/__init__.py +++ b/src/optimagic/__init__.py @@ -1,6 +1,6 @@ from __future__ import annotations -from optimagic import constraints, mark, utilities +from optimagic import constraints, mark, timing, utilities from optimagic.algorithms import algos from optimagic.benchmarking.benchmark_reports import ( convergence_report, @@ -102,4 +102,5 @@ "History", "__version__", "algos", + "timing", ] diff --git a/src/optimagic/benchmarking/run_benchmark.py b/src/optimagic/benchmarking/run_benchmark.py index 9e2ce4cd8..cd6d844c4 100644 --- a/src/optimagic/benchmarking/run_benchmark.py +++ b/src/optimagic/benchmarking/run_benchmark.py @@ -209,7 +209,7 @@ def _process_one_result(optimize_result, problem): criterion_history = history.fun criterion_history = np.clip(criterion_history, _solution_crit, np.inf) batches_history = history.batches - time_history = history.time + time_history = history.start_time return { "params_history": params_history_flat, diff --git a/src/optimagic/optimization/convergence_report.py b/src/optimagic/optimization/convergence_report.py index bb44a828b..02fa76c4f 100644 --- a/src/optimagic/optimization/convergence_report.py +++ b/src/optimagic/optimization/convergence_report.py @@ -1,16 +1,14 @@ import numpy as np +from numpy.typing import NDArray -from optimagic.optimization.history_tools import get_history_arrays +from optimagic.optimization.history import History -def get_convergence_report(history, direction): - history_arrs = get_history_arrays( - history=history, - direction=direction, - ) +def get_convergence_report(history: History) -> dict[str, dict[str, float]] | None: + is_accepted = history.is_accepted - critvals = history_arrs.fun[history_arrs.is_accepted] - params = history_arrs.params[history_arrs.is_accepted] + critvals = np.array(history.fun, dtype=np.float64)[is_accepted] + params = np.array(history.flat_params, dtype=np.float64)[is_accepted] if len(critvals) < 2: out = None @@ -35,7 +33,7 @@ def get_convergence_report(history, direction): return out -def _get_max_f_changes(critvals): +def _get_max_f_changes(critvals: NDArray[np.float64]) -> tuple[float, float]: best_val = critvals[-1] worst_val = critvals[0] @@ -47,7 +45,7 @@ def _get_max_f_changes(critvals): return max_change_rel, max_change_abs -def _get_max_x_changes(params): +def _get_max_x_changes(params: NDArray[np.float64]) -> tuple[float, float]: best_x = params[-1] diffs = params - best_x denom = np.clip(np.abs(best_x), 0.1, np.inf) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index aa9cd0bca..14df3a67e 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -1,35 +1,70 @@ import warnings from dataclasses import dataclass -from typing import Any +from functools import partial +from typing import Any, Callable, Iterable, Literal import numpy as np +import pandas as pd +from numpy.typing import NDArray +from pybaum import leaf_names, tree_just_flatten -from optimagic.typing import EvalTask, PyTree +from optimagic.parameters.tree_registry import get_registry +from optimagic.timing import CostModel +from optimagic.typing import Direction, EvalTask, PyTree @dataclass(frozen=True) class HistoryEntry: params: PyTree fun: float | None - time: float + start_time: float + stop_time: float task: EvalTask class History: # TODO: add counters for the relevant evaluations - def __init__(self) -> None: - self._params: list[PyTree] = [] - self._fun: list[float | None] = [] - self._time: list[float] = [] - self._batches: list[int] = [] - self._task: list[EvalTask] = [] + def __init__( + self, + direction: Direction, + params: list[PyTree] | None = None, + fun: list[float | None] | None = None, + start_time: list[float] | None = None, + stop_time: list[float] | None = None, + batches: list[int] | None = None, + task: list[EvalTask] | None = None, + ) -> None: + """Initialize a history. + + The history must know the direction of the optimization problem in order to + correctly return monotone sequences. The history can be initialized empty, for + example for usage during an optimization process, or with data, for example to + recover a history from a log. + + """ + _validate_args_are_all_none_or_lists_of_same_length( + params, fun, start_time, stop_time, batches, task + ) + + self.direction = direction + self._params = params if params is not None else [] + self._fun = fun if fun is not None else [] + self._start_time = start_time if start_time is not None else [] + self._stop_time = stop_time if stop_time is not None else [] + self._batches = batches if batches is not None else [] + self._task = task if task is not None else [] + + # ================================================================================== + # Methods to add entries to the history + # ================================================================================== def add_entry(self, entry: HistoryEntry, batch_id: int | None = None) -> None: if batch_id is None: batch_id = self._get_next_batch_id() self._params.append(entry.params) self._fun.append(entry.fun) - self._time.append(entry.time) + self._start_time.append(entry.start_time) + self._stop_time.append(entry.stop_time) self._batches.append(batch_id) self._task.append(entry.task) @@ -50,38 +85,210 @@ def add_batch( for entry, id in zip(batch, ids, strict=False): self.add_entry(entry, id) + def _get_next_batch_id(self) -> int: + if not self._batches: + batch = 0 + else: + batch = self._batches[-1] + 1 + return batch + + # ================================================================================== + # Properties and methods to access the history + # ================================================================================== + + # Function data, function value, and monotone function value + # ---------------------------------------------------------------------------------- + + def fun_data(self, cost_model: CostModel, monotone: bool) -> pd.DataFrame: + """Return the function value data. + + Args: + cost_model: The cost model that is used to calculate the time measure. + monotone: Whether to return the monotone function values. + + Returns: + pd.DataFrame: The function value data. The columns are: 'task', 'time' and + 'value'. If monotone is False, value is the fun value, otherwise the + monotone function value. + + """ + if monotone: + fun: list[float | None] | NDArray[np.float64] = self.monotone_fun + else: + fun = self.fun + task = _task_as_categorical(self.task) + time = self._get_time(cost_model) + return pd.DataFrame({"fun": fun, "task": task, "time": time}) + + @property + def fun(self) -> list[float | None]: + return self._fun + + @property + def monotone_fun(self) -> NDArray[np.float64]: + """The monotone function value of the history. + + If the value is None, the output at that position is nan. + + """ + return _calculate_monotone_sequence(self.fun, direction=self.direction) + + # Acceptance + # ---------------------------------------------------------------------------------- + + @property + def is_accepted(self) -> NDArray[np.bool_]: + """Boolean indicator whether a function value is accepted. + + A function value is accepted if it is smaller (or equal) than the monotone + function value counterpart in the case of minimization, or larger (or equal) in + the case of maximization. If the value is None, the output at that position is + False. + + """ + fun_arr = np.array(self.fun, dtype=np.float64) + if self.direction == Direction.MINIMIZE: + return fun_arr <= self.monotone_fun + elif self.direction == Direction.MAXIMIZE: + return fun_arr >= self.monotone_fun + + # Parameter data, params, flat params, and flat params names + # ---------------------------------------------------------------------------------- + + def params_data(self, cost_model: CostModel) -> pd.DataFrame: + """Return the parameter data. + + Args: + cost_model: The cost model that is used to calculate the time measure. + + Returns: + pd.DataFrame: The parameter data. The columns are: 'name' (the parameter + names), 'value' (the parameter values), 'task', and 'time'. + + """ + wide = pd.DataFrame(self.flat_params, columns=self.flat_param_names) + wide["task"] = _task_as_categorical(self.task) + wide["time"] = self._get_time(cost_model) + data = pd.melt( + wide, var_name="name", value_name="value", id_vars=["task", "time"] + ) + return data.reindex(columns=["name", "value", "task", "time"]) + @property def params(self) -> list[PyTree]: return self._params @property - def fun(self) -> list[float | None]: - return self._fun + def flat_params(self) -> list[list[float]]: + return _get_flat_params(self._params) @property - def time(self) -> list[float]: - arr = np.array(self._time) - return (arr - arr[0]).tolist() + def flat_param_names(self) -> list[str]: + return _get_flat_param_names(param=self._params[0]) + + # Time + # ---------------------------------------------------------------------------------- + + def _get_time( + self, cost_model: CostModel | Literal["wall_time"] + ) -> NDArray[np.float64]: + """Return the cumulative time measure. + + Args: + cost_model: The cost model that is used to calculate the time measure. If + "wall_time", the wall time is returned. + + Returns: + np.ndarray: The time measure. + + """ + if not isinstance(cost_model, CostModel) and cost_model != "wall_time": + raise ValueError("cost_model must be a CostModel or 'wall_time'.") + + if cost_model == "wall_time": + return np.array(self.stop_time, dtype=np.float64) - self.start_time[0] + + fun_time = self._get_time_per_task( + task=EvalTask.FUN, cost_factor=cost_model.fun + ) + jac_time = self._get_time_per_task( + task=EvalTask.JAC, cost_factor=cost_model.jac + ) + fun_and_jac_time = self._get_time_per_task( + task=EvalTask.FUN_AND_JAC, cost_factor=cost_model.fun_and_jac + ) + + time = fun_time + jac_time + fun_and_jac_time + batch_aware_time = _apply_to_batch( + data=time, + batch_ids=self.batches, + func=cost_model.aggregate_batch_time, + ) + return np.cumsum(batch_aware_time) + + def _get_time_per_task( + self, task: EvalTask, cost_factor: float | None + ) -> NDArray[np.float64]: + """Return the time measure per task. + + Args: + task: The task for which the time is calculated. + cost_factor: The cost factor used to calculate the time. If None, the time + is the difference between the start and stop time, otherwise the time + is given by the cost factor. + + Returns: + np.ndarray: The time per task. For entries where the task is not the + requested task, the time is 0. + + """ + dummy_task = np.array([1 if t == task else 0 for t in self.task]) + factor: float | NDArray[np.float64] + if cost_factor is None: + factor = np.array(self.stop_time, dtype=np.float64) - np.array( + self.start_time, dtype=np.float64 + ) + else: + factor = cost_factor + + return factor * dummy_task + + @property + def start_time(self) -> list[float]: + return self._start_time + + @property + def stop_time(self) -> list[float]: + return self._stop_time + + # Batches + # ---------------------------------------------------------------------------------- @property def batches(self) -> list[int]: return self._batches + # Tasks + # ---------------------------------------------------------------------------------- + @property def task(self) -> list[EvalTask]: return self._task - def _get_next_batch_id(self) -> int: - if not self._batches: - batch = 0 - else: - batch = self._batches[-1] + 1 - return batch - # ================================================================================== # Add deprecated dict access # ================================================================================== + @property + def time(self) -> list[float]: + msg = ( + "The attribute `time` of History will be deprecated soon. Use the " + "`start_time` method instead." + ) + warnings.warn(msg, FutureWarning) + arr = np.array(self._start_time) + return (arr - arr[0]).tolist() + @property def criterion(self) -> list[float | None]: msg = "The attribute `criterion` of History is deprecated. Use `fun` instead." @@ -90,7 +297,10 @@ def criterion(self) -> list[float | None]: @property def runtime(self) -> list[float]: - msg = "The attribute `runtime` of History is deprecated. Use `time` instead." + msg = ( + "The attribute `runtime` of History will be deprecated soon. Use the " + "`start_time` method instead." + ) warnings.warn(msg, FutureWarning) return self.time @@ -98,3 +308,146 @@ def __getitem__(self, key: str) -> Any: msg = "dict-like access to History is deprecated. Use attribute access instead." warnings.warn(msg, FutureWarning) return getattr(self, key) + + +# ====================================================================================== +# Methods +# ====================================================================================== + + +def _get_flat_params(params: list[PyTree]) -> list[list[float]]: + if len(params) > 0 and _is_1d_array(params[0]): + # fast path + flatten = lambda x: x.tolist() + else: + registry = get_registry(extended=True) + flatten = partial(tree_just_flatten, registry=registry) + + return [flatten(p) for p in params] + + +def _get_flat_param_names(param: PyTree) -> list[str]: + if _is_1d_array(param): + # fast path + return np.arange(param.size).astype(str).tolist() + + registry = get_registry(extended=True) + return leaf_names(param, registry=registry) + + +def _is_1d_array(param: PyTree) -> bool: + return isinstance(param, np.ndarray) and param.ndim == 1 + + +def _calculate_monotone_sequence( + sequence: list[float | None], direction: Direction +) -> NDArray[np.float64]: + sequence_arr = np.array(sequence, dtype=np.float64) # converts None to nan + nan_mask = np.isnan(sequence_arr) + + if direction == Direction.MINIMIZE: + sequence_arr[nan_mask] = np.inf + out = np.minimum.accumulate(sequence_arr) + elif direction == Direction.MAXIMIZE: + sequence_arr[nan_mask] = -np.inf + out = np.maximum.accumulate(sequence_arr) + + out[nan_mask] = np.nan + return out + + +# ====================================================================================== +# Misc +# ====================================================================================== + + +def _validate_args_are_all_none_or_lists_of_same_length( + *args: list[Any] | None, +) -> None: + all_none = all(arg is None for arg in args) + all_list = all(isinstance(arg, list) for arg in args) + + if not all_none: + if all_list: + unique_list_lengths = set(map(len, args)) # type: ignore[arg-type] + + if len(unique_list_lengths) != 1: + raise ValueError("All list arguments must have the same length.") + + else: + raise ValueError("All arguments must be lists of the same length or None.") + + +def _task_as_categorical(task: list[EvalTask]) -> pd.Categorical: + return pd.Categorical( + [t.value for t in task], categories=[t.value for t in EvalTask] + ) + + +def _apply_to_batch( + data: NDArray[np.float64], + batch_ids: list[int], + func: Callable[[Iterable[float]], float], +) -> NDArray[np.float64]: + """Apply a reduction operator on batches of data. + + This function assumes that batch_ids non-empty and sorted. + + Args: + data: 1d array with data. + batch_ids: A list with batch ids whose length is equal to the size of data. + Values need to be sorted and can be repeated. + func: A reduction function that takes an iterable of floats as input (e.g., a + numpy.ndarray or list) and returns a scalar. + + Returns: + The transformed data. Has the same length as data. For each batch, the result of + the reduction operation is stored at the first index of that batch, and all + other values of that batch are set to zero. + + """ + batch_starts = _get_batch_start(batch_ids) + batch_stops = [*batch_starts[1:], len(data)] + + batch_results = [] + for batch, (start, stop) in zip( + batch_ids, zip(batch_starts, batch_stops, strict=False), strict=False + ): + batch_data = data[start:stop] + + try: + reduced = func(batch_data) + except Exception as e: + msg = ( + f"Calling function {func.__name__} on batch {batch} of the History " + f"raised an Exception. Please verify that {func.__name__} is " + "well-defined and takes a list of floats as input and returns a scalar." + ) + raise ValueError(msg) from e + + try: + assert np.isscalar(reduced) + except AssertionError: + msg = ( + f"Function {func.__name__} did not return a scalar for batch {batch}. " + f"Please verify that {func.__name__} returns a scalar when called on a " + "list of floats." + ) + raise ValueError(msg) from None + + batch_results.append(reduced) + + out = np.zeros_like(data) + out[batch_starts] = batch_results + return out + + +def _get_batch_start(batch_ids: list[int]) -> list[int]: + """Get start indices of batches. + + This function assumes that batch_ids non-empty and sorted. + + """ + ids_arr = np.array(batch_ids, dtype=np.int64) + indices = np.where(ids_arr[:-1] != ids_arr[1:])[0] + 1 + return np.insert(indices, 0, 0).tolist() diff --git a/src/optimagic/optimization/history_tools.py b/src/optimagic/optimization/history_tools.py deleted file mode 100644 index 770f1930e..000000000 --- a/src/optimagic/optimization/history_tools.py +++ /dev/null @@ -1,88 +0,0 @@ -from dataclasses import dataclass -from functools import partial - -import numpy as np -from numpy.typing import NDArray -from pybaum import tree_just_flatten - -from optimagic.optimization.history import History -from optimagic.parameters.tree_registry import get_registry -from optimagic.typing import Direction - - -@dataclass(frozen=True) -class HistoryArrays: - fun: NDArray[np.float64] - params: NDArray[np.float64] - time: NDArray[np.float64] - monotone_fun: NDArray[np.float64] - is_accepted: NDArray[np.bool_] - - @property - def criterion(self) -> NDArray[np.float64]: - return self.fun - - @property - def monotone_criterion(self) -> NDArray[np.float64]: - return self.monotone_fun - - def __getitem__(self, key: str) -> NDArray[np.float64] | NDArray[np.bool_]: - return getattr(self, key) - - -def get_history_arrays(history: History, direction: Direction) -> HistoryArrays: - # ================================================================================== - # Handle deprecations for now - # ================================================================================== - assert direction in [Direction.MINIMIZE, Direction.MAXIMIZE] - - if isinstance(history, dict): - parhist = history["params"] - funhist = history["criterion"] - timehist = history["runtime"] - - else: - parhist = history.params - funhist = history.fun - timehist = history.time - - # ================================================================================== - # Filter out evaluations that do not have a `fun` value - # ================================================================================== - - parhist = [p for p, f in zip(parhist, funhist, strict=False) if f is not None] - timehist = [t for t, f in zip(timehist, funhist, strict=False) if f is not None] - funhist = [f for f in funhist if f is not None] - - # ================================================================================== - - is_flat = ( - len(parhist) > 0 and isinstance(parhist[0], np.ndarray) and parhist[0].ndim == 1 - ) - if is_flat: - to_internal = lambda x: x.tolist() - else: - registry = get_registry(extended=True) - to_internal = partial(tree_just_flatten, registry=registry) - - critvals = np.array(funhist) - - params = np.array([to_internal(p) for p in parhist]) - - runtimes = np.array(timehist) - - if direction == Direction.MINIMIZE: - monotone = np.minimum.accumulate(critvals) - is_accepted = critvals <= monotone - elif direction == Direction.MAXIMIZE: - monotone = np.maximum.accumulate(critvals) - is_accepted = critvals >= monotone - - out = HistoryArrays( - fun=critvals, - params=params, - time=runtimes, - monotone_fun=monotone, - is_accepted=is_accepted, - ) - return out diff --git a/src/optimagic/optimization/internal_optimization_problem.py b/src/optimagic/optimization/internal_optimization_problem.py index f0951df74..c15e32fe0 100644 --- a/src/optimagic/optimization/internal_optimization_problem.py +++ b/src/optimagic/optimization/internal_optimization_problem.py @@ -68,7 +68,7 @@ def __init__( self._error_handling = error_handling self._error_penalty_func = error_penalty_func self._batch_evaluator = batch_evaluator - self._history = History() + self._history = History(direction) self._linear_constraints = linear_constraints self._nonlinear_constraints = nonlinear_constraints self._logger = logger @@ -177,7 +177,7 @@ def exploration_fun( def with_new_history(self) -> Self: new = copy(self) - new._history = History() + new._history = History(self.direction) return new def with_error_handling(self, error_handling: ErrorHandling) -> Self: @@ -306,7 +306,7 @@ def _pure_evaluate_fun( issued. """ - now = time.perf_counter() + start_time = time.perf_counter() params = self._converter.params_from_internal(x) traceback: None | str = None try: @@ -333,17 +333,19 @@ def _pure_evaluate_fun( algo_fun_value, hist_fun_value = _process_fun_value( value=fun_value, solver_type=self._solver_type, direction=self._direction ) + stop_time = time.perf_counter() hist_entry = HistoryEntry( params=params, fun=hist_fun_value, - time=now, + start_time=start_time, + stop_time=stop_time, task=EvalTask.FUN, ) log_entry = IterationState( params=params, - timestamp=now, + timestamp=start_time, scalar_fun=hist_fun_value, valid=not bool(traceback), raw_fun=fun_value, @@ -359,7 +361,7 @@ def _pure_evaluate_jac( if self._jac is None: raise ValueError("The jac function is not defined.") - now = time.perf_counter() + start_time = time.perf_counter() traceback: None | str = None params = self._converter.params_from_internal(x) @@ -389,16 +391,19 @@ def _pure_evaluate_jac( value=jac_value, direction=self._direction, converter=self._converter, x=x ) + stop_time = time.perf_counter() + hist_entry = HistoryEntry( params=params, fun=None, - time=now, + start_time=start_time, + stop_time=stop_time, task=EvalTask.JAC, ) log_entry = IterationState( params=params, - timestamp=now, + timestamp=start_time, scalar_fun=None, valid=not bool(traceback), raw_fun=None, @@ -415,7 +420,7 @@ def _pure_evaluate_numerical_fun_and_jac( HistoryEntry, IterationState, ]: - now = time.perf_counter() + start_time = time.perf_counter() traceback: None | str = None def func(x: NDArray[np.float64]) -> SpecificFunctionValue: @@ -466,16 +471,19 @@ def func(x: NDArray[np.float64]) -> SpecificFunctionValue: if self._direction == Direction.MAXIMIZE: jac_value = -jac_value + stop_time = time.perf_counter() + hist_entry = HistoryEntry( params=self._converter.params_from_internal(x), fun=hist_fun_value, - time=now, + start_time=start_time, + stop_time=stop_time, task=EvalTask.FUN_AND_JAC, ) log_entry = IterationState( params=self._converter.params_from_internal(x), - timestamp=now, + timestamp=start_time, scalar_fun=hist_fun_value, valid=not bool(traceback), raw_fun=fun_value, @@ -488,7 +496,7 @@ def func(x: NDArray[np.float64]) -> SpecificFunctionValue: def _pure_exploration_fun( self, x: NDArray[np.float64] ) -> tuple[float, HistoryEntry, IterationState]: - now = time.perf_counter() + start_time = time.perf_counter() params = self._converter.params_from_internal(x) traceback: None | str = None @@ -521,16 +529,19 @@ def _pure_exploration_fun( if self._direction == Direction.MAXIMIZE: hist_fun_value = np.inf + stop_time = time.perf_counter() + hist_entry = HistoryEntry( params=params, fun=hist_fun_value, - time=now, + start_time=start_time, + stop_time=stop_time, task=EvalTask.EXPLORATION, ) log_entry = IterationState( params=params, - timestamp=now, + timestamp=start_time, scalar_fun=hist_fun_value, valid=not bool(traceback), raw_fun=fun_value, @@ -550,7 +561,7 @@ def _pure_evaluate_fun_and_jac( if self._fun_and_jac is None: raise ValueError("The fun_and_jac function is not defined.") - now = time.perf_counter() + start_time = time.perf_counter() traceback: None | str = None params = self._converter.params_from_internal(x) @@ -590,16 +601,19 @@ def _pure_evaluate_fun_and_jac( if self._direction == Direction.MAXIMIZE: out_jac = -out_jac + stop_time = time.perf_counter() + hist_entry = HistoryEntry( params=params, fun=hist_fun_value, - time=now, + start_time=start_time, + stop_time=stop_time, task=EvalTask.FUN_AND_JAC, ) log_entry = IterationState( params=params, - timestamp=now, + timestamp=start_time, scalar_fun=hist_fun_value, valid=not bool(traceback), raw_fun=fun_value, diff --git a/src/optimagic/optimization/process_results.py b/src/optimagic/optimization/process_results.py index 0817649f5..64d764174 100644 --- a/src/optimagic/optimization/process_results.py +++ b/src/optimagic/optimization/process_results.py @@ -5,9 +5,10 @@ from optimagic.optimization.algorithm import InternalOptimizeResult from optimagic.optimization.convergence_report import get_convergence_report +from optimagic.optimization.history import History from optimagic.optimization.optimize_result import MultistartInfo, OptimizeResult from optimagic.parameters.conversion import Converter -from optimagic.typing import AggregationLevel, Direction, PyTree +from optimagic.typing import AggregationLevel, Direction, EvalTask, PyTree from optimagic.utilities import isscalar @@ -41,9 +42,7 @@ def process_single_result( fun = -fun if raw_res.history is not None: - conv_report = get_convergence_report( - history=raw_res.history, direction=extra_fields.direction - ) + conv_report = get_convergence_report(raw_res.history) else: conv_report = None @@ -109,15 +108,16 @@ def process_multistart_result( # create a convergence report for the multistart optimization; This is not # the same as the convergence report for the individual local optimizations. # ============================================================================== - crit_hist = [opt.fun for opt in info.local_optima] - params_hist = [opt.params for opt in info.local_optima] - time_hist = [np.nan for opt in info.local_optima] - hist = {"criterion": crit_hist, "params": params_hist, "runtime": time_hist} - - conv_report = get_convergence_report( - history=hist, + report_history = History( direction=extra_fields.direction, + fun=[opt.fun for opt in info.local_optima], + params=[opt.params for opt in info.local_optima], + start_time=len(info.local_optima) * [np.nan], + stop_time=len(info.local_optima) * [np.nan], + batches=list(range(len(info.local_optima))), + task=len(info.local_optima) * [EvalTask.FUN], ) + conv_report = get_convergence_report(report_history) res.convergence_report = conv_report diff --git a/src/optimagic/timing.py b/src/optimagic/timing.py new file mode 100644 index 000000000..a9fbe7d88 --- /dev/null +++ b/src/optimagic/timing.py @@ -0,0 +1,49 @@ +from dataclasses import dataclass +from typing import Callable, Iterable + + +@dataclass(frozen=True) +class CostModel: + fun: float | None + jac: float | None + fun_and_jac: float | None + label: str + aggregate_batch_time: Callable[[Iterable[float]], float] + + def __post_init__(self) -> None: + if not callable(self.aggregate_batch_time): + raise ValueError( + "aggregate_batch_time must be a callable, got " + f"{self.aggregate_batch_time}" + ) + + +evaluation_time = CostModel( + fun=None, + jac=None, + fun_and_jac=None, + label="Function time (seconds)", + aggregate_batch_time=sum, +) + +fun_evaluations = CostModel( + fun=1, + jac=0, + fun_and_jac=1, + label="Number of criterion evaluations", + aggregate_batch_time=sum, +) + +fun_batches = CostModel( + fun=1, jac=0, fun_and_jac=1, label="Number of batches", aggregate_batch_time=max +) + +wall_time = "wall_time" + + +TIMING_REGISTRY = { + "evaluation_time": evaluation_time, + "fun_evaluations": fun_evaluations, + "fun_batches": fun_batches, + "wall_time": wall_time, +} diff --git a/src/optimagic/visualization/history_plots.py b/src/optimagic/visualization/history_plots.py index 4c4797b53..22d98bf71 100644 --- a/src/optimagic/visualization/history_plots.py +++ b/src/optimagic/visualization/history_plots.py @@ -10,7 +10,7 @@ from optimagic.config import PLOTLY_PALETTE, PLOTLY_TEMPLATE from optimagic.logging.logger import LogReader, SQLiteLogOptions from optimagic.optimization.algorithm import Algorithm -from optimagic.optimization.history_tools import get_history_arrays +from optimagic.optimization.history import History from optimagic.optimization.optimize_result import OptimizeResult from optimagic.parameters.tree_registry import get_registry from optimagic.typing import Direction @@ -59,7 +59,7 @@ def criterion_plot( palette = [palette] palette = itertools.cycle(palette) - key = "monotone_criterion" if monotone else "criterion" + fun_or_monotone_fun = "monotone_fun" if monotone else "fun" # ================================================================================== # Extract plotting data from results objects / data base @@ -103,9 +103,7 @@ def criterion_plot( } for i, local_history in enumerate(data[0]["local_histories"]): - history = get_history_arrays( - local_history, Direction(data[0]["direction"]) - )[key] + history = getattr(local_history, fun_or_monotone_fun) if max_evaluations is not None and len(history) > max_evaluations: history = history[:max_evaluations] @@ -128,7 +126,8 @@ def criterion_plot( _history = _data["stacked_local_histories"] else: _history = _data["history"] - history = get_history_arrays(_history, _data["direction"])[key] + + history = getattr(_history, fun_or_monotone_fun) if max_evaluations is not None and len(history) > max_evaluations: history = history[:max_evaluations] @@ -253,7 +252,7 @@ def params_plot( raise TypeError("result must be an OptimizeResult or a path to a log file.") if data["stacked_local_histories"] is not None: - history = data["stacked_local_histories"]["params"] + history = data["stacked_local_histories"].params else: history = data["history"].params @@ -338,14 +337,20 @@ def _extract_plotting_data_from_results_object( local_histories = None if stack_multistart and local_histories is not None: - stacked = _get_stacked_local_histories(local_histories) + stacked = _get_stacked_local_histories(local_histories, res.direction) if show_exploration: - stacked["params"] = ( - res.multistart_info.exploration_sample[::-1] + stacked["params"] - ) - stacked["criterion"] = ( - res.multistart_info.exploration_results.tolist()[::-1] - + stacked["criterion"] + fun = res.multistart_info.exploration_results.tolist()[::-1] + stacked.fun + params = res.multistart_info.exploration_sample[::-1] + stacked.params + + stacked = History( + direction=stacked.direction, + fun=fun, + params=params, + # TODO: This needs to be fixed + start_time=len(fun) * [None], + stop_time=len(fun) * [None], + batches=len(fun) * [None], + task=len(fun) * [None], ) else: stacked = None @@ -387,16 +392,28 @@ def _extract_plotting_data_from_database(res, stack_multistart, show_exploration direction = _problem_table["direction"].tolist()[-1] - history, local_histories, exploration = reader.read_multistart_history(direction) + _history, local_histories, exploration = reader.read_multistart_history(direction) if stack_multistart and local_histories is not None: - stacked = _get_stacked_local_histories(local_histories, history) + stacked = _get_stacked_local_histories(local_histories, direction, _history) if show_exploration: stacked["params"] = exploration["params"][::-1] + stacked["params"] stacked["criterion"] = exploration["criterion"][::-1] + stacked["criterion"] else: stacked = None + history = History( + direction=direction, + fun=_history["fun"], + params=_history["params"], + start_time=_history["time"], + # TODO (@janosg): Retrieve that information from `hist` once it is available. + # https://github.com/optimagic-dev/optimagic/pull/553 + stop_time=len(_history["fun"]) * [None], + task=len(_history["fun"]) * [None], + batches=list(range(len(_history["fun"]))), + ) + data = { "history": history, "direction": direction, @@ -408,7 +425,7 @@ def _extract_plotting_data_from_database(res, stack_multistart, show_exploration return data -def _get_stacked_local_histories(local_histories, history=None): +def _get_stacked_local_histories(local_histories, direction, history=None): """Stack local histories. Local histories is a list of dictionaries, each of the same structure. We transform @@ -427,4 +444,16 @@ def _get_stacked_local_histories(local_histories, history=None): stacked["criterion"].extend(history.fun) stacked["params"].extend(history.params) stacked["runtime"].extend(history.time) - return stacked + + return History( + direction=direction, + fun=stacked["criterion"], + params=stacked["params"], + start_time=stacked["runtime"], + # TODO (@janosg): Retrieve that information from `hist` once it is available + # for the IterationHistory. + # https://github.com/optimagic-dev/optimagic/pull/553 + stop_time=len(stacked["criterion"]) * [None], + task=len(stacked["criterion"]) * [None], + batches=list(range(len(stacked["criterion"]))), + ) diff --git a/tests/optimagic/optimization/test_algorithm.py b/tests/optimagic/optimization/test_algorithm.py index 7d78c02da..96a531ea9 100644 --- a/tests/optimagic/optimization/test_algorithm.py +++ b/tests/optimagic/optimization/test_algorithm.py @@ -117,7 +117,7 @@ def _solve_internal_problem(self, problem, x0): hist_entry = HistoryEntry( params=x0, fun=0.0, - time=0.0, + start_time=0.0, task=EvalTask.FUN, ) problem.history.add_entry(hist_entry) diff --git a/tests/optimagic/optimization/test_convergence_report.py b/tests/optimagic/optimization/test_convergence_report.py index 058698a58..ea527f2bc 100644 --- a/tests/optimagic/optimization/test_convergence_report.py +++ b/tests/optimagic/optimization/test_convergence_report.py @@ -3,46 +3,55 @@ from numpy.testing import assert_array_almost_equal as aaae from optimagic.optimization.convergence_report import get_convergence_report -from optimagic.typing import Direction +from optimagic.optimization.history import History +from optimagic.typing import Direction, EvalTask def test_get_convergence_report_minimize(): - hist = { - "criterion": [5, 4.1, 4.4, 4.0], - "params": [{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], - "runtime": [0, 1, 2, 3], - } - - calculated = pd.DataFrame.from_dict( - get_convergence_report(hist, Direction.MINIMIZE) + hist = History( + direction=Direction.MINIMIZE, + params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], + fun=[5, 4.1, 4.4, 4.0], + start_time=[0, 1, 2, 3], + stop_time=[1, 2, 3, 4], + task=4 * [EvalTask.FUN], + batches=[0, 1, 2, 3], ) + calculated = pd.DataFrame.from_dict(get_convergence_report(hist)) + expected = np.array([[0.025, 0.25], [0.05, 1.0], [0.1, 1], [0.1, 2.0]]) aaae(calculated.to_numpy(), expected) def test_get_convergence_report_maximize(): - hist = { - "criterion": [-5, -4.1, -4.4, -4.0], - "params": [{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], - "runtime": [0, 1, 2, 3], - } - - calculated = pd.DataFrame.from_dict( - get_convergence_report(hist, Direction.MAXIMIZE) + hist = History( + direction=Direction.MAXIMIZE, + params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], + fun=[-5, -4.1, -4.4, -4.0], + start_time=[0, 1, 2, 3], + stop_time=[1, 2, 3, 4], + task=4 * [EvalTask.FUN], + batches=[0, 1, 2, 3], ) + calculated = pd.DataFrame.from_dict(get_convergence_report(hist)) + expected = np.array([[0.025, 0.25], [0.05, 1.0], [0.1, 1], [0.1, 2.0]]) aaae(calculated.to_numpy(), expected) def test_history_is_too_short(): # first value is best, so history of accepted parameters has only one entry - hist = { - "criterion": [5, -4.1, -4.4, -4.0], - "params": [{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], - "runtime": [0, 1, 2, 3], - } + hist = History( + direction=Direction.MAXIMIZE, + params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], + fun=[5, 4.1, 4.4, 4.0], + start_time=[0, 1, 2, 3], + stop_time=[1, 2, 3, 4], + task=4 * [EvalTask.FUN], + batches=[0, 1, 2, 3], + ) - calculated = get_convergence_report(hist, Direction.MAXIMIZE) + calculated = get_convergence_report(hist) assert calculated is None diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index bd6dae3dd..72cdbb4a2 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -1,37 +1,552 @@ +import numpy as np +import pandas as pd import pytest from numpy.testing import assert_array_almost_equal as aaae +from numpy.testing import assert_array_equal +from pandas.testing import assert_frame_equal +from pybaum import tree_map -from optimagic.optimization.history import History, HistoryEntry -from optimagic.typing import EvalTask +import optimagic as om +from optimagic.optimization.history import ( + History, + HistoryEntry, + _apply_to_batch, + _calculate_monotone_sequence, + _get_batch_start, + _get_flat_param_names, + _get_flat_params, + _is_1d_array, + _task_as_categorical, + _validate_args_are_all_none_or_lists_of_same_length, +) +from optimagic.typing import Direction, EvalTask + +# ====================================================================================== +# Test methods to add data to History (add_entry, add_batch, init) +# ====================================================================================== @pytest.fixture def history_entries(): return [ - HistoryEntry(params=[1, 2, 3], fun=1, time=0.1, task=EvalTask.FUN), - HistoryEntry(params=[4, 5, 6], fun=2, time=0.2, task=EvalTask.FUN), - HistoryEntry(params=[7, 8, 9], fun=3, time=0.3, task=EvalTask.FUN), + HistoryEntry( + params={"a": 1, "b": [2, 3]}, + fun=1, + start_time=0.1, + stop_time=0.2, + task=EvalTask.FUN, + ), + HistoryEntry( + params={"a": 4, "b": [5, 6]}, + fun=3, + start_time=0.2, + stop_time=0.3, + task=EvalTask.FUN, + ), + HistoryEntry( + params={"a": 7, "b": [8, 9]}, + fun=2, + start_time=0.3, + stop_time=0.4, + task=EvalTask.FUN, + ), ] def test_history_add_entry(history_entries): - history = History() + history = History(Direction.MINIMIZE) for entry in history_entries: history.add_entry(entry) - assert history.params == [[1, 2, 3], [4, 5, 6], [7, 8, 9]] - assert history.fun == [1, 2, 3] + assert history.direction == Direction.MINIMIZE + + assert history.params == [ + {"a": 1, "b": [2, 3]}, + {"a": 4, "b": [5, 6]}, + {"a": 7, "b": [8, 9]}, + ] + assert history.fun == [1, 3, 2] assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] assert history.batches == [0, 1, 2] - aaae(history.time, [0.0, 0.1, 0.2]) + aaae(history.start_time, [0.1, 0.2, 0.3]) + aaae(history.stop_time, [0.2, 0.3, 0.4]) + + assert_array_equal(history.monotone_fun, np.array([1, 1, 1], dtype=np.float64)) + assert_array_equal( + history.flat_params, np.arange(1, 10, dtype=np.float64).reshape(3, 3) + ) def test_history_add_batch(history_entries): - history = History() + history = History(Direction.MAXIMIZE) history.add_batch(history_entries) - assert history.params == [[1, 2, 3], [4, 5, 6], [7, 8, 9]] - assert history.fun == [1, 2, 3] + assert history.direction == Direction.MAXIMIZE + + assert history.params == [ + {"a": 1, "b": [2, 3]}, + {"a": 4, "b": [5, 6]}, + {"a": 7, "b": [8, 9]}, + ] + assert history.fun == [1, 3, 2] assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] assert history.batches == [0, 0, 0] - aaae(history.time, [0.0, 0.1, 0.2]) + aaae(history.start_time, [0.1, 0.2, 0.3]) + aaae(history.stop_time, [0.2, 0.3, 0.4]) + + assert_array_equal(history.monotone_fun, np.array([1, 3, 3], dtype=np.float64)) + assert_array_equal( + history.flat_params, np.arange(1, 10, dtype=np.float64).reshape(3, 3) + ) + + +def test_history_from_data(): + data = { + "params": [{"a": 1, "b": [2, 3]}, {"a": 4, "b": [5, 6]}, {"a": 7, "b": [8, 9]}], + "fun": [1, 3, 2], + "task": [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN], + "batches": [0, 0, 0], + "start_time": [0.0, 0.15, 0.3], + "stop_time": [0.1, 0.25, 0.4], + } + + history = History( + direction=Direction.MAXIMIZE, + **data, + ) + + assert history.direction == Direction.MAXIMIZE + + assert history.params == data["params"] + assert history.fun == data["fun"] + assert history.task == data["task"] + assert history.batches == data["batches"] + aaae(history.start_time, data["start_time"]) + aaae(history.stop_time, data["stop_time"]) + + assert_array_equal(history.monotone_fun, np.array([1, 3, 3], dtype=np.float64)) + assert_array_equal( + history.flat_params, np.arange(1, 10, dtype=np.float64).reshape(3, 3) + ) + + +# ====================================================================================== +# Test functionality of History +# ====================================================================================== + + +@pytest.fixture +def params(): + params_tree = {"a": None, "b": {"c": None, "d": (None, None)}} + return [ + tree_map(lambda _: k, params_tree, is_leaf=lambda leaf: leaf is None) # noqa: B023 + for k in range(6) + ] + + +@pytest.fixture +def history_data(params): + return { + "fun": [10, None, 9, None, 2, 5], + "task": [ + EvalTask.FUN, + EvalTask.JAC, + EvalTask.FUN, + EvalTask.JAC, + EvalTask.FUN, + EvalTask.FUN_AND_JAC, + ], + "start_time": [0, 2, 5, 7, 10, 12], + "stop_time": [1, 4, 6, 9, 11, 14], + "params": params, + "batches": [0, 1, 2, 3, 4, 5], + } + + +@pytest.fixture +def history(history_data): + return History(direction=Direction.MINIMIZE, **history_data) + + +@pytest.fixture +def history_parallel(history_data): + data = history_data.copy() + data["batches"] = [0, 0, 1, 1, 2, 2] + return History(direction=Direction.MINIMIZE, **data) + + +# Function data, function value, and monotone function value +# -------------------------------------------------------------------------------------- + + +def test_history_fun_data_with_fun_evaluations_cost_model(history): + got = history.fun_data( + cost_model=om.timing.fun_evaluations, + monotone=False, + ) + exp = pd.DataFrame( + { + "fun": [10, np.nan, 9, np.nan, 2, 5], + "task": [ + "fun", + "jac", + "fun", + "jac", + "fun", + "fun_and_jac", + ], + "time": [1, 1, 2, 2, 3, 4], + } + ) + assert_frame_equal(got, exp, check_dtype=False, check_categorical=False) + + +def test_history_fun_data_with_fun_evaluations_cost_model_and_monotone(history): + got = history.fun_data( + cost_model=om.timing.fun_evaluations, + monotone=True, + ) + exp = pd.DataFrame( + { + "fun": [10, np.nan, 9, np.nan, 2, 2], + "task": [ + "fun", + "jac", + "fun", + "jac", + "fun", + "fun_and_jac", + ], + "time": [1, 1, 2, 2, 3, 4], + } + ) + assert_frame_equal(got, exp, check_dtype=False, check_categorical=False) + + +def test_history_fun_data_with_fun_batches_cost_model(history_parallel): + got = history_parallel.fun_data( + cost_model=om.timing.fun_batches, + monotone=False, + ) + exp = pd.DataFrame( + { + "fun": [10, np.nan, 9, np.nan, 2, 5], + "task": [ + "fun", + "jac", + "fun", + "jac", + "fun", + "fun_and_jac", + ], + "time": [1, 1, 2, 2, 3, 3], + } + ) + assert_frame_equal(got, exp, check_dtype=False, check_categorical=False) + + +def test_history_fun_data_with_evaluation_time_cost_model(history): + got = history.fun_data( + cost_model=om.timing.evaluation_time, + monotone=False, + ) + exp = pd.DataFrame( + { + "fun": [10, np.nan, 9, np.nan, 2, 5], + "task": [ + "fun", + "jac", + "fun", + "jac", + "fun", + "fun_and_jac", + ], + "time": [1, 3, 4, 6, 7, 9], + } + ) + assert_frame_equal(got, exp, check_dtype=False, check_categorical=False) + + +def test_fun_property(history): + assert_array_equal(history.fun, [10, None, 9, None, 2, 5]) + + +def test_monotone_fun_property(history): + assert_array_equal(history.monotone_fun, np.array([10, np.nan, 9, np.nan, 2, 2])) + + +# Acceptance +# -------------------------------------------------------------------------------------- + + +def test_is_accepted_property(history): + got = history.is_accepted + exp = np.array([True, False, True, False, True, False]) + assert_array_equal(got, exp) + + +# Parameter data, params, flat params, and flat params names +# -------------------------------------------------------------------------------------- + + +def test_params_data_fun_evaluations_cost_model(history): + got = history.params_data(cost_model=om.timing.fun_evaluations) + exp = pd.DataFrame( + { + "name": np.repeat( + [ + "a", + "b_c", + "b_d_0", + "b_d_1", + ], + 6, + ), + "value": np.tile(list(range(6)), 4), + "task": np.tile( + [ + "fun", + "jac", + "fun", + "jac", + "fun", + "fun_and_jac", + ], + 4, + ), + "time": np.tile([1, 1, 2, 2, 3, 4], 4), + } + ) + assert_frame_equal(got, exp, check_categorical=False, check_dtype=False) + + +def test_params_property(history, params): + assert history.params == params + + +def test_flat_params_property(history): + got = history.flat_params + assert_array_equal(got, [[k for _ in range(4)] for k in range(6)]) + + +def test_flat_param_names(history): + assert history.flat_param_names == ["a", "b_c", "b_d_0", "b_d_1"] + + +# Time +# -------------------------------------------------------------------------------------- + + +def test_get_time_per_task_fun(history): + got = history._get_time_per_task(EvalTask.FUN, cost_factor=1) + exp = np.array([1, 0, 1, 0, 1, 0]) + assert_array_equal(got, exp) + + +def test_get_time_per_task_jac_cost_factor_none(history): + got = history._get_time_per_task(EvalTask.JAC, cost_factor=None) + exp = np.array([0, 2, 0, 2, 0, 0]) + assert_array_equal(got, exp) + + +def test_get_time_per_task_fun_and_jac(history): + got = history._get_time_per_task(EvalTask.FUN_AND_JAC, cost_factor=-0.5) + exp = np.array([0, 0, 0, 0, 0, -0.5]) + assert_array_equal(got, exp) + + +def test_get_time_custom_cost_model(history): + cost_model = om.timing.CostModel( + fun=0.5, jac=1, fun_and_jac=2, label="test", aggregate_batch_time=sum + ) + got = history._get_time(cost_model) + exp = np.array( + [ + 0.5, + 0.5 + 1, + 1 + 1, + 1 + 2, + 1.5 + 2, + 1.5 + 2 + 2, + ] + ) + assert_array_equal(got, exp) + + +def test_get_time_fun_evaluations(history): + got = history._get_time(cost_model=om.timing.fun_evaluations) + exp = np.array([1, 1, 2, 2, 3, 4]) + assert_array_equal(got, exp) + + +def test_get_time_fun_batches(history): + got = history._get_time(cost_model=om.timing.fun_batches) + exp = np.array([1, 1, 2, 2, 3, 4]) + assert_array_equal(got, exp) + + +def test_get_time_fun_batches_parallel(history_parallel): + got = history_parallel._get_time(cost_model=om.timing.fun_batches) + exp = np.array([1, 1, 2, 2, 3, 3]) + assert_array_equal(got, exp) + + +def test_get_time_evaluation_time(history): + got = history._get_time(cost_model=om.timing.evaluation_time) + exp = np.array([1, 3, 4, 6, 7, 9]) + assert_array_equal(got, exp) + + +def test_get_time_wall_time(history): + got = history._get_time(cost_model="wall_time") + exp = np.array([1, 4, 6, 9, 11, 14]) + assert_array_equal(got, exp) + + +def test_get_time_invalid_cost_model(history): + with pytest.raises( + ValueError, match="cost_model must be a CostModel or 'wall_time'." + ): + history._get_time(cost_model="invalid") + + +def test_start_time_property(history): + assert history.start_time == [0, 2, 5, 7, 10, 12] + + +def test_stop_time_property(history): + assert history.stop_time == [1, 4, 6, 9, 11, 14] + + +# Batches +# -------------------------------------------------------------------------------------- + + +def test_batches_property(history): + assert history.batches == [0, 1, 2, 3, 4, 5] + + +# Tasks +# -------------------------------------------------------------------------------------- + + +def test_task_property(history): + assert history.task == [ + EvalTask.FUN, + EvalTask.JAC, + EvalTask.FUN, + EvalTask.JAC, + EvalTask.FUN, + EvalTask.FUN_AND_JAC, + ] + + +# ====================================================================================== +# Unit tests +# ====================================================================================== + + +def test_is_1d_array(): + assert _is_1d_array(np.arange(2)) is True + assert _is_1d_array(np.eye(2)) is False + assert _is_1d_array([0, 1]) is False + + +def test_get_flat_params_pytree(): + params = [ + {"a": 1, "b": [0, 1], "c": np.arange(2)}, + {"a": 2, "b": [1, 2], "c": np.arange(2)}, + ] + got = _get_flat_params(params) + exp = [ + [1, 0, 1, 0, 1], + [2, 1, 2, 0, 1], + ] + assert_array_equal(got, exp) + + +def test_get_flat_params_fast_path(): + params = [np.arange(2)] + got = _get_flat_params(params) + exp = [[0, 1]] + assert_array_equal(got, exp) + + +def test_get_flat_param_names_pytree(): + got = _get_flat_param_names(param={"a": 0, "b": [0, 1], "c": np.arange(2)}) + exp = ["a", "b_0", "b_1", "c_0", "c_1"] + assert got == exp + + +def test_get_flat_param_names_fast_path(): + got = _get_flat_param_names(param=np.arange(2)) + exp = ["0", "1"] + assert got == exp + + +def test_calculate_monotone_sequence_maximize(): + sequence = [0, 1, 0, 0, 2, 10, 0] + exp = [0, 1, 1, 1, 2, 10, 10] + got = _calculate_monotone_sequence(sequence, direction=Direction.MAXIMIZE) + assert_array_equal(exp, got) + + +def test_calculate_monotone_sequence_minimize(): + sequence = [10, 11, 8, 12, 0, 5] + exp = [10, 10, 8, 8, 0, 0] + got = _calculate_monotone_sequence(sequence, direction=Direction.MINIMIZE) + assert_array_equal(exp, got) + + +def test_validate_args_are_all_none_or_lists_of_same_length(): + _validate_args_are_all_none_or_lists_of_same_length(None, None) + _validate_args_are_all_none_or_lists_of_same_length([1], [1]) + + with pytest.raises(ValueError, match="All list arguments must have the same"): + _validate_args_are_all_none_or_lists_of_same_length([1], [1, 2]) + + with pytest.raises(ValueError, match="All arguments must be lists of the same"): + _validate_args_are_all_none_or_lists_of_same_length(None, [1]) + + +def test_task_as_categorical(): + task = [EvalTask.FUN, EvalTask.JAC, EvalTask.FUN_AND_JAC] + got = _task_as_categorical(task) + assert got.tolist() == ["fun", "jac", "fun_and_jac"] + assert isinstance(got.dtype, pd.CategoricalDtype) + + +def test_get_batch_start(): + batches = [0, 0, 1, 1, 1, 2, 2, 3] + got = _get_batch_start(batches) + assert got == [0, 2, 5, 7] + + +def test_apply_to_batch_sum(): + data = np.array([0, 1, 2, 3, 4]) + batch_ids = [0, 0, 1, 1, 2] + exp = np.array([1, 0, 5, 0, 4]) + got = _apply_to_batch(data, batch_ids, sum) + assert_array_equal(exp, got) + + +def test_apply_to_batch_max(): + data = np.array([0, 1, 2, 3, 4]) + batch_ids = [0, 0, 1, 1, 2] + exp = np.array([1, 0, 3, 0, 4]) + got = _apply_to_batch(data, batch_ids, max) + assert_array_equal(exp, got) + + +def test_apply_to_batch_broken_func(): + data = np.array([0, 1, 2, 3, 4]) + batch_ids = [0, 0, 1, 1, 2] + with pytest.raises(ValueError, match="Calling function on batch [0, 0]"): + _apply_to_batch(data, batch_ids, func=lambda _: 1 / 0) + + +def test_apply_to_batch_func_with_non_scalar_return(): + data = np.array([0, 1, 2, 3, 4]) + batch_ids = [0, 0, 1, 1, 2] + with pytest.raises(ValueError, match="Function did not return a scalar"): + _apply_to_batch(data, batch_ids, func=lambda _list: _list) diff --git a/tests/optimagic/optimization/test_history_tools.py b/tests/optimagic/optimization/test_history_tools.py deleted file mode 100644 index 4b4f4d100..000000000 --- a/tests/optimagic/optimization/test_history_tools.py +++ /dev/null @@ -1,27 +0,0 @@ -import numpy as np -import pytest -from numpy.testing import assert_array_almost_equal as aaae - -from optimagic.optimization.history_tools import get_history_arrays -from optimagic.typing import Direction - - -@pytest.fixture() -def history(): - hist = { - "criterion": [5, 4, 5.5, 4.2], - "params": [{"a": 0}, {"a": 1}, {"a": 2}, {"a": 3}], - "runtime": [0, 1, 2, 3], - } - return hist - - -def test_get_history_arrays_minimize(history): - calculated = get_history_arrays(history, Direction.MINIMIZE) - - aaae(calculated.is_accepted, np.array([True, True, False, False])) - - -def test_get_history_arrays_maximize(history): - calculated = get_history_arrays(history, Direction.MAXIMIZE) - aaae(calculated.is_accepted, np.array([True, False, True, False])) diff --git a/tests/optimagic/test_timing.py b/tests/optimagic/test_timing.py new file mode 100644 index 000000000..fd2edfc3c --- /dev/null +++ b/tests/optimagic/test_timing.py @@ -0,0 +1,14 @@ +import pytest + +from optimagic import timing + + +def test_invalid_aggregate_batch_time(): + with pytest.raises(ValueError, match="aggregate_batch_time must be a callable"): + timing.CostModel( + fun=None, + jac=None, + fun_and_jac=None, + label="label", + aggregate_batch_time="Not callable", + )