diff --git a/src/optimagic/__init__.py b/src/optimagic/__init__.py
index 28e912234..04213b265 100644
--- a/src/optimagic/__init__.py
+++ b/src/optimagic/__init__.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from optimagic import constraints, mark, utilities
+from optimagic import constraints, mark, timing, utilities
 from optimagic.algorithms import algos
 from optimagic.benchmarking.benchmark_reports import (
     convergence_report,
@@ -102,4 +102,5 @@
     "History",
     "__version__",
     "algos",
+    "timing",
 ]
diff --git a/src/optimagic/benchmarking/run_benchmark.py b/src/optimagic/benchmarking/run_benchmark.py
index 9e2ce4cd8..cd6d844c4 100644
--- a/src/optimagic/benchmarking/run_benchmark.py
+++ b/src/optimagic/benchmarking/run_benchmark.py
@@ -209,7 +209,7 @@ def _process_one_result(optimize_result, problem):
             criterion_history = history.fun
         criterion_history = np.clip(criterion_history, _solution_crit, np.inf)
         batches_history = history.batches
-        time_history = history.time
+        time_history = history.start_time
 
     return {
         "params_history": params_history_flat,
diff --git a/src/optimagic/optimization/convergence_report.py b/src/optimagic/optimization/convergence_report.py
index bb44a828b..02fa76c4f 100644
--- a/src/optimagic/optimization/convergence_report.py
+++ b/src/optimagic/optimization/convergence_report.py
@@ -1,16 +1,14 @@
 import numpy as np
+from numpy.typing import NDArray
 
-from optimagic.optimization.history_tools import get_history_arrays
+from optimagic.optimization.history import History
 
 
-def get_convergence_report(history, direction):
-    history_arrs = get_history_arrays(
-        history=history,
-        direction=direction,
-    )
+def get_convergence_report(history: History) -> dict[str, dict[str, float]] | None:
+    is_accepted = history.is_accepted
 
-    critvals = history_arrs.fun[history_arrs.is_accepted]
-    params = history_arrs.params[history_arrs.is_accepted]
+    critvals = np.array(history.fun, dtype=np.float64)[is_accepted]
+    params = np.array(history.flat_params, dtype=np.float64)[is_accepted]
 
     if len(critvals) < 2:
         out = None
@@ -35,7 +33,7 @@ def get_convergence_report(history, direction):
     return out
 
 
-def _get_max_f_changes(critvals):
+def _get_max_f_changes(critvals: NDArray[np.float64]) -> tuple[float, float]:
     best_val = critvals[-1]
     worst_val = critvals[0]
 
@@ -47,7 +45,7 @@ def _get_max_f_changes(critvals):
     return max_change_rel, max_change_abs
 
 
-def _get_max_x_changes(params):
+def _get_max_x_changes(params: NDArray[np.float64]) -> tuple[float, float]:
     best_x = params[-1]
     diffs = params - best_x
     denom = np.clip(np.abs(best_x), 0.1, np.inf)
diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py
index aa9cd0bca..14df3a67e 100644
--- a/src/optimagic/optimization/history.py
+++ b/src/optimagic/optimization/history.py
@@ -1,35 +1,70 @@
 import warnings
 from dataclasses import dataclass
-from typing import Any
+from functools import partial
+from typing import Any, Callable, Iterable, Literal
 
 import numpy as np
+import pandas as pd
+from numpy.typing import NDArray
+from pybaum import leaf_names, tree_just_flatten
 
-from optimagic.typing import EvalTask, PyTree
+from optimagic.parameters.tree_registry import get_registry
+from optimagic.timing import CostModel
+from optimagic.typing import Direction, EvalTask, PyTree
 
 
 @dataclass(frozen=True)
 class HistoryEntry:
     params: PyTree
     fun: float | None
-    time: float
+    start_time: float
+    stop_time: float
     task: EvalTask
 
 
 class History:
     # TODO: add counters for the relevant evaluations
-    def __init__(self) -> None:
-        self._params: list[PyTree] = []
-        self._fun: list[float | None] = []
-        self._time: list[float] = []
-        self._batches: list[int] = []
-        self._task: list[EvalTask] = []
+    def __init__(
+        self,
+        direction: Direction,
+        params: list[PyTree] | None = None,
+        fun: list[float | None] | None = None,
+        start_time: list[float] | None = None,
+        stop_time: list[float] | None = None,
+        batches: list[int] | None = None,
+        task: list[EvalTask] | None = None,
+    ) -> None:
+        """Initialize a history.
+
+        The history must know the direction of the optimization problem in order to
+        correctly return monotone sequences. The history can be initialized empty, for
+        example for usage during an optimization process, or with data, for example to
+        recover a history from a log.
+
+        """
+        _validate_args_are_all_none_or_lists_of_same_length(
+            params, fun, start_time, stop_time, batches, task
+        )
+
+        self.direction = direction
+        self._params = params if params is not None else []
+        self._fun = fun if fun is not None else []
+        self._start_time = start_time if start_time is not None else []
+        self._stop_time = stop_time if stop_time is not None else []
+        self._batches = batches if batches is not None else []
+        self._task = task if task is not None else []
+
+    # ==================================================================================
+    # Methods to add entries to the history
+    # ==================================================================================
 
     def add_entry(self, entry: HistoryEntry, batch_id: int | None = None) -> None:
         if batch_id is None:
             batch_id = self._get_next_batch_id()
         self._params.append(entry.params)
         self._fun.append(entry.fun)
-        self._time.append(entry.time)
+        self._start_time.append(entry.start_time)
+        self._stop_time.append(entry.stop_time)
         self._batches.append(batch_id)
         self._task.append(entry.task)
 
@@ -50,38 +85,210 @@ def add_batch(
         for entry, id in zip(batch, ids, strict=False):
             self.add_entry(entry, id)
 
+    def _get_next_batch_id(self) -> int:
+        if not self._batches:
+            batch = 0
+        else:
+            batch = self._batches[-1] + 1
+        return batch
+
+    # ==================================================================================
+    # Properties and methods to access the history
+    # ==================================================================================
+
+    # Function data, function value, and monotone function value
+    # ----------------------------------------------------------------------------------
+
+    def fun_data(self, cost_model: CostModel, monotone: bool) -> pd.DataFrame:
+        """Return the function value data.
+
+        Args:
+            cost_model: The cost model that is used to calculate the time measure.
+            monotone: Whether to return the monotone function values.
+
+        Returns:
+            pd.DataFrame: The function value data. The columns are: 'task', 'time' and
+                'value'. If monotone is False, value is the fun value, otherwise the
+                monotone function value.
+
+        """
+        if monotone:
+            fun: list[float | None] | NDArray[np.float64] = self.monotone_fun
+        else:
+            fun = self.fun
+        task = _task_as_categorical(self.task)
+        time = self._get_time(cost_model)
+        return pd.DataFrame({"fun": fun, "task": task, "time": time})
+
+    @property
+    def fun(self) -> list[float | None]:
+        return self._fun
+
+    @property
+    def monotone_fun(self) -> NDArray[np.float64]:
+        """The monotone function value of the history.
+
+        If the value is None, the output at that position is nan.
+
+        """
+        return _calculate_monotone_sequence(self.fun, direction=self.direction)
+
+    # Acceptance
+    # ----------------------------------------------------------------------------------
+
+    @property
+    def is_accepted(self) -> NDArray[np.bool_]:
+        """Boolean indicator whether a function value is accepted.
+
+        A function value is accepted if it is smaller (or equal) than the monotone
+        function value counterpart in the case of minimization, or larger (or equal) in
+        the case of maximization. If the value is None, the output at that position is
+        False.
+
+        """
+        fun_arr = np.array(self.fun, dtype=np.float64)
+        if self.direction == Direction.MINIMIZE:
+            return fun_arr <= self.monotone_fun
+        elif self.direction == Direction.MAXIMIZE:
+            return fun_arr >= self.monotone_fun
+
+    # Parameter data, params, flat params, and flat params names
+    # ----------------------------------------------------------------------------------
+
+    def params_data(self, cost_model: CostModel) -> pd.DataFrame:
+        """Return the parameter data.
+
+        Args:
+            cost_model: The cost model that is used to calculate the time measure.
+
+        Returns:
+            pd.DataFrame: The parameter data. The columns are: 'name' (the parameter
+                names), 'value' (the parameter values), 'task', and 'time'.
+
+        """
+        wide = pd.DataFrame(self.flat_params, columns=self.flat_param_names)
+        wide["task"] = _task_as_categorical(self.task)
+        wide["time"] = self._get_time(cost_model)
+        data = pd.melt(
+            wide, var_name="name", value_name="value", id_vars=["task", "time"]
+        )
+        return data.reindex(columns=["name", "value", "task", "time"])
+
     @property
     def params(self) -> list[PyTree]:
         return self._params
 
     @property
-    def fun(self) -> list[float | None]:
-        return self._fun
+    def flat_params(self) -> list[list[float]]:
+        return _get_flat_params(self._params)
 
     @property
-    def time(self) -> list[float]:
-        arr = np.array(self._time)
-        return (arr - arr[0]).tolist()
+    def flat_param_names(self) -> list[str]:
+        return _get_flat_param_names(param=self._params[0])
+
+    # Time
+    # ----------------------------------------------------------------------------------
+
+    def _get_time(
+        self, cost_model: CostModel | Literal["wall_time"]
+    ) -> NDArray[np.float64]:
+        """Return the cumulative time measure.
+
+        Args:
+            cost_model: The cost model that is used to calculate the time measure. If
+                "wall_time", the wall time is returned.
+
+        Returns:
+            np.ndarray: The time measure.
+
+        """
+        if not isinstance(cost_model, CostModel) and cost_model != "wall_time":
+            raise ValueError("cost_model must be a CostModel or 'wall_time'.")
+
+        if cost_model == "wall_time":
+            return np.array(self.stop_time, dtype=np.float64) - self.start_time[0]
+
+        fun_time = self._get_time_per_task(
+            task=EvalTask.FUN, cost_factor=cost_model.fun
+        )
+        jac_time = self._get_time_per_task(
+            task=EvalTask.JAC, cost_factor=cost_model.jac
+        )
+        fun_and_jac_time = self._get_time_per_task(
+            task=EvalTask.FUN_AND_JAC, cost_factor=cost_model.fun_and_jac
+        )
+
+        time = fun_time + jac_time + fun_and_jac_time
+        batch_aware_time = _apply_to_batch(
+            data=time,
+            batch_ids=self.batches,
+            func=cost_model.aggregate_batch_time,
+        )
+        return np.cumsum(batch_aware_time)
+
+    def _get_time_per_task(
+        self, task: EvalTask, cost_factor: float | None
+    ) -> NDArray[np.float64]:
+        """Return the time measure per task.
+
+        Args:
+            task: The task for which the time is calculated.
+            cost_factor: The cost factor used to calculate the time. If None, the time
+                is the difference between the start and stop time, otherwise the time
+                is given by the cost factor.
+
+        Returns:
+            np.ndarray: The time per task. For entries where the task is not the
+                requested task, the time is 0.
+
+        """
+        dummy_task = np.array([1 if t == task else 0 for t in self.task])
+        factor: float | NDArray[np.float64]
+        if cost_factor is None:
+            factor = np.array(self.stop_time, dtype=np.float64) - np.array(
+                self.start_time, dtype=np.float64
+            )
+        else:
+            factor = cost_factor
+
+        return factor * dummy_task
+
+    @property
+    def start_time(self) -> list[float]:
+        return self._start_time
+
+    @property
+    def stop_time(self) -> list[float]:
+        return self._stop_time
+
+    # Batches
+    # ----------------------------------------------------------------------------------
 
     @property
     def batches(self) -> list[int]:
         return self._batches
 
+    # Tasks
+    # ----------------------------------------------------------------------------------
+
     @property
     def task(self) -> list[EvalTask]:
         return self._task
 
-    def _get_next_batch_id(self) -> int:
-        if not self._batches:
-            batch = 0
-        else:
-            batch = self._batches[-1] + 1
-        return batch
-
     # ==================================================================================
     # Add deprecated dict access
     # ==================================================================================
 
+    @property
+    def time(self) -> list[float]:
+        msg = (
+            "The attribute `time` of History will be deprecated soon. Use the "
+            "`start_time` method instead."
+        )
+        warnings.warn(msg, FutureWarning)
+        arr = np.array(self._start_time)
+        return (arr - arr[0]).tolist()
+
     @property
     def criterion(self) -> list[float | None]:
         msg = "The attribute `criterion` of History is deprecated. Use `fun` instead."
@@ -90,7 +297,10 @@ def criterion(self) -> list[float | None]:
 
     @property
     def runtime(self) -> list[float]:
-        msg = "The attribute `runtime` of History is deprecated. Use `time` instead."
+        msg = (
+            "The attribute `runtime` of History will be deprecated soon. Use the "
+            "`start_time` method instead."
+        )
         warnings.warn(msg, FutureWarning)
         return self.time
 
@@ -98,3 +308,146 @@ def __getitem__(self, key: str) -> Any:
         msg = "dict-like access to History is deprecated. Use attribute access instead."
         warnings.warn(msg, FutureWarning)
         return getattr(self, key)
+
+
+# ======================================================================================
+# Methods
+# ======================================================================================
+
+
+def _get_flat_params(params: list[PyTree]) -> list[list[float]]:
+    if len(params) > 0 and _is_1d_array(params[0]):
+        # fast path
+        flatten = lambda x: x.tolist()
+    else:
+        registry = get_registry(extended=True)
+        flatten = partial(tree_just_flatten, registry=registry)
+
+    return [flatten(p) for p in params]
+
+
+def _get_flat_param_names(param: PyTree) -> list[str]:
+    if _is_1d_array(param):
+        # fast path
+        return np.arange(param.size).astype(str).tolist()
+
+    registry = get_registry(extended=True)
+    return leaf_names(param, registry=registry)
+
+
+def _is_1d_array(param: PyTree) -> bool:
+    return isinstance(param, np.ndarray) and param.ndim == 1
+
+
+def _calculate_monotone_sequence(
+    sequence: list[float | None], direction: Direction
+) -> NDArray[np.float64]:
+    sequence_arr = np.array(sequence, dtype=np.float64)  # converts None to nan
+    nan_mask = np.isnan(sequence_arr)
+
+    if direction == Direction.MINIMIZE:
+        sequence_arr[nan_mask] = np.inf
+        out = np.minimum.accumulate(sequence_arr)
+    elif direction == Direction.MAXIMIZE:
+        sequence_arr[nan_mask] = -np.inf
+        out = np.maximum.accumulate(sequence_arr)
+
+    out[nan_mask] = np.nan
+    return out
+
+
+# ======================================================================================
+# Misc
+# ======================================================================================
+
+
+def _validate_args_are_all_none_or_lists_of_same_length(
+    *args: list[Any] | None,
+) -> None:
+    all_none = all(arg is None for arg in args)
+    all_list = all(isinstance(arg, list) for arg in args)
+
+    if not all_none:
+        if all_list:
+            unique_list_lengths = set(map(len, args))  # type: ignore[arg-type]
+
+            if len(unique_list_lengths) != 1:
+                raise ValueError("All list arguments must have the same length.")
+
+        else:
+            raise ValueError("All arguments must be lists of the same length or None.")
+
+
+def _task_as_categorical(task: list[EvalTask]) -> pd.Categorical:
+    return pd.Categorical(
+        [t.value for t in task], categories=[t.value for t in EvalTask]
+    )
+
+
+def _apply_to_batch(
+    data: NDArray[np.float64],
+    batch_ids: list[int],
+    func: Callable[[Iterable[float]], float],
+) -> NDArray[np.float64]:
+    """Apply a reduction operator on batches of data.
+
+    This function assumes that batch_ids non-empty and sorted.
+
+    Args:
+        data: 1d array with data.
+        batch_ids: A list with batch ids whose length is equal to the size of data.
+            Values need to be sorted and can be repeated.
+        func: A reduction function that takes an iterable of floats as input (e.g., a
+            numpy.ndarray or list) and returns a scalar.
+
+    Returns:
+        The transformed data. Has the same length as data. For each batch, the result of
+        the reduction operation is stored at the first index of that batch, and all
+        other values of that batch are set to zero.
+
+    """
+    batch_starts = _get_batch_start(batch_ids)
+    batch_stops = [*batch_starts[1:], len(data)]
+
+    batch_results = []
+    for batch, (start, stop) in zip(
+        batch_ids, zip(batch_starts, batch_stops, strict=False), strict=False
+    ):
+        batch_data = data[start:stop]
+
+        try:
+            reduced = func(batch_data)
+        except Exception as e:
+            msg = (
+                f"Calling function {func.__name__} on batch {batch} of the History "
+                f"raised an Exception. Please verify that {func.__name__} is "
+                "well-defined and takes a list of floats as input and returns a scalar."
+            )
+            raise ValueError(msg) from e
+
+        try:
+            assert np.isscalar(reduced)
+        except AssertionError:
+            msg = (
+                f"Function {func.__name__} did not return a scalar for batch {batch}. "
+                f"Please verify that {func.__name__} returns a scalar when called on a "
+                "list of floats."
+            )
+            raise ValueError(msg) from None
+
+        batch_results.append(reduced)
+
+    out = np.zeros_like(data)
+    out[batch_starts] = batch_results
+    return out
+
+
+def _get_batch_start(batch_ids: list[int]) -> list[int]:
+    """Get start indices of batches.
+
+    This function assumes that batch_ids non-empty and sorted.
+
+    """
+    ids_arr = np.array(batch_ids, dtype=np.int64)
+    indices = np.where(ids_arr[:-1] != ids_arr[1:])[0] + 1
+    return np.insert(indices, 0, 0).tolist()
diff --git a/src/optimagic/optimization/history_tools.py b/src/optimagic/optimization/history_tools.py
deleted file mode 100644
index 770f1930e..000000000
--- a/src/optimagic/optimization/history_tools.py
+++ /dev/null
@@ -1,88 +0,0 @@
-from dataclasses import dataclass
-from functools import partial
-
-import numpy as np
-from numpy.typing import NDArray
-from pybaum import tree_just_flatten
-
-from optimagic.optimization.history import History
-from optimagic.parameters.tree_registry import get_registry
-from optimagic.typing import Direction
-
-
-@dataclass(frozen=True)
-class HistoryArrays:
-    fun: NDArray[np.float64]
-    params: NDArray[np.float64]
-    time: NDArray[np.float64]
-    monotone_fun: NDArray[np.float64]
-    is_accepted: NDArray[np.bool_]
-
-    @property
-    def criterion(self) -> NDArray[np.float64]:
-        return self.fun
-
-    @property
-    def monotone_criterion(self) -> NDArray[np.float64]:
-        return self.monotone_fun
-
-    def __getitem__(self, key: str) -> NDArray[np.float64] | NDArray[np.bool_]:
-        return getattr(self, key)
-
-
-def get_history_arrays(history: History, direction: Direction) -> HistoryArrays:
-    # ==================================================================================
-    # Handle deprecations for now
-    # ==================================================================================
-    assert direction in [Direction.MINIMIZE, Direction.MAXIMIZE]
-
-    if isinstance(history, dict):
-        parhist = history["params"]
-        funhist = history["criterion"]
-        timehist = history["runtime"]
-
-    else:
-        parhist = history.params
-        funhist = history.fun
-        timehist = history.time
-
-    # ==================================================================================
-    # Filter out evaluations that do not have a `fun` value
-    # ==================================================================================
-
-    parhist = [p for p, f in zip(parhist, funhist, strict=False) if f is not None]
-    timehist = [t for t, f in zip(timehist, funhist, strict=False) if f is not None]
-    funhist = [f for f in funhist if f is not None]
-
-    # ==================================================================================
-
-    is_flat = (
-        len(parhist) > 0 and isinstance(parhist[0], np.ndarray) and parhist[0].ndim == 1
-    )
-    if is_flat:
-        to_internal = lambda x: x.tolist()
-    else:
-        registry = get_registry(extended=True)
-        to_internal = partial(tree_just_flatten, registry=registry)
-
-    critvals = np.array(funhist)
-
-    params = np.array([to_internal(p) for p in parhist])
-
-    runtimes = np.array(timehist)
-
-    if direction == Direction.MINIMIZE:
-        monotone = np.minimum.accumulate(critvals)
-        is_accepted = critvals <= monotone
-    elif direction == Direction.MAXIMIZE:
-        monotone = np.maximum.accumulate(critvals)
-        is_accepted = critvals >= monotone
-
-    out = HistoryArrays(
-        fun=critvals,
-        params=params,
-        time=runtimes,
-        monotone_fun=monotone,
-        is_accepted=is_accepted,
-    )
-    return out
diff --git a/src/optimagic/optimization/internal_optimization_problem.py b/src/optimagic/optimization/internal_optimization_problem.py
index f0951df74..c15e32fe0 100644
--- a/src/optimagic/optimization/internal_optimization_problem.py
+++ b/src/optimagic/optimization/internal_optimization_problem.py
@@ -68,7 +68,7 @@ def __init__(
         self._error_handling = error_handling
         self._error_penalty_func = error_penalty_func
         self._batch_evaluator = batch_evaluator
-        self._history = History()
+        self._history = History(direction)
         self._linear_constraints = linear_constraints
         self._nonlinear_constraints = nonlinear_constraints
         self._logger = logger
@@ -177,7 +177,7 @@ def exploration_fun(
 
     def with_new_history(self) -> Self:
         new = copy(self)
-        new._history = History()
+        new._history = History(self.direction)
         return new
 
     def with_error_handling(self, error_handling: ErrorHandling) -> Self:
@@ -306,7 +306,7 @@ def _pure_evaluate_fun(
         issued.
 
         """
-        now = time.perf_counter()
+        start_time = time.perf_counter()
         params = self._converter.params_from_internal(x)
         traceback: None | str = None
         try:
@@ -333,17 +333,19 @@ def _pure_evaluate_fun(
         algo_fun_value, hist_fun_value = _process_fun_value(
             value=fun_value, solver_type=self._solver_type, direction=self._direction
         )
+        stop_time = time.perf_counter()
 
         hist_entry = HistoryEntry(
             params=params,
             fun=hist_fun_value,
-            time=now,
+            start_time=start_time,
+            stop_time=stop_time,
             task=EvalTask.FUN,
         )
 
         log_entry = IterationState(
             params=params,
-            timestamp=now,
+            timestamp=start_time,
             scalar_fun=hist_fun_value,
             valid=not bool(traceback),
             raw_fun=fun_value,
@@ -359,7 +361,7 @@ def _pure_evaluate_jac(
         if self._jac is None:
             raise ValueError("The jac function is not defined.")
 
-        now = time.perf_counter()
+        start_time = time.perf_counter()
         traceback: None | str = None
 
         params = self._converter.params_from_internal(x)
@@ -389,16 +391,19 @@ def _pure_evaluate_jac(
             value=jac_value, direction=self._direction, converter=self._converter, x=x
         )
 
+        stop_time = time.perf_counter()
+
         hist_entry = HistoryEntry(
             params=params,
             fun=None,
-            time=now,
+            start_time=start_time,
+            stop_time=stop_time,
             task=EvalTask.JAC,
         )
 
         log_entry = IterationState(
             params=params,
-            timestamp=now,
+            timestamp=start_time,
             scalar_fun=None,
             valid=not bool(traceback),
             raw_fun=None,
@@ -415,7 +420,7 @@ def _pure_evaluate_numerical_fun_and_jac(
         HistoryEntry,
         IterationState,
     ]:
-        now = time.perf_counter()
+        start_time = time.perf_counter()
         traceback: None | str = None
 
         def func(x: NDArray[np.float64]) -> SpecificFunctionValue:
@@ -466,16 +471,19 @@ def func(x: NDArray[np.float64]) -> SpecificFunctionValue:
         if self._direction == Direction.MAXIMIZE:
             jac_value = -jac_value
 
+        stop_time = time.perf_counter()
+
         hist_entry = HistoryEntry(
             params=self._converter.params_from_internal(x),
             fun=hist_fun_value,
-            time=now,
+            start_time=start_time,
+            stop_time=stop_time,
             task=EvalTask.FUN_AND_JAC,
         )
 
         log_entry = IterationState(
             params=self._converter.params_from_internal(x),
-            timestamp=now,
+            timestamp=start_time,
             scalar_fun=hist_fun_value,
             valid=not bool(traceback),
             raw_fun=fun_value,
@@ -488,7 +496,7 @@ def func(x: NDArray[np.float64]) -> SpecificFunctionValue:
     def _pure_exploration_fun(
         self, x: NDArray[np.float64]
     ) -> tuple[float, HistoryEntry, IterationState]:
-        now = time.perf_counter()
+        start_time = time.perf_counter()
         params = self._converter.params_from_internal(x)
         traceback: None | str = None
 
@@ -521,16 +529,19 @@ def _pure_exploration_fun(
             if self._direction == Direction.MAXIMIZE:
                 hist_fun_value = np.inf
 
+        stop_time = time.perf_counter()
+
         hist_entry = HistoryEntry(
             params=params,
             fun=hist_fun_value,
-            time=now,
+            start_time=start_time,
+            stop_time=stop_time,
             task=EvalTask.EXPLORATION,
         )
 
         log_entry = IterationState(
             params=params,
-            timestamp=now,
+            timestamp=start_time,
             scalar_fun=hist_fun_value,
             valid=not bool(traceback),
             raw_fun=fun_value,
@@ -550,7 +561,7 @@ def _pure_evaluate_fun_and_jac(
         if self._fun_and_jac is None:
             raise ValueError("The fun_and_jac function is not defined.")
 
-        now = time.perf_counter()
+        start_time = time.perf_counter()
         traceback: None | str = None
         params = self._converter.params_from_internal(x)
 
@@ -590,16 +601,19 @@ def _pure_evaluate_fun_and_jac(
         if self._direction == Direction.MAXIMIZE:
             out_jac = -out_jac
 
+        stop_time = time.perf_counter()
+
         hist_entry = HistoryEntry(
             params=params,
             fun=hist_fun_value,
-            time=now,
+            start_time=start_time,
+            stop_time=stop_time,
             task=EvalTask.FUN_AND_JAC,
         )
 
         log_entry = IterationState(
             params=params,
-            timestamp=now,
+            timestamp=start_time,
             scalar_fun=hist_fun_value,
             valid=not bool(traceback),
             raw_fun=fun_value,
diff --git a/src/optimagic/optimization/process_results.py b/src/optimagic/optimization/process_results.py
index 0817649f5..64d764174 100644
--- a/src/optimagic/optimization/process_results.py
+++ b/src/optimagic/optimization/process_results.py
@@ -5,9 +5,10 @@
 
 from optimagic.optimization.algorithm import InternalOptimizeResult
 from optimagic.optimization.convergence_report import get_convergence_report
+from optimagic.optimization.history import History
 from optimagic.optimization.optimize_result import MultistartInfo, OptimizeResult
 from optimagic.parameters.conversion import Converter
-from optimagic.typing import AggregationLevel, Direction, PyTree
+from optimagic.typing import AggregationLevel, Direction, EvalTask, PyTree
 from optimagic.utilities import isscalar
 
 
@@ -41,9 +42,7 @@ def process_single_result(
         fun = -fun
 
     if raw_res.history is not None:
-        conv_report = get_convergence_report(
-            history=raw_res.history, direction=extra_fields.direction
-        )
+        conv_report = get_convergence_report(raw_res.history)
     else:
         conv_report = None
 
@@ -109,15 +108,16 @@ def process_multistart_result(
         # create a convergence report for the multistart optimization; This is not
         # the same as the convergence report for the individual local optimizations.
         # ==============================================================================
-        crit_hist = [opt.fun for opt in info.local_optima]
-        params_hist = [opt.params for opt in info.local_optima]
-        time_hist = [np.nan for opt in info.local_optima]
-        hist = {"criterion": crit_hist, "params": params_hist, "runtime": time_hist}
-
-        conv_report = get_convergence_report(
-            history=hist,
+        report_history = History(
             direction=extra_fields.direction,
+            fun=[opt.fun for opt in info.local_optima],
+            params=[opt.params for opt in info.local_optima],
+            start_time=len(info.local_optima) * [np.nan],
+            stop_time=len(info.local_optima) * [np.nan],
+            batches=list(range(len(info.local_optima))),
+            task=len(info.local_optima) * [EvalTask.FUN],
         )
+        conv_report = get_convergence_report(report_history)
 
         res.convergence_report = conv_report
 
diff --git a/src/optimagic/timing.py b/src/optimagic/timing.py
new file mode 100644
index 000000000..a9fbe7d88
--- /dev/null
+++ b/src/optimagic/timing.py
@@ -0,0 +1,49 @@
+from dataclasses import dataclass
+from typing import Callable, Iterable
+
+
+@dataclass(frozen=True)
+class CostModel:
+    fun: float | None
+    jac: float | None
+    fun_and_jac: float | None
+    label: str
+    aggregate_batch_time: Callable[[Iterable[float]], float]
+
+    def __post_init__(self) -> None:
+        if not callable(self.aggregate_batch_time):
+            raise ValueError(
+                "aggregate_batch_time must be a callable, got "
+                f"{self.aggregate_batch_time}"
+            )
+
+
+evaluation_time = CostModel(
+    fun=None,
+    jac=None,
+    fun_and_jac=None,
+    label="Function time (seconds)",
+    aggregate_batch_time=sum,
+)
+
+fun_evaluations = CostModel(
+    fun=1,
+    jac=0,
+    fun_and_jac=1,
+    label="Number of criterion evaluations",
+    aggregate_batch_time=sum,
+)
+
+fun_batches = CostModel(
+    fun=1, jac=0, fun_and_jac=1, label="Number of batches", aggregate_batch_time=max
+)
+
+wall_time = "wall_time"
+
+
+TIMING_REGISTRY = {
+    "evaluation_time": evaluation_time,
+    "fun_evaluations": fun_evaluations,
+    "fun_batches": fun_batches,
+    "wall_time": wall_time,
+}
diff --git a/src/optimagic/visualization/history_plots.py b/src/optimagic/visualization/history_plots.py
index 4c4797b53..22d98bf71 100644
--- a/src/optimagic/visualization/history_plots.py
+++ b/src/optimagic/visualization/history_plots.py
@@ -10,7 +10,7 @@
 from optimagic.config import PLOTLY_PALETTE, PLOTLY_TEMPLATE
 from optimagic.logging.logger import LogReader, SQLiteLogOptions
 from optimagic.optimization.algorithm import Algorithm
-from optimagic.optimization.history_tools import get_history_arrays
+from optimagic.optimization.history import History
 from optimagic.optimization.optimize_result import OptimizeResult
 from optimagic.parameters.tree_registry import get_registry
 from optimagic.typing import Direction
@@ -59,7 +59,7 @@ def criterion_plot(
         palette = [palette]
     palette = itertools.cycle(palette)
 
-    key = "monotone_criterion" if monotone else "criterion"
+    fun_or_monotone_fun = "monotone_fun" if monotone else "fun"
 
     # ==================================================================================
     # Extract plotting data from results objects / data base
@@ -103,9 +103,7 @@ def criterion_plot(
         }
 
         for i, local_history in enumerate(data[0]["local_histories"]):
-            history = get_history_arrays(
-                local_history, Direction(data[0]["direction"])
-            )[key]
+            history = getattr(local_history, fun_or_monotone_fun)
 
             if max_evaluations is not None and len(history) > max_evaluations:
                 history = history[:max_evaluations]
@@ -128,7 +126,8 @@ def criterion_plot(
             _history = _data["stacked_local_histories"]
         else:
             _history = _data["history"]
-        history = get_history_arrays(_history, _data["direction"])[key]
+
+        history = getattr(_history, fun_or_monotone_fun)
 
         if max_evaluations is not None and len(history) > max_evaluations:
             history = history[:max_evaluations]
@@ -253,7 +252,7 @@ def params_plot(
         raise TypeError("result must be an OptimizeResult or a path to a log file.")
 
     if data["stacked_local_histories"] is not None:
-        history = data["stacked_local_histories"]["params"]
+        history = data["stacked_local_histories"].params
     else:
         history = data["history"].params
 
@@ -338,14 +337,20 @@ def _extract_plotting_data_from_results_object(
         local_histories = None
 
     if stack_multistart and local_histories is not None:
-        stacked = _get_stacked_local_histories(local_histories)
+        stacked = _get_stacked_local_histories(local_histories, res.direction)
         if show_exploration:
-            stacked["params"] = (
-                res.multistart_info.exploration_sample[::-1] + stacked["params"]
-            )
-            stacked["criterion"] = (
-                res.multistart_info.exploration_results.tolist()[::-1]
-                + stacked["criterion"]
+            fun = res.multistart_info.exploration_results.tolist()[::-1] + stacked.fun
+            params = res.multistart_info.exploration_sample[::-1] + stacked.params
+
+            stacked = History(
+                direction=stacked.direction,
+                fun=fun,
+                params=params,
+                # TODO: This needs to be fixed
+                start_time=len(fun) * [None],
+                stop_time=len(fun) * [None],
+                batches=len(fun) * [None],
+                task=len(fun) * [None],
             )
     else:
         stacked = None
@@ -387,16 +392,28 @@ def _extract_plotting_data_from_database(res, stack_multistart, show_exploration
 
     direction = _problem_table["direction"].tolist()[-1]
 
-    history, local_histories, exploration = reader.read_multistart_history(direction)
+    _history, local_histories, exploration = reader.read_multistart_history(direction)
 
     if stack_multistart and local_histories is not None:
-        stacked = _get_stacked_local_histories(local_histories, history)
+        stacked = _get_stacked_local_histories(local_histories, direction, _history)
         if show_exploration:
             stacked["params"] = exploration["params"][::-1] + stacked["params"]
             stacked["criterion"] = exploration["criterion"][::-1] + stacked["criterion"]
     else:
         stacked = None
 
+    history = History(
+        direction=direction,
+        fun=_history["fun"],
+        params=_history["params"],
+        start_time=_history["time"],
+        # TODO (@janosg): Retrieve that information from `hist` once it is available.
+        # https://github.com/optimagic-dev/optimagic/pull/553
+        stop_time=len(_history["fun"]) * [None],
+        task=len(_history["fun"]) * [None],
+        batches=list(range(len(_history["fun"]))),
+    )
+
     data = {
         "history": history,
         "direction": direction,
@@ -408,7 +425,7 @@ def _extract_plotting_data_from_database(res, stack_multistart, show_exploration
     return data
 
 
-def _get_stacked_local_histories(local_histories, history=None):
+def _get_stacked_local_histories(local_histories, direction, history=None):
     """Stack local histories.
 
     Local histories is a list of dictionaries, each of the same structure. We transform
@@ -427,4 +444,16 @@ def _get_stacked_local_histories(local_histories, history=None):
         stacked["criterion"].extend(history.fun)
         stacked["params"].extend(history.params)
         stacked["runtime"].extend(history.time)
-    return stacked
+
+    return History(
+        direction=direction,
+        fun=stacked["criterion"],
+        params=stacked["params"],
+        start_time=stacked["runtime"],
+        # TODO (@janosg): Retrieve that information from `hist` once it is available
+        # for the IterationHistory.
+        # https://github.com/optimagic-dev/optimagic/pull/553
+        stop_time=len(stacked["criterion"]) * [None],
+        task=len(stacked["criterion"]) * [None],
+        batches=list(range(len(stacked["criterion"]))),
+    )
diff --git a/tests/optimagic/optimization/test_algorithm.py b/tests/optimagic/optimization/test_algorithm.py
index 7d78c02da..96a531ea9 100644
--- a/tests/optimagic/optimization/test_algorithm.py
+++ b/tests/optimagic/optimization/test_algorithm.py
@@ -117,7 +117,7 @@ def _solve_internal_problem(self, problem, x0):
         hist_entry = HistoryEntry(
             params=x0,
             fun=0.0,
-            time=0.0,
+            start_time=0.0,
             task=EvalTask.FUN,
         )
         problem.history.add_entry(hist_entry)
diff --git a/tests/optimagic/optimization/test_convergence_report.py b/tests/optimagic/optimization/test_convergence_report.py
index 058698a58..ea527f2bc 100644
--- a/tests/optimagic/optimization/test_convergence_report.py
+++ b/tests/optimagic/optimization/test_convergence_report.py
@@ -3,46 +3,55 @@
 from numpy.testing import assert_array_almost_equal as aaae
 
 from optimagic.optimization.convergence_report import get_convergence_report
-from optimagic.typing import Direction
+from optimagic.optimization.history import History
+from optimagic.typing import Direction, EvalTask
 
 
 def test_get_convergence_report_minimize():
-    hist = {
-        "criterion": [5, 4.1, 4.4, 4.0],
-        "params": [{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}],
-        "runtime": [0, 1, 2, 3],
-    }
-
-    calculated = pd.DataFrame.from_dict(
-        get_convergence_report(hist, Direction.MINIMIZE)
+    hist = History(
+        direction=Direction.MINIMIZE,
+        params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}],
+        fun=[5, 4.1, 4.4, 4.0],
+        start_time=[0, 1, 2, 3],
+        stop_time=[1, 2, 3, 4],
+        task=4 * [EvalTask.FUN],
+        batches=[0, 1, 2, 3],
     )
 
+    calculated = pd.DataFrame.from_dict(get_convergence_report(hist))
+
     expected = np.array([[0.025, 0.25], [0.05, 1.0], [0.1, 1], [0.1, 2.0]])
     aaae(calculated.to_numpy(), expected)
 
 
 def test_get_convergence_report_maximize():
-    hist = {
-        "criterion": [-5, -4.1, -4.4, -4.0],
-        "params": [{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}],
-        "runtime": [0, 1, 2, 3],
-    }
-
-    calculated = pd.DataFrame.from_dict(
-        get_convergence_report(hist, Direction.MAXIMIZE)
+    hist = History(
+        direction=Direction.MAXIMIZE,
+        params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}],
+        fun=[-5, -4.1, -4.4, -4.0],
+        start_time=[0, 1, 2, 3],
+        stop_time=[1, 2, 3, 4],
+        task=4 * [EvalTask.FUN],
+        batches=[0, 1, 2, 3],
     )
 
+    calculated = pd.DataFrame.from_dict(get_convergence_report(hist))
+
     expected = np.array([[0.025, 0.25], [0.05, 1.0], [0.1, 1], [0.1, 2.0]])
     aaae(calculated.to_numpy(), expected)
 
 
 def test_history_is_too_short():
     # first value is best, so history of accepted parameters has only one entry
-    hist = {
-        "criterion": [5, -4.1, -4.4, -4.0],
-        "params": [{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}],
-        "runtime": [0, 1, 2, 3],
-    }
+    hist = History(
+        direction=Direction.MAXIMIZE,
+        params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}],
+        fun=[5, 4.1, 4.4, 4.0],
+        start_time=[0, 1, 2, 3],
+        stop_time=[1, 2, 3, 4],
+        task=4 * [EvalTask.FUN],
+        batches=[0, 1, 2, 3],
+    )
 
-    calculated = get_convergence_report(hist, Direction.MAXIMIZE)
+    calculated = get_convergence_report(hist)
     assert calculated is None
diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py
index bd6dae3dd..72cdbb4a2 100644
--- a/tests/optimagic/optimization/test_history.py
+++ b/tests/optimagic/optimization/test_history.py
@@ -1,37 +1,552 @@
+import numpy as np
+import pandas as pd
 import pytest
 from numpy.testing import assert_array_almost_equal as aaae
+from numpy.testing import assert_array_equal
+from pandas.testing import assert_frame_equal
+from pybaum import tree_map
 
-from optimagic.optimization.history import History, HistoryEntry
-from optimagic.typing import EvalTask
+import optimagic as om
+from optimagic.optimization.history import (
+    History,
+    HistoryEntry,
+    _apply_to_batch,
+    _calculate_monotone_sequence,
+    _get_batch_start,
+    _get_flat_param_names,
+    _get_flat_params,
+    _is_1d_array,
+    _task_as_categorical,
+    _validate_args_are_all_none_or_lists_of_same_length,
+)
+from optimagic.typing import Direction, EvalTask
+
+# ======================================================================================
+# Test methods to add data to History (add_entry, add_batch, init)
+# ======================================================================================
 
 
 @pytest.fixture
 def history_entries():
     return [
-        HistoryEntry(params=[1, 2, 3], fun=1, time=0.1, task=EvalTask.FUN),
-        HistoryEntry(params=[4, 5, 6], fun=2, time=0.2, task=EvalTask.FUN),
-        HistoryEntry(params=[7, 8, 9], fun=3, time=0.3, task=EvalTask.FUN),
+        HistoryEntry(
+            params={"a": 1, "b": [2, 3]},
+            fun=1,
+            start_time=0.1,
+            stop_time=0.2,
+            task=EvalTask.FUN,
+        ),
+        HistoryEntry(
+            params={"a": 4, "b": [5, 6]},
+            fun=3,
+            start_time=0.2,
+            stop_time=0.3,
+            task=EvalTask.FUN,
+        ),
+        HistoryEntry(
+            params={"a": 7, "b": [8, 9]},
+            fun=2,
+            start_time=0.3,
+            stop_time=0.4,
+            task=EvalTask.FUN,
+        ),
     ]
 
 
 def test_history_add_entry(history_entries):
-    history = History()
+    history = History(Direction.MINIMIZE)
     for entry in history_entries:
         history.add_entry(entry)
 
-    assert history.params == [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
-    assert history.fun == [1, 2, 3]
+    assert history.direction == Direction.MINIMIZE
+
+    assert history.params == [
+        {"a": 1, "b": [2, 3]},
+        {"a": 4, "b": [5, 6]},
+        {"a": 7, "b": [8, 9]},
+    ]
+    assert history.fun == [1, 3, 2]
     assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN]
     assert history.batches == [0, 1, 2]
-    aaae(history.time, [0.0, 0.1, 0.2])
+    aaae(history.start_time, [0.1, 0.2, 0.3])
+    aaae(history.stop_time, [0.2, 0.3, 0.4])
+
+    assert_array_equal(history.monotone_fun, np.array([1, 1, 1], dtype=np.float64))
+    assert_array_equal(
+        history.flat_params, np.arange(1, 10, dtype=np.float64).reshape(3, 3)
+    )
 
 
 def test_history_add_batch(history_entries):
-    history = History()
+    history = History(Direction.MAXIMIZE)
     history.add_batch(history_entries)
 
-    assert history.params == [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
-    assert history.fun == [1, 2, 3]
+    assert history.direction == Direction.MAXIMIZE
+
+    assert history.params == [
+        {"a": 1, "b": [2, 3]},
+        {"a": 4, "b": [5, 6]},
+        {"a": 7, "b": [8, 9]},
+    ]
+    assert history.fun == [1, 3, 2]
     assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN]
     assert history.batches == [0, 0, 0]
-    aaae(history.time, [0.0, 0.1, 0.2])
+    aaae(history.start_time, [0.1, 0.2, 0.3])
+    aaae(history.stop_time, [0.2, 0.3, 0.4])
+
+    assert_array_equal(history.monotone_fun, np.array([1, 3, 3], dtype=np.float64))
+    assert_array_equal(
+        history.flat_params, np.arange(1, 10, dtype=np.float64).reshape(3, 3)
+    )
+
+
+def test_history_from_data():
+    data = {
+        "params": [{"a": 1, "b": [2, 3]}, {"a": 4, "b": [5, 6]}, {"a": 7, "b": [8, 9]}],
+        "fun": [1, 3, 2],
+        "task": [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN],
+        "batches": [0, 0, 0],
+        "start_time": [0.0, 0.15, 0.3],
+        "stop_time": [0.1, 0.25, 0.4],
+    }
+
+    history = History(
+        direction=Direction.MAXIMIZE,
+        **data,
+    )
+
+    assert history.direction == Direction.MAXIMIZE
+
+    assert history.params == data["params"]
+    assert history.fun == data["fun"]
+    assert history.task == data["task"]
+    assert history.batches == data["batches"]
+    aaae(history.start_time, data["start_time"])
+    aaae(history.stop_time, data["stop_time"])
+
+    assert_array_equal(history.monotone_fun, np.array([1, 3, 3], dtype=np.float64))
+    assert_array_equal(
+        history.flat_params, np.arange(1, 10, dtype=np.float64).reshape(3, 3)
+    )
+
+
+# ======================================================================================
+# Test functionality of History
+# ======================================================================================
+
+
+@pytest.fixture
+def params():
+    params_tree = {"a": None, "b": {"c": None, "d": (None, None)}}
+    return [
+        tree_map(lambda _: k, params_tree, is_leaf=lambda leaf: leaf is None)  # noqa: B023
+        for k in range(6)
+    ]
+
+
+@pytest.fixture
+def history_data(params):
+    return {
+        "fun": [10, None, 9, None, 2, 5],
+        "task": [
+            EvalTask.FUN,
+            EvalTask.JAC,
+            EvalTask.FUN,
+            EvalTask.JAC,
+            EvalTask.FUN,
+            EvalTask.FUN_AND_JAC,
+        ],
+        "start_time": [0, 2, 5, 7, 10, 12],
+        "stop_time": [1, 4, 6, 9, 11, 14],
+        "params": params,
+        "batches": [0, 1, 2, 3, 4, 5],
+    }
+
+
+@pytest.fixture
+def history(history_data):
+    return History(direction=Direction.MINIMIZE, **history_data)
+
+
+@pytest.fixture
+def history_parallel(history_data):
+    data = history_data.copy()
+    data["batches"] = [0, 0, 1, 1, 2, 2]
+    return History(direction=Direction.MINIMIZE, **data)
+
+
+# Function data, function value, and monotone function value
+# --------------------------------------------------------------------------------------
+
+
+def test_history_fun_data_with_fun_evaluations_cost_model(history):
+    got = history.fun_data(
+        cost_model=om.timing.fun_evaluations,
+        monotone=False,
+    )
+    exp = pd.DataFrame(
+        {
+            "fun": [10, np.nan, 9, np.nan, 2, 5],
+            "task": [
+                "fun",
+                "jac",
+                "fun",
+                "jac",
+                "fun",
+                "fun_and_jac",
+            ],
+            "time": [1, 1, 2, 2, 3, 4],
+        }
+    )
+    assert_frame_equal(got, exp, check_dtype=False, check_categorical=False)
+
+
+def test_history_fun_data_with_fun_evaluations_cost_model_and_monotone(history):
+    got = history.fun_data(
+        cost_model=om.timing.fun_evaluations,
+        monotone=True,
+    )
+    exp = pd.DataFrame(
+        {
+            "fun": [10, np.nan, 9, np.nan, 2, 2],
+            "task": [
+                "fun",
+                "jac",
+                "fun",
+                "jac",
+                "fun",
+                "fun_and_jac",
+            ],
+            "time": [1, 1, 2, 2, 3, 4],
+        }
+    )
+    assert_frame_equal(got, exp, check_dtype=False, check_categorical=False)
+
+
+def test_history_fun_data_with_fun_batches_cost_model(history_parallel):
+    got = history_parallel.fun_data(
+        cost_model=om.timing.fun_batches,
+        monotone=False,
+    )
+    exp = pd.DataFrame(
+        {
+            "fun": [10, np.nan, 9, np.nan, 2, 5],
+            "task": [
+                "fun",
+                "jac",
+                "fun",
+                "jac",
+                "fun",
+                "fun_and_jac",
+            ],
+            "time": [1, 1, 2, 2, 3, 3],
+        }
+    )
+    assert_frame_equal(got, exp, check_dtype=False, check_categorical=False)
+
+
+def test_history_fun_data_with_evaluation_time_cost_model(history):
+    got = history.fun_data(
+        cost_model=om.timing.evaluation_time,
+        monotone=False,
+    )
+    exp = pd.DataFrame(
+        {
+            "fun": [10, np.nan, 9, np.nan, 2, 5],
+            "task": [
+                "fun",
+                "jac",
+                "fun",
+                "jac",
+                "fun",
+                "fun_and_jac",
+            ],
+            "time": [1, 3, 4, 6, 7, 9],
+        }
+    )
+    assert_frame_equal(got, exp, check_dtype=False, check_categorical=False)
+
+
+def test_fun_property(history):
+    assert_array_equal(history.fun, [10, None, 9, None, 2, 5])
+
+
+def test_monotone_fun_property(history):
+    assert_array_equal(history.monotone_fun, np.array([10, np.nan, 9, np.nan, 2, 2]))
+
+
+# Acceptance
+# --------------------------------------------------------------------------------------
+
+
+def test_is_accepted_property(history):
+    got = history.is_accepted
+    exp = np.array([True, False, True, False, True, False])
+    assert_array_equal(got, exp)
+
+
+# Parameter data, params, flat params, and flat params names
+# --------------------------------------------------------------------------------------
+
+
+def test_params_data_fun_evaluations_cost_model(history):
+    got = history.params_data(cost_model=om.timing.fun_evaluations)
+    exp = pd.DataFrame(
+        {
+            "name": np.repeat(
+                [
+                    "a",
+                    "b_c",
+                    "b_d_0",
+                    "b_d_1",
+                ],
+                6,
+            ),
+            "value": np.tile(list(range(6)), 4),
+            "task": np.tile(
+                [
+                    "fun",
+                    "jac",
+                    "fun",
+                    "jac",
+                    "fun",
+                    "fun_and_jac",
+                ],
+                4,
+            ),
+            "time": np.tile([1, 1, 2, 2, 3, 4], 4),
+        }
+    )
+    assert_frame_equal(got, exp, check_categorical=False, check_dtype=False)
+
+
+def test_params_property(history, params):
+    assert history.params == params
+
+
+def test_flat_params_property(history):
+    got = history.flat_params
+    assert_array_equal(got, [[k for _ in range(4)] for k in range(6)])
+
+
+def test_flat_param_names(history):
+    assert history.flat_param_names == ["a", "b_c", "b_d_0", "b_d_1"]
+
+
+# Time
+# --------------------------------------------------------------------------------------
+
+
+def test_get_time_per_task_fun(history):
+    got = history._get_time_per_task(EvalTask.FUN, cost_factor=1)
+    exp = np.array([1, 0, 1, 0, 1, 0])
+    assert_array_equal(got, exp)
+
+
+def test_get_time_per_task_jac_cost_factor_none(history):
+    got = history._get_time_per_task(EvalTask.JAC, cost_factor=None)
+    exp = np.array([0, 2, 0, 2, 0, 0])
+    assert_array_equal(got, exp)
+
+
+def test_get_time_per_task_fun_and_jac(history):
+    got = history._get_time_per_task(EvalTask.FUN_AND_JAC, cost_factor=-0.5)
+    exp = np.array([0, 0, 0, 0, 0, -0.5])
+    assert_array_equal(got, exp)
+
+
+def test_get_time_custom_cost_model(history):
+    cost_model = om.timing.CostModel(
+        fun=0.5, jac=1, fun_and_jac=2, label="test", aggregate_batch_time=sum
+    )
+    got = history._get_time(cost_model)
+    exp = np.array(
+        [
+            0.5,
+            0.5 + 1,
+            1 + 1,
+            1 + 2,
+            1.5 + 2,
+            1.5 + 2 + 2,
+        ]
+    )
+    assert_array_equal(got, exp)
+
+
+def test_get_time_fun_evaluations(history):
+    got = history._get_time(cost_model=om.timing.fun_evaluations)
+    exp = np.array([1, 1, 2, 2, 3, 4])
+    assert_array_equal(got, exp)
+
+
+def test_get_time_fun_batches(history):
+    got = history._get_time(cost_model=om.timing.fun_batches)
+    exp = np.array([1, 1, 2, 2, 3, 4])
+    assert_array_equal(got, exp)
+
+
+def test_get_time_fun_batches_parallel(history_parallel):
+    got = history_parallel._get_time(cost_model=om.timing.fun_batches)
+    exp = np.array([1, 1, 2, 2, 3, 3])
+    assert_array_equal(got, exp)
+
+
+def test_get_time_evaluation_time(history):
+    got = history._get_time(cost_model=om.timing.evaluation_time)
+    exp = np.array([1, 3, 4, 6, 7, 9])
+    assert_array_equal(got, exp)
+
+
+def test_get_time_wall_time(history):
+    got = history._get_time(cost_model="wall_time")
+    exp = np.array([1, 4, 6, 9, 11, 14])
+    assert_array_equal(got, exp)
+
+
+def test_get_time_invalid_cost_model(history):
+    with pytest.raises(
+        ValueError, match="cost_model must be a CostModel or 'wall_time'."
+    ):
+        history._get_time(cost_model="invalid")
+
+
+def test_start_time_property(history):
+    assert history.start_time == [0, 2, 5, 7, 10, 12]
+
+
+def test_stop_time_property(history):
+    assert history.stop_time == [1, 4, 6, 9, 11, 14]
+
+
+# Batches
+# --------------------------------------------------------------------------------------
+
+
+def test_batches_property(history):
+    assert history.batches == [0, 1, 2, 3, 4, 5]
+
+
+# Tasks
+# --------------------------------------------------------------------------------------
+
+
+def test_task_property(history):
+    assert history.task == [
+        EvalTask.FUN,
+        EvalTask.JAC,
+        EvalTask.FUN,
+        EvalTask.JAC,
+        EvalTask.FUN,
+        EvalTask.FUN_AND_JAC,
+    ]
+
+
+# ======================================================================================
+# Unit tests
+# ======================================================================================
+
+
+def test_is_1d_array():
+    assert _is_1d_array(np.arange(2)) is True
+    assert _is_1d_array(np.eye(2)) is False
+    assert _is_1d_array([0, 1]) is False
+
+
+def test_get_flat_params_pytree():
+    params = [
+        {"a": 1, "b": [0, 1], "c": np.arange(2)},
+        {"a": 2, "b": [1, 2], "c": np.arange(2)},
+    ]
+    got = _get_flat_params(params)
+    exp = [
+        [1, 0, 1, 0, 1],
+        [2, 1, 2, 0, 1],
+    ]
+    assert_array_equal(got, exp)
+
+
+def test_get_flat_params_fast_path():
+    params = [np.arange(2)]
+    got = _get_flat_params(params)
+    exp = [[0, 1]]
+    assert_array_equal(got, exp)
+
+
+def test_get_flat_param_names_pytree():
+    got = _get_flat_param_names(param={"a": 0, "b": [0, 1], "c": np.arange(2)})
+    exp = ["a", "b_0", "b_1", "c_0", "c_1"]
+    assert got == exp
+
+
+def test_get_flat_param_names_fast_path():
+    got = _get_flat_param_names(param=np.arange(2))
+    exp = ["0", "1"]
+    assert got == exp
+
+
+def test_calculate_monotone_sequence_maximize():
+    sequence = [0, 1, 0, 0, 2, 10, 0]
+    exp = [0, 1, 1, 1, 2, 10, 10]
+    got = _calculate_monotone_sequence(sequence, direction=Direction.MAXIMIZE)
+    assert_array_equal(exp, got)
+
+
+def test_calculate_monotone_sequence_minimize():
+    sequence = [10, 11, 8, 12, 0, 5]
+    exp = [10, 10, 8, 8, 0, 0]
+    got = _calculate_monotone_sequence(sequence, direction=Direction.MINIMIZE)
+    assert_array_equal(exp, got)
+
+
+def test_validate_args_are_all_none_or_lists_of_same_length():
+    _validate_args_are_all_none_or_lists_of_same_length(None, None)
+    _validate_args_are_all_none_or_lists_of_same_length([1], [1])
+
+    with pytest.raises(ValueError, match="All list arguments must have the same"):
+        _validate_args_are_all_none_or_lists_of_same_length([1], [1, 2])
+
+    with pytest.raises(ValueError, match="All arguments must be lists of the same"):
+        _validate_args_are_all_none_or_lists_of_same_length(None, [1])
+
+
+def test_task_as_categorical():
+    task = [EvalTask.FUN, EvalTask.JAC, EvalTask.FUN_AND_JAC]
+    got = _task_as_categorical(task)
+    assert got.tolist() == ["fun", "jac", "fun_and_jac"]
+    assert isinstance(got.dtype, pd.CategoricalDtype)
+
+
+def test_get_batch_start():
+    batches = [0, 0, 1, 1, 1, 2, 2, 3]
+    got = _get_batch_start(batches)
+    assert got == [0, 2, 5, 7]
+
+
+def test_apply_to_batch_sum():
+    data = np.array([0, 1, 2, 3, 4])
+    batch_ids = [0, 0, 1, 1, 2]
+    exp = np.array([1, 0, 5, 0, 4])
+    got = _apply_to_batch(data, batch_ids, sum)
+    assert_array_equal(exp, got)
+
+
+def test_apply_to_batch_max():
+    data = np.array([0, 1, 2, 3, 4])
+    batch_ids = [0, 0, 1, 1, 2]
+    exp = np.array([1, 0, 3, 0, 4])
+    got = _apply_to_batch(data, batch_ids, max)
+    assert_array_equal(exp, got)
+
+
+def test_apply_to_batch_broken_func():
+    data = np.array([0, 1, 2, 3, 4])
+    batch_ids = [0, 0, 1, 1, 2]
+    with pytest.raises(ValueError, match="Calling function <lambda> on batch [0, 0]"):
+        _apply_to_batch(data, batch_ids, func=lambda _: 1 / 0)
+
+
+def test_apply_to_batch_func_with_non_scalar_return():
+    data = np.array([0, 1, 2, 3, 4])
+    batch_ids = [0, 0, 1, 1, 2]
+    with pytest.raises(ValueError, match="Function <lambda> did not return a scalar"):
+        _apply_to_batch(data, batch_ids, func=lambda _list: _list)
diff --git a/tests/optimagic/optimization/test_history_tools.py b/tests/optimagic/optimization/test_history_tools.py
deleted file mode 100644
index 4b4f4d100..000000000
--- a/tests/optimagic/optimization/test_history_tools.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import numpy as np
-import pytest
-from numpy.testing import assert_array_almost_equal as aaae
-
-from optimagic.optimization.history_tools import get_history_arrays
-from optimagic.typing import Direction
-
-
-@pytest.fixture()
-def history():
-    hist = {
-        "criterion": [5, 4, 5.5, 4.2],
-        "params": [{"a": 0}, {"a": 1}, {"a": 2}, {"a": 3}],
-        "runtime": [0, 1, 2, 3],
-    }
-    return hist
-
-
-def test_get_history_arrays_minimize(history):
-    calculated = get_history_arrays(history, Direction.MINIMIZE)
-
-    aaae(calculated.is_accepted, np.array([True, True, False, False]))
-
-
-def test_get_history_arrays_maximize(history):
-    calculated = get_history_arrays(history, Direction.MAXIMIZE)
-    aaae(calculated.is_accepted, np.array([True, False, True, False]))
diff --git a/tests/optimagic/test_timing.py b/tests/optimagic/test_timing.py
new file mode 100644
index 000000000..fd2edfc3c
--- /dev/null
+++ b/tests/optimagic/test_timing.py
@@ -0,0 +1,14 @@
+import pytest
+
+from optimagic import timing
+
+
+def test_invalid_aggregate_batch_time():
+    with pytest.raises(ValueError, match="aggregate_batch_time must be a callable"):
+        timing.CostModel(
+            fun=None,
+            jac=None,
+            fun_and_jac=None,
+            label="label",
+            aggregate_batch_time="Not callable",
+        )