diff --git a/tests/benchmark-models/test_benchmark_collection.sh b/tests/benchmark-models/test_benchmark_collection.sh
index 581b8db028..d3d1cad712 100755
--- a/tests/benchmark-models/test_benchmark_collection.sh
+++ b/tests/benchmark-models/test_benchmark_collection.sh
@@ -93,9 +93,22 @@ for model in $models; do
     yaml="${model_dir}"/"${model}"/problem.yaml
   fi
 
+  # problems we need to flatten
+  to_flatten=(
+    "Bruno_JExpBot2016" "Chen_MSB2009" "Crauste_CellSystems2017"
+    "Fiedler_BMCSystBiol2016" "Fujita_SciSignal2010" "SalazarCavazos_MBoC2020"
+  )
+  flatten=""
+  for item in "${to_flatten[@]}"; do
+    if [[ "$item" == "$model" ]]; then
+      flatten="--flatten"
+      break
+    fi
+  done
+
   amici_model_dir=test_bmc/"${model}"
   mkdir -p "$amici_model_dir"
-  cmd_import="amici_import_petab ${yaml} -o ${amici_model_dir} -n ${model} --flatten"
+  cmd_import="amici_import_petab ${yaml} -o ${amici_model_dir} -n ${model} ${flatten}"
   cmd_run="$script_path/test_petab_model.py -y ${yaml} -d ${amici_model_dir} -m ${model} -c"
 
   printf '=%.0s' {1..40}
diff --git a/tests/benchmark-models/test_petab_benchmark.py b/tests/benchmark-models/test_petab_benchmark.py
index 4892100877..69df16f181 100644
--- a/tests/benchmark-models/test_petab_benchmark.py
+++ b/tests/benchmark-models/test_petab_benchmark.py
@@ -12,8 +12,9 @@
 from amici.petab.petab_import import import_petab_problem
 import benchmark_models_petab
 from collections import defaultdict
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from amici import SensitivityMethod
+from petab.v1.lint import measurement_table_has_timepoint_specific_mappings
 from fiddy import MethodId, get_derivative
 from fiddy.derivative_check import NumpyIsCloseDerivativeCheck
 from fiddy.extensions.amici import simulate_petab_to_cached_functions
@@ -58,14 +59,18 @@ class GradientCheckSettings:
     atol_consistency: float = 1e-5
     rtol_consistency: float = 1e-1
     # Step sizes for finite difference gradient checks.
-    step_sizes = [
-        1e-1,
-        5e-2,
-        1e-2,
-        1e-3,
-        1e-4,
-        1e-5,
-    ]
+    step_sizes: list[float] = field(
+        default_factory=lambda: [
+            2e-1,
+            1e-1,
+            5e-2,
+            1e-2,
+            5e-1,
+            1e-3,
+            1e-4,
+            1e-5,
+        ]
+    )
     rng_seed: int = 0
     ss_sensitivity_mode: amici.SteadyStateSensitivityMode = (
         amici.SteadyStateSensitivityMode.integrateIfNewtonFails
@@ -97,7 +102,6 @@ class GradientCheckSettings:
     noise_level=0.01,
     atol_consistency=1e-3,
 )
-settings["Okuonghae_ChaosSolitonsFractals2020"].step_sizes.extend([0.2, 0.005])
 settings["Oliveira_NatCommun2021"] = GradientCheckSettings(
     # Avoid "root after reinitialization"
     atol_sim=1e-12,
@@ -176,7 +180,10 @@ def test_benchmark_gradient(model, scale, sensitivity_method, request):
         pytest.skip()
 
     petab_problem = benchmark_models_petab.get_problem(model)
-    petab.flatten_timepoint_specific_output_overrides(petab_problem)
+    if measurement_table_has_timepoint_specific_mappings(
+        petab_problem.measurement_df,
+    ):
+        petab.flatten_timepoint_specific_output_overrides(petab_problem)
 
     # Only compute gradient for estimated parameters.
     parameter_ids = petab_problem.x_free_ids
diff --git a/tests/benchmark-models/test_petab_model.py b/tests/benchmark-models/test_petab_model.py
index c4ec2f5dd2..125a046a5e 100755
--- a/tests/benchmark-models/test_petab_model.py
+++ b/tests/benchmark-models/test_petab_model.py
@@ -25,6 +25,7 @@
     simulate_petab,
 )
 from petab.v1.visualize import plot_problem
+from petab.v1.lint import measurement_table_has_timepoint_specific_mappings
 
 logger = get_logger(f"amici.{__name__}", logging.WARNING)
 
@@ -115,7 +116,11 @@ def main():
 
     # load PEtab files
     problem = petab.Problem.from_yaml(args.yaml_file_name)
-    petab.flatten_timepoint_specific_output_overrides(problem)
+
+    if measurement_table_has_timepoint_specific_mappings(
+        problem.measurement_df
+    ):
+        petab.flatten_timepoint_specific_output_overrides(problem)
 
     # load model
     if args.model_directory: