From 191ee2f765e0560e15e7e9e00b6ccae11681eeff Mon Sep 17 00:00:00 2001 From: Rory Conlin Date: Tue, 25 Jun 2024 19:49:51 -0400 Subject: [PATCH 1/6] Rescale optimizer termination criteria --- desc/optimize/aug_lagrangian.py | 24 ++++++++++++++++-------- desc/optimize/aug_lagrangian_ls.py | 23 ++++++++++++++++------- desc/optimize/fmin_scalar.py | 19 +++++++++++++------ desc/optimize/least_squares.py | 19 +++++++++++++------ 4 files changed, 58 insertions(+), 27 deletions(-) diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py index a2601032ad..1b1f29ff41 100644 --- a/desc/optimize/aug_lagrangian.py +++ b/desc/optimize/aug_lagrangian.py @@ -189,7 +189,8 @@ def fmin_auglag( # noqa: C901 - FIXME: simplify this problem dimension. Set it to ``"auto"`` in order to use an automatic heuristic for choosing the initial scale. The heuristic is described in [2]_, p.143. By default uses ``"auto"``. - + - ``"scaled_termination"`` : Whether to evaluate termination criteria for + ``xtol`` and ``gtol`` in scaled / normalized units (default) or base units. Returns ------- @@ -338,6 +339,7 @@ def laghess(z, y, mu, *args): maxiter = setdefault(maxiter, z.size * 100) max_nfev = options.pop("max_nfev", 5 * maxiter + 1) max_dx = options.pop("max_dx", jnp.inf) + scaled_termination = options.pop("scaled_termination", True) hess_scale = isinstance(x_scale, str) and x_scale in ["hess", "auto"] if hess_scale: @@ -353,7 +355,7 @@ def laghess(z, y, mu, *args): g_h = g * d H_h = d * H * d[:, None] - g_norm = jnp.linalg.norm(g * v, ord=jnp.inf) + g_norm = jnp.linalg.norm((g_h if scaled_termination else g * v), ord=jnp.inf) # conngould : norm of the cauchy point, as recommended in ch17 of Conn & Gould # scipy : norm of the scaled x, as used in scipy @@ -399,7 +401,7 @@ def laghess(z, y, mu, *args): ) subproblem = methods[tr_method] - z_norm = jnp.linalg.norm(z, ord=2) + z_norm = jnp.linalg.norm(((z * scale_inv) if scaled_termination else z), ord=2) success = None message = None step_norm = jnp.inf @@ -493,7 +495,7 @@ def laghess(z, y, mu, *args): success, message = check_termination( actual_reduction, f, - step_norm, + (step_h_norm if scaled_termination else step_norm), z_norm, g_norm, Lreduction_ratio, @@ -536,7 +538,9 @@ def laghess(z, y, mu, *args): scale, scale_inv = compute_hess_scale(H) v, dv = cl_scaling_vector(z, g, lb, ub) v = jnp.where(dv != 0, v * scale_inv, v) - g_norm = jnp.linalg.norm(g * v, ord=jnp.inf) + g_norm = jnp.linalg.norm( + (g_h if scaled_termination else g * v), ord=jnp.inf + ) # updating augmented lagrangian params if g_norm < gtolk: # TODO: maybe also add ftolk, xtolk? @@ -565,9 +569,13 @@ def laghess(z, y, mu, *args): v, dv = cl_scaling_vector(z, g, lb, ub) v = jnp.where(dv != 0, v * scale_inv, v) - g_norm = jnp.linalg.norm(g * v, ord=jnp.inf) + g_norm = jnp.linalg.norm( + (g_h if scaled_termination else g * v), ord=jnp.inf + ) - z_norm = jnp.linalg.norm(z, ord=2) + z_norm = jnp.linalg.norm( + ((z * scale_inv) if scaled_termination else z), ord=2 + ) d = v**0.5 * scale diag_h = g * dv * scale g_h = g * d @@ -580,7 +588,7 @@ def laghess(z, y, mu, *args): success, message = False, STATUS_MESSAGES["callback"] else: - step_norm = actual_reduction = 0 + step_norm = step_h_norm = actual_reduction = 0 iteration += 1 if verbose > 1: diff --git a/desc/optimize/aug_lagrangian_ls.py b/desc/optimize/aug_lagrangian_ls.py index 7b50ebdac9..39784d1c00 100644 --- a/desc/optimize/aug_lagrangian_ls.py +++ b/desc/optimize/aug_lagrangian_ls.py @@ -170,6 +170,8 @@ def lsq_auglag( # noqa: C901 - FIXME: simplify this (generally 2-3), while ``"svd"`` uses one singular value decomposition. ``"cho"`` is generally faster for large systems, especially on GPU, but may be less accurate for badly scaled systems. Default ``"svd"`` + - ``"scaled_termination"`` : Whether to evaluate termination criteria for + ``xtol`` and ``gtol`` in scaled / normalized units (default) or base units. Returns ------- @@ -273,6 +275,7 @@ def lagjac(z, y, mu, *args): maxiter = setdefault(maxiter, z.size * 100) max_nfev = options.pop("max_nfev", 5 * maxiter + 1) max_dx = options.pop("max_dx", jnp.inf) + scaled_termination = options.pop("scaled_termination", True) jac_scale = isinstance(x_scale, str) and x_scale in ["jac", "auto"] if jac_scale: @@ -288,7 +291,7 @@ def lagjac(z, y, mu, *args): g_h = g * d J_h = J * d - g_norm = jnp.linalg.norm(g * v, ord=jnp.inf) + g_norm = jnp.linalg.norm((g_h if scaled_termination else g * v), ord=jnp.inf) # conngould : norm of the cauchy point, as recommended in ch17 of Conn & Gould # scipy : norm of the scaled x, as used in scipy @@ -324,7 +327,7 @@ def lagjac(z, y, mu, *args): callback = setdefault(callback, lambda *args: False) - z_norm = jnp.linalg.norm(z, ord=2) + z_norm = jnp.linalg.norm(((z * scale_inv) if scaled_termination else z), ord=2) success = None message = None step_norm = jnp.inf @@ -432,7 +435,7 @@ def lagjac(z, y, mu, *args): success, message = check_termination( actual_reduction, cost, - step_norm, + (step_h_norm if scaled_termination else step_norm), z_norm, g_norm, Lreduction_ratio, @@ -470,7 +473,9 @@ def lagjac(z, y, mu, *args): scale, scale_inv = compute_jac_scale(J, scale_inv) v, dv = cl_scaling_vector(z, g, lb, ub) v = jnp.where(dv != 0, v * scale_inv, v) - g_norm = jnp.linalg.norm(g * v, ord=jnp.inf) + g_norm = jnp.linalg.norm( + (g_h if scaled_termination else g * v), ord=jnp.inf + ) # updating augmented lagrangian params if g_norm < gtolk: # TODO: maybe also add ftolk, xtolk? @@ -494,9 +499,13 @@ def lagjac(z, y, mu, *args): v, dv = cl_scaling_vector(z, g, lb, ub) v = jnp.where(dv != 0, v * scale_inv, v) - g_norm = jnp.linalg.norm(g * v, ord=jnp.inf) + g_norm = jnp.linalg.norm( + (g_h if scaled_termination else g * v), ord=jnp.inf + ) - z_norm = jnp.linalg.norm(z, ord=2) + z_norm = jnp.linalg.norm( + ((z * scale_inv) if scaled_termination else z), ord=2 + ) d = v**0.5 * scale diag_h = g * dv * scale g_h = g * d @@ -509,7 +518,7 @@ def lagjac(z, y, mu, *args): success, message = False, STATUS_MESSAGES["callback"] else: - step_norm = actual_reduction = 0 + step_norm = step_h_norm = actual_reduction = 0 iteration += 1 if verbose > 1: diff --git a/desc/optimize/fmin_scalar.py b/desc/optimize/fmin_scalar.py index c3b6b8208b..472d3486fc 100644 --- a/desc/optimize/fmin_scalar.py +++ b/desc/optimize/fmin_scalar.py @@ -159,6 +159,8 @@ def fmintr( # noqa: C901 - FIXME: simplify this problem dimension. Set it to ``"auto"`` in order to use an automatic heuristic for choosing the initial scale. The heuristic is described in [2]_, p.143. By default uses ``"auto"``. + - ``"scaled_termination"`` : Whether to evaluate termination criteria for + ``xtol`` and ``gtol`` in scaled / normalized units (default) or base units. Returns ------- @@ -222,6 +224,7 @@ def fmintr( # noqa: C901 - FIXME: simplify this maxiter = N * 100 max_nfev = options.pop("max_nfev", 5 * maxiter + 1) max_dx = options.pop("max_dx", jnp.inf) + scaled_termination = options.pop("scaled_termination", True) hess_scale = isinstance(x_scale, str) and x_scale in ["hess", "auto"] if hess_scale: @@ -237,7 +240,7 @@ def fmintr( # noqa: C901 - FIXME: simplify this g_h = g * d H_h = d * H * d[:, None] - g_norm = jnp.linalg.norm(g * v, ord=jnp.inf) + g_norm = jnp.linalg.norm((g_h if scaled_termination else g * v), ord=jnp.inf) # conngould : norm of the cauchy point, as recommended in ch17 of Conn & Gould # scipy : norm of the scaled x, as used in scipy @@ -283,7 +286,7 @@ def fmintr( # noqa: C901 - FIXME: simplify this ) subproblem = methods[tr_method] - x_norm = jnp.linalg.norm(x, ord=2) + x_norm = jnp.linalg.norm(((x * scale_inv) if scaled_termination else x), ord=2) success = None message = None step_norm = jnp.inf @@ -366,7 +369,7 @@ def fmintr( # noqa: C901 - FIXME: simplify this success, message = check_termination( actual_reduction, f, - step_norm, + (step_h_norm if scaled_termination else step_norm), x_norm, g_norm, reduction_ratio, @@ -410,8 +413,12 @@ def fmintr( # noqa: C901 - FIXME: simplify this g_h = g * d H_h = d * H * d[:, None] - x_norm = jnp.linalg.norm(x, ord=2) - g_norm = jnp.linalg.norm(g * v, ord=jnp.inf) + x_norm = jnp.linalg.norm( + ((x * scale_inv) if scaled_termination else x), ord=2 + ) + g_norm = jnp.linalg.norm( + (g_h if scaled_termination else g * v), ord=jnp.inf + ) if g_norm < gtol: success, message = True, STATUS_MESSAGES["gtol"] @@ -421,7 +428,7 @@ def fmintr( # noqa: C901 - FIXME: simplify this allx.append(x) else: - step_norm = actual_reduction = 0 + step_norm = step_h_norm = actual_reduction = 0 iteration += 1 if verbose > 1: diff --git a/desc/optimize/least_squares.py b/desc/optimize/least_squares.py index f50cb388b9..b496b80c0a 100644 --- a/desc/optimize/least_squares.py +++ b/desc/optimize/least_squares.py @@ -136,6 +136,8 @@ def lsqtr( # noqa: C901 - FIXME: simplify this (generally 2-3), while ``"svd"`` uses one singular value decomposition. ``"cho"`` is generally faster for large systems, especially on GPU, but may be less accurate for badly scaled systems. Default ``"svd"`` + - ``"scaled_termination"`` : Whether to evaluate termination criteria for + ``xtol`` and ``gtol`` in scaled / normalized units (default) or base units. Returns ------- @@ -180,6 +182,7 @@ def lsqtr( # noqa: C901 - FIXME: simplify this maxiter = setdefault(maxiter, n * 100) max_nfev = options.pop("max_nfev", 5 * maxiter + 1) max_dx = options.pop("max_dx", jnp.inf) + scaled_termination = options.pop("scaled_termination", True) jac_scale = isinstance(x_scale, str) and x_scale in ["jac", "auto"] if jac_scale: @@ -195,7 +198,7 @@ def lsqtr( # noqa: C901 - FIXME: simplify this g_h = g * d J_h = J * d - g_norm = jnp.linalg.norm(g * v, ord=jnp.inf) + g_norm = jnp.linalg.norm((g_h if scaled_termination else g * v), ord=jnp.inf) # conngould : norm of the cauchy point, as recommended in ch17 of Conn & Gould # scipy : norm of the scaled x, as used in scipy @@ -236,7 +239,7 @@ def lsqtr( # noqa: C901 - FIXME: simplify this callback = setdefault(callback, lambda *args: False) - x_norm = jnp.linalg.norm(x, ord=2) + x_norm = jnp.linalg.norm(((x * scale_inv) if scaled_termination else x), ord=2) success = None message = None step_norm = jnp.inf @@ -332,7 +335,7 @@ def lsqtr( # noqa: C901 - FIXME: simplify this success, message = check_termination( actual_reduction, cost, - step_norm, + (step_h_norm if scaled_termination else step_norm), x_norm, g_norm, reduction_ratio, @@ -370,8 +373,12 @@ def lsqtr( # noqa: C901 - FIXME: simplify this g_h = g * d J_h = J * d - x_norm = jnp.linalg.norm(x, ord=2) - g_norm = jnp.linalg.norm(g * v, ord=jnp.inf) + x_norm = jnp.linalg.norm( + ((x * scale_inv) if scaled_termination else x), ord=2 + ) + g_norm = jnp.linalg.norm( + (g_h if scaled_termination else g * v), ord=jnp.inf + ) if g_norm < gtol: success, message = True, STATUS_MESSAGES["gtol"] @@ -380,7 +387,7 @@ def lsqtr( # noqa: C901 - FIXME: simplify this success, message = False, STATUS_MESSAGES["callback"] else: - step_norm = actual_reduction = 0 + step_norm = step_h_norm = actual_reduction = 0 iteration += 1 if verbose > 1: From 97743d66157033bbd8de83dcc0a47847c2eae0b2 Mon Sep 17 00:00:00 2001 From: Rory Conlin Date: Tue, 9 Jul 2024 15:40:16 -0400 Subject: [PATCH 2/6] Fix scaling of g_norm --- desc/optimize/aug_lagrangian.py | 8 +++++--- desc/optimize/aug_lagrangian_ls.py | 8 +++++--- desc/optimize/fmin_scalar.py | 6 ++++-- desc/optimize/least_squares.py | 6 ++++-- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py index 1b1f29ff41..075bdffcdc 100644 --- a/desc/optimize/aug_lagrangian.py +++ b/desc/optimize/aug_lagrangian.py @@ -355,7 +355,9 @@ def laghess(z, y, mu, *args): g_h = g * d H_h = d * H * d[:, None] - g_norm = jnp.linalg.norm((g_h if scaled_termination else g * v), ord=jnp.inf) + g_norm = jnp.linalg.norm( + (g * v * scale if scaled_termination else g * v), ord=jnp.inf + ) # conngould : norm of the cauchy point, as recommended in ch17 of Conn & Gould # scipy : norm of the scaled x, as used in scipy @@ -539,7 +541,7 @@ def laghess(z, y, mu, *args): v, dv = cl_scaling_vector(z, g, lb, ub) v = jnp.where(dv != 0, v * scale_inv, v) g_norm = jnp.linalg.norm( - (g_h if scaled_termination else g * v), ord=jnp.inf + (g * v * scale if scaled_termination else g * v), ord=jnp.inf ) # updating augmented lagrangian params @@ -570,7 +572,7 @@ def laghess(z, y, mu, *args): v, dv = cl_scaling_vector(z, g, lb, ub) v = jnp.where(dv != 0, v * scale_inv, v) g_norm = jnp.linalg.norm( - (g_h if scaled_termination else g * v), ord=jnp.inf + (g * v * scale if scaled_termination else g * v), ord=jnp.inf ) z_norm = jnp.linalg.norm( diff --git a/desc/optimize/aug_lagrangian_ls.py b/desc/optimize/aug_lagrangian_ls.py index 39784d1c00..04991f61fb 100644 --- a/desc/optimize/aug_lagrangian_ls.py +++ b/desc/optimize/aug_lagrangian_ls.py @@ -291,7 +291,9 @@ def lagjac(z, y, mu, *args): g_h = g * d J_h = J * d - g_norm = jnp.linalg.norm((g_h if scaled_termination else g * v), ord=jnp.inf) + g_norm = jnp.linalg.norm( + (g * v * scale if scaled_termination else g * v), ord=jnp.inf + ) # conngould : norm of the cauchy point, as recommended in ch17 of Conn & Gould # scipy : norm of the scaled x, as used in scipy @@ -474,7 +476,7 @@ def lagjac(z, y, mu, *args): v, dv = cl_scaling_vector(z, g, lb, ub) v = jnp.where(dv != 0, v * scale_inv, v) g_norm = jnp.linalg.norm( - (g_h if scaled_termination else g * v), ord=jnp.inf + (g * v * scale if scaled_termination else g * v), ord=jnp.inf ) # updating augmented lagrangian params @@ -500,7 +502,7 @@ def lagjac(z, y, mu, *args): v, dv = cl_scaling_vector(z, g, lb, ub) v = jnp.where(dv != 0, v * scale_inv, v) g_norm = jnp.linalg.norm( - (g_h if scaled_termination else g * v), ord=jnp.inf + (g * v * scale if scaled_termination else g * v), ord=jnp.inf ) z_norm = jnp.linalg.norm( diff --git a/desc/optimize/fmin_scalar.py b/desc/optimize/fmin_scalar.py index 472d3486fc..4f73df1d51 100644 --- a/desc/optimize/fmin_scalar.py +++ b/desc/optimize/fmin_scalar.py @@ -240,7 +240,9 @@ def fmintr( # noqa: C901 - FIXME: simplify this g_h = g * d H_h = d * H * d[:, None] - g_norm = jnp.linalg.norm((g_h if scaled_termination else g * v), ord=jnp.inf) + g_norm = jnp.linalg.norm( + (g * v * scale if scaled_termination else g * v), ord=jnp.inf + ) # conngould : norm of the cauchy point, as recommended in ch17 of Conn & Gould # scipy : norm of the scaled x, as used in scipy @@ -417,7 +419,7 @@ def fmintr( # noqa: C901 - FIXME: simplify this ((x * scale_inv) if scaled_termination else x), ord=2 ) g_norm = jnp.linalg.norm( - (g_h if scaled_termination else g * v), ord=jnp.inf + (g * v * scale if scaled_termination else g * v), ord=jnp.inf ) if g_norm < gtol: diff --git a/desc/optimize/least_squares.py b/desc/optimize/least_squares.py index b496b80c0a..5f4e64170a 100644 --- a/desc/optimize/least_squares.py +++ b/desc/optimize/least_squares.py @@ -198,7 +198,9 @@ def lsqtr( # noqa: C901 - FIXME: simplify this g_h = g * d J_h = J * d - g_norm = jnp.linalg.norm((g_h if scaled_termination else g * v), ord=jnp.inf) + g_norm = jnp.linalg.norm( + (g * v * scale if scaled_termination else g * v), ord=jnp.inf + ) # conngould : norm of the cauchy point, as recommended in ch17 of Conn & Gould # scipy : norm of the scaled x, as used in scipy @@ -377,7 +379,7 @@ def lsqtr( # noqa: C901 - FIXME: simplify this ((x * scale_inv) if scaled_termination else x), ord=2 ) g_norm = jnp.linalg.norm( - (g_h if scaled_termination else g * v), ord=jnp.inf + (g * v * scale if scaled_termination else g * v), ord=jnp.inf ) if g_norm < gtol: From edfea8435f0721853ac88fa35744d99f893c55fa Mon Sep 17 00:00:00 2001 From: Rory Conlin Date: Tue, 9 Jul 2024 16:21:21 -0400 Subject: [PATCH 3/6] Update default augmented lagrangian hyperparams for scaled termination --- desc/optimize/aug_lagrangian.py | 24 ++++++++++++------------ desc/optimize/aug_lagrangian_ls.py | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py index 075bdffcdc..bff90b7cd9 100644 --- a/desc/optimize/aug_lagrangian.py +++ b/desc/optimize/aug_lagrangian.py @@ -313,18 +313,6 @@ def laghess(z, y, mu, *args): y = jnp.nan_to_num(y, nan=0.0, posinf=0.0, neginf=0.0) y, mu, c = jnp.broadcast_arrays(y, mu, c) - # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1 - omega = options.pop("omega", 1.0) - eta = options.pop("eta", 1.0) - alpha_omega = options.pop("alpha_omega", 1.0) - beta_omega = options.pop("beta_omega", 1.0) - alpha_eta = options.pop("alpha_eta", 0.1) - beta_eta = options.pop("beta_eta", 0.9) - tau = options.pop("tau", 10) - - gtolk = max(omega / jnp.mean(mu) ** alpha_omega, gtol) - ctolk = max(eta / jnp.mean(mu) ** alpha_eta, ctol) - L = lagfun(f, c, y, mu) g = laggrad(z, y, mu, *args) ngev += 1 @@ -416,6 +404,18 @@ def laghess(z, y, mu, *args): if g_norm < gtol and constr_violation < ctol: success, message = True, STATUS_MESSAGES["gtol"] + # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1 + omega = options.pop("omega", g_norm if scaled_termination else 1.0) + eta = options.pop("eta", constr_violation if scaled_termination else 1.0) + alpha_omega = options.pop("alpha_omega", 1.0) + beta_omega = options.pop("beta_omega", 1.0) + alpha_eta = options.pop("alpha_eta", 1.0 if scaled_termination else 0.1) + beta_eta = options.pop("beta_eta", 1.0 if scaled_termination else 0.9) + tau = options.pop("tau", 10) + + gtolk = max(omega / jnp.mean(mu) ** alpha_omega, gtol) + ctolk = max(eta / jnp.mean(mu) ** alpha_eta, ctol) + if verbose > 1: print_header_nonlinear(True, "Penalty param", "max(|mltplr|)") print_iteration_nonlinear( diff --git a/desc/optimize/aug_lagrangian_ls.py b/desc/optimize/aug_lagrangian_ls.py index 04991f61fb..58c30cf7eb 100644 --- a/desc/optimize/aug_lagrangian_ls.py +++ b/desc/optimize/aug_lagrangian_ls.py @@ -253,18 +253,6 @@ def lagjac(z, y, mu, *args): y = jnp.nan_to_num(y, nan=0.0, posinf=0.0, neginf=0.0) y, mu, c = jnp.broadcast_arrays(y, mu, c) - # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1 - omega = options.pop("omega", 1.0) - eta = options.pop("eta", 1.0) - alpha_omega = options.pop("alpha_omega", 1.0) - beta_omega = options.pop("beta_omega", 1.0) - alpha_eta = options.pop("alpha_eta", 0.1) - beta_eta = options.pop("beta_eta", 0.9) - tau = options.pop("tau", 10) - - gtolk = max(omega / jnp.mean(mu) ** alpha_omega, gtol) - ctolk = max(eta / jnp.mean(mu) ** alpha_eta, ctol) - L = lagfun(f, c, y, mu) J = lagjac(z, y, mu, *args) Lcost = 1 / 2 * jnp.dot(L, L) @@ -342,6 +330,18 @@ def lagjac(z, y, mu, *args): if g_norm < gtol and constr_violation < ctol: success, message = True, STATUS_MESSAGES["gtol"] + # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1 + omega = options.pop("omega", g_norm if scaled_termination else 1.0) + eta = options.pop("eta", constr_violation if scaled_termination else 1.0) + alpha_omega = options.pop("alpha_omega", 1.0) + beta_omega = options.pop("beta_omega", 1.0) + alpha_eta = options.pop("alpha_eta", 1.0 if scaled_termination else 0.1) + beta_eta = options.pop("beta_eta", 1.0 if scaled_termination else 0.9) + tau = options.pop("tau", 10) + + gtolk = max(omega / jnp.mean(mu) ** alpha_omega, gtol) + ctolk = max(eta / jnp.mean(mu) ** alpha_eta, ctol) + if verbose > 1: print_header_nonlinear(True, "Penalty param", "max(|mltplr|)") print_iteration_nonlinear( From 900f06bc5840ca264cbabfcc824399c8372397f3 Mon Sep 17 00:00:00 2001 From: Rory Conlin Date: Tue, 12 Nov 2024 22:43:53 -0500 Subject: [PATCH 4/6] Few more tweaks --- desc/optimize/aug_lagrangian.py | 8 ++++---- desc/optimize/aug_lagrangian_ls.py | 8 ++++---- tests/test_examples.py | 2 +- tests/test_optimizer.py | 18 +++++++++--------- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/desc/optimize/aug_lagrangian.py b/desc/optimize/aug_lagrangian.py index bff90b7cd9..bc3e860853 100644 --- a/desc/optimize/aug_lagrangian.py +++ b/desc/optimize/aug_lagrangian.py @@ -405,12 +405,12 @@ def laghess(z, y, mu, *args): success, message = True, STATUS_MESSAGES["gtol"] # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1 - omega = options.pop("omega", g_norm if scaled_termination else 1.0) - eta = options.pop("eta", constr_violation if scaled_termination else 1.0) + omega = options.pop("omega", min(g_norm, 1e-2) if scaled_termination else 1.0) + eta = options.pop("eta", min(constr_violation, 1e-2) if scaled_termination else 1.0) alpha_omega = options.pop("alpha_omega", 1.0) beta_omega = options.pop("beta_omega", 1.0) - alpha_eta = options.pop("alpha_eta", 1.0 if scaled_termination else 0.1) - beta_eta = options.pop("beta_eta", 1.0 if scaled_termination else 0.9) + alpha_eta = options.pop("alpha_eta", 0.1) + beta_eta = options.pop("beta_eta", 0.9) tau = options.pop("tau", 10) gtolk = max(omega / jnp.mean(mu) ** alpha_omega, gtol) diff --git a/desc/optimize/aug_lagrangian_ls.py b/desc/optimize/aug_lagrangian_ls.py index 2eeb04dbc0..7cb938172d 100644 --- a/desc/optimize/aug_lagrangian_ls.py +++ b/desc/optimize/aug_lagrangian_ls.py @@ -339,12 +339,12 @@ def lagjac(z, y, mu, *args): success, message = True, STATUS_MESSAGES["gtol"] # notation following Conn & Gould, algorithm 14.4.2, but with our mu = their mu^-1 - omega = options.pop("omega", g_norm if scaled_termination else 1.0) - eta = options.pop("eta", constr_violation if scaled_termination else 1.0) + omega = options.pop("omega", min(g_norm, 1e-2) if scaled_termination else 1.0) + eta = options.pop("eta", min(constr_violation, 1e-2) if scaled_termination else 1.0) alpha_omega = options.pop("alpha_omega", 1.0) beta_omega = options.pop("beta_omega", 1.0) - alpha_eta = options.pop("alpha_eta", 1.0 if scaled_termination else 0.1) - beta_eta = options.pop("beta_eta", 1.0 if scaled_termination else 0.9) + alpha_eta = options.pop("alpha_eta", 0.1) + beta_eta = options.pop("beta_eta", 0.9) tau = options.pop("tau", 10) gtolk = max(omega / jnp.mean(mu) ** alpha_omega, gtol) diff --git a/tests/test_examples.py b/tests/test_examples.py index 5b4e328981..5310ac1608 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -877,7 +877,7 @@ def mirrorRatio(params): ) optimizer = Optimizer("lsq-auglag") (eq, field), _ = optimizer.optimize( - (eq, field), objective, constraints, maxiter=100, verbose=3 + (eq, field), objective, constraints, maxiter=150, verbose=3 ) eq, _ = eq.solve(objective="force", verbose=3) diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 4f0cce0e0b..edf3bf1a4b 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -837,9 +837,9 @@ def con(x): args=(), x_scale="auto", ftol=0, - xtol=1e-6, - gtol=1e-6, - ctol=1e-6, + xtol=1e-8, + gtol=1e-8, + ctol=1e-8, verbose=3, maxiter=None, options={"initial_multipliers": "least_squares"}, @@ -854,9 +854,9 @@ def con(x): args=(), x_scale="auto", ftol=0, - xtol=1e-6, - gtol=1e-6, - ctol=1e-6, + xtol=1e-8, + gtol=1e-8, + ctol=1e-8, verbose=3, maxiter=None, options={"initial_multipliers": "least_squares", "tr_method": "cho"}, @@ -881,9 +881,9 @@ def con(x): args=(), x_scale="auto", ftol=0, - xtol=1e-6, - gtol=1e-6, - ctol=1e-6, + xtol=1e-8, + gtol=1e-8, + ctol=1e-8, verbose=3, maxiter=None, options={"initial_multipliers": "least_squares"}, From a8f0a549a7d39b9e41a74ba984879adf92b89476 Mon Sep 17 00:00:00 2001 From: Rory Conlin Date: Wed, 20 Nov 2024 23:46:38 -0500 Subject: [PATCH 5/6] Remove todo comment --- desc/optimize/least_squares.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desc/optimize/least_squares.py b/desc/optimize/least_squares.py index 204b7b5478..56e7d6e0ba 100644 --- a/desc/optimize/least_squares.py +++ b/desc/optimize/least_squares.py @@ -349,7 +349,7 @@ def lsqtr( # noqa: C901 ) alltr.append(trust_radius) alpha *= tr_old / trust_radius - # TODO (#1395): does this need to move to the outer loop? + success, message = check_termination( actual_reduction, cost, From fe5bd894eac7676db0580cf509ad981bc21cdfca Mon Sep 17 00:00:00 2001 From: Rory Conlin Date: Wed, 20 Nov 2024 23:49:06 -0500 Subject: [PATCH 6/6] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6743861fbc..c49ae368a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ Changelog New Features - Add ``from_input_file`` method to ``Equilibrium`` class to generate an ``Equilibrium`` object with boundary, profiles, resolution and flux specified in a given DESC or VMEC input file - +- Adds an option ``scaled_termination`` (defaults to True) to all of the desc optimizers to measure the norms for ``xtol`` and ``gtol`` in the scaled norm provided by ``x_scale`` (which defaults to using an adaptive scaling based on the Jacobian or Hessian). This should make things more robust when optimizing parameters with widely different magnitudes. The old behavior can be recovered by passing ``options={"scaled_termination": False}``. Bug Fixes