Skip to content

Commit

Permalink
address Mihai's comments
Browse files Browse the repository at this point in the history
  • Loading branch information
frapac committed Jul 22, 2024
1 parent a16b29e commit a1b34e7
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 64 deletions.
22 changes: 9 additions & 13 deletions tex/sections/conditioning.tex
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,21 @@ \section{Conditioning of the condensed KKT system}
matrix $K_\gamma$, which incorporates both equality and inequality
constraints. The results extend directly to $K_\tau$ (by letting the number of equalities to be zero).

To simplify the notation, we suppose that the primal variable
$x$ is unconstrained \ma{Not sure why we needed bound constraints on $x$ in the first place}, leaving only a bounded slack variable $s$ in \eqref{eq:slack_problem}.
This is equivalent to include the bounds on the variable $x$ in the inequality constraints,
as $\bar{h}(x) \leq 0$ with $\bar{h}(x) := (h(x), -x)$.

\subsection{Centrality conditions}
We start the discussion by recalling important results about the iterates of the interior-point algorithm.
Let $p = (x, s, y, z)$ be the current primal-dual iterate (the bound multiplier $v$ is considered apart),
and $p^\star$ be a solution of the KKT conditions~\eqref{eq:kktconditions}. We suppose
that the solution $p^\star$ is regular and satisfies the following assumptions.
We denote $\cactive = \cactive(x^\star)$ the active-set at the optimal solution $x^\star$,
\ma{As I noted in the beginning; it does not make sense to denote the active set as you did unless you have no inequality constraints on $x$. Maybe you want to define the active set here since here is where you use it; after you make the assumption you have no ineq constraints on $x$}
For $p := (x, s, y, z)$, we denote by
$(p, v)$ the current primal-dual iterate,
and $(p^\star, v^\star)$ a solution of the KKT conditions~\eqref{eq:kktconditions}.
We denote $\cactive = \cactive(x^\star)$ the active-set at the optimal solution $x^\star$,
and $\cinactive = \cinactive(x^\star)$ the inactive set.
In this section, we are interested in the \emph{local} convergence behavior of the
primal-dual iterate, and we suppose $(p, v)$ is close enough to the solution
$(p^\star, v^\star)$.

% Sungho: not sure if this should be introduced here.
\begin{assumption}
\label{hyp:ipm}
Let $p^\star = (x^\star, s^\star, y^\star, z^\star)$ be a primal-dual solution
\ma{Most people would include the $v^\star$ in the primal dual solution}
Let $(p^\star, v^\star)$ be a primal-dual solution
satisfying the KKT conditions~\eqref{eq:kktconditions}. Let the following hold:
\begin{itemize}
\item Continuity: The Hessian $\nabla^2_{x x} L(\cdot)$ is Lipschitz continuous
Expand All @@ -68,7 +64,7 @@ \subsection{Centrality conditions}
We denote $\delta(p, v) = \| (p, v) - (p^\star, v^\star) \|$ the Euclidean distance to the
primal-dual stationary point $(p^\star, v^\star)$.
From \cite[Theorem 2.2]{wright2001effects}, if Assumption~\ref{hyp:ipm}
holds at $p^\star$ and $v > 0$ \ma{Pretty sure that is true only locally},
holds at $p^\star$ and $v > 0$,
\begin{equation}
\delta(p, v) = \Theta\left( \left\Vert \begin{bmatrix}
\nabla_p L(p, v) \\ \min(v, s)
Expand Down
82 changes: 34 additions & 48 deletions tex/sections/ipm.tex
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,12 @@ \subsection{Problem formulation and KKT conditions}
\label{eq:problem}
\min_{x \in \mathbb{R}^n} \; f(x)
\quad \text{subject to}\quad
\left\{
\begin{aligned}
& g(x) = 0 \; , ~ h(x) \leq 0 \; , \\
& x \geq 0 \; ,
\end{aligned}
\right.
g(x) = 0 \; , ~ h(x) \leq 0 \; ,
\end{equation}
with $f:\mathbb{R}^n \to \mathbb{R}$ a real-valued function
encoding the objective, $g: \mathbb{R}^n \to \mathbb{R}^{m_e}$
encoding the equality constraints, and $h: \mathbb{R}^{n} \to
\mathbb{R}^{m_i}$ encoding the inequality constraints.
In addition, the variable $x$ is subject to simple bounds $x \geq 0$.
In what follows, we suppose that the functions $f, g, h$ are smooth
and twice differentiable.

Expand All @@ -41,42 +35,40 @@ \subsection{Problem formulation and KKT conditions}
\left\{
\begin{aligned}
& g(x) = 0 \; , ~ h(x) + s = 0 \; , \\
& x \geq 0 \; , ~ s \geq 0 \; .
& s \geq 0 \; .
\end{aligned}
\right.
\end{equation}
In~\eqref{eq:slack_problem}, the inequality constraints
are encoded inside the variable bounds.
are encoded inside the variable bounds on the slack variables.

We denote by $y \in \mathbb{R}^{m_e}$ and $z \in \mathbb{R}^{m_i}$ the multipliers associated
resp. to the equality constraints and the inequality constraints.
Similarly, we denote
by $(u, v) \in \mathbb{R}^{n + m_i}$ the multipliers associated
respectively to the bounds $x \geq 0$ and $s \geq 0$.
Using the dual variable $(y, z, u, v)$, we define the Lagrangian of \eqref{eq:slack_problem} as
by $v \in \mathbb{R}^{m_i}$ the multipliers associated
to the bounds $s \geq 0$.
Using the dual variable $(y, z, v)$, we define the Lagrangian of \eqref{eq:slack_problem} as
\begin{equation}
\label{eq:lagrangian}
L(x, s, y, z, u, v) = f(x) + y^\top g(x) + z^\top \big(h(x) +s\big)
- u^\top x - v^\top s \; .
L(x, s, y, z, v) = f(x) + y^\top g(x) + z^\top \big(h(x) +s\big)
- v^\top s \; .
\end{equation}
The KKT conditions of \eqref{eq:slack_problem} are:
\begin{subequations}
\label{eq:kktconditions}
\begin{align}
& \nabla f(x) + \nabla g(x)^\top y + \nabla h(x)^\top z - u = 0 \; , \\
& \nabla f(x) + \nabla g(x)^\top y + \nabla h(x)^\top z = 0 \; , \\
& z - v = 0 \; , \\
& g(x) = 0 \; , \\
& h(x) + s = 0 \; , \\
\label{eq:kktconditions:compx}
& 0 \leq x \perp u \geq 0 \; , \\
\label{eq:kktconditions:comps}
& 0 \leq s \perp v \geq 0 \; .
\end{align}
\end{subequations}
The notation $x \perp u$ is a shorthand for the complementarity
condition $x_i u_i = 0$ (for all $i=1,\cdots, n$).
The notation $s \perp v$ is a shorthand for the complementarity
condition $s_i v_i = 0$ (for all $i=1,\cdots, n$).

The set of active constraints at a point $x$ is denoted by \ma{This is odd. Don't you need to worry about the activity of $x$ itself?}
The set of active constraints at a point $x$ is denoted by
\begin{equation}
\mathcal{B}(x) := \{ i \in\{ 1, \cdots, m_i\} \; | \; h_i(x) = 0 \} \; .
\end{equation}
Expand All @@ -89,36 +81,35 @@ \subsection{Solving the KKT conditions with the interior-point method}
\label{sec:ipm:kkt}
The interior-point method aims at finding a stationary point
satisfying the KKT conditions~\eqref{eq:kktconditions}.
The complementarity constraints \eqref{eq:kktconditions:compx}-\eqref{eq:kktconditions:comps}
The complementarity constraints \eqref{eq:kktconditions:comps}
render the KKT conditions non-smooth, complicating the solution of
the whole system~\eqref{eq:kktconditions}.
IPM uses a homotopy continuation method to solve a simplified
version of \eqref{eq:kktconditions}, parameterized by a barrier
parameter $\mu > 0$~\cite[Chapter 19]{nocedal_numerical_2006}.
For positive $(x, s, u, v) > 0$, we solve the system
For positive $(x, s, v) > 0$, we solve the system
\begin{equation}
\label{eq:kkt_ipm}
F_\mu(x, s, y, z, u, v) =
F_\mu(x, s, y, z, v) =
\begin{bmatrix}
\nabla f(x) + \nabla g(x)^\top y + \nabla h(x)^\top z - u \\
\nabla f(x) + \nabla g(x)^\top y + \nabla h(x)^\top z \\
z - v \\
g(x) \\
h(x) + s \\
X u - \mu e \\
S v - \mu e
\end{bmatrix} = 0
\; .
\end{equation}
We introduce in \eqref{eq:kkt_ipm} the diagonal matrices $X = \diag(x_1, \cdots, x_n)$
and $S = \diag(s_1, \cdots, s_{m_i})$, along with the vector of ones $e$.
As we drive the barrier parameter $\mu$ to $0$, the solution of the
system $F_\mu(x, s, y, z, u, v) = 0$ tends to the solution of the
system $F_\mu(x, s, y, z, v) = 0$ tends to the solution of the
KKT conditions~\eqref{eq:kktconditions}.

We note that at a fixed parameter $\mu$, the function $F_\mu(\cdot)$
is smooth. Hence, the system \eqref{eq:kkt_ipm} can be solved iteratively
using a regular Newton method. For a primal-dual iterate
$w_k := (x_k, s_k, y_k, z_k, u_k, v_k)$, the next iterate is computed as
$w_k := (x_k, s_k, y_k, z_k, v_k)$, the next iterate is computed as
$w_{k+1} = w_k + \alpha_k d_k$, where $d_k$ is a descent
direction computed by solving the linear system
\begin{equation}
Expand All @@ -127,7 +118,7 @@ \subsection{Solving the KKT conditions with the interior-point method}
\end{equation}
The step $\alpha_k$ is computed using a line-search algorithm, in a way
that ensures that the bounded variables remain positive
at the next primal-dual iterate: $(x_{k+1}, s_{k+1}, u_{k+1}, v_{k+1}) > 0$.
at the next primal-dual iterate: $(x_{k+1}, s_{k+1}, v_{k+1}) > 0$.
Once the iterates are sufficiently close to the central path,
the IPM decreases the barrier parameter $\mu$ to find a solution closer to
the original KKT conditions~\eqref{eq:kktconditions}.
Expand All @@ -143,19 +134,17 @@ \subsection{Solving the KKT conditions with the interior-point method}
\setlength\arraycolsep{5pt}
\tag{$K_3$}
\begin{bmatrix}
W_k & 0 & G_k^\top & H_k^\top & -I & \phantom{-}0 \\
0 & 0 & 0\phantom{^\top} & I\phantom{^\top} & \phantom{-}0 & -I \\
G_k & 0 & 0\phantom{^\top} & 0\phantom{^\top} & \phantom{-}0 & \phantom{-}0 \\
H_k & I & 0\phantom{^\top} & 0\phantom{^\top} & \phantom{-}0 & \phantom{-}0 \\
U_k & 0 & 0\phantom{^\top} & 0\phantom{^\top} & \phantom{-}X_k & \phantom{-}0 \\
0 & V_k & 0\phantom{^\top} & 0\phantom{^\top} & \phantom{-}0 & \phantom{-}S_k
W_k & 0 & G_k^\top & H_k^\top & \phantom{-}0 \\
0 & 0 & 0\phantom{^\top} & I\phantom{^\top} & -I \\
G_k & 0 & 0\phantom{^\top} & 0\phantom{^\top} & \phantom{-}0 \\
H_k & I & 0\phantom{^\top} & 0\phantom{^\top} & \phantom{-}0 \\
0 & V_k & 0\phantom{^\top} & 0\phantom{^\top} & \phantom{-}S_k
\end{bmatrix}
\begin{bmatrix}
d_x \\
d_s \\
d_y \\
d_z \\
d_u \\
d_v
\end{bmatrix}
% = -F_\mu(w_k) \; .
Expand All @@ -165,7 +154,6 @@ \subsection{Solving the KKT conditions with the interior-point method}
z_k - v_k \\
g(x_k) \\
h(x_k) + s_k \\
X_k u_k - \mu e \\
S_k v_k - \mu e
\end{bmatrix} \; ,
\end{equation}
Expand All @@ -178,14 +166,14 @@ \subsection{Solving the KKT conditions with the interior-point method}

\paragraph{Augmented KKT system.}
It is usual to remove in \eqref{eq:kkt:unreduced} the blocks associated
to the bound multipliers $(u, v)$ and solve instead the regularized
to the bound multipliers $v$ and solve instead the regularized
$4 \times 4$ symmetric system, called the \emph{augmented KKT system}:
\begin{equation}
\label{eq:kkt:augmented}
\tag{$K_2$}
\setlength\arraycolsep{3pt}
\begin{bmatrix}
W + D_x + \delta_w I & 0 & \phantom{-}G^\top & \phantom{-}H^\top \\
W + \delta_w I & 0 & \phantom{-}G^\top & \phantom{-}H^\top \\
0 & D_s + \delta_w I & \phantom{-}0\phantom{^\top} & \phantom{-}I\phantom{^\top} \\
G & 0 & -\delta_c I & \phantom{-}0\phantom{^\top} \\
H & I & \phantom{-}0\phantom{^\top} & -\delta_c I
Expand All @@ -204,24 +192,22 @@ \subsection{Solving the KKT conditions with the interior-point method}
% h(x_k) + s_k
\end{bmatrix} \; ,
\end{equation}
with the diagonal matrices $D_x := X^{-1} U$ and $D_s := S^{-1} V$.
with the diagonal matrix $D_s := S^{-1} V$.
The vectors forming the right-hand-sides are given respectively by
$r_1 := \nabla f(x) + \nabla g(x)^\top y + \nabla h(x)^\top z - \mu X^{-1} e$,
$r_1 := \nabla f(x) + \nabla g(x)^\top y + \nabla h(x)^\top z$,
$r_2 := z - \mu S^{-1} e$,
$r_3 := g(x)$,
$r_4 := h(x) + s$.
Once \eqref{eq:kkt:augmented} is solved, we recover the updates on bound multipliers with
$d_u = - X^{-1}(U d_x - \mu e) - u$ and
$d_v = - S^{-1}(V d_s - \mu e) - v$.

Note that we have added additional regularization terms $\delta_w \geq 0 $
and $\delta_c \geq 0$ in \eqref{eq:kkt:augmented}, to ensure the
matrix is invertible.
Without the regularization terms in \eqref{eq:kkt:augmented}, the augmented KKT system is non-singular
if and only if the Jacobian $J = \begin{bmatrix} G \; &\; 0 \\ H \;&\; I \end{bmatrix}$
is full row-rank and the matrix $\begin{bmatrix} W + D_x & 0 \\ 0 & D_s \end{bmatrix}$
projected onto the null-space of the Jacobian $J$ is positive-definite~\cite{benzi2005numerical}.
\ma{I do not think that is an if and only if conditions. The Jacobian needs to be full rank. but the other matrix need not be positive definite in the projection for the system to be nonsingular}
is full row-rank and the matrix $\begin{bmatrix} W & 0 \\ 0 & D_s \end{bmatrix}$
projected onto the null-space of the Jacobian $J$ is definite~\cite{benzi2005numerical}.
The condition is satisfied if the inertia (defined as the respective numbers
of positive, negative and zero eigenvalues) of the matrix~\eqref{eq:kkt:augmented} is $(n + m_i, m_i + m_e, 0)$.
We use the inertia-controlling method introduced in \cite{wachter2006implementation}
Expand All @@ -231,7 +217,7 @@ \subsection{Solving the KKT conditions with the interior-point method}
As a consequence, the system \eqref{eq:kkt:augmented} is usually factorized using
an inertia-revealing \lblt factorization~\cite{duff1983multifrontal}.
Krylov methods are often not competitive when solving~\eqref{eq:kkt:augmented},
as the block diagonal terms $D_x$ and $D_s$ are getting increasingly
as the block diagonal terms $D_s$ are getting increasingly
ill-conditioned near the solution. Their use in IPM has been limited to
linear and convex quadratic programming \cite{gondzio-2012} (when paired
with a suitable preconditioner). We also refer to \cite{cao2016augmented}
Expand Down Expand Up @@ -266,7 +252,7 @@ \subsection{Solving the KKT conditions with the interior-point method}
\end{bmatrix}
\; ,
\end{equation}
where we have introduced the \emph{condensed matrix} $K := W + D_x + \delta_w I + H^\top D_H H$
where we have introduced the \emph{condensed matrix} $K := W + \delta_w I + H^\top D_H H$
and the two diagonal matrices
\begin{equation}
C := \big(I + \delta_c(D_s + \delta_w I)\big)^{-1} \; , \quad
Expand All @@ -284,7 +270,7 @@ \subsection{Solving the KKT conditions with the interior-point method}

\paragraph{Iterative refinement.}
Compared to \eqref{eq:kkt:unreduced},
the diagonal matrices $D_x$ and $D_s$ introduce
the diagonal matrix $D_s$ introduces
an additional ill-conditioning in \eqref{eq:kkt:augmented}, amplified
in the condensed form~\eqref{eq:kkt:condensed}:
the elements in the diagonal tend to infinity if a variable converges to its bound,
Expand All @@ -307,7 +293,7 @@ \subsection{Discussion}
as its conditioning is worse
than \eqref{eq:kkt:augmented} (implying less accurate solutions).
Additionally, condensation may result in increased fill-in within the condensed system \eqref{eq:kkt:condensed}~\cite[Section 19.3, p.571]{nocedal_numerical_2006}.
In the worst cases \eqref{eq:kkt:condensed} itself may become fully dense if an inequality row is completely dense (fortunately, a case rarer than in the normal equations used in linear programming \ma{Do you really mean normal equations as in regression or do you mean equations commonly encountered in linear programming?}).
In the worst cases \eqref{eq:kkt:condensed} itself may become fully dense if an inequality row is completely dense (fortunately, a case rarer than in the normal equations commonly encountered in linear programming).
Consequently, condensation methods are not commonly utilized in practical optimization settings.
To the best of our knowledge, Artelys Knitro~\cite{waltz2006interior} is the only solver that supports computing the descent direction with \eqref{eq:kkt:condensed}.

Expand Down
2 changes: 1 addition & 1 deletion tex/sections/kkt_systems.tex
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ \subsection{Equality relaxation strategy: LiftedKKT}
K_\tau \,d_x = - r_1 - H_\tau^\top(D_H r_4 - C r_2) \; ,
\end{equation}
with $H_\tau = \big(G^\top ~ H^\top \big)^\top$ and
$K_\tau := W + D_x + \delta_w I + H_\tau^\top D_H H_\tau$.
$K_\tau := W + \delta_w I + H_\tau^\top D_H H_\tau$.
Using the relation~\eqref{eq:ipm:inertia}, the matrix $K_\tau$
is guaranteed to be positive definite if the primal regularization parameter $\delta_w$ is adequately large.
As such, the parameter $\delta_w$ is chosen dynamically using the inertia information of the system in \eqref{eq:kkt:condensed}.
Expand Down
4 changes: 2 additions & 2 deletions tex/sections/numerics.tex
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ \subsubsection{Breakdown of the time spent in one IPM iteration}
}
\includegraphics[width=.7\textwidth]{figures/breakdown.pdf}
\caption{Breakdown of the time spent in one IPM iteration
for different linear solvers, when solving {\tt 78484epigrids} (A30 GPU)
for different linear solvers, when solving {\tt 78484epigrids}
\label{fig:timebreakdown}}
\end{figure}

Expand Down Expand Up @@ -359,7 +359,7 @@ \subsection{Benchmark on OPF instances}
\hline
\end{tabular}
}
\caption{OPF benchmark, solved with a tolerance {\tt tol=1e-6}. (A100 GPU) \label{tab:opf:benchmark}}
\caption{OPF benchmark, solved with a tolerance {\tt tol=1e-6}. \label{tab:opf:benchmark}}
\end{table}

\begin{figure}[!ht]
Expand Down

0 comments on commit a1b34e7

Please sign in to comment.