Skip to content

Commit

Permalink
fix ex and sol 2
Browse files Browse the repository at this point in the history
  • Loading branch information
ludwigbothmann committed Dec 3, 2024
1 parent 4d53e0a commit 8a78906
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 14 deletions.
2 changes: 1 addition & 1 deletion exercises/exercise-02/ex2.tex
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@
\item Compute $\frac{\partial \Vert \mathbf{x} - \mathbf{c} \Vert_2}{\partial
\mathbf{x}}$
\item Compute $\frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}}$
\item Show that $\frac{\partial \mathbf{u}^\top \mathbf{Y}}{\partial\mathbf{x}} = \begin{pmatrix}\mathbf{u}^\top\frac{\partial \mathbf{y}_1}{\partial\mathbf{x}} + \mathbf{y}_1^\top\frac{\partial \mathbf{u}}{\partial\mathbf{x}} \\ \vdots \\ \mathbf{u}^\top\frac{\partial \mathbf{y}_d}{\partial\mathbf{x}} + \mathbf{y}_d^\top\frac{\partial \mathbf{u}}{\partial\mathbf{x}}\end{pmatrix}$ where $\mathbf{y}_i:\R^d\rightarrow\R^d, i= 1,\dots,d$ are the column vectors of $\mathbf{Y}.$
\item Show that $\frac{\partial \mathbf{Y}^\top \mathbf{u}}{\partial\mathbf{x}} = \begin{pmatrix}\mathbf{u}^\top\frac{\partial \mathbf{y}_1}{\partial\mathbf{x}} + \mathbf{y}_1^\top\frac{\partial \mathbf{u}}{\partial\mathbf{x}} \\ \vdots \\ \mathbf{u}^\top\frac{\partial \mathbf{y}_d}{\partial\mathbf{x}} + \mathbf{y}_d^\top\frac{\partial \mathbf{u}}{\partial\mathbf{x}}\end{pmatrix}$ where $\mathbf{y}_i:\R^d\rightarrow\R^d, i= 1,\dots,d$ are the column vectors of $\mathbf{Y}.$
\item Compute $\frac{\partial^2 \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}\partial\mathbf{x}^\top}$ \\
\textit{Hint:} use c), d)

Expand Down
16 changes: 9 additions & 7 deletions exercises/exercise-02/sol2.tex
Original file line number Diff line number Diff line change
Expand Up @@ -225,15 +225,17 @@
\item $\frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}} = \frac{\partial \mathbf{u}^\top \mathbf{I} \mathbf{v}}{\partial \mathbf{x}} = \mathbf{u}^\top \mathbf{I} \frac{\partial \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \mathbf{I}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} =
\mathbf{u}^\top \frac{\partial \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}}$

\item $\frac{\partial \mathbf{u}^\top \mathbf{Y}}{\partial\mathbf{x}} = \frac{\partial\begin{pmatrix}\mathbf{u}^\top \mathbf{y}_1 \\ \vdots \\ \mathbf{u}^\top \mathbf{y}_d\end{pmatrix}}{\partial \mathbf{x}} \overset{(c)}{=} \begin{pmatrix} \mathbf{u}^\top \frac{\partial \mathbf{y}_1}{\partial \mathbf{x}} + \mathbf{y}_1^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} \\ \vdots \\
\mathbf{u}^\top \frac{\partial \mathbf{y}_d}{\partial \mathbf{x}} + \mathbf{y}_d^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}}
\item $\frac{\partial \mathbf{Y}^\top \mathbf{u}}{\partial\mathbf{x}} = \frac{\partial\begin{pmatrix}\mathbf{y}_1^\top \mathbf{u} \\ \vdots \\ \mathbf{y}_d^\top \mathbf{u}\end{pmatrix}}{\partial \mathbf{x}} \overset{(c)}{=} \begin{pmatrix} \mathbf{y}_1^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} + \mathbf{u}^\top \frac{\partial \mathbf{y}_1}{\partial \mathbf{x}} \\ \vdots \\
\mathbf{y}_d^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} + \mathbf{u}^\top \frac{\partial \mathbf{y}_d}{\partial \mathbf{x}}
\end{pmatrix}$

\item Note for $\mathbf{y}:\R^d\rightarrow\R^d, \mathbf{x}\mapsto\mathbf{y}(\mathbf{x})$ the $i-$th column of $\frac{\partial\mathbf{y}}{\partial\mathbf{x}}$ is $\frac{\partial\mathbf{y}}{\partial x_i}$.
With this it follows that \\
\begin{align*}
\frac{\partial^2 \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}\partial\mathbf{x}^\top} &= \frac{\partial}{\partial\mathbf{x}} \left( \frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial\mathbf{x}} \right) \\
&\overset{(c)}{=} \frac{\partial ( \mathbf{u}^\top \frac{\partial \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} )}{\partial\mathbf{x}} \\
\frac{\partial^2 \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}\partial\mathbf{x}^\top} &= \frac{\partial}{\partial\mathbf{x}} \left( \frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial\mathbf{x}^\top} \right) \\
&= \frac{\partial}{\partial\mathbf{x}} \left[ \left( \frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial\mathbf{x}} \right)^\top \right] \\
&\overset{(c)}{=} \frac{\partial ( \mathbf{u}^\top \frac{\partial \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} )^\top}{\partial\mathbf{x}} \\
&= \frac{\partial \left( \left(\frac{\partial \mathbf{v}}{\partial \mathbf{x}}\right)^\top \mathbf{u} + \left(\frac{\partial \mathbf{u}}{\partial \mathbf{x}}\right)^\top \mathbf{v})\right)}{\partial\mathbf{x}} \\
&\overset{(d)}{=} \begin{pmatrix}
\mathbf{u}^\top \frac{\partial^2 \mathbf{v}}{\partial x_1\partial \mathbf{x}} + \frac{\partial\mathbf{v}}{\partial x_1}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} \\
\vdots \\
Expand All @@ -250,13 +252,13 @@
\vdots \\
\mathbf{u}^\top \frac{\partial^2 \mathbf{v}}{\partial x_d\partial \mathbf{x}}
\end{pmatrix}
+ \frac{\partial \mathbf{v}}{\partial \mathbf{x}}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}}
+ \frac{\partial \mathbf{u}}{\partial \mathbf{x}}^\top \frac{\partial \mathbf{v}}{\partial \mathbf{x}}
+ \frac{\partial \mathbf{u}}{\partial \mathbf{x}} \left(\frac{\partial \mathbf{v}}{\partial \mathbf{x}}\right)^\top
+ \frac{\partial \mathbf{v}}{\partial \mathbf{x}} \left(\frac{\partial \mathbf{u}}{\partial \mathbf{x}}\right)^\top
+ \begin{pmatrix}
\mathbf{v}^\top \frac{\partial^2 \mathbf{u}}{\partial x_1\partial \mathbf{x}} \\
\vdots \\
\mathbf{v}^\top \frac{\partial^2 \mathbf{u}}{\partial x_d\partial \mathbf{x}}
\end{pmatrix}.
\end{pmatrix}
\end{align*}
\end{enumerate}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@

\begin{vbframe}{Runtime comparison (indep.)}
\vspace{-0.2cm}
{\small Clearly, NR makes more progress than GD per iteration. OTOH Newton steps are vastly more expensive than GD updates\\
{\small Clearly, NR makes more progress than GD per iteration. OTOH Newton steps are much more expensive than GD updates\\
$\Rightarrow$ How do NR and GD compare wrt runtime instead of iterations (50 steps)?}
\begin{figure}
\includegraphics[width=1.0\textwidth]{slides/05-multivariate-second-order/figure_man/simu-newton/NR_GD_runtime_comparison.pdf} \\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@
Use Case & General non-linear\newline optimization & Likelihood-based models,\newline especially GLMs \\ \hline
\end{tabular}
\end{table}
If randomness doesn't exist in data, Newton-Raphson and Fisher scoring are the same.
In many cases Newton-Raphson and Fisher scoring are equivalent (see below).
\end{vbframe}

\begin{vbframe}{Logistic regression}
The goal of logistic regression is to predict a binary event.
Given $n$ observations $\left(\xi, \yi\right) \in \R^p \times \{0, 1\}$,
Given $n$ observations $\left(\xi, \yi\right) \in \R^{p+1} \times \{0, 1\}$,
$\yi |\, \xi \sim Bernoulli(\pi^{(i)})$.\\
\lz
We want to minimize the following risk
Expand All @@ -82,7 +82,7 @@
the sigmoid function $s(f) = \frac{1}{1 + \exp(-f)}$ and the score $\fxit = \thx.$\\
\lz

NB: Note that $\frac{\partial}{\partial f} s(f) = s(f)(1-s(f))$ and $\frac{\partial \fxit}{\partial \thetav} = \left(\xi\right)^\top.$\\
\textbf{NB}: Note that $\frac{\partial}{\partial f} s(f) = s(f)(1-s(f))$ and $\frac{\partial \fxit}{\partial \thetav} = \left(\xi\right)^\top.$\\
\vspace{0.3cm}
For more details we refer to the \href{https://slds-lmu.github.io/i2ml/chapters/11_advriskmin/}{\color{blue}{i2ml}} lecture.

Expand All @@ -104,8 +104,8 @@
}

where $\mathbf{X} = \left(
\xi[1], \dots,
\xi[n]\right)^\top \in \R^{n\times p}, \mathbf{y} = \left(
{\xi[1]}^{\top}, \dots,
{\xi[n]}^{\top}\right)^\top \in \R^{n\times (p+1)}, \mathbf{y} = \left(
\yi[1], \dots,
\yi[n]
\right)^\top,$ \\ $\pi(\mathbf{X}\vert\;\thetav) = \left(
Expand Down

0 comments on commit 8a78906

Please sign in to comment.