fix ex and sol 2

slds-lmu · Dec 3, 2024 · 8a78906 · 8a78906
1 parent 4d53e0a
commit 8a78906
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 14 deletions.
diff --git a/exercises/exercise-02/ex2.tex b/exercises/exercise-02/ex2.tex
@@ -217,7 +217,7 @@
 	\item Compute $\frac{\partial \Vert \mathbf{x} - \mathbf{c}  \Vert_2}{\partial
 	\mathbf{x}}$
 	\item Compute $\frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}}$
-	\item Show that $\frac{\partial \mathbf{u}^\top \mathbf{Y}}{\partial\mathbf{x}} = \begin{pmatrix}\mathbf{u}^\top\frac{\partial  \mathbf{y}_1}{\partial\mathbf{x}} + \mathbf{y}_1^\top\frac{\partial  \mathbf{u}}{\partial\mathbf{x}} \\ \vdots \\ \mathbf{u}^\top\frac{\partial  \mathbf{y}_d}{\partial\mathbf{x}} + \mathbf{y}_d^\top\frac{\partial  \mathbf{u}}{\partial\mathbf{x}}\end{pmatrix}$ where $\mathbf{y}_i:\R^d\rightarrow\R^d, i= 1,\dots,d$ are the column vectors of  $\mathbf{Y}.$
+	\item Show that $\frac{\partial \mathbf{Y}^\top \mathbf{u}}{\partial\mathbf{x}} = \begin{pmatrix}\mathbf{u}^\top\frac{\partial  \mathbf{y}_1}{\partial\mathbf{x}} + \mathbf{y}_1^\top\frac{\partial  \mathbf{u}}{\partial\mathbf{x}} \\ \vdots \\ \mathbf{u}^\top\frac{\partial  \mathbf{y}_d}{\partial\mathbf{x}} + \mathbf{y}_d^\top\frac{\partial  \mathbf{u}}{\partial\mathbf{x}}\end{pmatrix}$ where $\mathbf{y}_i:\R^d\rightarrow\R^d, i= 1,\dots,d$ are the column vectors of  $\mathbf{Y}.$
 	\item Compute $\frac{\partial^2 \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}\partial\mathbf{x}^\top}$ \\
 	\textit{Hint:} use c), d)
 

diff --git a/exercises/exercise-02/sol2.tex b/exercises/exercise-02/sol2.tex
@@ -225,15 +225,17 @@
 	\item $\frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}} = \frac{\partial \mathbf{u}^\top \mathbf{I} \mathbf{v}}{\partial \mathbf{x}} = \mathbf{u}^\top \mathbf{I} \frac{\partial  \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \mathbf{I}^\top \frac{\partial  \mathbf{u}}{\partial \mathbf{x}} = 
 	\mathbf{u}^\top \frac{\partial  \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \frac{\partial  \mathbf{u}}{\partial \mathbf{x}}$
 
-\item $\frac{\partial \mathbf{u}^\top \mathbf{Y}}{\partial\mathbf{x}} = \frac{\partial\begin{pmatrix}\mathbf{u}^\top \mathbf{y}_1 \\ \vdots \\ \mathbf{u}^\top \mathbf{y}_d\end{pmatrix}}{\partial \mathbf{x}} \overset{(c)}{=} \begin{pmatrix}	\mathbf{u}^\top \frac{\partial  \mathbf{y}_1}{\partial \mathbf{x}} + \mathbf{y}_1^\top \frac{\partial  \mathbf{u}}{\partial \mathbf{x}} \\ \vdots \\
-\mathbf{u}^\top \frac{\partial  \mathbf{y}_d}{\partial \mathbf{x}} + \mathbf{y}_d^\top \frac{\partial  \mathbf{u}}{\partial \mathbf{x}}
+\item $\frac{\partial \mathbf{Y}^\top \mathbf{u}}{\partial\mathbf{x}} = \frac{\partial\begin{pmatrix}\mathbf{y}_1^\top \mathbf{u} \\ \vdots \\ \mathbf{y}_d^\top \mathbf{u}\end{pmatrix}}{\partial \mathbf{x}} \overset{(c)}{=} \begin{pmatrix}	\mathbf{y}_1^\top \frac{\partial  \mathbf{u}}{\partial \mathbf{x}} + \mathbf{u}^\top \frac{\partial  \mathbf{y}_1}{\partial \mathbf{x}} \\ \vdots \\
+\mathbf{y}_d^\top \frac{\partial  \mathbf{u}}{\partial \mathbf{x}} + \mathbf{u}^\top \frac{\partial  \mathbf{y}_d}{\partial \mathbf{x}}
 \end{pmatrix}$ 
 
 	\item Note for $\mathbf{y}:\R^d\rightarrow\R^d, \mathbf{x}\mapsto\mathbf{y}(\mathbf{x})$ the $i-$th column of $\frac{\partial\mathbf{y}}{\partial\mathbf{x}}$ is $\frac{\partial\mathbf{y}}{\partial x_i}$.
 	  With this it follows that \\
 	  \begin{align*}
-      \frac{\partial^2 \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}\partial\mathbf{x}^\top} &= \frac{\partial}{\partial\mathbf{x}} \left( \frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial\mathbf{x}} \right) \\
-      &\overset{(c)}{=} \frac{\partial ( \mathbf{u}^\top \frac{\partial  \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \frac{\partial  \mathbf{u}}{\partial \mathbf{x}} )}{\partial\mathbf{x}} \\
+      \frac{\partial^2 \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}\partial\mathbf{x}^\top} &= \frac{\partial}{\partial\mathbf{x}} \left( \frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial\mathbf{x}^\top} \right) \\
+      &= \frac{\partial}{\partial\mathbf{x}} \left[ \left( \frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial\mathbf{x}} \right)^\top \right] \\
+    &\overset{(c)}{=} \frac{\partial ( \mathbf{u}^\top \frac{\partial  \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \frac{\partial  \mathbf{u}}{\partial \mathbf{x}} )^\top}{\partial\mathbf{x}} \\
+      &= \frac{\partial \left( \left(\frac{\partial  \mathbf{v}}{\partial \mathbf{x}}\right)^\top \mathbf{u} + \left(\frac{\partial  \mathbf{u}}{\partial \mathbf{x}}\right)^\top \mathbf{v})\right)}{\partial\mathbf{x}} \\
       &\overset{(d)}{=} \begin{pmatrix}
                           \mathbf{u}^\top \frac{\partial^2  \mathbf{v}}{\partial x_1\partial \mathbf{x}} + \frac{\partial\mathbf{v}}{\partial x_1}^\top \frac{\partial  \mathbf{u}}{\partial \mathbf{x}} \\
                           \vdots \\
@@ -250,13 +252,13 @@
             \vdots \\
             \mathbf{u}^\top \frac{\partial^2  \mathbf{v}}{\partial x_d\partial \mathbf{x}}
          \end{pmatrix}
-         + \frac{\partial \mathbf{v}}{\partial \mathbf{x}}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}}
-         + \frac{\partial \mathbf{u}}{\partial \mathbf{x}}^\top \frac{\partial \mathbf{v}}{\partial \mathbf{x}}
+         + \frac{\partial \mathbf{u}}{\partial \mathbf{x}} \left(\frac{\partial \mathbf{v}}{\partial \mathbf{x}}\right)^\top
+         + \frac{\partial \mathbf{v}}{\partial \mathbf{x}} \left(\frac{\partial \mathbf{u}}{\partial \mathbf{x}}\right)^\top
          + \begin{pmatrix}
             \mathbf{v}^\top \frac{\partial^2  \mathbf{u}}{\partial x_1\partial \mathbf{x}} \\
             \vdots \\
             \mathbf{v}^\top \frac{\partial^2  \mathbf{u}}{\partial x_d\partial \mathbf{x}} 
-           \end{pmatrix}.
+           \end{pmatrix}
     \end{align*}
 \end{enumerate}
 }

diff --git a/slides/05-multivariate-second-order/slides-multivar-second-order-5-comparison.tex b/slides/05-multivariate-second-order/slides-multivar-second-order-5-comparison.tex
@@ -133,7 +133,7 @@
 
 \begin{vbframe}{Runtime comparison (indep.)}
 \vspace{-0.2cm}
-{\small Clearly, NR makes more progress than GD per iteration. OTOH Newton steps are vastly more expensive than GD updates\\
+{\small Clearly, NR makes more progress than GD per iteration. OTOH Newton steps are much more expensive than GD updates\\
 $\Rightarrow$ How do NR and GD compare wrt runtime instead of iterations (50 steps)?}
 \begin{figure}
             \includegraphics[width=1.0\textwidth]{slides/05-multivariate-second-order/figure_man/simu-newton/NR_GD_runtime_comparison.pdf} \\

diff --git a/slides/05-multivariate-second-order/slides-multivar-second-order-6-fisher.tex b/slides/05-multivariate-second-order/slides-multivar-second-order-6-fisher.tex
@@ -63,12 +63,12 @@
     Use Case & General non-linear\newline optimization & Likelihood-based models,\newline especially GLMs \\ \hline
     \end{tabular}
 \end{table}
-If randomness doesn't exist in data, Newton-Raphson and Fisher scoring are the same.
+In many cases Newton-Raphson and Fisher scoring are equivalent (see below).
 \end{vbframe}
 
 \begin{vbframe}{Logistic regression}
 The goal of logistic regression is to predict a binary event.
-Given $n$ observations $\left(\xi, \yi\right) \in \R^p \times \{0, 1\}$,
+Given $n$ observations $\left(\xi, \yi\right) \in \R^{p+1} \times \{0, 1\}$,
 $\yi |\, \xi \sim Bernoulli(\pi^{(i)})$.\\
 \lz
 We want to minimize the following risk 
@@ -82,7 +82,7 @@
 the sigmoid function $s(f) = \frac{1}{1 + \exp(-f)}$ and the score $\fxit = \thx.$\\
 \lz
 
-NB: Note that $\frac{\partial}{\partial f} s(f) = s(f)(1-s(f))$ and $\frac{\partial \fxit}{\partial \thetav} = \left(\xi\right)^\top.$\\
+\textbf{NB}: Note that $\frac{\partial}{\partial f} s(f) = s(f)(1-s(f))$ and $\frac{\partial \fxit}{\partial \thetav} = \left(\xi\right)^\top.$\\
 \vspace{0.3cm}
 For more details we refer to the \href{https://slds-lmu.github.io/i2ml/chapters/11_advriskmin/}{\color{blue}{i2ml}} lecture.
 
@@ -104,8 +104,8 @@
 }
 
 where  $\mathbf{X} = \left(
-    \xi[1], \dots, 
-    \xi[n]\right)^\top \in \R^{n\times p}, \mathbf{y} = \left(
+    {\xi[1]}^{\top}, \dots, 
+    {\xi[n]}^{\top}\right)^\top \in \R^{n\times (p+1)}, \mathbf{y} = \left(
     \yi[1], \dots,
     \yi[n]
 \right)^\top,$ \\ $\pi(\mathbf{X}\vert\;\thetav) = \left(