From 8a78906af0469e92fbc5b42444147a50fc421211 Mon Sep 17 00:00:00 2001 From: ludwigbothmann <46222472+ludwigbothmann@users.noreply.github.com> Date: Tue, 3 Dec 2024 12:00:26 +0100 Subject: [PATCH] fix ex and sol 2 --- exercises/exercise-02/ex2.tex | 2 +- exercises/exercise-02/sol2.tex | 16 +++++++++------- ...slides-multivar-second-order-5-comparison.tex | 2 +- .../slides-multivar-second-order-6-fisher.tex | 10 +++++----- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/exercises/exercise-02/ex2.tex b/exercises/exercise-02/ex2.tex index b0cac4f3..fb9938ee 100644 --- a/exercises/exercise-02/ex2.tex +++ b/exercises/exercise-02/ex2.tex @@ -217,7 +217,7 @@ \item Compute $\frac{\partial \Vert \mathbf{x} - \mathbf{c} \Vert_2}{\partial \mathbf{x}}$ \item Compute $\frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}}$ - \item Show that $\frac{\partial \mathbf{u}^\top \mathbf{Y}}{\partial\mathbf{x}} = \begin{pmatrix}\mathbf{u}^\top\frac{\partial \mathbf{y}_1}{\partial\mathbf{x}} + \mathbf{y}_1^\top\frac{\partial \mathbf{u}}{\partial\mathbf{x}} \\ \vdots \\ \mathbf{u}^\top\frac{\partial \mathbf{y}_d}{\partial\mathbf{x}} + \mathbf{y}_d^\top\frac{\partial \mathbf{u}}{\partial\mathbf{x}}\end{pmatrix}$ where $\mathbf{y}_i:\R^d\rightarrow\R^d, i= 1,\dots,d$ are the column vectors of $\mathbf{Y}.$ + \item Show that $\frac{\partial \mathbf{Y}^\top \mathbf{u}}{\partial\mathbf{x}} = \begin{pmatrix}\mathbf{u}^\top\frac{\partial \mathbf{y}_1}{\partial\mathbf{x}} + \mathbf{y}_1^\top\frac{\partial \mathbf{u}}{\partial\mathbf{x}} \\ \vdots \\ \mathbf{u}^\top\frac{\partial \mathbf{y}_d}{\partial\mathbf{x}} + \mathbf{y}_d^\top\frac{\partial \mathbf{u}}{\partial\mathbf{x}}\end{pmatrix}$ where $\mathbf{y}_i:\R^d\rightarrow\R^d, i= 1,\dots,d$ are the column vectors of $\mathbf{Y}.$ \item Compute $\frac{\partial^2 \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}\partial\mathbf{x}^\top}$ \\ \textit{Hint:} use c), d) diff --git a/exercises/exercise-02/sol2.tex b/exercises/exercise-02/sol2.tex index 98997858..03fd32ef 100644 --- a/exercises/exercise-02/sol2.tex +++ b/exercises/exercise-02/sol2.tex @@ -225,15 +225,17 @@ \item $\frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}} = \frac{\partial \mathbf{u}^\top \mathbf{I} \mathbf{v}}{\partial \mathbf{x}} = \mathbf{u}^\top \mathbf{I} \frac{\partial \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \mathbf{I}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} = \mathbf{u}^\top \frac{\partial \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}}$ -\item $\frac{\partial \mathbf{u}^\top \mathbf{Y}}{\partial\mathbf{x}} = \frac{\partial\begin{pmatrix}\mathbf{u}^\top \mathbf{y}_1 \\ \vdots \\ \mathbf{u}^\top \mathbf{y}_d\end{pmatrix}}{\partial \mathbf{x}} \overset{(c)}{=} \begin{pmatrix} \mathbf{u}^\top \frac{\partial \mathbf{y}_1}{\partial \mathbf{x}} + \mathbf{y}_1^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} \\ \vdots \\ -\mathbf{u}^\top \frac{\partial \mathbf{y}_d}{\partial \mathbf{x}} + \mathbf{y}_d^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} +\item $\frac{\partial \mathbf{Y}^\top \mathbf{u}}{\partial\mathbf{x}} = \frac{\partial\begin{pmatrix}\mathbf{y}_1^\top \mathbf{u} \\ \vdots \\ \mathbf{y}_d^\top \mathbf{u}\end{pmatrix}}{\partial \mathbf{x}} \overset{(c)}{=} \begin{pmatrix} \mathbf{y}_1^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} + \mathbf{u}^\top \frac{\partial \mathbf{y}_1}{\partial \mathbf{x}} \\ \vdots \\ +\mathbf{y}_d^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} + \mathbf{u}^\top \frac{\partial \mathbf{y}_d}{\partial \mathbf{x}} \end{pmatrix}$ \item Note for $\mathbf{y}:\R^d\rightarrow\R^d, \mathbf{x}\mapsto\mathbf{y}(\mathbf{x})$ the $i-$th column of $\frac{\partial\mathbf{y}}{\partial\mathbf{x}}$ is $\frac{\partial\mathbf{y}}{\partial x_i}$. With this it follows that \\ \begin{align*} - \frac{\partial^2 \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}\partial\mathbf{x}^\top} &= \frac{\partial}{\partial\mathbf{x}} \left( \frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial\mathbf{x}} \right) \\ - &\overset{(c)}{=} \frac{\partial ( \mathbf{u}^\top \frac{\partial \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} )}{\partial\mathbf{x}} \\ + \frac{\partial^2 \mathbf{u}^\top \mathbf{v}}{\partial \mathbf{x}\partial\mathbf{x}^\top} &= \frac{\partial}{\partial\mathbf{x}} \left( \frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial\mathbf{x}^\top} \right) \\ + &= \frac{\partial}{\partial\mathbf{x}} \left[ \left( \frac{\partial \mathbf{u}^\top \mathbf{v}}{\partial\mathbf{x}} \right)^\top \right] \\ + &\overset{(c)}{=} \frac{\partial ( \mathbf{u}^\top \frac{\partial \mathbf{v}}{\partial \mathbf{x}} + \mathbf{v}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} )^\top}{\partial\mathbf{x}} \\ + &= \frac{\partial \left( \left(\frac{\partial \mathbf{v}}{\partial \mathbf{x}}\right)^\top \mathbf{u} + \left(\frac{\partial \mathbf{u}}{\partial \mathbf{x}}\right)^\top \mathbf{v})\right)}{\partial\mathbf{x}} \\ &\overset{(d)}{=} \begin{pmatrix} \mathbf{u}^\top \frac{\partial^2 \mathbf{v}}{\partial x_1\partial \mathbf{x}} + \frac{\partial\mathbf{v}}{\partial x_1}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} \\ \vdots \\ @@ -250,13 +252,13 @@ \vdots \\ \mathbf{u}^\top \frac{\partial^2 \mathbf{v}}{\partial x_d\partial \mathbf{x}} \end{pmatrix} - + \frac{\partial \mathbf{v}}{\partial \mathbf{x}}^\top \frac{\partial \mathbf{u}}{\partial \mathbf{x}} - + \frac{\partial \mathbf{u}}{\partial \mathbf{x}}^\top \frac{\partial \mathbf{v}}{\partial \mathbf{x}} + + \frac{\partial \mathbf{u}}{\partial \mathbf{x}} \left(\frac{\partial \mathbf{v}}{\partial \mathbf{x}}\right)^\top + + \frac{\partial \mathbf{v}}{\partial \mathbf{x}} \left(\frac{\partial \mathbf{u}}{\partial \mathbf{x}}\right)^\top + \begin{pmatrix} \mathbf{v}^\top \frac{\partial^2 \mathbf{u}}{\partial x_1\partial \mathbf{x}} \\ \vdots \\ \mathbf{v}^\top \frac{\partial^2 \mathbf{u}}{\partial x_d\partial \mathbf{x}} - \end{pmatrix}. + \end{pmatrix} \end{align*} \end{enumerate} } diff --git a/slides/05-multivariate-second-order/slides-multivar-second-order-5-comparison.tex b/slides/05-multivariate-second-order/slides-multivar-second-order-5-comparison.tex index fce79704..59cdefe8 100644 --- a/slides/05-multivariate-second-order/slides-multivar-second-order-5-comparison.tex +++ b/slides/05-multivariate-second-order/slides-multivar-second-order-5-comparison.tex @@ -133,7 +133,7 @@ \begin{vbframe}{Runtime comparison (indep.)} \vspace{-0.2cm} -{\small Clearly, NR makes more progress than GD per iteration. OTOH Newton steps are vastly more expensive than GD updates\\ +{\small Clearly, NR makes more progress than GD per iteration. OTOH Newton steps are much more expensive than GD updates\\ $\Rightarrow$ How do NR and GD compare wrt runtime instead of iterations (50 steps)?} \begin{figure} \includegraphics[width=1.0\textwidth]{slides/05-multivariate-second-order/figure_man/simu-newton/NR_GD_runtime_comparison.pdf} \\ diff --git a/slides/05-multivariate-second-order/slides-multivar-second-order-6-fisher.tex b/slides/05-multivariate-second-order/slides-multivar-second-order-6-fisher.tex index 222af2b9..cb6fcf49 100644 --- a/slides/05-multivariate-second-order/slides-multivar-second-order-6-fisher.tex +++ b/slides/05-multivariate-second-order/slides-multivar-second-order-6-fisher.tex @@ -63,12 +63,12 @@ Use Case & General non-linear\newline optimization & Likelihood-based models,\newline especially GLMs \\ \hline \end{tabular} \end{table} -If randomness doesn't exist in data, Newton-Raphson and Fisher scoring are the same. +In many cases Newton-Raphson and Fisher scoring are equivalent (see below). \end{vbframe} \begin{vbframe}{Logistic regression} The goal of logistic regression is to predict a binary event. -Given $n$ observations $\left(\xi, \yi\right) \in \R^p \times \{0, 1\}$, +Given $n$ observations $\left(\xi, \yi\right) \in \R^{p+1} \times \{0, 1\}$, $\yi |\, \xi \sim Bernoulli(\pi^{(i)})$.\\ \lz We want to minimize the following risk @@ -82,7 +82,7 @@ the sigmoid function $s(f) = \frac{1}{1 + \exp(-f)}$ and the score $\fxit = \thx.$\\ \lz -NB: Note that $\frac{\partial}{\partial f} s(f) = s(f)(1-s(f))$ and $\frac{\partial \fxit}{\partial \thetav} = \left(\xi\right)^\top.$\\ +\textbf{NB}: Note that $\frac{\partial}{\partial f} s(f) = s(f)(1-s(f))$ and $\frac{\partial \fxit}{\partial \thetav} = \left(\xi\right)^\top.$\\ \vspace{0.3cm} For more details we refer to the \href{https://slds-lmu.github.io/i2ml/chapters/11_advriskmin/}{\color{blue}{i2ml}} lecture. @@ -104,8 +104,8 @@ } where $\mathbf{X} = \left( - \xi[1], \dots, - \xi[n]\right)^\top \in \R^{n\times p}, \mathbf{y} = \left( + {\xi[1]}^{\top}, \dots, + {\xi[n]}^{\top}\right)^\top \in \R^{n\times (p+1)}, \mathbf{y} = \left( \yi[1], \dots, \yi[n] \right)^\top,$ \\ $\pi(\mathbf{X}\vert\;\thetav) = \left(