stochastic.tex

\documentclass[12pt]{article}
\usepackage[left=2.5cm,top=2.5cm,right=2.5cm,bottom=2.5cm]{geometry}
\usepackage{cancel}
\usepackage{bm}
\usepackage{moreverb}
\usepackage{graphicx}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{color}
\usepackage{ctable}
\definecolor{MyDarkBlue}{rgb}{0.15,0.15,0.45}
\usepackage[linktocpage=true]{hyperref}
\hypersetup{
colorlinks=true,
citecolor=MyDarkBlue,
linkcolor=MyDarkBlue,
urlcolor=MyDarkBlue,
}
\def\argmin{\mathop{\rm argmin}}
\def\argmax{\mathop{\rm argmax}}
\def\tr{\mathop{\rm tr}\nolimits}
\def\Re{\mathop{\rm Re}\nolimits}
\def\diag{\mathop{\rm diag}\nolimits}
\def\var{\mathop{\rm var}\nolimits}
\def\cov{\mathop{\rm cov}\nolimits}
\def\RGP{\mathop{\rm RGP}\nolimits}
\def\Geom{\mathop{\rm Geom}\nolimits}
\def\B{\mathop{\rm B}\nolimits}
\def\Hyp{\mathop{\rm Hyp}\nolimits}
\def\CV{\mathop{\rm CV}\nolimits}
\def\EV{\mathop{\rm EV}\nolimits}
\theoremstyle{plain}% default
\newtheorem{thm}{Theorem}[section]
\newtheorem{lem}[thm]{Lemma}
\newtheorem{prop}[thm]{Proposition}
\newtheorem*{cor}{Corollary}
\theoremstyle{definition}
\newtheorem{defn}{Definition}[section]
\newtheorem{conj}{Conjecture}[section]
\newtheorem{exmp}{Example}[section]
\theoremstyle{remark}
\newtheorem*{remk}{Remark}
\newtheorem*{note}{Note}
\newtheorem{case}{Case}
\newcommand\ddfrac[2]{\frac{\displaystyle #1}{\displaystyle #2}}
\begin{document}
\title{Notes on stochastic processes}
\author{James Stokes}
\maketitle
\tableofcontents

\section{Probability theory}
\subsection{Probability space}
The sample space $\Omega$ is the set of all outcomes of an experiment. An event $A$ is a subset of $\Omega$. An event $A$ is said to occur if the outcome of the experiment lies in $A$. A probability distribution is a function on the collection of events such that $P(\Omega) =1$, $P(A) \geq 0$ for all $A$  and for any countable union of disjoint sets
\begin{align}
	P\Bigl(\coprod_i A_i\Bigr)
		& = \sum_i P(A_i).
\end{align}
It follows from the above axioms that $P(A) \leq 1$ since $1=P(\Omega) = P(A \cup A^c) = P(A) + P(A^c) $ and thus  since $P(A^c) \geq 0$.  By expressing $A \cup B$ as a disjoint union of sets,
\begin{align}
	A \cup B
		& = (A \cap B^c) \sqcup (A^c \cap B) \sqcup (A \cap B)
\end{align}
we obtain
\begin{align}
	P(A \cup B)
		& = P(A \cap B^c) + P(A^c \cap B) + P(A \cap B) \\
		& =	\left[P(A) + P(B) - 2P(A \cap B) + \right] + P(A \cap B) \\
		& = P(A) + P(B) - P(A \cap B).
\end{align}

\subsection{Conditional probability}
Denote by $P(A|B)$ the probability of $A$ occurring given that $B$ has occurred. This is the probability that the outcome of the experiment lies in $A$ given that the outcome is known to lie in $B$ It is given by the formula
\begin{equation}
	P(A|B) = \frac{P(A \cap B)}{P(B)}.
\end{equation}
The above formula has the corollary 
\begin{equation}
	P(A \cap B) = P(B) P(A| B) = P(A)P(B|A).
\end{equation}
This can be thought of as a generalization of the formula $P(A \cap B) = P(A)P(B)$ for independent events. Iterating the definition of conditional probability we obtain
\begin{align}
	P(A \cap B \cap C)
		& = P(A \cap B) P(C | A\cap B) \\
		& = P(A)P(B|A)P(C | A\cap B).
\end{align}
\begin{exmp}
A coin is tossed twice. Find the probability of the event of two heads given (a) the first toss is a head (b) at least one toss is a head. We have
\begin{align}
	P(H_1 \cap H_2 |H_1)
		& = \frac{P(H_1 \cap H_2 \cap H_1)}{P(H_1)} = \frac{P(H_1 \cap H_2)}{P(H_1)} \\
		& = \frac{p^2}{p^2 + p(1-p)} = p \\
	P(H_1 \cap H_2 |H_1 \cup H_2)
		& = \frac{P(H_1 \cap H_2 \cap (H_1\cup H_2))}{P(H_1\cup H_2)} = \frac{P(H_1 \cap H_2)}{P(H_1\cup H_2)} \\
		& = \frac{p^2}{p^2 + 2p(1-p)} = \frac{p}{1-p}.
\end{align}
Since $0 \leq p \leq 1$ we see that (a) is at least as likely as (b).
\end{exmp}
\subsubsection{Bayes' theorem}
Suppose we partition the sample space into a set of disjoint events called priors $\sqcup_i A_i = \Omega$. Then the partition theorem states that
\begin{equation}
	P(B) = \sum_i P(B|A_i)P(A_i).
\end{equation}
Using this fact allows us to express the conditional probability $P(A_i |B)$ in terms of the conditional probabilities $P(B|A_j)$ as follows
\begin{align}
	P(A_i |B)
		 = \frac{P(A_i \cap B)}{P(B)}
		 = \frac{P(A_i) P(B|A_i)}{\sum_j P(A_j) P(B|A_j) }.
\end{align}
Applying the partition theorem to the expectation of a discrete random variable $X$ gives
\begin{align}
	\mathbb{E}[X]
		& = \sum_i\sum_x x P(X=x|A_i)P(A_i) \\
		& \sum_i \mathbb{E}[X\,|\,A_i]P(A_i).
\end{align}

Let $f_{X,Y}(x,y)$ be the joint probability density for two random variables $X$ and $Y$. The conditional probability density is defined as
\begin{equation}
	f_{X|Y}(x | y) = \frac{f_{X,Y}(x,y)}{f_Y(y)}.
\end{equation}

\begin{exmp} (Monty hall problem). Suppose without loss of generality that the contestant chooses door 1 (if the contestant chooses any other door then we just relabel the doors so that the contestant chose door 1). We clearly have
\begin{equation}
	P(\textrm{behind 1}) = P(\textrm{behind 2}) = P(\textrm{behind 3}) = \frac{1}{3}.
\end{equation}
Now consider the probabilities conditioned on the fact that the host chooses a door. Note that the host can only choose door 2 or 3 since the contestant has selected door 1. We have
\begin{align}
	P(\textrm{behind 1}|\textrm{chooses 2})
	 & = \frac{P(\textrm{chooses 2}|\textrm{behind 1})P(\textrm{behind 1})}{P(\textrm{chooses 2})}
	 = \frac{\frac{1}{2}\frac{1}{3}}{\frac{1}{2}}  = 1/3  \\
	P(\textrm{behind 1}|\textrm{chooses 3})
	& = \frac{P(\textrm{chooses 3}|\textrm{behind 1})P(\textrm{behind 1})}{P(\textrm{chooses 3})} = \frac{\frac{1}{2}\frac{1}{3}}{\frac{1}{2}} = 1/3 .
\end{align}
Now
\begin{align}
	P(\textrm{behind 2}|\textrm{chooses 3})
	& = \frac{P(\textrm{chooses 3}|\textrm{behind 2})P(\textrm{behind 2})}{P(\textrm{chooses 3})} = \frac{1\frac{1}{3}}{\frac{1}{2}} = 2/3  \\
	P(\textrm{behind 3}|\textrm{chooses 2})
	& = \frac{P(\textrm{chooses 2}|\textrm{behind 3})P(\textrm{behind 3})}{P(\textrm{chooses 2})}= \frac{1\frac{1}{3}}{\frac{1}{2}} = 2/3. 
\end{align}
The only remaining possibilities have vanishing probability because the host will never choose the door behind which the prize lies; that is,
\begin{equation}
	P(\textrm{behind 2}|\textrm{chooses 2})=P(\textrm{behind 3}|\textrm{chooses 3}) = 0
\end{equation}
\end{exmp}

\subsection{Discrete probability distributions}
\subsubsection{Bernoulli}
Consider a biased coin that lands heads with probability $p$. This experiment can be modeled by a Bernoulli distribution which describes the probability of success in a single experiment with success probability $p$. A  Bernoulli random variable $X \sim \B(1,p)$ has probability distribution
\begin{equation}
	P(X=1)=p, \quad \quad P(X=0) = 1-p.
\end{equation}
The mean and variance are easily calculated to be
\begin{equation}
	\mathbb{E}[X] = p1 + (1-p)0=p, \quad \quad \var(X) = \mathbb{E}[X^2]-\mathbb{E}[X]^2 = p-p^2=p(1-p).
\end{equation}
\subsubsection{Binomial and Geometric}
Now consider a sequence of $k$ biased coin tosses where heads lands with probability $p$. This can can be modeled as a sequence of $k$ independent and identically distributed Bernoulli random variables $(X_1,\ldots,X_k)$ with common success probability $P(X_i=1) = p$. The binomial distribution $\B(k,p)$ characterizes the number of heads that land after $k$ tosses. It also describes the number of successes in $k$ draws with replacement, where the success probability on each draw is $p$. A binomial random variable $X\sim \B(k,p)$ has probability distribution
\begin{equation}
	P(X=j) = {k\choose{j}} p^j (1-p)^{k-j}.
\end{equation}
This gives the probability of $j$ heads appearing in $k$ flips of an unfair coin. The ${k\choose{i}}$ factor counts the number of ways that the $i$ heads can be distributed in the sequence of $k$ trials.
Note that the binomial random variable can be represented as a sum of $k$ iid Bernoulli random variables
\begin{equation}
	X= \sum_{j=1}^k X_j
\end{equation}
and thus
\begin{align}
	\mathbb{E}[X]
		&  = kp, \\
	\var(X)
		& = \sum_{i=1}^k\sum_{j=1}^k \cov(X_i,X_j) = \sum_{i=1}^k \var(X_i) = kp(1-p).
\end{align}

The geometric distribution $\Geom(p)$ describes the probability of success on the $k$th trial after $k-1$ failures with success probability $p$. Given that a biasses coin toss yields tails, the binomial distribution characterizes the number of tosses until the next head. A geometric random variable $X \sim \Geom(p)$ has probability distribution
\begin{equation}
	P(X=k) = (1-p)^{k-1}p.
\end{equation}
An example is the probability that an unfair coin yields heads on the $k$th flip if the probability of heads is $p$. The expectation value
\begin{equation}
	\mathbb{E}[X] = \sum_{k=1}^\infty P(X=k)k.
\end{equation}
can be computed by differentiating the following identity with respect to $p$,
\begin{align}
	1
		& = \sum_{k=1}^\infty (1-p)^{k-1}p \\
	0
		& = \sum_{k=1}^\infty \left[(1-p)^{k-1}-(k-1)p(1-p)^{k-2}\right] \\
		& = 1/p - \mathbb{E}[X].
\end{align}
It can also be computed by representing it in terms of conditional expectations
\begin{align}
	\mathbb{E}[X] 
		& = P(X=1)\mathbb{E}[X\,|\,X=1]+P(X>1)\mathbb{E}[X\,|\,X>1] \\
		& = P(X=1)\mathbb{E}[X\,|\,X=1]+P(X>1)\left(\mathbb{E}[X-1\,|\,X-1>0]+1\right) \\
		& = p + (1-p)\left(\mathbb{E}[X] + 1\right)
\end{align}
where we have used the memorylessness property $P(X-n|X>n)=P(X)$. Solving gives
\begin{equation}
	\mathbb{E}[X] = 1/p.
\end{equation}
The variance can be computed using the trick
\begin{equation}
	\var(X) = \mathbb{E}[X^2]-\mathbb{E}[X]^2 = \mathbb{E}[X(X-1)]+\mathbb{E}[X]-\mathbb{E}[X]^2
\end{equation}
We have
\begin{align}
	\mathbb{E}[X]
		& = \sum_{k=1}^\infty k(k-1)(1-p)^{k-1}p \\
		& = p(1-p)[1\cdot 2+2\cdot 3(1-p)^2 + \cdots]\\
		& = \frac{2p(1-p)}{[1-(1-p)]^3}
\end{align}
and thus
\begin{equation}
	\var(X) = \frac{1-p}{p^2}.
\end{equation}
\begin{exmp}
Let $X$ represent the number of tosses until two successive heads appear in a row. We partition on first and second tosses and use the memorylessness property $\mathbb{E}[X\,|\,T] = \mathbb{E}[X] +1$, $\mathbb{E}[X\,|\,HT] = \mathbb{E}[X] +2$ to give
\begin{align}
	\mathbb{E}[X]
		& = \mathbb{E}[X\,|\,T]P(T) + \mathbb{E}[X\,|\,T]P(T) \\
		& = \mathbb{E}[X\,|\,T]P(T) + \mathbb{E}[X\,|\,HT]P(HT) + \mathbb{E}[X\,|\,HH]P(HH) \\
		& = (1+\mathbb{E}[X])(1-p)+(2+\mathbb{E}[X])p(1-p)+ 2p^2.
\end{align}
This implies
\begin{equation}
	\mathbb{E}[X] = 1/p + 1/p^2.
\end{equation}
Alternatively let $X_n$ denote the number of flips required to obtain $n$ successive heads. We have
\begin{align}
	\mathbb{E}[X_2]
		& = \sum_{k=1}^\infty \mathbb{E}[X_2\,|\,X_1=k]P(X_1=k)
\end{align}
Now
\begin{align}
	\mathbb{E}[X_2\,|\,X_1=k]
		& = p(k+1) + (1-p)(k+1 + \mathbb{E}[X_2]) \\
		& = 1+k+ (1-p)\mathbb{E}[X_2]
\end{align}
and thus
\begin{align}
	\mathbb{E}[X_2]
		& = \sum_{k=1}^\infty P(X_1=k)\left[1+k+ (1-p)\mathbb{E}[X_2]\right] \\
		& = 1 + \mathbb{E}[X_1]+ (1-p)\mathbb{E}[X_2].
\end{align}
Setting $\mathbb{E}[X_1]=1/p$ and solving we obtain the result.
\end{exmp}

\subsubsection{Uniform}
The discrete uniform distribution on $[a,b]$ (where $a,b\in\mathbb{Z}$) is given by
\begin{equation}
	P(X=k) = \begin{cases}
		\frac{1}{b-a+1},&  k = a,\ldots,b \\
		0, & \mathrm{o.w.}
	\end{cases}
\end{equation}
We have
\begin{equation}
	\mathbb{E}[X] = \frac{1}{b-a+1}\sum_{k=a}^b k = \frac{a+b}{2}
\end{equation}
and
\begin{equation}
	\var(X) = \frac{(a-b)(a-b-2)}{12}.
\end{equation}
\subsubsection{Multinomial}
Consider a sequence of $k$ independent and identically distributed random variables $(X_1,\ldots,X_k)$ with common probability distribution,
\begin{equation}
	P(X_i = j) = p_j, \quad \quad j \in \{1,\ldots,n\}, \quad \quad \sum_{j=1}^n p_j = 1.
\end{equation}
The multinomial distribution is thus a generalization of the binomial distribution where each random variable can take on $n$ values. We have,
\begin{equation}
	P(X_1=j_1,\ldots X_n = j_n) = \frac{k!}{j_1!\cdots j_n!}p_1^{j_1}\cdots p_n^{j_n}, \quad \quad \sum_{i=1}^n j_i = k.
\end{equation}
The factor 
\begin{equation}
\frac{k!}{j_1!\cdots j_n!} = {k\choose{j_1}}{k-j_1\choose{j_2}}{k-j_1-j_2\choose{j_3}}\cdots{k-j_1-\cdots-j_{n-1}\choose{j_n}}
\end{equation}
is the number of ways to partition a $k$-element set into $n$ disjoint subsets, with $j_i$ elements in the $i$th subset.
\subsubsection{Hypergeometric}
The hypergeometric distribution describes the probability of picking $j$ red balls, after drawing $k$ times without replacement from an urn containing $n$ balls, of which $m$ are red,
\begin{equation}
	P(X=j) = \frac{{m\choose{j}}{N-m\choose{k-j}}}{{N\choose{k}}},
\end{equation}
where ${m\choose{j}}$ is the number of ways of choosing $j$ balls from the $m$ red ones and ${N-m\choose{k-j}}$ is the number of ways of choosing $k-j$ balls from $N-m$ non-red ones. In the limit $N \to \infty$ holding $p\equiv m/N$ fixed the hypergeometric distribution approaches the binomial distribution; that is,
\begin{equation}
	\lim_{N\to\infty}\frac{{m\choose{j}}{N-m\choose{k-j}}}{{N\choose{k}}} = {k\choose{j}} p^j (1-p)^{k-j}.
\end{equation}
\subsection{Continuous probability distributions}
\subsubsection{Normal}
A normal random variable $X \sim \mathcal{N}(\mu,\sigma^2)$ with mean $\mu$ and variance $\sigma^2$ has pdf
\begin{equation}
	f_X(x) = \frac{1}{\sigma\sqrt{2\pi}}e^{-(x-\mu)^2/(2\sigma^2)}
\end{equation}
\subsubsection{Poisson}
The Poisson distribution can be thought of as the limit of the binomial distribution $\B(k,p)$ where $k\to\infty$ holding the expectation $\mathbb{E}[X] = kp = \lambda$ fixed,
\begin{align}
	P(X=j)
		& = \lim_{k\to\infty}{k\choose{j}}\left(\frac{\lambda}{k}\right)^j\left(1-\frac{\lambda}{k}\right)^{k-j} \\
		& = \lim_{k\to\infty}\frac{k(k-1)\cdots(k-j+1)}{k^j}\frac{\lambda^j}{j!}\left(1-\frac{\lambda}{k}\right)^{k} \left(1-\frac{\lambda}{k}\right)^{-j} \\
		& = \lim_{k\to\infty}\frac{(k^j + \cdots)}{k^j}\frac{\lambda^j}{j!}\left(1-\frac{\lambda}{k}\right)^{k} \left(1-\frac{\lambda}{k}\right)^{-j} \\
		& = \frac{\lambda^je^{-\lambda}}{j!}.
\end{align}
The variance is also easily found as a limit of the binomial variance
\begin{equation}
	\var(X) = \lim_{k\to\infty} kp(1-p) = \lim_{k\to\infty} \lambda\left(1-\frac{\lambda}{k}\right) = \lambda.
\end{equation}
If we relabel $\mathbb{E}[X] = \lambda t$ then $\lambda$ has the interpretation of the arrival rate and then the number of arrivals in an interval of time $\tau$ is given by
\begin{equation}
	P(k,\tau) = \frac{(\lambda \tau)^ke^{-\lambda \tau}}{k!}.
\end{equation}

Notice for comparison if we take the limit of $\B(k,p)$ as $k\to\infty$ holding $p$ fixed we obtain a normal distribution by the central limit theorem.

\subsection{Sums of random variables}
If $X$ and $Y$ are independent random variables and $W = X+Y$ then
\begin{align}
	P(W=w)
		& = P(X+Y = w) \\
		& = \sum_x P(X=x)P(Y=w-x).
\end{align}
For continuous distributions this formula becomes
\begin{equation}
	f_W(w) = \int_{-\infty}^{\infty} dx\, f_X(x)f_Y(w-x).
\end{equation}
The moment generating function for a random variable $X$ is given by
\begin{equation}
	M_X(t) = \mathbb{E}[e^{tX}].
\end{equation}
If $X$ and $Y$ and independent random variables and $W = X+Y$ then
\begin{equation}
	M_W(t) = M_X(t)M_Y(t).
\end{equation}
The probability generating function is given by 
\begin{equation}
	g_X(s) = M_X(\log s) = \mathbb{E}[s^X].
\end{equation}
\begin{exmp}
Let $X$ and $Y$ be random variables that represent the rolls of two indepedent six-sided die with faces $\{1,2,3,4,5,6\}$. We can obtain the distribution of $X+Y$ by computing its probability generating function
\begin{align}
	g_{X+Y}(s) 
		& = g_X(s)g_Y(s) \\
		& = g_X(s)^2 \\
		& = \frac{s^2}{36}+\frac{s^3}{18}+\frac{s^4}{12}+\frac{s^5}{9}+\frac{5 s^6}{36}+\frac{s^7}{6}+\frac{5 s^8}{36}+\frac{s^9}{9}+\frac{s^{10}}{12}+\frac{s^{11}}{18}+\frac{s^{12}}{36}
\end{align}
Factoring this expression we obtain
\begin{align}
	g_{X+Y}(s)
		& = \frac{1}{36} s^2 (1+s)^2 \left(1-s+s^2\right)^2 \left(1+s+s^2\right)^2 \\
		& = \frac{1}{4}(s^2 + 2s^3 + s^4)\frac{1}{9}(1+2 s^2+3 s^4+2 s^6+s^8) \\
		& = g_{X_4}(s)g_{X_9}(s) \\
		& = g_{X_4+X_9}
\end{align}
This shows that the distribution of $X+Y$ is the same as the distribution of $X_4+X_9$ where $X_4$ and $X_9$ represent the rolls of two independent die with faces $\{2,3,3,4\}$ and $\{0,2,2,4,4,4,6,6,8\}$, respectively.
\end{exmp}

\subsubsection{Generating arbitrarily distributed random variables}
Starting with a random variable $Y$ uniformly distributed on $[0,1]$ we can generate a random variable $X$ with arbitrary probability distribution function $f(x)$ as follows. Let 
\begin{equation}
	F(x) = \int_{-\infty}^x du \, f(u)
\end{equation}
be the cumulative distribution function for $X$. If $F(x)$ is continuous then the random variable $U \equiv F(X)$ is uniformly distributed on $[0,1]$ because
\begin{equation}
	P(U \leq u) = P(F(X) \leq u) = P(X \leq F^{-1}(u)) = F\left(F^{-1}(u)\right) = u.
\end{equation}
The random variable $U$ is called the probability integral transform of $X$.
We therefore define $X$ by the equation $F(X) = U$ or
\begin{equation}
	X = F^{-1}(U).
\end{equation}
In the case of the exponential distribution $f(x) = \lambda e^{-\lambda x}$ we have $F(x) = 1-e^{-\lambda x}$ and thus $X = -\frac{1}{\lambda}\log(1-Y)$.

\subsection{Correlation}
Suppose we want to simulate $n$ random variables with a given covariance matrix $\Sigma\in \mathbb{R}^{n \times n}$. This means we want a column vector of random variables $\vec{X} = (X_1,\ldots,X_n)^{\rm T}$ such that,
\begin{equation}
	\Sigma = \mathbb{E} \big[(\vec{X}-\mathbb{E}[\vec{X}])(\vec{X}-\mathbb{E}[\vec{X}])^{\rm T} \big].
\end{equation}
Since the covariance matrix is positive semi-definite it can be diagonalized as $\Sigma = ADA^{\rm T}$ where $D$ is a diagonal matrix of eigenvalues and $A$ is an orthogonal matrix whose columns are the eigenvectors. If we take the columns of $A$ be be orthonormal then the eigenvalues in $D$ are the variances in the basis defined by the orthonormal column vectors; that is,
\begin{equation}
	D = \diag(\sigma_1^2,\ldots,\sigma_n^2).
\end{equation}
We now construct an $n$-component column vector of $n$ independent random variables $\vec{Z} = (Z_1,\ldots,Z_n)^{\rm T}$ with $\mathbb{E}\big[\vec{Z}\big] =(\sigma_1^2,\ldots,\sigma_n^2)$ and $\mathbb{E}\big[\vec{Z}\big] = \vec{0}$. Then $D$ is obviously the covariance matrix for $\vec{Z}$,
\begin{equation}
	D = \mathbb{E} \big[ \vec{Z} \vec{Z}^{\rm T} \big].
\end{equation}
Left-multiplying this expression by $A$ and right-multiplying by $A^{\rm T}$ we obtain the identity,
\begin{equation}
	\Sigma = \mathbb{E} \big[ A\vec{Z} (A\vec{Z})^{\rm T} \big].
\end{equation}
Thus we have found a vector or random variables $A\vec{Z}$ with vanishing mean and covariance matrix given by $\Sigma$. It follows that the vector of random variables satisfying the original requirement is given by,
\begin{equation}
	\vec{X} = A\vec{Z} + \mathbb{E}\big[\vec{X}\big].
\end{equation}
In the special case where $\vec{Z}$ is a vector of independent Gaussian RVs, the resulting RVs $\vec{X}$ will also be Gaussian.

\subsubsection{Copulas}
Suppose that $(X_1,\ldots X_m)$ are continuous random variables with marginal distributions $F_1(x_1)$ up to $F_m(x_m)$ and joint distribution function $F(x_1,\ldots,x_m)$. Define the random variables $(U_1,\ldots,U_m)$ to be the associated probability integral transforms $U_1 = F_1(X_1), \ldots,U_m=F(x_m)$. The copula of $(X_1,\ldots X_m)$ is defined as the joint distribution for $(U_1,\ldots,U_m)$,
\begin{equation}
	C(u_1,\ldots,u_m) = P(U_1 \leq u_1,\ldots,U_m \leq u_m).
\end{equation}
Notice that $0 \leq u_i \leq 1$ so $C$ is a probability distribution on $[0,1]^m$ and moreover the marginal distributions $P(U_i \leq u_i)$ are all uniform on $[0,1]$.
Using the definition of the probability integral transform this equation can also be written as,
\begin{align}
	C(u_1,\ldots,u_m) 
		& = P(F(X_1) \leq u_1,\ldots,F(X_m) \leq u_m), \\
		& = P\big(X_1 \leq F_1^{-1}(u_1),\ldots,X_m\leq F_m^{-1}(u_m)\big), \\
		&  = F\big(F_1^{-1}(u_1),\ldots,F_m^{-1}(u_m)\big).
\end{align}
It immediately follows that,
\begin{equation}
	F(x_1,\ldots,x_m) = C(F_1(x_1),\ldots,F_m(x_m)).
\end{equation}
Taking draws $(X_1,\ldots,X_m)$ from $F$ is thus equivalent to taking draws $(U_1,\ldots,U_m)$ from $C$ and solving the $m$ sets of equations,
\begin{equation}
 X_i = F_i^{-1}(U_i), \quad \quad  1 \leq i \leq m.
\end{equation}
\subsection{Law of large numbers}
\subsubsection{Weak law of large numbers}
%Markov/Chebyshev inequality:
%\begin{equation}
%	P(|X-\mu| \geq c) \leq \frac{\sigma^2}{c^2}, \quad \quad P(|X-\mu| \geq k\sigma) \leq \frac{1}{k^2}
%\end{equation}
%\begin{proof}
%\begin{align}
%	\mathbb{E}[(X-\mu)^2]
%		& = \int_{-\infty}^{\infty} dx \, (x-\mu)^2 f_X(x) \\
%		& \geq \left(\int_{-\infty}^{\mu-c} + \int_{\mu+c}^{\infty}\right)dx \, (x-\mu)^2 f_X(x) \\
%		& \geq c^2P(X \leq \mu - c)+c^2P(X \geq \mu + c) \\
%		& = c^2P(|X-\mu|\geq c).
%\end{align}
%\end{proof}
Let $X_1,\ldots, X_n$ be independent, identically distributed random variables with mean $\mu$ and variance $\sigma^2$. Define the sample mean to be the random variable
\begin{equation}
	M_n = \frac{S_n}{n}, \quad \quad S_n = X_1 + \cdots + X_n
\end{equation}
where $\mathbb{E}[M_n] = \mu$ and $\var[M_n] = \sigma^2/n$. Applying the Markov inequality to $M_n$ we obtain
\begin{equation}
	P(|M_n - \mu| \geq \epsilon) \leq \frac{\sigma^2}{n\epsilon}.
\end{equation}
Taking $n \to \infty$ we find that for all $\epsilon$,
\begin{equation}
	\lim_{n\to\infty} P(|M_n - \mu| \geq \epsilon) = 0.
\end{equation}
This implies that $M_n$ converges in probability to the true mean $\mu$.
\subsubsection{Central limit theorem}
Now consider the standardized sum which measures the number of standard deviations $S_n$ lies from its mean,
\begin{equation}
	Z_n \equiv \frac{S_n - \mathbb{E}[S_n]}{\sigma_{S_n}} = \frac{S_n - n\mathbb{E}[X]}{\sqrt{n}\sigma} = \frac{S_n/n - \mu}{\sigma/\sqrt{n}}
\end{equation}
where $\mathbb{E}[Z_n]=0$ and $\var(Z_n) = 0$. In the limit as $n\to\infty$
\begin{equation}
	P(Z_n \leq c) \to P(Z\leq c)
\end{equation}
where
\begin{equation}
	f_Z(x) = \frac{1}{\sqrt{2\pi}}e^{-x^2/2}.
\end{equation}
\subsection{Stochastic processes}
\subsubsection{Markov process}
A discrete-time Markov chain is a stochastic process $\{ X_n \in S \; | \; 0 \leq n < \infty \}$ with the property that for all $n \geq 1$
\begin{equation}
	P(X_n = j \; | \; X_{n-1} = i_{n-1},\ldots, X_0 = i_0) = P(X_n = s \; | \; X_{n-1} = i_{n-1}).
\end{equation}
A Markov chain can alternatively be characterized by the following equivalent condition that for all $n\geq 1$ and $m \geq 0$,
\begin{equation}
	P(X_{n+m} = j \; | \; X_{n-1} = i_{n-1},\ldots, X_0 = i_0) = P(X_{n+m} = j \; | \; X_{n-1} = i_{n-1}).
\end{equation}
Let us show this for $m=1$ by defining the events $A = \{ X_{n+m} = j \}$, $B = \{ X_{n-1} = i_{n-1},\ldots, X_0 = i_0\}$ and $C_k = \{ X_{n-1} = k \}$. Then,
\begin{align}
	P(A \; | \; B)
		& = \frac{P(A \cap B)}{P(B)}, \\
		& = \sum_{k \in S}\frac{P(A \cap B \cap C_k)}{P(B)}, \notag \\
		& = \sum_{k \in S}\frac{P(A \cap B \cap C_k)}{P(B \cap C_k)}\frac{P(C_k \cap B)}{P(B)}, \notag \\
		& = \sum_{k \in S} P(A \; | \; B, C_k) P(C_k \; | \; B).
\end{align}
If we now apply the Markov property we obtain,
\begin{equation}
	P(A \; | \; B) = \sum_{k \in S} P(X_{n+1} = j \; | \; X_n = k, X_{n-1} = i_{n-1}) P(X_{n-1} = k  \; | \;  X_{n-1} = i_{n-1}),
\end{equation}
where we have left the event $\{ X_{n-1} = i_{n-1} \}$ in the conditioning for convenience. Using the definition of conditional probability we find that $P(X_n = k, X_{n-1} = i_{n-1})$ cancels and we are left with
\begin{align}
	P(A \; | \; B) 
		& = \sum_{k \in S} \frac{P(X_{n+1} = j, X_n = k, X_{n-1} = i_{n-1})}{P(X_{n-1} = i_{n-1})}, \\
		& =  \frac{P(X_{n+1} = j, X_{n-1} = i_{n-1})}{P(X_{n-1} = i_{n-1})}, \\
		& = P(X_{n+1} = j \; | \; X_{n-1} = i_{n-1}).
\end{align}
The $n$-step transition probabilities are defined by
\begin{equation}
	p_{ij}(n) = P(X_n = j \; | \; X_0 = i),
\end{equation}
and the associated matrix is denoted $\mathbf{P}(n)$
For time-homogeneous Markov chains the $n$-step transition probabilities satisfy,
\begin{equation}
	p_{ij}(m+n) = \sum_{k \in S} p_{ik}(m)p_{kj}(n) \iff \mathbf{P}(m+n) =  \mathbf{P}(m) \mathbf{P}(n)
\end{equation}
It follows that $ \mathbf{P}(n) =  \mathbf{P}(1)^n$.
\subsubsection{Branching process}
Consider a branching process with offspring probabilities $p_0$, $p_1$ and $p_2$. Denote the probability of extinction by $p$. We condition the probability of extinction on each of the possible outcomes of the first branching event; namely,
\begin{align}
	P(\mathrm{extinction})
		& = P(\mathrm{extinction}|0)P(0) + P(\mathrm{extinction}|1)P(1) + P(\mathrm{extinction}|2)P(2)
\end{align}
Using independence we can express the extinction probability in terms of itself as
\begin{align}
	p
		& = p_0\cdot 1 + p_1p + p_2 p^2.
\end{align}
Solving the quadratic equation and using the fact that $p_0+p_1+p_2=1$ we obtain $p = p_0/p_2$.
\subsubsection{Random walk}
Let $\{ X_n \; | \: 1 \leq n \leq \infty \}$ be a sequence iid random variables with probability distribution $P(X_n = \pm 1) = 1/2$. Let $S_0$ be a constant and $S_n = S_0+ \sum_{k=1}^n X_n$ for $n \geq 1$. Denote by $\phi_k$ the probability that $S_n$ reaches $A$ before $-B$ given that $S_0 = k$. Then we obtain the recursion relation
\begin{equation}
	\phi_k = \frac{1}{2}\phi_{k-1} + \frac{1}{2}\phi_{k+1}.
\end{equation}
The boundary conditions for this finite difference equation are $\phi_A = 1$ and $\phi_{-B} = 0$. The solution for $\phi_0$ is
\begin{equation}
	\phi_0 = \frac{B}{A+B}.
\end{equation}
Notice that the recursion relation can be rearranged to give $\phi_{k-1}+\phi_{k+1} -2\phi_k = 0$ which is just the discretization of the differential equation $\partial^2\phi$ = 0.
\subsubsection{Martingales}
Let $\{ X_n \; | \; 1 \leq n \leq \infty \}$ be a sequence of random variables. A discrete-time Martingale is a sequence of random variables $M_n = f(X_1,\ldots, X_n)$ ($0 \leq n \leq \infty$) with the property that
\begin{equation}
	\mathbb{E}[M_n|X_1,\ldots,X_{n-1}] = M_{n-1}
\end{equation}
or equivalently
\begin{equation}
	\mathbb{E}[M_n-M_{n-1}|X_1,\ldots,X_{n-1}] = 0.
\end{equation}

A Martingale in continuous time is defined as a stochastic process satisfying $\mathbb{E}_t[X(T)] = X(t)$ for $T > t$. Note that this implies that the expected increments are zero
\begin{equation}
	\mathbb{E}_t[X(T) - X(t)] = 0.
\end{equation}
Martingales have the property that their expectation value is constant; that is if $0 \leq s \leq t$ then $\mathbb{E}[X(t)] = \mathbb{E}[X(s)]$. This follows from the tower law of condition expectations,
\begin{equation}
\mathbb{E}[X(t)] = \mathbb{E} \big[ \mathbb{E}[X(t) \; | \; \mathcal{F}_s] \big] = \mathbb{E}[X(s)].
\end{equation}

An important example of Martingale in continuous time is a Brownian motion. A Brownian motion is a continuous-time stochastic process which has continuous sample paths and satisfies
\begin{itemize}
	\item
		$w(0) = 0$ 
	\item
		The increments have distribution $w(T) - w(t) \sim \mathcal{N}(0,T-t)$ for $T>t$
\end{itemize}
Notice that the second criterion implies that the distribution of increments $w(T) - w(t)$ (for $T>t$) depends only on the length $T - t$ of the time interval and is hence independent of the previous history. A Brownian motion is a Martingale because
\begin{align}
	\mathbb{E}_t[w(T)]
		& = \mathbb{E}_t[w(T)-w(t)+w(t)] \\
		& = \mathbb{E}_t[w(T)-w(t)] +\mathbb{E}_t[w(t)] \\
		& = \mathbb{E}[w(T)-w(t)] + w(t) \\
		& = w(t).
\end{align}
Moreover, since $\mathbb{E}[dw(t)] = 0$, we have $\mathbb{E}[dw(t)^2] = \var[dw(t)] = dt$.

The following Mathematica code generates and plots sample path for a Brownian motion
\begin{verbatim}
T = 10;
n = 1000;
dt = T/n;
dw = RandomVariate[NormalDistribution[0, Sqrt[dt]], n];
BrownianPath = Accumulate[Prepend[dw, 0]];
ListLinePlot[BrownianPath, Frame -> True]
\end{verbatim}
\begin{equation}
 \includegraphics[width=70mm]{brownian}
\end{equation}

Another important example of a Martingale is the geometric Brownian motion which is a stochastic process given by $e^{- \sigma^2 /2 t + \sigma w(t)}$.
\subsubsection{Ito calculus}
A stochastic process $X$ is an Ito process if
\begin{equation}
	dX(t) = \mu(t) dt + \sigma(t) dw(t)
\end{equation}
where $\mu(t)$ and $\sigma(t)$ are stochastic processes. An Ito process is called a diffusion if $\mu(X(t),t)$ and $\sigma(X(t),t)$. In the special case where $\mu$ and $\sigma$ are constants this equation can be integrated to
\begin{equation}
	X(t) = X(0) + \mu t + \sigma w(t).
\end{equation}
Thus
\begin{equation}
	X(T) - X(t) \sim \mathcal{N} \left(X(0) + \mu (T-t), \sigma^2 (T-t)\right).
\end{equation}
Ito's lemma states that if $X(t)$ is an Ito process with drift $\mu(t)$ and volatility $\sigma(t)$ and $Y(t) = f(X(t),t)$ then
\begin{align}
	dY(t)
		& = f_X(X(t),t)dX(t) + f_t(X(t),t)dt + \frac{1}{2}f_{XX}(X(t),t)\sigma(t)^2 dt \\
		& = \left[f_X(X(t),t)\mu(t) + f_t(X(t),t) + \frac{1}{2}f_{XX}(X(t),t)\sigma(t)^2 \right] dt + f_X(X(t),t) \sigma(t) dw(t) \notag.
\end{align}
The intuition for the second derivative term follows from considering the squared differential
\begin{equation}
	dX(t)^2 = \mu(t)^2dt^2+2\mu(t)\sigma(t)dw(t)dt+\sigma(t)^2dw(t)^2.
\end{equation}
We then argue that the first and second terms are negligible compared to the third because of the identity $\mathbb{E}[dw(t)^2]=dt$.

Ito's lemma can be used to solve stochastic differential equations by changing variables. Let us rescale the drift and volatility,
\begin{equation}
		dX(t) = X(t) \mu(t) dt + X(t) \sigma(t) dw(t)
\end{equation}
where $\mu(t)$ and $\sigma(t)$ now denote the relative drift and volatility, respectively. Consider the change of variables $Y(t) = f(X(t))= \log X(t)$. Then $\mu(t) \to X(t) \mu(t)$, $\sigma(t) \to X(t)\sigma(t)$, $f_X = 1/X$ and $f_{XX} = - 1/X^2$ so by Ito's lemma
\begin{equation}
	dY(t) = \left[\mu(t) - \frac{1}{2}\sigma(t)^2\right]dt + \sigma(t) dw(t)\label{e:LogIto}
\end{equation}
so
\begin{equation}
	\log X(t) = \log X(0) + \int_0^t \left[\mu(s) - \frac{1}{2}\sigma(s)^2\right]ds + \int_0^t \sigma(s) dw(s)
\end{equation}
Now assume that $\mu(t)$ and $\sigma(t)$ are time-dependent but not stochastic. Computing the expectation value and variance we find
\begin{align}
	\mathbb{E}[\log X(t)]
		& = \log X(0) + \int_0^t \left[\mu(s) - \frac{1}{2}\sigma(s)^2\right]ds \\
	\var[\log X(t)]
		& = \var\left[\int_0^t \sigma(s) dw(s)\right] \\
		& = \int_0^t \sigma(s)^2\var[ dw(s)] \\
		& = \int_0^t \sigma(s)^2 ds
\end{align}
and thus
\begin{equation}
	\log X(t) \sim \mathcal{N} \left(\log X(0) + \int_0^t \left[\mu(s) - \frac{1}{2}\sigma(s)^2\right]ds,\int_0^t \sigma(s)^2 ds\right).
\end{equation}
Taking $\mu$ and $\sigma$ to be constants (geometric Brownian motion) we obtain
\begin{equation}
	X(t) = X(0)e^{\left(\mu - \frac{1}{2}\sigma^2\right)t + \sigma w(t)}
\end{equation}
Hence
\begin{equation}
	\log X(t) \sim \mathcal{N}\left(\log X(0) + \left(\mu - \frac{1}{2}\sigma^2\right)t,\sigma^2t\right)
\end{equation}
The following Mathematica code generates and plots a sample path for a geometric Brownian motion
\begin{verbatim}
T=10;n=1000;dt=T/n;mu=0.3;sigma=0.4;Y0=2;
dw=RandomVariate[NormalDistribution[0,Sqrt[dt]],n];
dY=(mu-sigma^2/2)dt+sigma dw;
YPath=Accumulate[Prepend[dY,Y0]];
XPath=Exp[YPath];
ListLinePlot[XPath,Frame->True]
\end{verbatim}
\begin{equation}
 \includegraphics[width=70mm]{geometricbrownian}
\end{equation}

Suppose that $X$ is an Ito process under some probability measure $P$,
\begin{equation}
	dX(t) = X(t)\mu(t) dt + X(t)\sigma(t) dw(t)
\end{equation}
where $w$ is a Brownian motion under $P$. Given another probability measure $Q$, Girsanov's theorem states that $X$ is an Ito process under $Q$ with the same volatility,
\begin{equation}
	dX(t) = X(t) \hat{\mu}(t)dt + X(t)\sigma(t)d\hat{w}(t)
\end{equation}
where $\hat{w}$ is a Brownian motion under $Q$.

\begin{exmp}(Ornstein-Uhlenbeck process)
Consider the SDE
\begin{equation}
	dr(t)
		= \kappa(\theta(t)-r(t))dt+\sigma dw(t).
\end{equation}
We can rewrite this as
\begin{align}
	dr(t) + \kappa r(t) dt
		& = \kappa\theta(t)dt + \sigma dw(t) \\
	e^{-\kappa t}d(e^{\kappa t}r(t))
		& = \kappa\theta(t)dt + \sigma dw(t) \\
	d(e^{\kappa t}r(t))
		& = \kappa e^{\kappa t}\theta(t)dt + \sigma e^{\kappa t} dw(t)
\end{align}
Integrating both sides gives
\begin{equation}
	r(t)  = e^{-\kappa t}r(0) + \kappa \int_0^t e^{-\kappa(t-s)}\theta(s)ds + \sigma \int_{0}^t e^{-\kappa(t-s)} dw(s)
\end{equation}
It follows that
\begin{equation}
	r(t) \sim \mathcal{N}\left(e^{-\kappa t}r(0) + \kappa \int_0^t e^{-\kappa(t-s)}\theta(s)ds,\frac{\sigma^2}{2\kappa}(1-e^{-2\kappa t})\right)
\end{equation}
If $\theta(s) = \theta$ then
\begin{equation}
	r(t)  = e^{-\kappa t}r(0) + \theta(1-e^{-\kappa t}) + \sigma \int_{0}^t e^{-\kappa(t-s)} dw(s)
\end{equation}
and the process is mean reverting to $\theta$.
\end{exmp}
\begin{exmp}
Consider the stochastic process
\begin{equation}
	X(t) = \int_0^t ds \, w(s).
\end{equation}
This process is an Ito process with Brownian drift term $dX(t) = w(t)dt$. $X(t)$ is a normal random variable. Let us calculate its mean and variance. If we integrate by parts we find
\begin{equation}
	X(t) = t \, w(t)-\int_0^t s \, dw(s) = \int_0^t(t-s)dw(s)
\end{equation}
where we have used $w(0)=0$. Thus $\mathbb{E}[X(t)]=0$ and $\var[X(t)] = \int_0^t(t-s)^2ds = t^3/3 $. We can also compute the variance of $X(t)$ using the fact that it has vanishing expectation value,
\begin{align}
	\var[X(t)]
		& = \mathbb{E}\left[X(t)^2\right] \\
		& = \int_0^t ds_1\int_0^t ds_2 \, \mathbb{E}[X(s_1)X(s_2)] \\
		& = \int_0^t ds_1\int_0^t ds_2 \min(s_1,s_2) \\
		& = t^3 /3.
\end{align}
Here we have used the fact that $X(t)$ has independent stationary increments so
\begin{equation}
	\mathbb{E}[X(s_1)X(s_2)]
		=
		\begin{cases}
			\mathbb{E}\left[X(s_1)^2\right]=s_1, & s_2 > s_1 \\
			\mathbb{E}\left[X(s_2)^2\right]=s_2, & s_1 > s_2.
		\end{cases}
\end{equation}
\end{exmp}
\subsubsection{Semimartingales}
A Poisson process $N(t)$ with intensity $\lambda$ is a continuous-time stochastic process such that
\begin{itemize}
	\item $N(0) = 0$
	\item $dN(t)$ is a Bernoulli random variable for all $t$
	\item $\mathbb{E}_t[N(T) - N(t)] = \lambda(T-t)$ for all $T \geq t$.
\end{itemize}
A compound Poisson (jump) process with intensity $\lambda$ and jump size distribution $\mathcal{D}$ is given by
\begin{equation}
	J(t) = \sum_{i=1}^{N(t)} J_i
\end{equation}
where $J_i$ are independent, identically distributed random variables under $\mathcal{D}$ which are independent of $N$.

A semimartingale is the sum of an Ito process and a jump process
\begin{equation}
	dX(t) = \mu(t) dt + \sigma(t) dw(t) + dJ(t).
\end{equation}  

The following Mathematica code generates a sample path for a the process $dX(t) = dw(t) + dJ(t)$ where $dJ(t)$ is a jump process with intensity $\lambda = 1$ and $\mathcal{D} = \mathcal{N}(0,3)$.
\begin{verbatim}
T = 10; n = 2000; dt = T/n; lambda = 1; sigma = 2;
dw = RandomVariate[NormalDistribution[0, Sqrt[dt]], n];
dJ = Table[
   If[RandomVariate[BernoulliDistribution[lambda dt]] == 0, 0, 
    RandomVariate[NormalDistribution[0, sigma]]], {i, 1, n}];
dX = dw + dJ;
BrownianJumpPath = Accumulate[Prepend[dX, 0]];
ListLinePlot[BrownianJumpPath, Frame -> True]
\end{verbatim}
\begin{equation}
 \includegraphics[width=70mm]{brownianjump}
\end{equation}

Ito's lemma can be generalized to semimartingales. If $X(t)$ is a semimartingale and $Y(t) = f(X(t),t)$ then
\begin{align}
	dY(t)
		& = \left[f_X(X(t-),t)\mu(t) + f_t(X(t-),t) + \frac{1}{2}f_{XX}(X(t-),t)\sigma(t)^2 \right] dt + \notag \\
		& \quad + f_X(X(t-),t) \sigma(t) dw(t) + f(X(t),t)-f(X(t-),t)
\end{align}

\subsection{Feynman-Kac formula}
Consider the following parabolic PDE ($x \in \mathbb{R}$ and $t < T$)
\begin{equation}
	\frac{\partial u}{\partial t} + \frac{1}{2}\sigma(x,t)^2\frac{\partial^2 u}{\partial x^2} + \mu(x,t)\frac{\partial u}{\partial x} - V(x,t)u(x,t) = 0, \quad \quad u(x,T) = f(x).
\end{equation}
Now consider the diffusion process
\begin{equation}
	dX(t) = \mu(X(t),t)dt + \sigma(X(t),t)dw(t)
\end{equation}
where $w$ is a Brownian motion under $Q$. The Feynman-Kac theorem relates the solution of the PDE to a conditional expectation value
\begin{equation}
	u(x,t) = \mathbb{E}^Q \left(\left. e^{-\int_t^T ds \, V(X(s),s)}f(X(T))  \right|  X(t) = x \right).
\end{equation}

The Feynman-Kac formula has a simple multi-dimensional generalization to PDEs dependent on $p$-dimensional spatial variables,
\begin{equation}
\frac{\partial u}{\partial t} + \sum_{i=1}^p \mu_i(x,t) \frac{\partial u}{\partial x_i}  + \frac{1}{2}\sum_{i=1}^p\sum_{j=1}^p s_{ij}(x,t) \frac{\partial^2 u}{\partial x_i \partial x_j} - V(x,t)u(x,t) = 0,
\end{equation}
where $s_{ii}(x,t) \geq 0$ and $s_{ij}(x,t) = s_{ji}(x,t).$
The solution to this equation for $t < T$ subject to the terminal condition $u(x,T) = f(x)$ is given by the expectation
\begin{equation}
	u(x,t) = \mathbb{E}^Q \left(\left. e^{-\int_t^T ds \, V(X(s),s)}f(X(T))  \right|  X(t) = x \right)
\end{equation}
where $X_i$ are Ito processes given by
\begin{equation}
	dX_i(t) = \mu_i(X(t),t) dt + \vec{\sigma}_i(X(t),t) \cdot d\vec{w}(t)
\end{equation}
where $\vec{w}$ is a $d$-dimensional Brownian motion and $s_{ij} = \vec{\sigma}_i \cdot \vec{\sigma}_j$. 
\begin{itemize}
	\item
		In the special case where $\sigma(x,t) = \sigma$ and $\mu(x,t) = 0$ the PDE reduces to the backward heat equation
	\item 
		The expectation value can be interpreted as an integration with respect to the Wiener measure
	\item
		The probabilistic interpretation of absence of mixed derivative terms is that the stochastic increments are independent
\end{itemize}

\section{Optimal control}
\subsection{Liquidation problem}
Consider the problem of selling $X$ shares of an asset during a time interval $[0,T]$. We assume that $n_j$ shares are sold during the interval $(t_{j-1},t_j]$ where $t_j = j \tau$, $\tau = T/N$ and $j \in \{ 1, \ldots, N \}$ such that $X = \sum_{j=1}^N n_j$. The price received $\hat{S}_j$  from the sale of $n_j$ shares during the interval $(t_{j-1},t_j]$ is assumed to be displaced from the prevailing market price $S_{j-1}$ by a temporary price impact which we model as function dependent only on $n_j$,
\begin{equation}
	\hat{S}_j = S_{j-1} - h(n_j).
\end{equation}

The price process is modeled as a discrete-time random process which suffers a permanent impact $- g(n_j)$ dependent on the number of trades executed during that interval,
\begin{equation}
	S_{j} = S_{j-1} + \sigma \tau^{1/2} z_{j} - g(n_{j}),
\end{equation}
where $(z_1,\ldots,z_N)$ is a vector of independent and identically distributed $\mathcal{N}(0,1)$ random variables.
The recursive formula for the price process can be used to express it in terms of today's spot price $S_0$ as follows,
\begin{equation}
S_k = S_0 + \sum_{j=1}^k \big[ \sigma \tau^{1/2} z_j - g(n_j) \big].
\end{equation}
Hence the total revenue generated from the trading strategy  is given by,
\begin{align}
	R
		& = \sum_{k=1}^N \hat{S}_k n_k, \\
		& = S_0 \sum_{k=1}^N n_k + \sigma \tau^{1/2} \sum_{k=1}^N \sum_{j=1}^{k-1} z_j n_k - \sum_{k=1}^N\sum_{j=1}^{k-1} g(n_j)n_k - \sum_{k=1}^N h(n_k)n_k, \\
		& = S_0 X+ \sigma \tau^{1/2} \sum_{k=1}^N z_k x_k - \sum_{k=1}^N g(n_k)x_k - \sum_{k=1}^N h(n_k)n_k,
\end{align}
where we have defined the inventory process $x_k$ which satisfies $x_0 = X$ and $x_N = 0$,
\begin{equation}
	x_k = X-\sum_{j=1}^k n_k.
\end{equation}
The expected value and variance of the total cost of trading $C = S_0 X - R$ is given by
\begin{align}
	\mathbb{E}[C] 
		& = \sum_{k=1}^N \big[g(n_k) x_k + h(n_k) n_k \big], \\
		& = \sum_{k=1}^N \big[g(x_{k-1}-x_k) x_k + h(x_{k-1}-x_k) (x_{k-1}-x_k) \big], \\
	\var (C)
		&  = \sigma^2 \tau \sum_{k=1}^N x_k^2.
\end{align}
Assuming linear price impact functions $g(x) = (c/\tau) x$ and $h(x) = (\eta/\tau) x$ we obtain,
\begin{equation}
	C(x)
		 = \frac{cX^2}{2} + \sum_{k=1}^N \tau \, \frac{1}{2}m\left(\frac{x_{k-1}-x_k}{\tau} \right)^2 - \sum_{k=1}^N \sigma \tau^{1/2} z_k x_k ,
\end{equation}
where $m = 2\eta - c$. Let us define a mean-variance objective function as follows,
\begin{align}
	S[x_1,\ldots,x_{N-1}]
		& = \mathbb{E}[C] + \lambda \var(C) - \frac{cX^2}{2}, \\
		& = \sum_{k=1}^N \tau \left[ \frac{1}{2}m\left(\frac{x_{k-1}-x_k}{\tau} \right)^2 +  \frac{1}{2}\omega^2 x_k^2\right],
\end{align}
where we have defined $\omega^2 = 2 \lambda \sigma^2$. Optimizing the objective function by setting $\partial S / \partial x_i = 0$ we obtain,
\begin{equation}
	\frac{x_{i+1}-2x_i + x_{i-1}}{\tau^2} = \omega^2 x_i.
\end{equation}
In the limit $\tau \to 0$ the objective function takes the form of an action functional for a particle of mass $m$ in an inverted harmonic potential $V(x) = -\frac{1}{2}\omega^2 x^2$,
\begin{equation}
	S[x] = \int_0^T dt \left[\frac{1}{2}m \dot{x}^2 +\frac{1}{2}\omega^2 x^2 \right].
\end{equation}
The solution of the corresponding Euler-Lagrange equation $m\ddot{x} = \omega^2 x$ subject to the boundary conditions $x(0) = X$ and $x(T) = 0$ is thus,
\begin{equation}
	x(t) = X\frac{\sinh\big[\frac{\omega}{\sqrt{m}}(T-t)\big]}{\sinh\big[\frac{\omega}{\sqrt{m}}T\big]}.
\end{equation}
Setting $t = k \tau$ we obtain the approximate solution of the finite-difference equation found by Almgren and Chriss \cite{almgren}.

\subsubsection{Dynamic strategies}
In this section we review \cite{sepin} which considers the liquidation problem with temporary and permanent price impact functions of the form,
\begin{align}
	\hat{S}_j
		& = S_{j-1} - \eta_j n_j, \\
	S_j
		& = S_{j-1} + \tau \sigma_j z_j - c n_j
\end{align}
where $\{ \eta_j \}_{j = 0}^N$ and $\{ \sigma_j \}_{j \in \mathbb{N}}$ are Markov chains with transition probabilities,
\begin{equation}
	p_{k-1}^{(v,w)} = \mathbb{Q}[(\sigma_k,\eta_k) = w \; | \; (\sigma_{k-1},\eta_{k-1})=v], \quad k \in \{1,\ldots,N \}.
\end{equation}
The total cost of trading is now given by
\begin{equation}
	C(x) = \frac{cX^2}{2} + \sum_{k=1}^N \left[ \left(\eta_k - \frac{c}{2}\right)(x_{k-1}-x_k)^2 - \sigma_k \tau^{1/2} z_k x_k \right].
\end{equation}
In contrast to Almgren and Chriss, the risk-aversion parameter is taken to be zero so the optimization problem is,
\begin{equation}
	x^\ast = \argmin_{x \in \mathcal{A}} \mathbb{E}\big[ Q(x) \; | \; (\sigma_0,\eta_0) = v \big],
\end{equation}
where for convenience we wave defined $Q(x) = C(x) - cX^2/2$ and $\mathcal{A}$ denotes the set of admissible strategies $(x_0,\ldots,x_N)$ with the boundary conditions $x_0 = X$ and $x_N = 0$.
Let us define
\begin{equation}
	J_n^{(v)}(z) = \min_{x \in \mathcal{A}_n(z)} \mathbb{E}\big[ Q_n(x) \; | \; (\sigma_n,\eta_n) = v\big] ,
\end{equation}
where $\mathcal{A}_n(z)$ denotes the set of admissible substrategies $(x_n,\ldots,x_N)$ satisfying the boundary conditions $x_n = z$ and $x_N = 0$, and we have defined
\begin{equation}
	Q_n(x) = \sum_{i=n+1}^N (x_{i-1}-x_i)^2\tilde{\eta}_i, \quad \quad \tilde{\eta}_i = \eta_i - \frac{c}{2}.
\end{equation}
The inductive hypothesis is,
\begin{equation}
	J_{n}^{(v)}(x_n) = x_{n}^2 a_{n}^{(v)}, \quad n \in \{ 0 ,\ldots , N-1 \}.
\end{equation}
Let us check this in the case $n = N-1$. Using the fact that $x_N = 0$ we obtain $Q_{N-1}(x) = (x_{N-1}^2)\tilde{\eta}_N$ and thus
\begin{align}
	J^{(v)}_{N-1}(x_{N-1})
		& = \min_{\mathcal{A}_{N-1}(x_{N-1})} (x_{N-1})^2 \mathbb{E}[\tilde{\eta}_N \; | \; (\sigma_{N-1},\eta_{N-1})=v], \\
		& = (x_{N-1})^2 \mathbb{E}[\tilde{\eta}_N \; | \; (\sigma_{N-1},\eta_{N-1})=v].
\end{align}
We therefore obtain,
\begin{equation}
	a^{(v)}_{N-1} = \mathbb{E}[\tilde{\eta}_N \; | \; (\sigma_{N-1},\eta_{N-1})=v].
\end{equation}
Now,
\begin{equation}
	Q_{n-1}(x) = Q_n + (x_{n-1} - x_n)^2\tilde{\eta}_{n}
\end{equation}
so,
\begin{align}
	J^{(v)}_{n-1}(x_{n-1})
		& = \min_{\mathcal{A}_{n-1}(x_{n-1})} \mathbb{E} [Q_{n-1} \; | \; (\sigma_{n-1},\eta_{n-1}) = v], \notag \\
		& = \min_{\mathcal{A}_{n-1}(x_{n-1})} \big\{\mathbb{E} [Q_{n} \; | \; (\sigma_{n-1},\eta_{n-1}) = v]+ (x_{n-1} -x_{n})^2 \mathbb{E} [\tilde{\eta}_n \; | \; (\sigma_{n-1},\eta_{n-1}) = v]\big\}, \notag \\
		& = \min_{x_n} \Big\{\min_{\mathcal{A}_{n}(x)}\mathbb{E} [Q_{n} \; | \; (\sigma_{n-1},\eta_{n-1}) = v]+ (x_{n-1} -x_{n})^2 \mathbb{E} [\tilde{\eta}_n \; | \; (\sigma_{n-1},\eta_{n-1}) = v]\Big\}. \end{align}
Using the tower rule for conditional expectations we obtain,
\begin{align}
	\min_{\mathcal{A}_{n}(x)}\mathbb{E} [Q_{n} \; | \; (\sigma_{n-1},\eta_{n-1}) = v]
		& = \min_{\mathcal{A}_{n}(x_n)} \mathbb{E} \big[ \mathbb{E} [Q_{n} \; | \; (\sigma_n,\eta_n)] \; | \; (\sigma_{n-1},\eta_{n-1}) = v \big ], \notag \\
		& =  \min_{\mathcal{A}_{n}(x_n)} \sum_{w} \mathbb{E} [Q_{n} \; | \; (\sigma_{n},\eta_{n}) = w]\mathbb{Q}[(\sigma_n,\eta_n) = w \; | \; (\sigma_{n-1},\eta_{n-1})=v], \notag \\
		& =  \sum_{w} \Big[\min_{\mathcal{A}_{n}(x_n)}\mathbb{E} [Q_{n} \; | \; (\sigma_{n},\eta_{n}) = w] \Big] \mathbb{Q}[(\sigma_n,\eta_n) = w \; | \; (\sigma_{n-1},\eta_{n-1})=v], \notag \\
		& =   x_n^2 \sum_{w}  p_{n-1}^{(v,w)} a_n^{(w)},
\end{align}
where we have used the inductive hypothesis $\min_{\mathcal{A}_{n}(x_n)}  \mathbb{E} [Q_{n} \; | \; (\sigma_{n},\eta_{n}) = w] = x_n^2 a_n^{(w)}$. Substituting back we obtain,
\begin{align}
	J^{(v)}_{n-1}(x_{n-1})
		& = \min_{x_n} \bigg\{ x_n^2\sum_{w}p_{n-1}^{(v,w)} a_n^{(w)}+ (x_{n-1} -x_{n})^2 \mathbb{E} [\tilde{\eta}_n \; | \; (\sigma_{n-1},\eta_{n-1}) = v]\bigg\}. 
\end{align}
So $x^\ast$ satisfies the recursion,
\begin{equation}
x_n^\ast = x_{n-1}^\ast \frac{\mathbb{E} [\tilde{\eta}_n \; | \; (\sigma_{n-1},\eta_{n-1}) = v]}{\mathbb{E} [\tilde{\eta}_n \; | \; (\sigma_{n-1},\eta_{n-1}) = v]+\sum_{w}p_{n-1}^{(v,w)} a_n^{(w)}}.
\end{equation}
Substituting back into the value function we see that the inductive hypothesis is satisfied by the optimal solution.
\subsection{Infinite horizon}
In this section we consider optimal trading of a single asset with an infinite time horion \cite{sepin2}. Let $S_n$ denote the price of an asset at time $t_n$ and let $r$ denote the risk-free rate. Suppose the excess returns $r_n = S_n - (1+r)S_{n-1}$ from time $t_{n-1}$ to time $t_n$ are described in terms of a $K$-dimensional vector of stochastic factors  $\vec{f}_n$ by the following linear hypothesis,
\begin{align}
	r_n
		& = \vec{\theta} \cdot \vec{f}_{n} + u_n,
\end{align}
where $u_n$ is a random variable with mean zero and variance $\sigma^2$. The factors are assumed to evolve according to a mean-reverting process,
\begin{align}
	\vec{f}_n
		& = \vec{f}_{n-1} + \kappa(\vec{m}_n - \vec{f}_{n-1}) + \vec{\xi}_n,
\end{align}
where $\vec{\xi}_n$ is sequence of iid mean zero random vectors with covariance matrix $\Omega$, $\vec{m}_n$ is a vector-valued stochastic process representing the mean-reversion values and $\kappa \in \mathbb{R}^{K\times K}$ is a matrix which describes the speed of mean reversion of the stochastic factor vector $\vec{f}_n$. Let $x_n$ denote the amount of holdings in the asset at time $t_n$. The cost of trading $x_{n}-x_{n-1}$ shares over the interval $(t_{n-1},t_n]$ is given by,
\begin{equation}
	C_n = (x_{n}-x_{n-1}) \hat{S}_n,
\end{equation}
where $\hat{S}_n$ is the transacted cost which differs from the spot price $S_{n-1}$ by a term proportional to the number of trades executed $x_n-x_{n-1}$, multiplied by a stochastic liquidity $\eta_n$,
\begin{equation}
	\hat{S}_n = S_{n-1} + \eta_n (x_n - x_{n-1}).
\end{equation}
The stochastic process $(\vec{m}_n,\eta_n)$ is taken to be a time-homogeneous Markov chain with transition probabilities,
\begin{equation}
	p^{(v,w)} = \mathbb{Q}[(\vec{m}_k,\eta_k) = w \; | \; (\vec{m}_{k-1},\eta_{k-1})=v].
\end{equation}
The optimal trading strategy is such that it maximizes the expected returns minus transaction costs. Following Markowitz, it is instructive to penalize the risk of the position by introducing a risk-aversion coefficient $\lambda$. The required strategy as a function of the initial state $v$ is thus,
\begin{equation}
	x^\ast = \argmax_{x \in \mathcal{A}} \mathbb{E}\big[ Q(x) \; | \; (\vec{m}_0,\eta_0) = v\big],
\end{equation}
where
\begin{equation}
	Q(x) = \sum_{k\geq 1} r_k x_k - \eta_k(x_k-x_{k-1})^2 - \frac{1}{2} x_k^2 \gamma \sigma^2.
\end{equation}
and $\mathcal{A}$ denotes the set of strategies with $x_0 = X$ and $f_0 = f$. Let us define,
\begin{equation}
	J_{n}^{(v)}(z,f) = \max_{x \in \mathcal{A}_n(z,f)} \mathbb{E}\big[ Q_n(x) \; | \; (\vec{m}_n,\eta_n) = v\big], \quad \quad n \in \{0,1,\ldots,\infty \},
\end{equation}
where
\begin{equation}
	Q_n(x) = \sum_{k\geq n+1} r_k x_k - \eta_k(x_k-x_{k-1})^2 - \frac{1}{2} x_k^2 \gamma \sigma^2.
\end{equation}
We are interested in the case $n = 0$ but since the problem is time-homogeneous we choose a time-homogeneous ansatz. Following \cite{garleanu}, we choose the following quadratic ansatz,
\begin{equation}
	J^{(v)}(z,\vec{f}) = - \frac{1}{2} a_v z^2 + b_v z + \vec{f} \cdot A_v \vec{f} + \vec{c}_v \cdot \vec{f} + \vec{d}_v \cdot \vec{f} z + e_v.
\end{equation}
Proceeding inductively we find,
\begin{align}
	J_{n-1}^{(v)}(x_{n-1},\vec{f}_{n-1})
		& = \max_{x \in \mathcal{A}_{n-1}(x_{n-1},f_{n-1})} \mathbb{E}\big[ Q_{n-1}(x) \; | \; (\vec{m}_{n-1},\eta_{n-1}) = v\big], \\
		& = \max_{x \in \mathcal{A}_{n-1}(x_{n-1},f_{n-1})} \Big\{  \mathbb{E}\big[ Q_{n}(x) \; | \; (\vec{m}_{n-1},\eta_{n-1}) = v\big] +  \notag \\
		& \quad + \mathbb{E}\big[ r_n \; | \; (\vec{m}_{n-1},\eta_{n-1}) = v\big] x_n - \frac{1}{2} x_n^2 \gamma\sigma^2 - (x_n - x_{n-1})^2 \mathbb{E}\big[ \eta_n \; | \; (\vec{m}_{n-1},\eta_{n-1}) = v\Big] \Big\} \notag.
\end{align}
Using the tower rule for conditional expectations we obtain,
\begin{align}
\mathbb{E}\big[ Q_{n}(x) \; | \; (\vec{m}_{n-1},\eta_{n-1}) = v\big]
		&  =  \mathbb{E}\big[ Q_{n}(x) \; | \; (\vec{m}_{n-1},\eta_{n-1}) = v\big], \\
		& = \mathbb{E} \big[ \mathbb{E} [Q_{n} \; | \; (\vec{m}_n,\eta_n) ] \; | \; (\vec{m}_{n-1},\eta_{n-1}) = v \big ], \\
		& = \sum_w p^{(v,w)} \mathbb{E} [Q_{n} \; | \; (\vec{m}_n,\eta_n) = w].
\end{align}
Now recall that in our optimization problem $\vec{f}_{n-1}$ is known. On the other hand, $\vec{f}_n$ is uncertain because it depends on the random variables $\vec{\xi}_n$. Therefore we condition on the value of $\vec{\xi}_n$ as follows,
\begin{equation}
	\mathbb{E}\big[ Q_{n}(x) \; | \; (\vec{m}_{n-1},\eta_{n-1}) = w\big]
		= \sum_w p^{(v,w)} \int d\vec{\xi} \, \rho(\vec{\xi})\mathbb{E} \big[ Q_{n} \; | \; (\vec{m}_n,\eta_n) = w,\;\vec{\xi}_n = \vec{\xi} \big].
\end{equation}
Since $\vec{\xi}_n$ is known in the above expectation, and using the fact that
\begin{align}
	\mathbb{E}[u_n \; | \; (\vec{m}_{n-1},\eta_{n-1}) = v] 
		& = 0, \\
	\mathbb{E}[\xi_n \; | \; (\vec{m}_{n-1},\eta_{n-1}) = v] 
		& = 0,
\end{align}
we obtain,
\begin{align}
	J_{n-1}^{(v)}(x_{n-1},f_{n-1})
		& = \max_{x_n} \Big\{  \sum_w p^{(v,w)} \int d\vec{\xi} \, \rho(\vec{\xi}) \left.J^{(w)}(x_n,\vec{f}_n)\right|_{\vec{f}_n = \vec{f}_{n-1}+\kappa (\vec{m}_{(w)}-\vec{f}_{n-1}) + \vec{\xi}} +  \\
		& \quad + \vec\theta \cdot \Big[(1-\kappa)\vec{f}_{n-1} + \sum_w p^{(v,w)}\kappa \vec{m}_{(w)} \Big]x_n - \frac{1}{2} x_n^2 \gamma\sigma^2 - (x_n - x_{n-1})^2 \sum_w p^{(v,w)}\eta_{(w)} \Big\}.\notag
\end{align}
Integrating over the random vector $\vec{\xi}$ we obtain,
\begin{align}
	& \int d\vec{\xi} \, \rho(\vec{\xi}) \left.J^{(w)}(x_n,\vec{f}_n)\right|_{\vec{f}_n = \vec{f}_{n-1}+\kappa (\vec{m}_{(w)}-\vec{f}_{n-1}) + \vec{\xi}}
	 = -\frac{1}{2}a_w x_n^2 + b_w x_n + \vec{d}_w \cdot \big[ \vec{f}_{n-1}+\kappa (\vec{m}_{(w)}-\vec{f}_{n-1}) \big] x_n   \notag \\
	 & + \big[ \vec{f}_{n-1}+\kappa (\vec{m}_{(w)}-\vec{f}_{n-1}) \big] \cdot  A_w \big[ \vec{f}_{n-1}+\kappa (\vec{m}_{(w)}-\vec{f}_{n-1}) \big] + \tr(A_w \Omega) + \vec{c}_w\big[ \vec{f}_{n-1}+\kappa (\vec{m}_{(w)}-\vec{f}_{n-1}) \big] + e_w. \notag 
\end{align}
Substituting back and collecting terms in $x_n$ gives,
\begin{align}
	J_{n-1}^{(v)}(x_{n-1},f_{n-1})
		& = \max_{x_n} \sum_w p^{(v,w)}\Big\{  -\frac{1}{2}(\gamma\sigma^2 + a_w + 2\eta_{(w)})x_n^2 - x_{n-1}^2 \eta_{(w)} + \notag \\
		& \quad + x_n\Big(\vec\theta \cdot \Big[(1-\kappa)\vec{f}_{n-1} +\kappa \vec{m}_{(w)} \Big]x_n+ 2x_{n-1}\eta_{(w)} + b_w + \vec{d}_w \cdot \big[(1-\kappa)\vec{f}_{n-1}+\kappa \vec{m}_{(w)}\big]\Big) + \notag \\
		& \quad + \big[ (1-\kappa)\vec{f}_{n-1}+\kappa \vec{m}_{(w)} \big] \cdot  A_w \big[ (1-\kappa)\vec{f}_{n-1}+\kappa \vec{m}_{(w)} \big] + \tr(A_w \Omega) + \notag \\
		& \quad +  \vec{c}_w\big[ (1-\kappa)\vec{f}_{n-1}+\kappa \vec{m}_{(w)}\big] + e_w \Big\}, \\
		& = \max_{x_n} \sum_w p^{(v,w)}\Big\{  -\frac{1}{2}(\gamma\sigma^2 + a_w + 2\eta_{(w)})x_n^2 - x_{n-1}^2 \eta_{(w)} + \notag \\
		& \quad + x_n\Big(\big[(1-\kappa)^{\rm T}(\vec{\theta}+\vec{d}_w)\big] \cdot\vec{f}_{n-1}+ 2x_{n-1}\eta_{(w)} + b_w + (\vec\theta+\vec{d}_w) \cdot \kappa \vec{m}_{(w)}\Big) + \notag \\
		& \quad + \vec{f}_{n-1} \cdot (1-\kappa)^{\rm T}A_w(1-\kappa) \vec{f}_{n-1} + \notag \\
		& \quad + \big[(1-\kappa)^{\rm T}(A_w + A_w^{\rm T})\kappa\vec{m}_{(w)} + (1-\kappa)^{\rm T}\vec{c}_w \big] \cdot \vec{f}_{n-1} + \notag \\
		& \quad + \vec{m}_{(w)}^{\rm T}\kappa^{\rm T} A_w \kappa \vec{m}_{(w)} + \vec{c}_w \cdot \kappa \vec{m}_{(w)} + \tr(A_w \Omega) + e_w.
\end{align}
Partially differentiating the argument of the maximization with respect to $x_n$ we find that the optimal solution $x^\ast$ satisfies the recursion,
\begin{align}
	x_n^\ast
		& = q_v \big[ 2 \eta^{(v)}x_{n-1}^\ast + B_v + \vec{V}_v\cdot \vec{f}_{n-1} \big],
\end{align}
where we have defined
\begin{align}
	q_v
		& = \frac{1}{\gamma\sigma^2 + a^v + 2\eta^{(v)}}, \\
	\eta^{(v)}
		& = \sum_{w}p^{(v,w)} \eta_{(w)} , \\
	a^{v}
		& = \sum_{w}p^{(v,w)} a_w, \\
	B_v
		& = \sum_{w}p^{(v,w)}\big[b_w + (\vec\theta+\vec{d}_w) \cdot \kappa \vec{m}_{(w)}\big], \\
	\vec{V}_v
		& =(1-\kappa)^{\rm T} \sum_{w}p^{(v,w)}(\vec\theta + \vec{d}_w).
\end{align}
Then the terms in the objective function which involve $x$ can be written in a surprisingly simple form,
\begin{align}
-(x_{n-1}^\ast)^2 \eta^{(v)} + q_{v}^{-1}(x_n^\ast)^2-\frac{1}{2}q_v^{-1}(x_{n}^\ast)^2
	& = \frac{1}{2}q_v \Big[ \big(4(\eta^{(v)})^2-2 \eta^{(v)} q_v^{-1}\big)(x_{n-1}^\ast)^2 + B_v^2 + \vec{f}_{n-1}\cdot \vec{V}_v \vec{V}_v^{\rm T}\vec{f}_{n-1}  + \notag\\  
	& \quad +4\eta^{(v)} B_v x_{n-1}^\ast +  4 \eta^{(v)} \vec{V}_v \cdot\vec{f}_{n-1}x_{n-1}^\ast + 2 B_v \vec{V}_v \cdot\vec{f}_{n-1}\Big].
\end{align}
So the coefficients in the quadratic ansatz satisfy the following conditions,
\begin{align}
	a_v 
		& = -4q_v (\eta^{(v)})^2 + 2 \eta^{(v)}, \\
	b_v
		& = 2q_v \eta^{(v)} B_v, \\
	A_v
		& = \sum_{w}p^{(v,w)}(1-\kappa)^{\rm T}A_w(1-\kappa) + \frac{1}{2}q_v \vec{V}_v\vec{V}_v^{\rm T}, \\
	\vec{c}_v
		& = (1-\kappa)^{\rm T}\sum_{w}p^{(v,w)}\big[(A_w + A_w^{\rm T})\kappa\vec{m}_{(w)} + \vec{c}_w \big] + q_v B_v \vec{V}_v, \\
	\vec{d}_v
		& = 2 q_v \eta^{(v)} \vec{V}_v, \\
	e_v
		& = \sum_{w}p^{(v,w)}\Big[\vec{m}_{(w)}^{\rm T}\kappa^{\rm T} A_w \kappa \vec{m}_{(w)} + \vec{c}_w \cdot \kappa \vec{m}_{(w)} + \tr(A_w \Omega) + e_w\Big] + \frac{1}{2} q_v B_v^2.
\end{align}
In addition we have the constraint ensuring concavity of the objective function,
\begin{equation}
	\gamma\sigma^2 + a^v + 2\eta^{(v)} > 0 .
\end{equation}
If transaction costs are zero then $\eta_{(w)} = 0$ for all $w$ and hence $a_v = b_v = \vec{d}_v = 0$. Thus
\begin{equation}
	x_n^\ast = \frac{1}{\gamma\sigma^2} \vec{\theta} \cdot\Big[\kappa \vec{m}^{(v)} + (1-\kappa) \vec{f}_{n-1}\Big].
\end{equation}
Now consider the case when transaction costs are constant and nonzero so that $\eta_{(w)} = \eta$ for all $w$ and for some constant $\eta$. 
The coefficients $a_v$ are the solution of the following system of quadratic equations,
\begin{equation}
	a_v a^v + a_v (\gamma \sigma^2 + 2\eta^{(v)}) - 2a^v\eta^{(v)} = 2\eta^{(v)}\gamma\sigma^2.
\end{equation}
Setting $\eta^{(v)} = \eta$ this becomes,
\begin{equation}
	a_v a^v + a_v (\gamma \sigma^2 + 2\eta) - 2a^v\eta = 2\eta\gamma\sigma^2.
\end{equation}
Interestingly, there exists a solution to this system of the form $a_v = a$ where $a$ satisfies the quadratic,
\begin{equation}
a^2 +\gamma\sigma^2 a - 2 \eta \gamma \sigma^2 = 0.
\end{equation}
The solutions are
\begin{equation}
	a = \frac{\gamma\sigma^2}{2}\bigg[-1 \pm \sqrt{1+\frac{8\eta}{\gamma\sigma^2}} \bigg].
\end{equation}
Substituting back into the concavity constraint we find,
\begin{equation}
	\frac{\gamma\sigma^2}{2}\bigg[1 +  \frac{4\eta}{\gamma\sigma^2} \pm \sqrt{1+\frac{8\eta}{\gamma\sigma^2}} \bigg] > 0.
\end{equation}
It follows that if $\eta > 0$ then the constraint is satisfied by both roots of the quadratic. We have,
\begin{equation}
	q = \frac{1}{\gamma\sigma^2 + a + 2\eta}.
\end{equation}
\begin{align}
	\vec{d}
		& = 2q\eta\vec{V}, \\
		& = 2q\eta (1-\kappa)^{\rm T}(\vec{\theta} + \vec{d}), \implies \\
	\vec{d}
		& = 2q\eta \big[1-2q\eta(1-\kappa)^{\rm T}\big]^{-1}(1-\kappa)^{\rm T}\vec{\theta}.
\end{align}
We conclude that 
\begin{equation}
	\vec{V} = q \big[1-2q\eta(1-\kappa)^{\rm T}\big]^{-1}(1-\kappa)^{\rm T}\vec{\theta}.
\end{equation}
The coefficient $b$ satisfies
\begin{align}
	b
		& = 2q\eta \big[b+(\vec{\theta} + \vec{d})^{\rm T}(\vec{\theta} + \vec{d})\big] \implies \\
	b
		& = \frac{2q\eta}{1-2q\eta}(\vec{\theta}+\vec{d})^{\rm T}\kappa \vec{m},
\end{align}
where $\vec{d}$ is given by the above formula. Thus the strategy is,
\begin{equation}
	x_n^\ast = q\big[2\eta x_{n-1}^\ast + B + \vec{V} \cdot \vec{f}\big],
\end{equation}
where
\begin{align}
	B 
		& = b + (\vec\theta + \vec{d})^{\rm T}\kappa \vec{m}, \\
		& = \frac{(\vec\theta + \vec{d})^{\rm T}\kappa \vec{m}}{1-2q\eta}.
\end{align}
where $b$ and $\vec{d}$ are as above.

\section{Portfolio theory}
\subsection{Markowitz portfolio}
Consider $n$ risky assets with expected returns $\mathbb{E}[R_i] = \mu_i$ and let $\Sigma$ denote the matrix of return covariances
\begin{equation}
	\sigma_{ij} = \mathbb{E}\big[\big(R_i-\mathbb{E}[R_i]\big)\big(R_j-\mathbb{E}[R_j]\big)\big].
\end{equation}
where $i,j \in \{1,\ldots,n \}$. Assume, in addition, that there exists a single riskless asset $R_0$ with non-random return $r$.   The total expected return of the portfolio $R = \sum_{i=0}^n w_iR_i$ is
\begin{equation}
	\mathbb{E}[R] 
		= r + \sum_{i=1}^n w_i (\mu_i - r) \label{e:1}
		= r+  (\mu-r\bm{1})^{\rm T}\bm{w},
\end{equation}
where we have used the constraint $w_0 = 1- \sum_{i=1}^n w_i$ to express the expected return in terms of the risky allocation vector $\bm{w} \in \mathbb{R}^n$.
The total variance of the portfolio as a function of the risky allocations is,
\begin{equation}
	\var(R) = \sum_{i=1}^n\sum_{j=1}^n w_iw_j\sigma_{ij} = \bm{w}^{\rm T} \Sigma \bm{w}.
\end{equation}
We seek to minimize the following objective function,
\begin{equation}
	J(\bm{w},\lambda,\mu_R) = \frac{1}{2}\bm{w}^{\rm T} \Sigma \bm{w} - \lambda \big[r + (\mu - r\bm{1})^{\rm T}\bm{w} - \mu_R \big].
\end{equation}
The first term is one-half the variance of the portfolio and the second term is proportional to returns in excess of the amount $\mu_R$. The factor of $-\lambda$ is included to encourage excess returns in the minimization problem.

For fixed $\lambda$, the portfolio which minimizes the Lagrangian is given by,
\begin{equation}\label{e:2}
	\frac{\partial}{\partial\bm{w}} J(\bm{w},\lambda,\mu_R)=0 \implies \bm{w} = \lambda \Sigma^{-1}(\mu - r\bm{1}).
\end{equation}
Left-multiplying \eqref{e:2} by $\bm{1}^{\rm T}$ we find the optimal allocation of the riskless asset  in terms of $\lambda$,
\begin{equation}
	w_0 = 1 - \lambda \bm{1}^{\rm T}\Sigma^{-1}(\mu - r\bm{1}).
\end{equation}
In the case $\lambda=0$ all of the resources are allocated in the riskless asset and for $\lambda$ sufficiently large we have $w_0 < 0$ which corresponds to borrowing money at the risk-free rate in order to finance a risky portfolio. It follows that $1/\lambda$ should be interpreted as a risk-aversion parameter. Substituting the allocations \eqref{e:2} we find,
\begin{align}
	\mathbb{E}[R]
		& = r + \lambda(\mu - r\bm{1})^{\rm T}\Sigma^{-1} (\mu - r\bm{1}), \\
	\var(R)
		& =\lambda^2 (\mu - r\bm{1})^{\rm T}\Sigma^{-1} (\mu - r\bm{1}).
\end{align}
Since the expected return increases without bound as $\lambda \to \infty$, it is more instructive to consider the so-called Sharp ratio which measures the returns in excess of the risk-free rate, normalized by the standard deviation of the portfolio which is then independent of $\lambda$,
\begin{equation}
	\mathrm{SR} = \frac{\mathbb{E}[R] - r}{\sqrt{\var(R)}} = \sqrt{(\mu - r\bm{1})^{\rm T}\Sigma^{-1} (\mu - r\bm{1})}.
\end{equation}
It is also noteworthy that the above formulas imply a trivial relation between the expectation and variance in terms of the risk-aversion parameter; namely,
\begin{equation}
	\lambda = \frac{\var(R)}{\mathbb{E}[R] - r}.
\end{equation}
Using the above formula to eliminate $\lambda$ from \eqref{e:2} we obtain the famous formula linking excess returns of a particular asset to its covariance with the total return,
\begin{equation}
	\mu - r\bm{1} = \frac{\Sigma \bm{w}}{\var(R)}\big(\mathbb{E}[R]- r \big)
\end{equation}
In component notation this says,
\begin{equation}
	\mu_i - r = \beta_i\big(\mathbb{E}[R]-r\big), \quad \quad \beta_i = \frac{\cov(R_i,R)}{\var(R)}.
\end{equation}
The \emph{Markowitz portfolio} is defined as the allocation vector which minimizes the total variance for fixed expected total return $\mu_R$,
\begin{align}
	\min_{\bm{w}} & \;\; \bm{w}^{\rm T} \Sigma \bm{w}, \\
	\mathrm{s.t.} & \;\; r+  (\mu-r\bm{1})^{\rm T}\bm{w} -\mu_R = 0. 
\end{align}
By constrained optimization theory it satisfies,
\begin{equation}
	\frac{\partial}{\partial\bm{w}} J(\bm{w},\lambda,\mu_R) =0, \quad \quad \frac{\partial}{\partial \lambda} J(\bm{w},\lambda,\mu_R) = 0,
\end{equation}
where the second equation just gives the total expectation constraint $\mu_R = r + (\mu - r\bm{1})^{\rm T} \bm{w}$. Imposing the total expectation constraint in \eqref{e:2} we can solve for the Lagrange multiplier in terms of the allocations,
\begin{equation}
	\lambda = \frac{\mu_R - r}{(\mu - r\bm{1})^{\rm T}\Sigma^{-1} (\mu - r\bm{1})}.
\end{equation}
The expressions for the total expected return and variance are now given by,
\begin{align}
	\mathbb{E}[R]
		& = \mu_R, \\
	\var(R)
		& =\frac{(\mu_R-r)^2}{(\mu - r \bm{1})^{\rm T}\Sigma^{-1}(\mu - r\bm{1})}.
\end{align}
Hence we obtain a linear relationship between excess total return and portfolio standard deviation,
\begin{equation}
	\mu_R = r + \sigma_R\sqrt{(\mu - r\bm{1})^{\rm T}\Sigma^{-1} (\mu - r\bm{1})}
\end{equation}

Now assume that there is no riskless asset so the expected return is $\mathbb{E}[R] = \mu^{\rm T}\bm{w}$. The optimization problem corresponding to the Markowitz portfolio now involves an additional endowment constraint,
\begin{align}
	\min_{\bm{w}} & \;\; \bm{w}^{\rm T} \Sigma \bm{w}, \\
	\mathrm{s.t.} & \;\; \mu^{\rm T}\bm{w} = \mu_R, \\
	\mathrm{and} & \;\; \bm{1}^{\rm T}\bm{w} = 1.  
\end{align}
The objective function is now,
\begin{equation}
	J(\bm{w},\lambda_1,\lambda_2,\mu_R) = \frac{1}{2}\bm{w}^{\rm T} \Sigma \bm{w} - \lambda_1 (\mu^{\rm T}\bm{w} - \mu_R ) - \lambda_2(\bm{1}^{\rm T}\bm{w} - 1).
\end{equation}
For fixed $\lambda_1$ and $\lambda_2$, the value of the allocation vector that minimizes this objective is given by
\begin{equation}
	\bm{w} = \lambda_1\Sigma^{-1}\mu + \lambda_2 \Sigma^{-1}\bm{1}.
\end{equation}
Applying the total expected return and endowment constraints gives
\begin{align}
	\mu_R
		& = \lambda_1 a  + \lambda_2 b, \\
	1
		& = \lambda_1 b  + \lambda_2 c.
\end{align}
where
\begin{align}
	a
		& = \mu^{\rm T}\Sigma^{-1}\mu, \\
	b
		& = \bm{1}^{\rm T}\Sigma^{-1}\mu, \\
	c
		& = \bm{1}^{\rm T}\Sigma^{-1}\bm{1}.
\end{align}
Solving for the Lagrange multipliers we obtain,
\begin{align}
	\lambda_1
		& = \frac{b - c\mu_R}{b^2 - ac}, \\
	\lambda_2
		& = \frac{b\mu_R - a}{b^2-ac}.
\end{align}
The variance and expected return are now given by,
\begin{align}
	\mathbb{E}[R]
		& = \mu_R \\
	\var(R)
		& = \lambda_1^2 a+ \lambda_2^2 c + 2\lambda_1\lambda_2 b, \\
		& = \frac{c \mu_R^2 - 2b \mu_R + a}{ac-b^2}, \\
		& = \frac{\mu_R^2 - 2(b/c) \mu_R + a/c}{a-b^2/c}, \\
		& = \frac{\big(\mu_R-b/c\big)^2 + a/c-(b/c)^2 }{a-b^2/c}, \\
	\sigma_R^2
		& = \left(\frac{\mu_R-b/c}{\sqrt{a-b^2/c}}\right)^2 + \frac{a/c-(b/c)^2}{a-b^2/c},
\end{align}
which is the equation for a hyperbola. This should be compared with the linear relationship obtained with the riskless asset; namely,
\begin{equation}
\mu_R = r + \sigma_R\sqrt{a-2br+cr^2}.
\end{equation}
Solving these equations simulatneously we find the point of tangency,
\begin{align}
	\sigma_R
		& = \frac{\sqrt{a-2br+cr^2}}{b-cr}, \\
	\mu_R
		& = \frac{a-br}{b-cr}.
\end{align}
Setting $\mu_R = (a-br)/(b-cr) + \epsilon$ we obtain
\begin{align}
	w_0 
		& = -\frac{b-cr}{a-2br+cr^2}\epsilon, \\
		& =  -\frac{\bm{1}^{\rm T}\Sigma^{-1}(\mu - r \bm{1})}{(\mu - r\bm{1})^{\rm T}\Sigma^{-1}(\mu - r\bm{1})}\epsilon.
\end{align}
\begin{figure}[h]
\centering
\includegraphics[width=80mm]{frontier}
\caption{The set of efficient portfolios with and without a risky asset intersect at a single point in the mean-variance space $(\mu_R,\sigma_R)$.}
\end{figure}
\subsection{Universal portfolios}
Let $\vec{x}_t \in (0,\infty)^n$  represent the relative price change of $n$ assets between times $t$ and $t+1$,
\begin{equation}
	x_{t,i} = \frac{\mathrm{Price}_{t+1}(i)}{\mathrm{Price}_t(i)}.
\end{equation}
Consider a a strategy specified by a constant allocation vector $\vec{b} \in B_n$, where
\begin{equation}
	B_n = \{ \vec{b} \in \mathbb{R}^n \; | \; b_i \geq 0, \; \vec{b} \cdot \vec{1} = 1 \}.
\end{equation}
Let's say we have an initial wealth equal to unity, which we invest at time $t = 1$ according to $\vec{b}$. At the end of the first trading period this portfolio achieves a wealth given by,
\begin{equation}
	S_1 = \vec{b} \cdot \vec{x}_1.
\end{equation}
We now rebalance the portfolio such that the wealth $S_1$ is again divided amongst the assets according to $\vec{b}$. At the end of the next trading period the wealth is,
\begin{equation}
	S_2 = S_1 (\vec{b}\cdot\vec{x}_1) =  (\vec{b}\cdot\vec{x}_1)(\vec{b}\cdot\vec{x}_2).
\end{equation}
Generalizing, we define the wealth achieved by a constantly rebalanced portofolio after the completion of $k$ investment opportunities to be,
\begin{equation}
	S_k(\vec{b}) = \prod_{t=1}^k \vec{b} \cdot \vec{x}_t,
\end{equation}
and we define
\begin{equation}
	S_T^\ast = \max_{\vec{b} \in B_n} S_T(\vec{b}) ,
\end{equation}
where we have suppressed the dependence of these quantities on the stock sequence $(\vec{x}_1,\cdots, \vec{x}_T)$.
The goal is to compare $S_T^\ast$ to the wealth achieved by an adapted strategies of the form,
\begin{equation}
	\hat{S}_T = \prod_{t = 1}^T \vec{b}_t \cdot \vec{x}_t.
\end{equation}
In the case of the universal portfolio the allocations are given by,
\begin{align}
	\vec{b}_t
		& = \ddfrac{\int_{B_n} d^n \vec{b} \, S_{t-1}(\vec{b}) \, \vec{b} }{\int_{B_n} d^n \vec{b} \, S_{t-1}(\vec{b})}, \quad 1 \leq t \leq T.
\end{align}
Substituting $t=1$ we find the explicit expression $\vec{b}_1 = \frac{1}{m}\vec{1}$. Computing the inner product $\vec{b}_t \cdot \vec{x}_t$ we find the convenient identity,
\begin{align}
	\vec{b}_t \cdot \vec{x}_t
		& = \ddfrac{\int_{B_n} d^n \vec{b} \, S_{t}(\vec{b}) }{\int_{B_n} d^n \vec{b} \, S_{t-1}(\vec{b})}, \quad 1 \leq t \leq T.
\end{align}
It follows from the above identity that $\hat{S}_T$ is simply the average of $S_T(\vec{b})$ over the simplex $B_n$,
\begin{equation}
	\hat{S}_T = \ddfrac{\int_{B_n} d^n\vec{b} \, S_T(\vec{b})}{\int_{B_n} d^n\vec{b}}.
\end{equation}
Setting $T =3 $, for example, we find
\begin{align}
	\hat{S}_3 = \ddfrac{\int_{B_n} d^n\vec{b} \, \vec{b}\cdot\vec{x}_1}{\int_{B_n} d^n\vec{b}} \cdot \ddfrac{\int_{B_n}  d^n\vec{b} \, S_1(\vec{b}) \, \vec{b}\cdot\vec{x}_2}{\int_{B_n} d^n\vec{b} \, S_1(\vec{b})} \cdot \ddfrac{\int_{B_n}  d^n\vec{b} \, S_2(\vec{b}) \, \vec{b}\cdot\vec{x}_3}{\int_{B_n} d^n\vec{b} \, S_2(\vec{b})} = \ddfrac{\int_{B_n}  d^n\vec{b} \, S_3(\vec{b})}{\int_{B_n} d^n\vec{b} }.
\end{align}
The integral over $B_n$ can be performed by mapping it to $\mathbb{S}^{n-1}$. For example in the case of $B_4$, we have
\begin{align}
	b_1
		& = \cos^2 \theta_1, \\
	b_2
		& = \sin^2 \theta_1 \cos^2 \theta_2, \\
	b_3
		& = \sin^2 \theta_1 \sin^2\theta_2 \cos^2\theta_3, \\
	b_4
		& = \sin^2\theta_1 \sin^2\theta_2 \sin^2\theta_3
\end{align}
where $ 0\leq \theta_1,\theta_2,\theta_3 \leq \pi/2$. In the two-asset case, the metric on the simplex is
\begin{equation}
	ds^2 = 8\sin^2\theta_1 \cos^2\theta_1 d\theta_1 \implies \int_{B_1} d b = \int_0^{\pi/2} ds = \sqrt{2}.
\end{equation}
In the three-asset case we have,
\begin{equation}
	\sqrt{g} = \sqrt{12\cos^2\theta_1 \sin^6 \theta_1 \sin^2 (2\theta_2)} \implies \int_{B_2} d^2 \vec{b} =  \int_0^{\pi/2}d\theta_1 \int_0^{\pi/2}d\theta_2\sqrt{g} = \frac{\sqrt{3}}{2}.
\end{equation}


\section{Fundamental theorem of asset pricing}
\subsection{Single-period models}
Consider a stock with share price $S_t$ per share where $S_0$ is known but $S_1$ is uncertain. Let the money-market be an asset with share price $e^{tr}$ with $t = 0,1$ and no uncertainty. 
\subsubsection{Forward contracts}
The buyer agrees to pay the seller an amount $F$ in exchange for one share of stock at time $t=1$. The forward price $F$ is decided at time $t=0$. Naive expectation pricing would lead to price $F = \mathbb{E}[S_1]$ where the expectation is with respect to the real-word probabilities. In fact, there will be an arbitrage opportunity unless $F = S_0 e^r$.
\begin{proof}
Consider the following trading strategies. (Cash-and-carry arbitrage) At $t=0$ the seller borrows an amount $S_0$ from the money-market to buy 1 share and enters a forward contract with the buyer. At $t=1$ the stock is sold to the buyer at the forward price $F$ and the seller settles their accounts with the money market, making a profit $F-S_0e^r$. (Reverse cash-and-carry arbitrage) At $t=0$ buyer shorts 1 share of stock, invests the proceeds $S_0$ in the money-market and enters the forward contract with the seller $S$. At $t=1$, the buyer buys the stock from the seller at the agreed forward price $F$ and the short position can now be settled. The amount of profit to the buyer is $S_0 e^r - F$.

Notice that if the forward contract is undervalued ($F < S_0 e^r$) then buyer has an arbitrage opportunity while if $F > S_0 e^r$ then the seller has an arbitrage opportunity. Therefore absence of arbitrage implies $F = S_0 e^r$.
\end{proof}

\subsubsection{European call}
Consider a European call with strike price $K$. This allows the buyer of the option to buy the underlying stock at price $K$. Let $V_t$ denote the value of the option and time $t=0,1$. Then $V_1 = \max(0,S_1-K)$. Let $r$ denote the risk-free rate of return. Consider a two-market scenario where $S_1(\omega_1) \leq K \leq S_1(\omega_2)$ and $S_1(\omega_1) \leq S_0e^r \leq S_1(\omega_2)$. There will be an arbitrage opportunity unless
\begin{equation}
	V_0 = (d_2-K)\frac{S_0e^r - d_1}{d_2 - d_1}e^{-r}
\end{equation}
where $d_i = S_1(\omega_i)$.
\begin{proof}
(i) At $t= 0$ shorts $a$ shares of the underlying stock and the buy the option for the price $V_0$. Invest the remainder $aS_0 - V_0$ is invested in the money-market. At $t = 1$ the option is exercised iff $\omega_2$ is realized. The pay-off at time $t = 1$ is given by
\begin{equation}
	e^r(aS_0 -V_0) - aS_1(\omega) + V_1(\omega).
\end{equation}
Suppose $\omega_1$ is realized so $V_1(\omega_1) = 0$. Then the profit is
\begin{equation}
 	\varphi(\omega_1) = e^r(aS_0 -V_0) - ad_1 = a(S_0 e^r - d_1) - e^r V_0
\end{equation}
The pay-off will be positive provided
\begin{equation}
	V_0 < a (S_0e^{r} - d_1)e^{-r} \equiv V
\end{equation}
If $\omega_2$ is realized then the pay-off is
\begin{align}
	\varphi(\omega_2)
		& = e^r(aS_0 -V_0) - ad_2 + (d_2 - K) \\
		& > e^r(aS_0 -V) - ad_2 + (d_2 - K) \\
	\varphi(\omega_2)
		& > a d_1 - a d_2 + (d_2 - K)
\end{align}
So for $\varphi(\omega_2) > 0$ we want
\begin{equation}
	a = \frac{d_2 - K}{d_2 - d_1}.
\end{equation}
(ii) Now consider the seller of the option. %If $\omega_1$ is realized they are in the money by $a d_1$ while 
If $\omega_2$ is realized they are out of the money ($-d_2 + K$). If, however, they borrow $aS_0 - V_0$ from the money market and buy $a$ shares of stock then the pay-offs are
\begin{align}
	\varphi(\omega_2)
		& = - (S_0a - V_0)e^r + a d_2 + (-d_2 + K) > 0 \\
	\varphi(\omega_1)
		& = - (S_0a - V_0)e^r + a d_1 > 0.
\end{align}
Note that this is the reverse of the (i). If $V_0 > S_0 a$ there is no need to borrow from the money-market. We simply purchase $a$ shares using the proceeds of the call option and invest the remainder $V_0 - aS_0 $. The pay-offs are then
\begin{align}
	\varphi(\omega_2)
		& = (V_0 - S_0a)e^r + a d_2 + (-d_2 + K) > 0 \\
	\varphi(\omega_1)
		& = (V_0-S_0 a)e^r + a d_1 > 0.
\end{align}
\end{proof}
Notice that $V_0$ can be written in terms of a probability measure as
\begin{equation}
	V_0 = e^{-r} \sum_{\omega \in \Omega} P(\omega)V_1(\omega) = e^{-r} E(V_1)
\end{equation}
where $V_1(\omega_2) = d_2 - K$, $V_1(\omega_1) = 0$ and
\begin{equation}
	P(\omega_2) = \frac{S_0 e^r - d_1}{d_2 - d_1}, \quad P(\omega_1) = 1 - P(\omega_2).
\end{equation}
Calculating the expectation of $S_1$ with respect to this measure we find $E(S_1) = S_0 e^r$ and thus
\begin{equation}
	S_0 = e^{-r} E(S_1).
\end{equation}

A simpler explanation: Consider a portfolio which is long $a$ shares and short a call. Then after one period the portfolio has value $a d_2 - (d_2 - K)$ if the share price goes up and has value $a d_1$ if the share price goes down. If the portfolio has zero risk then these must be equal and thus 
\begin{equation}
	a = \frac{d_2 - K}{d_2-d_1}.
\end{equation}
Since this portfolio is riskless its return must be equal to the interest earned at the risk free rate on the initial value of the portfolio; that is
\begin{equation}
	(S_0a - V_0) e^r = a d_1 \implies V_0 = (d_2-K)\frac{S_0e^r - d_1}{d_2 - d_1}e^{-r}
\end{equation}

A replicating portfolio argument: Since a call is bullish the replicating portfolio must buy some stock (say $\Delta$ shares) and borrow  some money $B$,
\begin{equation}
	\Pi_t = \Delta \, S_t - Be^{rt}.
\end{equation}
We choose $\Delta$ and $B$ such that the final pay-off of the portfolio matches the final pay-off of the call. We then obtain two equations corresponding to the two market outcomes
\begin{align}
	\Delta \, d_2 - Be^r
	 	& = d_2 - K \\
	 \Delta \, d_1 - Be^r
	 	& = 0
\end{align}
which implies
\begin{align}
	\Delta
		& = \frac{d_2-K}{d_2-d_1} \\
		& = \frac{c_2 - c_1}{d_2 - d_1} \\
	B
		& = e^{-r}d_1\Delta 
\end{align}
The value of the call should be equal to the initial cost of the replicating portfolio; namely
\begin{equation}
	\Pi_0 = \Delta \, S_0 - B = (d_2-K)\frac{S_0e^r - d_1}{d_2 - d_1}e^{-r}.
\end{equation}

\subsection{Existence and uniquness of risk-neutral measure}
Consider a single-period market with $m$ market scenarios $\{ \omega_i \}_{i=1}^m$ and $n+1$ traded assets with share prices $\{ S_t^j \}_{j=0}^n$ where $t \in \{0,1\}$. The first asset is is assumed to be riskless with share price at time $t$ given by $S_t^0 = e^{tr}$. A portfolio is defined by a vector in $\mathbb{R}^{n+1}$. The value of the portfolio at time $t = 0$ is known and the value of at time $t=1$ depends on which market scenario is realized,
\begin{align}
	V_0(\theta)
		& = \sum_{j=0}^n \theta_j S_0^j, \\
	V_1(\theta;\omega_i)
		& = \sum_{j=0}^n \theta_j S_1^j(\omega_i).
\end{align}
An arbitrage is a portfolio $\theta$ which satisfies either of the following properties,
\begin{align}
	V_0(\theta) \leq 0, \quad V_1(\theta;\omega_i) > 0, \quad \forall i \in \{1,\ldots, m \}, \\
	V_0(\theta) < 0, \quad  V_1(\theta;\omega_i) \geq 0, \quad \forall i \in \{1,\ldots, m \}.
\end{align}
A risk-neutral measure is defined as a probability distribution function $\pi$ with the property that
\begin{equation}\label{e:riskneutral}
	S_0^j = e^{-r}\sum_{i = 1}^m \pi(\omega_i) S_1^j(\omega_i), \quad \forall j \in \{0,\ldots, n\}.
\end{equation}
\begin{thm}
There exists a risk-neutral measure $\iff$ there does not exist arbitrage.
\end{thm}
\begin{proof}
($\implies$) Suppose there exists a risk-neutral measure. Then summing over the $j$ index in \eqref{e:riskneutral} with an arbitrary portfolio $\theta_j$ we obtain
\begin{equation}
	V_0(\theta) = e^{-r}\sum_{i=1}^m \pi(\omega_i) V_1(\theta;\omega_i).
\end{equation}
It is easy to see that the condition for an arbitrage is incompatible with this equation because $\pi(\omega_i) \geq 0$. ($\impliedby$) We begin by noting that the equation
\begin{equation}
	S_0^0 = e^{-r}\sum_{i=1}^m \pi(\omega_i) S_1^0(\omega_i)
\end{equation}
holds for any PDF because $S_t^0 = e^{tr}$. We therefore define a vector in $\mathbb{R}^n$ by the equation
\begin{equation}
	\vec{y} = e^{-r} \sum_{i = 1}^{m} \pi(\omega_i) \vec{S}_1(\omega_i).
\end{equation}
where
\begin{equation}
	\vec{S}_1(\omega_i) = \big(S_1^1(\omega_i),\ldots,S_1^n(\omega_i) \big).
\end{equation}
If we define
\begin{equation}
	\xi = \{ \vec{y} \; | \; \textrm{$\pi$ is a PDF} \}
\end{equation}
then $\xi$ is the convex hull of the $m$ points $\{\vec{S}_1(\omega_1),\ldots,\vec{S}_1(\omega_m) \} \subset \mathbb{R}^n$. The existence of a risk-neutral measure is thus equivalent to the statement that $\vec{S}_0 \in \xi$. We assume for a contradiction that there does not exist a risk-neutral measure; that is, $\vec{S}_0 \not\in \xi$. The separating hyperplane theorem ensures that there exists a hyperplane in $\mathbb{R}^n$ separating the convex set $\xi$ and from $\vec{S}_0 \not\in \xi$. Let us parametrize the separating hyperplane by 
\begin{equation}
	H = \{ \vec{x} \in \mathbb{R}^n \; | \; \vec{\theta}\cdot\vec{x} = \alpha \}.
\end{equation}
The statement that $H$ separates $\vec{S}_0$ from $\xi$ is equivalent to $\vec{\theta} \cdot \vec{S}_0 < \alpha$ and $ \vec{\theta} \cdot \vec{y} \geq \alpha $ for all $\vec{y} \in \xi$, which implies $\vec{\theta}\cdot \vec{S}_0 < \vec{\theta} \cdot \vec{y}$ for all $\vec{y} \in \xi$. Since each $\vec{y}$ corresponds to a choice of PDF, we choose the $\vec{y}$ corresponding to
\begin{equation}
	\pi(\omega_i)
	=
	\begin{cases}
		1, & i = k \\
		0, & \mathrm{o.w.}
	\end{cases}.
\end{equation}
It follows that for all $k \in \{1,\ldots,m \}$ we have
\begin{equation}
	\vec{\theta} \cdot \vec{S}_0 < e^{-r}\vec{\theta} \cdot \vec{S}_1(\omega_k).
\end{equation}
Thus
\begin{equation}
\sum_{j =1}^n \theta_j S_0^j < e^{-r} \sum_{j =1}^n \theta_j S_1^j(\omega_i), \quad \forall i \in \{1,\ldots,m\}.
\end{equation}
It follows that we can find $\theta_0 \in \mathbb{R}$ such that adding to both sides we obtain
\begin{equation}
\sum_{j =0}^n \theta_j S_0^j < 0 < e^{-r} \sum_{j =0}^n \theta_j S_1^j(\omega_i), \quad \forall i \in \{1,\ldots,m\}.
\end{equation}
\end{proof}
\begin{exmp}
Consider an economy with two traded assets and two states of the world $\omega_1$ and $\omega_2$. As usual one of the assets is assumed to be riskless with price process $B_t = e^{rt}$. The risk-neutral probabilities are defined by
\begin{align}
	\begin{bmatrix}
	B_0 \\
	S_0
	\end{bmatrix}
	=
	e^{-r}
	\begin{bmatrix}
	B_1(\omega_1) & B_1(\omega_2) \\
	S_1(\omega_1) & S_1(\omega_2)
	\end{bmatrix}
	\begin{bmatrix}
	\pi(\omega_1) \\
	\pi(\omega_2)
	\end{bmatrix}.
\end{align}
Now consider a call option whose pay-off is given by $V_1(\omega) = (S_1(\omega)-K)^+$ where $d_1 < K < d_2$. The value at time zero is given by the discounted expectation
\begin{equation}
	V_0 = e^{-r}\big[V_1(\omega_1)\pi(\omega_1)+V_1(\omega_2)\pi(\omega_2)\big] = (d_2-K)\frac{S_0e^r - d_1}{d_2 - d_1}e^{-r}. 
\end{equation}
\end{exmp}
\begin{exmp}
Now consider an economy with two tradeable assets and three states of the world. The risk-neutral probabilities are defined by
\begin{align}
	\begin{bmatrix}
	B_0 \\
	S_0
	\end{bmatrix}
	=
	e^{-r}
	\begin{bmatrix}
	B_1(\omega_1) & B_1(\omega_2) & B_1(\omega_3) \\
	S_1(\omega_1) & S_1(\omega_2) & S_1(\omega_3)
	\end{bmatrix}
	\begin{bmatrix}
	\pi(\omega_1) \\
	\pi(\omega_2) \\
	\pi(\omega_3)
	\end{bmatrix}.
\end{align}
Consider a call option with strike $d_1 < K < d_3$ and assume $ d_1 < d_2 < d_3$. Then
\begin{equation}
	V_0 = e^{-r}\big[(d_2-K)_+ p_2 + (d_3 - K)p_3 \big]
\end{equation}
where $p_2$ and $p_3$ are constrained to satisfy
\begin{equation}
	S_0e^r = p_1 d_1 + p_2 d_2 + p_3 d_3,\quad p_1+p_3+p_3 = 1, \quad p_1>0, \quad p_2 > 0, \quad p_3 > 0.
\end{equation}
\end{exmp}
We shall now assume that the single-period market above is arbitrage-free and thus that there exists a unique risk-neutral probability measure. We define a derivative security as any asset whose value at time $t=1$ is a random variable.
\begin{thm}
The risk neutral measure is unique $\iff$ the set  $\{ S_1^j \}_{j=0}^n$ spans the space of random variables.
\end{thm}
\begin{proof}
($\impliedby$) We will show that the risk-neutral measure is unique by explicitly constructing it. Let $1\leq i \leq m$ be arbitrary and let $V^{(i)}$ denote the random variable which is equal to $1$ if $\omega_i$ occurs and $0$ otherwise; that is, $V^{(i)}(\omega_{k}) = \delta_{ik}$. By the hypothesis there exists a portfolio vector $\theta^{(i)}$ such that
\begin{equation}
	V^{(i)}(\omega_{k}) = \sum_{j=0}^n \theta_j^{(i)} S_1^j(\omega_k).
\end{equation}
If we assume that $V^{(i)}$ represents the value of an asset $V^{(i)}_t$ at $t=1$, then the value of the asset at time zero is given by
\begin{equation}
	V_0^{(i)} = \sum_{j=0}^n \theta_j^{(i)} S_0^j.
\end{equation}
On the other hand, the absence of arbitrage implies that a risk-neutral probability measure $\pi$ exists and thus we can express the value of the portfolio at time zero as,
\begin{equation}
	V_0^{(i)} = e^{-r}\sum_{k=1}^m \pi(\omega_k) V_1^{(i)}(\omega_k) = e^{-r}\pi(\omega_i).
\end{equation}
It follows that the unique risk-neutral measure is given by
\begin{equation}
	\pi(\omega_i) = e^{r}\sum_{j=0}^n \theta_j^{(i)} S_0^j \quad \forall i \in \{1,\ldots,m\}.
\end{equation}
($\implies$) We show the contrapositive statement; that is, if there exists a random variable $V$ which cannot be replicated by the tradeable assets then the risk-neutral measure is not unique. It is convenient to define the following vectors in $\mathbb{R}^m$,
\begin{align}
	\mathbf{V}
		& = (V(\omega_1),\ldots,V(\omega_m)), \\
	\mathbf{S}^j
		& = (S_1^j(\omega_1),\ldots,S_1^j(\omega_m)) \quad \forall j \in \{0,\ldots,n\}.
\end{align}
If $\mathbf{V}$ is not a linear combination of the $n+1$ vectors $\{\mathbf{S}^j \}_{j=0}^n$ then it follows that $\{ \mathbf{S}^j \}_{j=0}^n$ does not span $\mathbb{R}^m$. There thus exists a vector $\mathbf{v} \in \mathbb{R}^m$ which is orthogonal to $\mathbf{S}^j$ for all $0 \leq j \leq n$. In particular, $\mathbf{v} \cdot \mathbf{S}^0 = 0$ implies
\begin{equation}
	\sum_{i=1}^m v_i = 0.
\end{equation}
Suppose we are given a risk-neutral probability measure $\pi$. We will show that there exists $\epsilon > 0$ such that the following equation defines a second risk neutral measure,
\begin{equation}
	\pi^\ast(\omega_i) = \pi(\omega_i) + \epsilon v_i.
\end{equation}
It is clear that $\sum_{i=1}^m \pi^\ast(\omega_i)  = 1$ and moreover (since $\mathbf{v} \cdot \mathbf{S}^j$ for all $0 \leq j \leq n$),
\begin{equation}
	\sum_{i=1}^m \pi^\ast(\omega_i) S_1^j(\omega_i) = \sum_{i=1}^m \pi(\omega_i) S_1^j(\omega_i) \quad \forall j \in \{0,\ldots,n\}.
\end{equation}
The only other requirement for $\pi^\ast$ to define a risk-neutral measure is that $\pi^\ast(\omega_i) > 0$ for all $1 \leq i \leq m$. This can always be achieved by choosing $\epsilon$ sufficiently small since $\pi(\omega_i) > 0$ for all $1 \leq i \leq m$.
\end{proof}
\subsection{Continuous-time models}
A numeraire is any strictly positive self-financing portfolio. The most common numeraires are the price $P(t,T)$ of a non-defaultable zero-coupon bond with maturity $T$ and face value equal to $1$, $N(t) = P(t,T)$. The other is the value of a money-market account with instantaneous risk-free rate $r(t)$, $N(t) = B(t) \equiv B(0) e^{\int_0^t ds\, r(s) }$.

The relative gain process with respect to a numeraire $N(t)$ for a dividend-paying asset $S(t)$ is
\begin{equation}
	\RGP(t) = \frac{S(t)}{N(t)} + \int_0^t \frac{dD(s)}{N(s)}
\end{equation}
where $D(t) = \int_0^t ds D(s)$ is the cumulative dividend paid by the asset from time $0$ to $t$. 
The fundamental theorem states that in the absence of arbitrage there exists a probability measure $Q_N$ with respect to which the relative gain processes are martingales, where $N$ is any numeraire process,
\begin{equation}
	\RGP(t) = \mathbb{E}_t^{Q_N}[\RGP(T)].
\end{equation}
For non-dividend-paying assets this reduces to
\begin{equation}
	\frac{S(t)}{N(t)} = \mathbb{E}^{Q_N}_t \left[\frac{S(T)}{N(T)}\right].
\end{equation}
The probability measures associated with non-defaultable ZCBs and the money-market account are called the $T$-forward and risk-neutral probability measures, respectively. 

As an example consider a non-dividend paying asset with pay-off $V(T)$ (for example a European option). Using the money-market account as the numeraire we obtain the price at time $t$
\begin{align}
	V(t)
		& = B(t) \mathbb{E}_t^{Q_B}\left[\frac{V(T)}{B(T)}\right] \\
		& = \mathbb{E}_t^{Q_B}\left[\frac{B(t)}{B(T)}V(T)\right] \\
		& = \mathbb{E}_t^{Q_B}\left[e^{-\int_t^T ds\,r(s)}V(T)\right].
\end{align}
If the interest rate is not stochastic then the integral can be pulled outside of the expectation value. For stochastic interest rates, however, this is not possible and it is more convenient to use the $T$-forward measure which prices the asset as (using $P(T,T) = 1$)
\begin{equation}
	V(t) = P(t,T) \mathbb{E}_t^{Q_T}[V(T)]
\end{equation}
The continuously compounded zero-coupon rate $r(t,T)$ for maturity $T$ at time $t$ is defined by
\begin{equation}
	P(t,T) = e^{-r(t,T)(T-t)}.
\end{equation}
If interest rates are constant then absence of arbitrage implies $r(t,T) = r$ and thus $Q_B = Q_T$. In general the price of a zero-coupon bond can be related to the instantaneous interest rate using the FTAP with the money-market as the numeraire (a ZCB does not pay dividends)
\begin{equation}
	P(t,T) = B(t) \mathbb{E}^{Q_B}_t \left[\frac{P(T,T)}{B(T)}\right] = B(t) \mathbb{E}^{Q_B}_t \left[\frac{1}{B(T)}\right] = \mathbb{E}^{Q_B}_t \left[e^{-\int_t^T ds\, r(s)}\right].
\end{equation}

\section{Equity derivatives}
\subsection{Forwards and futures prices}
To price a forward contract we view it as a derivative with pay-off function for the seller given by
\begin{equation}
	V(T) = S(T) - K
\end{equation}
where $K$ is the delivery price and $T$ is the delivery date for the forward contract. In other words the seller pays $S(T)$ and receives $K$ from the buyer. Notice that for any $t < T$ the value of the forward is zero since the forward contract can be entered at zero cost at any time. It follows from the FTAP that
\begin{equation}
0 = \mathbb{E}^{Q_N}_t \left[\frac{S(T) - G(t,T)}{N(T)}\right]
\end{equation}
where we have denoted the forward price at time $t$ for delivery at $T$ by $G(t,T)$. Using the $T$-forward measure we obtain
$G(t,T) = \mathbb{E}^{Q_T}_t [S(T)]$. Since the forward price converges to the spot price at maturity $G(T,T) = S(T)$ we also have $G(t,T)=\mathbb{E}^{Q_T}_t [G(T,T)]$. This shows that the forward price is a Martingale under the $T$-forward measure. If $S$ does not pay dividends, then the forward price is determined by
\begin{equation}
	G(t,T) = \mathbb{E}^{Q_T}_t \left[\frac{S(T)}{P(T,T)}\right] = \frac{S(t)}{P(t,T)}
\end{equation}
If $S$ pays dividends, then we introduce a non-dividend-paying asset $V$ which is defined by re-investing all dividends. The value of such an asset is given by
\begin{equation}
	V(t) = S(t)e^{\int_0^t ds\, \delta(s)}.
\end{equation}
Since $V$ does not pay dividends, then by the above argument the corresponding forward price must be
\begin{equation}
	G_V(t,T) = \mathbb{E}_t^{Q_T}\left[V(T)\right] = \mathbb{E}_t^{Q_T}\left[\frac{V(T)}{P(T,T)}\right] = \frac{V(t)}{P(t,T)} =  \frac{S(t)e^{\int_0^t ds\, \delta(s)}}{P(t,T)}
\end{equation}
If the dividend rate is not stochastic then
\begin{align}
	G_V(t,T)
		& = \mathbb{E}^{Q_T}_t \left[S(T) e^{\int_0^T ds\, \delta(s)}\right] \\
		& = \mathbb{E}^{Q_T}_t \left[S(T)\right] e^{\int_0^T ds\, \delta(s)} \\
		& = G(t,T) e^{\int_0^T ds\, \delta(s)}
\end{align}
and thus
\begin{equation}
	G(t,T) = G_V(t,T)e^{\int_T^0 ds\, \delta(s)} = \frac{S(t)}{P(t,T)}e^{-\int_t^T ds\, \delta(s)}.
\end{equation}

Like a forward contract, a futures contract is an agreement which can be entered at any time $t < T$ at zero cost. During the interval $[t,T]$, the buyer collects the gains and loses given by the changes in the futures price
\begin{equation}
	\int_t^T dF(s,T) = F(T,T)- F(t,T).
\end{equation}
Since the futures price converges to the spot price at maturity $F(T,T)=S(T)$, the futures contract has the same pay-off as a forward contract. Unlike the forward contract, however, there is no exchange of the underlying.
The futures price is a Martingale under the risk-neutral probability measure
\begin{equation}
	F(t,T)= \mathbb{E}^{Q_B}_t \left[F(T,T)\right].
\end{equation}
\begin{proof}
Consider the following trading strategy. At time $t$ invest $F(t,T)$ in the money-market and acquire $n(t)$ futures contracts at no cost. After a time $\Delta t$ has elapsed, the money-market account has grown to $F(t,T)e^{r(t)\Delta t}$ and the futures position yields a pay-off $n(t)[F(t+\Delta t,T)- F(t,T)]$ which is invested. If we choose $n = e^{r(t)\Delta t}$ then the account balance at time $t+\Delta t$ is $F(t,T)e^{r(t)\Delta t} + n(t)[F(t+\Delta t,T)- F(t,T)] = e^{r(t)\Delta t}F(t+\Delta t,T)$. Now increase the future position to $n(t+\Delta t)n(t) = e^{[r(t+\Delta t) + r(t)]\Delta t}$ contracts. The value of the money-market account at time $t + 2\Delta t$ is 
\begin{equation}
	e^{r(t+\Delta t)\Delta t}[e^{r(t)\Delta t}F(t+\Delta t,T)] = e^{[r(t)+r(t+\Delta t)]\Delta t}F(t+\Delta t,T)
\end{equation}
and after investing the pay-off from the new futures position yields a total
\begin{equation}
	e^{[r(t)+r(t+\Delta t)]\Delta t}F(t+\Delta t,T) + n(t+\Delta t)n(t)[F(t+2\Delta t,T) - F(t+\Delta t,T)] = e^{[r(t)+r(t+\Delta t)]\Delta t}F(t+2\Delta t,T)
\end{equation}
Continuing this process until the contract maturity date $T$ and taking the continuous limit we obtain the following pay-off at maturity
\begin{equation}
	e^{\int_t^T r(s) ds}F(T,T) = e^{\int_t^T r(s) ds}S(T).
\end{equation}
By the FTAP the value of this pay-off at $t <T$ is given by (using the risk-neutral measure
\begin{align}
	V(t) 
		& = B(t) \mathbb{E}^{Q_B}_t \left[\frac{e^{\int_t^T r(s) ds}S(T)}{B(T)}\right] \\
		& = \mathbb{E}^{Q_B}_t \left[\frac{B(t)}{B(T)}e^{\int_t^T r(s) ds}S(T)\right] \\
		& = \mathbb{E}^{Q_B}_t \left[S(T)\right]
\end{align}
On the other hand, our investment at time $t$ to yield this return was $F(t,T)$. So we obtain
\begin{equation}
	F(t,T) = \mathbb{E}^{Q_B}_t \left[S(T)\right] = \mathbb{E}^{Q_B}_t \left[F(T,T)\right] 
\end{equation}
\end{proof}
\subsection{Bounds on puts and calls}
\subsubsection{Put-call parity}
Consider a European call and put on an underlying asset $S$, which have identical strikes $K$ and expirations $T$. Denote their time-$t$ prices by $c(t)$ and $p(t)$. A portfolio consisting of a call and $K$ unit-face ZCBs with expiration $T$ has an identical pay-off to a portfolio consisting of a put and one unit of the underlying; namely\footnote{The first portfolio can be thought of as purchasing a call and lending money.} 
\begin{equation}
	\max(S(T)-K,0)+K = \max(K-S(T),0)+S(T)  = \max(S(T),K)
\end{equation}
Applying the FTAP to the first portfolio with the $T$-forward measure we obtain
\begin{equation}
	P(t,T) \mathbb{E}^{Q_T}_t \left[\max(S(T)-K,0)+K\right] = c(t) + KP(t,T)
\end{equation}
and similarly for the second portfolio
\begin{equation}
	P(t,T) \mathbb{E}^{Q_T}_t \left[\max(K-S(T),0)+S(T)\right] = p(t) + P(t,T)G(t,T)
\end{equation}
where we have used $S(T) = G(T,T)$ and $G(t,T)$ is a martingale under the $T$-forward measure. We therefore have the identity
\begin{equation}
	c(t) + P(t,T)K = p(t) + P(t,T)G(t,T)
\end{equation}
or
\begin{equation}
	c(t) - p(t) = P(t,T)(G(t,T) - K).
\end{equation}
Recall that for non-dividend-paying stock we have $G(t,T) = S(t)/P(t,T)$ and thus
\begin{equation}
	c(t) - p(t) = S(t) - P(t,T)K.
\end{equation}
We see that if the present value of the strike is equal to the prevailing stock price then puts and calls have the same value. These options are said to be at the money (ATM). On the other hand if the stock price exceeds $P(t,T)K$ then call is said to be in the money (ITM) and the put is out of the money (OTM). Thus the call is more expensive than the put. The opposite holds true if the stock price falls below $P(t,T)K$.
\subsubsection{Lower bounds for calls}
Consider a portfolio consisting of a European call and $K$ unit-face ZCBs. The pay-off of this portfolio is
\begin{equation}
\max(0,S(T) - K) + K = \max(K,S(T)).
\end{equation}
Notice that the pay-off from this portfolio is at least as big as a portfolio consisting of just one unit of the share; that is,
\begin{equation}
	\max(0,S(T) - K) + K \geq S(T).
\end{equation}
Applying the fundamental theorem of asset pricing we obtain
\begin{align}
	P(t,T) \mathbb{E}^{Q_T}_t[\max(0,S(T) - K) + K]
		& \geq P(t,T) \mathbb{E}^{Q_T}_t[S(T)]
\end{align}
If the stock does not pay dividends then
\begin{equation}
	\mathbb{E}^{Q_T}_t[S(T)] = \mathbb{E}^{Q_T}_t\left[\frac{S(T)}{P(T,T)}\right] = \frac{S(t)}{P(t,T)}
\end{equation}
and thus
\begin{align}
	c(t) + P(t,T) K 
		& \geq  S(t)
\end{align}
or
\begin{equation}
	c(t) \geq S(t) - P(t,T)K.
\end{equation}
If the put-call parity relation is violated, then there is an arbitrage opportunity that can be established using a ``buy low, sell high'' strategy. It consists of purchasing the call and shorting the stock and investing the proceeds. If $S(T) > K$ then the stock is purchased through the call at price $K$ leading to a final pay-off which is positive by assumption
\begin{equation}
	\frac{S(t) - c(t)}{P(t,T)} - K > 0.
\end{equation}
On the other hand if $S(T)$ falls below $K$ then the call option expires out of the money and the stock is bought at the market price leading to pay-off which is also positive
\begin{equation}
	\frac{S(t) - c(t)}{P(t,T)} - S(T) > \frac{S(t) - c(t)}{P(t,T)} - K > 0.
\end{equation}


The price of an American call $C(t)$ is at least the price of the corresponding European call and thus
\begin{equation}
	C(t) \geq S(t) - P(t,T)K.
\end{equation}
We can now understand why early exercise of an American call option on a non-dividend paying asset is never favorable. The pay-off for an American call from immediate exercise at time $t$ is $S(t) - K$. On the other hand we have just seen that the value of the American call satisfies
\begin{equation}
	C(t) \geq S(t) - P(t,T)K \geq  S(t) - K,
\end{equation}
and hence the option is worth more unexercised. If one believes that the stock price will drop below the strike, rendering the call worthless, then it is still not favorable early exercise. One should simply sell the option at the price $C(t)$.
\subsubsection{Lower bounds for puts}
Consider a European put and one unit of the share (no dividends). The pay-off is $\max(S(T),K)$. This is at least the pay-off of $K$ ZCBs; namely $\max(S(T),K) \geq K$. Applying the FTAP we obtain
\begin{equation}
p(t) + S(t) \geq K P(t,T) \implies p(t) \geq KP(t,T) - S(t)
\end{equation}
Obviously for an American call we have $P(t) \geq p(t) \geq KP(t,T) - S(t)$ but in fact $P(t) \geq K - S(t) \geq KP(t,T) - S(t)$. This is because if $P(t) < K - S(t)$ then we can buy the put and exercise it immediately yielding a positive cash flow.
\subsubsection{Convexity of puts and calls with respect to strike}
The price of a European call (or put) is a convex function of the strike price; that is
\begin{equation}
	\frac{V(K+\delta)+V(K-\delta)}{2} > V(K).
\end{equation}
This result can be proved by showing that the converse leads to an arbitrage. The arbitrage position is a ``buy low, sell high'' strategy of buying a call of strike $K+\delta$, buying a call of strike $K-\delta$ and writing two calls at strike $K$, all of the same expiration $T$. We know this strategy can be financed because by assumption we have
\begin{equation}
2V(K) \geq V(K+\delta)+V(K-\delta). 
\end{equation}
One can show that the four possible market scenarios: $S(T) > K+\delta $, $K< S(T) <K+\delta$, $K-\delta<S(T)<K$ and $S(T)<K-\delta$ lead to a non-negative pay-off. The same strategy also works for put options.

Note that just two option quotes of the same maturity are sufficient to detect an arbitrage opportunity. This is because the prevailing prices for vanilla calls and puts with strike $K=0$ are fixed to be $c(t)=S(t)$ and $p(t)=0$, respectively. An arbitrage position can be constructed by buying the undervalued option and shorting the requisite number of overvalued options.
\subsection{Risk-neutral pricing}
Let us assume that the underlying asset is modeled by an Ito process,
\begin{equation}
	dS(t) = S(t)\mu(t) dt + S(t) \sigma(t) dw(t)
\end{equation}
Assume that the underlying pays dividends at a rate $\delta(t)$ and that the risk-free interest rate is given by $r(t)$. Then  $D(t) = \int_0^t S(s) \delta(s) ds$ and $B(t) = B(0)e^{\int_0^t r(s) ds}$. Taking the differential of the relative gain process using Leibniz's rule gives
\begin{align}
	d \RGP(t)
		& = d \left[\frac{S(t)}{N(t)} + \int_0^t \frac{dD(s)}{N(s)}\right] \\
		& = d\left(\frac{S(t)}{B(t)}\right) + \frac{S(t)}{B(t)}\delta(t) dt \\
		& = \frac{1}{B(t)}\left[dS(t) +(\delta(t) - r(t))S(t)dt\right] \\
		& = \frac{S(t)}{B(t)}(\mu(t) + \delta(t) - r(t))dt + \frac{S(t)}{B(t)}\sigma(t) dw(t)
\end{align}
By Girsanov's theorem the underlying asset is an Ito process with respect to the risk-neutral probability measure
\begin{equation}
	dS(t) = S(t) \hat{\mu}(t) dt + S(t) \sigma(t) d\hat{w}(t)
\end{equation}
The differential of the relative gain process with respect  to the risk-neutral measure is
\begin{equation}
	d\RGP(t) = \frac{S(t)}{B(t)}(\hat{\mu}(t) - r(t) + \delta(t))dt + \frac{S(t)}{B(t)}\sigma(t) d\hat{w}(t).
\end{equation}
By the fundamental theorem of asset pricing the relative gain process is a Martingale with respect to $Q_B$ and hence $\hat{\mu}(t) = r(t) - \delta(t)$ and consequently the asset price follows,
\begin{equation}
	dS(t) = S(t) [r(t)-\delta(t)] dt + S(t) \sigma(t) d\hat{w}(t).
\end{equation}
\subsection{Black-Scholes model}
If we assume $r(t) = r$, $\delta(t) = \delta$ and $\sigma(t) = \sigma$ then the SDE corresponds to geometric Brownian motion which has the exact solution
\begin{equation}
	S(t) = S(0)e^{\left(r-\delta - \frac{1}{2}\sigma^2\right)t + \sigma \hat{w}(t)}
\end{equation}
so
\begin{equation}
	S(T) = S(t)e^{\left(r-\delta - \frac{1}{2}\sigma^2\right)(T-t) + \sigma (\hat{w}(T)-\hat{w}(t))}
\end{equation}
and thus
\begin{equation}
	\log S(T) \sim \mathcal{N}\left(\log S(t) + \left(r-\delta - \frac{1}{2}\sigma^2\right)(T-t),\sigma^2(T-t)\right)
\end{equation}
where we have used $\hat{w}(T)-\hat{w}(t) \sim \mathcal{N}(0,T-t)$. It is now possible to compute the time-$t$ value of a European derivative with pay-off $V(T) = \varphi(S(T))$ at time $T$ using the FTAP
\begin{equation}
	V(t) = e^{-r(T-t)}\mathbb{E}^{Q_B}_t [V(T)].
\end{equation}
Notice that we have applied the FTAP in its non-dividend-paying form because a European derivative does not produce any cash-flow until the maturity date $T$.

European calls and puts can be priced analytically in the Black-Scholes model. It is useful to express the pay-off of a European call as
\begin{equation}
	\max(S(T)-K,0) = S(T) 1_{\{ S(T) \geq K \}} - K  1_{\{S(T) \geq K \}}.
\end{equation}
The pricing formula for a call follows from the fact that if $\log X \sim \mathcal{N}(\mu_X,\sigma_X^2)$ then
\begin{align}
	\mathbb{E}[1_{X \geq K}]
		& = P(X \geq K) = N\left(\frac{\mu_X - \log K}{\sigma_X}\right) \\
	\mathbb{E}[X 1_{X \geq K}]
		& = e^{\mu_X + \sigma_X^2/2} N\left(\frac{\mu_X + \sigma_X^2 - \log K}{\sigma_X}\right)
\end{align}
where
\begin{align}
	N(x)
		& = \frac{1}{\sqrt{2\pi}}\int_{-\infty}^x dz \, e^{-z^2/2}
\end{align}
is the cumulative distribution function for the standard normal distribution. Substituting $\mu_X = \log \left(S(t)e^{(r-\delta)(T-t)}\right) - \frac{1}{2}\sigma^2(T-t)$ and $\sigma_X = \sigma\sqrt{T-t}$ we find
\begin{align}
	c(t) 
		& = S(t)e^{-\delta(T-t)} N(y+\sigma\sqrt{T-t}) - Ke^{-r(T-t)} N(y)
\end{align}
where
\begin{align}
	y
		& = \frac{\log\left(\frac{S(t)e^{(r-\delta)(T-t)}}{K}\right) - \frac{1}{2}\sigma^2(T-t)}{\sigma\sqrt{T-t}}.
\end{align}
The price of a European put can be obtained in a similar fashion, or alternatively by using the put-call parity relation
\begin{equation}
	c(t) - p(t) = e^{-r(T-t)}(S(t)e^{(r-\delta)(T-t)}-K)
\end{equation}
which gives
\begin{align}
	p(t)
		& = Ke^{-r(T-t)} N(-y) - S(t) e^{-\delta(T-t)}N(-y-\sigma\sqrt{T-t}).
\end{align}
where we have used $N(x)+N(-x) = 1$. 

Let us consider the various limits of the Black-Scholes pricing formula. If we let $K \to 0$ then the pay-off for a European call option is simply the value of the underlying at the maturity date of the call. If the underlying does not pay dividends we expect $c(t) \to S(t)$ and indeed this happens because $y \to \infty$ as $K \to 0$. If the underlying pays dividends then $c(t) \to S(t) e^{-\delta (T-t)}$ because the call option does not entitle the holder to the dividend payments. Similarly a call with an infinite strike will never be exercised and is thus worthless so $c(t) \to 0 $ as $K \to \infty$ which can be seen from the fact that $y \to -\infty$ as $K \to \infty$. For a European put option $p(t) \to 0$ as $K \to 0$ because a put with vanishing strike entitles the holder to nothing. As $K \to \infty$ we have $p(t) \simeq K e^{-r(T-t)} - S(t)e^{-\delta(T-t)}$ because a put with infinite strike is guaranteed to be exercised at maturity. As $T \to \infty$ the price of a European call non a non-dividend paying asset approaches $S(t)$ while for a put $p(t) \to 0$ as $T \to \infty$.

\begin{figure}[h]
\centering
\includegraphics[width=70mm]{strikedependence}\includegraphics[width=70mm]{timedependence}
\caption{Left: Call price (solid) and put price (dashed) as a function of strike for $S(t) = 50$, $t-T=10$, $r=0.01$, $\sigma = 0.1$ and $\delta = 0$. The green lines shows the put-call parity relation. Right: Time-dependence of call and put price with fixed strike $K=50$.}
\end{figure}

\subsubsection{Black-Scholes PDE}
Now consider the price process $V(t)$ of a general European-style derivative of the underlying $S(t)$. The discounted price process $\hat{V}(t) = e^{-rt}V(t)$ is a Martingale with respect to the risk-neutral probability measure,
\begin{equation}
	\hat{V}(t) = \mathbb{E}_t^{Q_B}[\hat{V}(T)].
\end{equation}
Taking the differential of $\hat{V}(t)$ and applying Ito's lemma we find
\begin{equation}
	d\hat{V}(t) = e^{-rt}\left[dV(t) -r V(t)\right].
\end{equation}
where
\begin{equation}
	dV(t) = \left[\frac{\partial V}{\partial t} + \frac{\partial V}{\partial S} S(t)(r-\delta) + \frac{1}{2}\frac{\partial^2 V}{\partial S^2} S(t)^2\sigma^2\right] dt + \frac{\partial V}{\partial S} S(t) \sigma d\hat{w}(t).
\end{equation}
The Martingale condition then implies the following partial differential equation,
\begin{equation}
	\frac{\partial V(S,t)}{\partial t} + \frac{\partial V(S,t)}{\partial S} S(r-\delta) + \frac{1}{2}\frac{\partial^2 V(t,S)}{\partial S^2} S^2\sigma^2 - r V(S,t) = 0.
\end{equation}
A slicker way to obtain the Black-Scholes PDE is to apply the Feynman-Kac formula to $V(t) = e^{-r(T-t)}\mathbb{E}^{Q_B}_t \left[V(T)\right]$. More generally, if we assume deterministic discounting then $V(t) = e^{-\int_t^T ds \, r(s)}\mathbb{E}^{Q_B}_t \left[V(T)\right]$. If we further assume deterministic volatility and dividend rate then the Feynman-Kac formula gives
\begin{equation}
	\frac{\partial V(S,t)}{\partial t} + \frac{\partial V(S,t)}{\partial S} S\big(r(t) - \delta(t)\big) + \frac{1}{2}\frac{\partial^2 V(S,t)}{\partial S^2} S^2\sigma(t)^2 - r(t) V(S,t) = 0.
\end{equation}
\subsubsection{Relationship with the heat equation}
The Black-Scholes differential equation can be converted to the heat equation. We first perform a field redefinition to the discounted price process $u = e^{-rt}V$ which eliminates the $rV$ term,
\begin{equation}
	\frac{\partial u}{\partial t} + \frac{\partial u}{\partial S} S(r-\delta) + \frac{1}{2}\frac{\partial^2 u}{\partial S^2} S^2\sigma^2 = 0.
\end{equation}
Now perform the coordinate transformation $\tilde{x} = \log S$ and $\tilde\tau = T - t$ which brings it to the form of arithmetic, rather than geometric Brownian motion,
\begin{equation}
	\frac{\partial u}{\partial \tilde\tau} - \left(r-\delta-\frac{\sigma^2}{2}\right)\frac{\partial u}{\partial \tilde{x}} - \frac{\sigma^2}{2}\frac{\partial^2u}{\partial \tilde{x}^2} = 0.
\end{equation}
Finally, the drift term can be removed by performing the following Galilean boost,
\begin{align}
	x
		& = \tilde{x} + (r-\delta - \sigma^2 / 2)\tilde\tau \\
	\tau
		& = \tilde\tau.
\end{align}
By the chain rule we have
\begin{align}
	\frac{\partial}{\partial \tilde{x}} 
		& = \frac{\partial x}{\partial \tilde x}\frac{\partial}{\partial x} + \frac{\partial \tau}{\partial \tilde x}\frac{\partial}{\partial \tau} = \frac{\partial}{\partial x} \\
	\frac{\partial}{\partial \tilde{\tau}} 
		& = \frac{\partial x}{\partial \tilde \tau}\frac{\partial}{\partial x} + \frac{\partial \tau}{\partial \tilde \tau}\frac{\partial}{\partial \tau} = (r-\delta - \sigma^2 / 2)\frac{\partial}{\partial x} + \frac{\partial}{\partial \tau}
\end{align}
Then
\begin{equation}
	\frac{\partial u}{\partial \tau} = \frac{1}{2}\sigma^2\frac{\partial^2 u}{\partial x^2}.
\end{equation}
The solution of the heat equation can be expressed as an integral transform of the initial condition
\begin{equation}
	u(x,\tau) = \frac{1}{\sigma\sqrt{2\pi\tau}}\int_{-\infty}^{\infty}ds\, u(x,0)\exp\left[-\frac{(x-s)^2}{2\sigma^2\tau}\right].
\end{equation}
For example, if $u(x,0)=\delta(x)$ then
\begin{equation}
	u(x,\tau)=\frac{1}{\sigma\sqrt{2\pi\tau}}\exp\left(-\frac{x^2}{2\sigma^2\tau}\right).
\end{equation}
At any fixed $\tau$ this describes a normal distribution in $x$ with mean zero and variance $\sigma^2\tau$.
Let us consider a European call option. The boundary condition for the time direction is the final condition $V(S,T)= \max(S-K,0)$. In terms of the new variables this becomes an initial condition $u(x,0)= \max(e^{x}-K,0)$ In the stock price direction we impose two boundary conditions valid for all $t$, $V(0,t) = 0$ and $V(S,t) \to S$ as $S\to\infty$. %These become $u(-\infty,\tau)= 0$ and $u(x,\tau)\to e^{x-(r-\delta-\sigma^2/2)\tau}$ as $x\to\infty$.


\subsubsection{Replicating portfolio argument}
Another perspective on the Black-Scholes PDE is provided by considering a self-financing portfolio $\Pi(t)$ which is a linear combination of the asset $S(t)$ and the money market account $B(t)$. We will assume that the underlying asset does not pay dividends. Since $S(t)$ and $V(t)$ have the same stochastic driver, this guarantees that we can construct $\Pi(t)$ to replicate $V(t)$.
\begin{equation}
	\Pi(t) = x(t) B(t) + y(t) S(t).
\end{equation}
A process is self-financing if the gains and losses of the portfolio are entirely due to gains and losses on the underlying asset; that is,
\begin{align}
	d\Pi(t)
		& = x(t) dB(t) + y(t) dS(t) \\
		& = \left[r x(t) B(t) + y(t)S(t)r \right]dt + \sigma y(t) S(t) d\hat{w}(t).
\end{align}
Matching coefficients of differentials with $dV(t)$ (setting $\delta = 0$ for simplicity) we obtain 
\begin{equation}
	y(t) = \frac{\partial V}{\partial S}, \quad \quad rx(t) B(t) = \frac{\partial V}{\partial t} + \frac{1}{2}\frac{\partial^2 V}{\partial S^2} S(t)^2\sigma^2.
\end{equation}
Substituting $x(t)$ and $y(t)$ back into the expression for $\Pi(t)$ and using the fact that $\Pi(t) = V(t)$ for all $t$ we obtain the Black-Scholes equation,
\begin{equation}
	\frac{\partial V}{\partial t} + \frac{\partial V}{\partial S} S r + \frac{1}{2}\frac{\partial^2 V}{\partial S^2} S^2\sigma^2 - rV = 0.
\end{equation}
\subsubsection{Hedging argument}
Suppose that a Financial institution writes a call option valued at $V(t)$ on an underlying asset  with value $S(t)$ at time $t$. In order to minimize exposure they buy $\Delta(t)$ shares of the underlying. The value of the firm's portfolio at time $t$ is $\Pi(t) = -V(t) + \Delta(t) S(t)$. The change in value of the portfolio after an instant of time $dt$ is given by
\begin{align}
	d\Pi(t)
		& = -dV(t) + \Delta(t)dS(t) + \Delta(t)S(t)\delta dt \\
		& = \left[\Delta(t) - \frac{\partial V}{\partial S}\right]dS(t) - \left[\frac{\partial V}{\partial t} + \frac{1}{2}\frac{\partial^2 V}{\partial S^2}S(t)^2\sigma^2 - \Delta(t)S(t)\delta\right] dt
\end{align}
where we have made the Black-Scholes assumption that $\sigma(t)=\sigma$. If we assume $\Delta(t) = \partial V / \partial S$ then we notice that the growth in the portfolio is deterministic and so by absence of arbitrage we must have $d\Pi(t) = r \Pi(t) dt$. This implies 
\begin{equation}
	\frac{\partial V}{\partial t} + \frac{\partial V}{\partial S}S(r-\delta) + \frac{1}{2}\frac{\partial^2 V}{\partial S^2}S^2 \sigma^2 - rV = 0.
\end{equation}
which is the Black-Scholes equation. Another way to think about it is that the firm can finance the stock and call position by initially borrowing an amount at time $t$ given by $V(t) - \Delta(t)S(t)=-\Pi(t)$. Thus the initial value of the portfolio taking into account borrowing is zero. In an instant of time $dt$ the change in the value of the portfolio taking into account borrowing is $d\Pi(t) - \Pi(t)r dt$ which can be evaluated to give
\begin{equation}
d\Pi(t) - \Pi(t)r dt =-\left[\frac{\partial V}{\partial t} + \frac{1}{2}\frac{\partial^2 V}{\partial S^2}S(t)^2\sigma^2+(r-\delta)\Delta \, S(t) - rV(t)\right]dt + \left[\Delta(t) - \frac{\partial V}{\partial S} \right]dS(t).
\end{equation}
We see that this vanishes if both $\Delta(t) = \partial V / \partial S$ and the Black-Scholes equation is satisfied. The firm can make a profit by charging an additional premium on top of the Black-Scholes no-arbitrage call price.

The Black-Scholes delta can be computed either by using the Chain rule, or alternatively by noticing that the Black-Scholes call price is homogeneous in the stock price and the strike; that is,
\begin{equation}
	c(\alpha S,\alpha K) = \alpha \, c(S,K).
\end{equation}
It follows from Euler's theorem that
\begin{equation}
	c(S,K) = S \frac{\partial c}{\partial S} + K \frac{\partial c}{K}.
\end{equation}
Comparing with the Black-Scholes call  price we conclude that
\begin{equation}
	\Delta_{\rm BS} = e^{-\delta(T-t)}N(y + \sigma\sqrt{T-t}).
\end{equation}
\subsubsection{Discrete-time hedging}
Suppose a call option on a non-dividend paying stock is sold at time $t_0$ and delta hedged at a discrete set of times $t_j = t_0 + j \Delta t$. Let $S_i = S(t_i)$ and $\Delta_i = \Delta_{\rm BS}(t_i)$. At time $t_0$ we buy $\Delta_0$ shares so that stock position is $\Delta_0 S_0$. This  position is financed by the sale of the call and borrowing from the money market. The borrowing position at time $t_0$ is $B_0 \equiv c(t_0) - \Delta_0 S_0$ so that the net position is
\begin{equation}
	 \underbrace{- c(t_0)}_{\textrm{sell call}} + \underbrace{\Delta_0 S_0}_{\textrm{buy stock}} + \underbrace{c(t_0) - \Delta_0 S_0}_{\textrm{amount borrowed}} = 0.
\end{equation}
At time $t_1$ the share holdings has changed in value to $\Delta_0 S_1$ and we change the number of holdings to $\Delta_1$ shares. This is accomplished by selling the $\Delta_0$ shares at the prevailing price $S_1$ and purchasing $\Delta_1$ shares at the same price $S_1.$ It is clear that this position can be financed by borrowing an amount $(\Delta_0 - \Delta_1)S_1$ from the money market. The borrowing position is now
\begin{equation}
	B_1 = B_0e^{r\Delta t} + (\Delta_0 - \Delta_1)S_1.
\end{equation}
Similarly at time $t_j$ the number of shares is chosen to be $\Delta_j$. Iterating we find that the borrowing position required to maintain the delta hedge is given by
\begin{equation}
	B_j = B_{j-1}e^{r\Delta t} + (\Delta_{j-1}-\Delta_j)S_j
\end{equation}
where $B_0 = c(t_0) - \Delta_0 S_0$.

\subsubsection{Path-dependent derivatives}
A path-dependent derivative $V$ has a pay-off that depends on the price of the underlying asset $S$ at a set of times $t \leq t_1 \leq \cdots \leq t_n \leq T$,
\begin{equation}
	V(T) = \varphi(S(t_1),\ldots S(t_n)).
\end{equation}
For the risk-neutral probability measure with constant interest $r$, the FTAP gives
\begin{equation}
	V(t) = e^{-r(T-t)}\mathbb{E}^{Q_B}_t\left[\varphi(S(t_1),\ldots S(t_n))\right]
\end{equation}
To compute this expectation value by Monte Carlo simulation one generates a sample path with increments given by $t_1-t, t_2-t_1,\ldots$. From the sample path one obtains a value of the pay-off $\varphi(S(t_1),\ldots S(t_n))$. Repeating this process many times and averaging the resulting set of pay-offs returns an approximation to the expectation value.

\subsubsection{Volatility from time series}
Recall that in the Black-Scholes model we have the following distribution of the stock price increments
\begin{align}
	\log S(t+\Delta t) - \log S(t) \sim \mathcal{N} \left(\left(r - \delta - \frac{1}{2}\sigma^2\right)\Delta t,\sigma^2\Delta t \right).
\end{align}
Suppose we have $n+1$ observations of the stock price $S_0,S_1,\ldots S_n$ measured at fixed time intervals $\Delta t$. The unbiased estimate of the variance (second central moment) of logarithmic price differences is
\begin{equation}
	s^2 = \frac{1}{n-1}\sum_{i=1}^n(u_i-\bar{u})^2, \quad \quad u_i = \log S_i - \log S_{i-1}.
\end{equation}
We then have the following estimate for the volatility in terms of $s$,
\begin{equation}
	\sigma = \frac{s}{\sqrt{\Delta t}}.
\end{equation}
The assumption of log-normality can be tested by computing higher central moments. For example the third central moment or skewness should vanish for a normal distribution. The unbiased estimate of the skewness is calculate by
\begin{equation}
	\mathcal{S} = \frac{n}{(n-1)(n-2)}\sum_{i=1}^n(u_i-\bar{u})^3.
\end{equation}
Similarly the kurtosis (fourth central moment divided by square of second central moment) should equal $3$ for a normal distribution. The formula for the unbiased estimate of the kurtosis is more complicated. 
\subsection{Black model}
Assume that that the forward price of the underlying asset $S$ follows an Ito process with constant relative volatility 
\begin{equation}
	dG(t,T) = \mu(t)G(t,T)dt + G(t,T)\sigma dw(t)
\end{equation}
By Girsansov's theorem, $G(t,T)$ is an Ito process with respect to the $T$-forward measure with the same volatility. Moreover $G(t,T)$ is a martingale with respect to the $T$-forward measure so it must have zero drift and thus
\begin{equation}
	dG(t,T) = G(t,T)\sigma d\hat{w}(t).
\end{equation}
Recall that this SDE has solution
\begin{equation}
	G(t,T) = G(0,T)e^{-\frac{1}{2}\sigma^2t + \sigma \hat{w}(t)}.
\end{equation}
Since the forward price converges to the spot price at maturity we have
\begin{equation}
	S(T) = G(T,T) = G(0,T)e^{-\frac{1}{2}\sigma^2T + \sigma \hat{w}(T)}  = G(t,T)e^{-\frac{1}{2}\sigma^2(T-t) + \sigma [\hat{w}(T)-\hat{w}(t)]}
\end{equation}
which implies
\begin{equation}
	\log S(T) \sim \mathcal{N} \left(\log G(t,T) -\frac{1}{2}\sigma^2(T-t), \sigma^2(T-t)\right)
\end{equation}
Thus we know the distribution of $S(T)$ without any assumption about interest rates or dividends provided that we know the current forward prices $G(t,T)$. The price of a European derivative is computed by the FTAP as in the Black-Scholes model, except now with respect to the $T$-forward measure
\begin{equation}
	V(t) = P(t,T) \mathbb{E}^{Q_T}_t[\varphi(G(T,T))].
\end{equation}

European call and put options can be explicitly priced in the Black model
\begin{align}
	c(t)
		& = P(t,T) \mathbb{E}_t^{Q_T}\left[(G(T,T)-K)^+\right] \\
		& = P(t,T) \left[G(t,T)N(y+\sigma\sqrt{T-t}) - KN(y)\right] \\
	p(t)
		& = P(t,T) \mathbb{E}_t^{Q_T}\left[(K-G(T,T))^+\right] \\
		& = P(t,T) \left[KN(-y)-G(t,T)N(-y-\sigma\sqrt{T-t})\right]
\end{align}
where
\begin{equation}
	y = \frac{\log\left(\frac{G(t,T)}{K}\right) - \frac{1}{2}\sigma^2(T-t)}{\sigma \sqrt{T-t}}.
\end{equation}
Note that if the interest rate is constant and the stock pays dividends at a constant rate $r$ then recall that
\begin{equation}
	G(t,T) = S(t) e^{(r-\delta)(T-t)}
\end{equation}
and thus we recover the Black-Scholes result. The pricing formulas for the Black model can be obtained from the Black-Scholes pricing formulas by setting $\delta = r$, replacing $r$ by the continuously compounded zero-coupon rate $r(t,T)$ and replacing $S(t)$ by forward price $G(t,T)$.
\subsubsection{Black equation}
It is possible to represent the Black pricing formula for European derivatives as the solution of a partial differential equation. In the Black model the price at time zero of a European derivative with pay-off $\varphi(S(T))$ is given by the following discounted expectation
\begin{equation}
	V(0) = P(0,T) \mathbb{E}^{Q_T} \left[\varphi(G(T,T))\right]
\end{equation}
where we have used the fact that the forward price converges to the spot price at maturity $G(T,T) = S(T)$. Knowledge of the forward price dynamics under the $T$-forward measure and of the yield curve at time zero allows the above quantity to be estimated by Monte Carlo simulation. In particular, since the forward price is a Martingale under the $T$-forward measure, the Black specification of geometric Brownian motion corresponds to the following SDE
\begin{equation}
	dG(t,T) = \sigma G(t,T) d\hat{w}(t)
\end{equation}
where $\hat{w}$ is a Brownian motion under $Q_T$. If we define the instantaneous forward rate by
\begin{equation}
	f(t,s) = - \frac{\partial }{\partial s} \log P(t,s)
\end{equation}
then
\begin{equation}
	V(0) = \mathbb{E}^{Q_T} \left[e^{-\int_0^T ds \, f(0,s)} \varphi(G(T,T))\right].
\end{equation}
Comparing with the Feynman-Kac formula we see that that $V(0)$ can be obtained by backward evolving the terminal condition $V(G,T) = \varphi(G)$ to time zero under the following PDE,
\begin{equation}
	\frac{\partial V(G,t)}{\partial t} + \frac{1}{2}\sigma^2G^2\frac{\partial^2 V(G,t)}{\partial G^2} - f(0,t)V(G,t) = 0.
\end{equation}
\subsubsection{Volatility surfaces}
The Black model expresses the price of European vanilla options as a function of the  volatility parameter $\sigma$ and four market observables; the forward price $G(t,T)$ of the underlying asset, the price $P(t,T)$ of a non-defaultable ZCB with unit face value, the strike price $K$, and the time to expiration $T-t$ of the option. This implies a one-to-one map between prices of European vanillas and volatility parameters. A plot of the Black-implied volatility parameter versus strike price and time to expiration is called a volatility surface.

In order to determine the Black-implied volatilities one needs to know the forward prices $G(t,T)$ as a function of the expiration $T$. The forward contract can be priced by looking at European put and call options with the same strike $K$ expiration equal to $T$.  By put-call parity relation we then have
\begin{equation}
	G(t,T) = K + \frac{c(t)-p(t)}{P(t,T)}.
\end{equation}
The ZCB prices $P(t,T)$ can be found from the continuously compounded zero-coupon rates $P(t,T) = e^{r(t,T)(T-t)}$ which are determined by the LIBOR zero coupon yield curve. This method leads to slight variability in the forward prices from options with different strikes but same expiration. The best estimate for the forward price is determined by choosing the strike corresponding to the most near-the-money option. A call is in the money if $G(t,T) > K$ and a put is in the money if $K > G(t,T)$. It follows from the put-call parity formula that the most near-the-money options have the smallest value of $|c(t) - p(t)|$.

Having computed the forward prices we can determine the black-implied volatilities. Typically out-of-the-money options are used for these calculations because they are the most heavily traded options. For example if the traded option is a call then we will select it if $G(t,T) - K < 0$ and similarly a put is selected if $G(t,T) - K > 0$.

If the options are American then put-call parity does not apply. If we assume that the stock does not pay dividends then the forward price is given by
\begin{equation}
	G(t,T) = \frac{S(t)}{P(t,T)} = S(t)e^{r(t,T)(T-t)}.
\end{equation}
Since it is never favorable to exercise early an American call option on a  non-dividend paying asset, the price of an American call is the same as the price of the corresponding European call. Therefore one simply uses the call options (both OTM and ITM) to calculate Black-implied volatilities, treating them as though they were European.
\subsubsection{Volatility skews and smiles}
The Black-implied volatilities of short-maturity options display a convex dependence on the strike price $K$ referred to as a volatility smile. As time to expiration increases, this behavior gives way a monotonically decreasing dependence on $K$. The shape of the volatility surface reflects the risk-neutral distribution of $\log S(T)$. In particular, excess kurtosis results in symmetric volatility smiles while negative skewness increases the value of low-strike and decreases the value of high-strike options leading to a volatility skew.

The fact that the Black-implied volatilities of short-maturity options rises sharply for low and high $K$ indicates that the log-normality assumption fails to capture the possibility of extreme stock price movements. In other words, the Black model is trying to compensate for the absence of a fat-tailed probability distribution by increasing the standard deviation for these strikes.

The negative skewness can be attributed to several factors including risk aversion; namely, that volatility increases when markets are falling. It can also be understood as a consequence of the leverage effect. The total value of a company is given by the sum of its equity and debt $V = E + D$, this implies
\begin{align}
	\frac{dE}{E} = \frac{V}{E}\frac{dV}{V} - \frac{D}{E}\frac{dD}{D}.
\end{align}
Under the simplifying assumption that the debt is risk free, the change in the company's value is entirely due to the change in equity and thus
\begin{equation}
	\sigma_E \approx \frac{V}{E}\sigma_V.
\end{equation}
If the price process for the value $V$ follows geometric Brownian motion then $\sigma_V = \mathrm{cst}$ and it follows that $\sigma_E$ is negatively correlated with equity price.
\subsection{Local volatility models}
A forward local volatility model assumes that the forward price on the underlying asset follows a diffusion process,
\begin{equation}
	dG(t,T) = G(t,T)\mu(t)dt + G(t,T)\sigma(G(t,T),t) dw(t).
\end{equation}
Changing to the $T$-forward measure using Girsanov's theorem and using the fact that $G(t,T)$ is a martingale with respect to the $T$-forward measure we obtain
\begin{equation}
	dG(t,T) = \sigma(G(t,T),t) d\hat{w}(t)
\end{equation}
where $\hat{w}$ is a Brownian motion under $Q_T$. The CEV model postulates that $\sigma(G,t) = \alpha G^{-\beta}$ where $\alpha,\beta \geq 0$. We have
\begin{equation}
	dG(t,T) = \alpha G(t,T)^{1-\beta} d\hat{w}(t).
\end{equation}
Note that for $\beta > 1$ the volatility diverges as $G(t,T) \to 0$ which leads to slow convergence of the Euler discretization. Convergence can be improved by changing to a variable which has constant volatility. The appropriate change of variables is $X(t) = G(t,T)^\beta/\beta$. Then by Ito's lemma we obtain
\begin{align}
	dX(t)
		& = G(t,T)^{\beta-1}dG(t,T) + (\beta-1)G(t,T)^{\beta-2}dG(t,T)^2 \\
		& = \frac{\alpha^2(\beta-1)}{2\beta X(t)}dt + \alpha d\hat{w}(t).
\end{align}
For $\beta = 0$ we recover the Black model and thus the forward price can never vanish. For $\beta > 0$, however, we see that $X(t)$ (and thus $G(t,T)$) can vanish with positive probability.

European calls can be priced analytically in the CEV model,
\begin{equation}
	c(t) = P(t,T)\left[G(t,T)\left(1-\chi^2(2y;2+2\nu,2x)\right)-K\chi^2(2x;2\nu,2y)\right]
\end{equation}
where
\begin{align}
	\nu
		& = \frac{1}{2\beta}\\
	x
		& = \frac{2}{T}\left(\frac{\nu}{\alpha}\right)^2 G(t,T)^{2\beta} \\
	y
		& = \frac{2}{T}\left(\frac{\nu}{\alpha}\right)^2 K^{2\beta}
\end{align}
and $\chi^2(x;n,\lambda)$ is the cumulative distribution function for the non-central chi-squared distribution with $n$ degrees of freedom and non-centrality parameter $\lambda$.
%\subsection{Implied volatility function models}
\subsection{Stochastic volatility models}
A spot stochastic volatility model assumes that the stock volatility satisfies its own Ito process,
\begin{align}
	dS(t)
		& = S(t)(r-\delta)dt + \sqrt{v(t)}S(t)d\hat{w}(t) \\
	dv(t)
		& = \alpha(S(t),v(t),t)dt + \sqrt{v(t)}\beta(S(t),v(t),t)d\hat{w}_v(t)
\end{align}
where $\hat{w}$ and $\hat{w}_v$ are correlated Brownian motions with correlation $\rho$,
\begin{equation}
	\cov(d\hat{w},d\hat{w}_v) = \mathbb{E}[d\hat{w} \, d\hat{w}_v]=\rho \, dt.
\end{equation}
For simulating sample paths it is more convenient to change variables to $X(t) = \log S(t)$. It follows from \eqref{e:LogIto} that
\begin{equation}
	dX(t)
		= \left[r-\delta -\frac{1}{2}v(t)\right]dt + \sqrt{v(t)}d\hat{w}(t).
\end{equation}

It is easy to show that in stochastic volatility models a delta hedge is incapable of replicating a derivative asset. This can be seen by considering the delta hedged portfolio
\begin{equation}
	\Pi(t) = -V(t) + \Delta(t) S(t).
\end{equation}
The change in value in an instant of time is given by
\begin{align}
	d\Pi
		& = -\left[\frac{\partial V}{\partial t}dt + \frac{\partial V}{\partial S}dS + \frac{1}{2}\frac{\partial^2V}{\partial S^2}dS^2+ \frac{\partial V}{\partial v}dv + \frac{1}{2}\frac{\partial^2V}{\partial v^2}dv^2+\frac{\partial V}{\partial S \partial v}dS \, dv\right] + \Delta dS + \Delta\,S \delta dt \notag \\
		& =  - \left[\frac{\partial V}{\partial t}-\Delta \, S\delta+\frac{1}{2}\frac{\partial^2 V}{\partial S^2}S^2v+\frac{1}{2}\frac{\partial^2 V}{\partial v^2}\beta^2v + \frac{\partial^2V}{\partial S \partial v}S\beta v \rho\right]dt +\left[\Delta-\frac{\partial V}{\partial S}\right]dS - \frac{\partial V}{\partial v}dv
\end{align}
Hedging can be achieved by introducing a second derivative asset $V_1$ which depends on $v$,
\begin{equation}
	\Pi(t) = -V(t) + \Delta(t)S(t) + \Delta_1(t)V_1(t).
\end{equation}
In an instant of time the change in the value of the portfolio is
\begin{align}
	d\Pi
		& =  - \left[\frac{\partial V}{\partial t}-\Delta\,S\delta+\frac{1}{2}\frac{\partial^2 V}{\partial S^2}S^2v+\frac{1}{2}\frac{\partial^2 V}{\partial v^2}\beta^2v + \frac{\partial^2V}{\partial S \partial v}S\beta v \rho\right]dt + \notag \\
		& \quad + \Delta_1 \left[\frac{\partial V_1}{\partial t}+\frac{1}{2}\frac{\partial^2 V_1}{\partial S^2}S^2v+\frac{1}{2}\frac{\partial^2 V_1}{\partial v^2}\beta^2v + \frac{\partial^2V_1}{\partial S \partial v}S\beta v \rho\right]dt \notag \\
		& \quad +\left[\Delta+\Delta_1\frac{\partial V_1}{\partial S}-\frac{\partial V}{\partial S}\right]dS + \left[\Delta_1\frac{\partial V_1}{\partial v} - \frac{\partial V}{\partial v}\right]dv
\end{align}
It follows that we should choose
\begin{equation}
	\Delta_1 = \frac{\frac{\partial V}{\partial v}}{\frac{\partial V_1}{\partial v}}, \quad \quad \Delta = \frac{\partial V}{\partial S} - \Delta_1\frac{\partial V_1}{\partial S}
\end{equation}
Making these substitutions and setting $d\Pi = \Pi r dt$ we obtain
\begin{equation}
\frac{\frac{\partial V}{\partial t}+\frac{\partial V}{\partial S}S(r-\delta)+\frac{1}{2}\frac{\partial^2 V}{\partial S^2}S^2v+\frac{1}{2}\frac{\partial^2 V}{\partial v^2}\beta^2 v + \frac{\partial^2V}{\partial S \partial v}S\beta v\rho - r V}{\frac{\partial V}{\partial v}}
	= (V \longleftrightarrow V_1)
\end{equation}
Since the left-hand side only depends on $V$ and the right-hand side depends only on $V_1$, they must equal the same function $f(S,v,t)$ and thus
\begin{equation}
	\frac{\partial V}{\partial t}+\frac{\partial V}{\partial S}S(r-\delta)+\frac{1}{2}\frac{\partial^2 V}{\partial S^2}S^2v+\frac{1}{2}\frac{\partial^2 V}{\partial v^2}\beta^2 v + \frac{\partial^2V}{\partial S \partial v}S\beta v\rho - r V = f \frac{\partial V}{\partial v}.
\end{equation}
We can learn about the function $f$ by considering a delta-hedged portfolio
\begin{equation}
	\Pi_1(t) = -V(t) + \frac{\partial V}{\partial S}S(t).
\end{equation}
We have
\begin{align}
d\Pi_1 - r\Pi_1 dt
	& =  - \left[\frac{\partial V}{\partial t}+\frac{\partial V}{\partial S}S(r-\delta)+\frac{1}{2}\frac{\partial^2 V}{\partial S^2}S^2v+\frac{1}{2}\frac{\partial^2 V}{\partial v^2}\beta^2v + \frac{\partial^2V}{\partial S \partial v}S\beta \rho - rV\right]dt - \frac{\partial V}{\partial v}dv \notag \\
	& = \frac{\partial V}{\partial v}\left(f-\alpha\right)dt - \frac{\partial V}{\partial v}\sqrt{v}\beta \, d\hat{w}_v
\end{align}
It can be argued that working in the risk-neutral probability measure $\alpha$ should be chosen to cancel $f$ which is equivalent to setting $f = \alpha$ in the pricing equation
\begin{equation}
	\frac{\partial V}{\partial t}+\frac{\partial V}{\partial S}S(r-\delta)+\frac{1}{2}\frac{\partial^2 V}{\partial S^2}S^2v+\frac{1}{2}\frac{\partial^2 V}{\partial v^2}\beta^2 v + \frac{\partial^2V}{\partial S \partial v}S\beta v\rho - r V = \alpha \frac{\partial V}{\partial v}.
\end{equation}

\subsubsection{Pricing with the characteristic function}
The price of a European call can be expressed as an integral transform of the characteristic function for log-stock returns,
\begin{align}
	c(t)
		& = S(t)e^{-\delta(T-t)}\Pi_1 - Ke^{-r(T-t)} \Pi_2 \\
	\Pi_1
		& = \frac{1}{2} + \frac{1}{\pi}\int_0^\infty d\omega  \Re \left[\frac{e^{-i\omega \log K}\phi(\omega-i)}{i\omega\phi(-i)}\right] \\
	\Pi_2
		& = \frac{1}{2} + \frac{1}{\pi}\int_0^\infty d\omega \Re \left[\frac{e^{-i\omega \log K}\phi(\omega)}{i\omega}\right] \\
%	c(t)
%		& = \frac{e^{-\alpha\log K}}{\pi}\int_0^\infty dv e^{-i v \log K}\rho(v) \\
%	\rho(v)
%		& = \frac{e^{-r(T-t)}\phi(v-i(\alpha+1);T)}{\alpha^2+\alpha-v^2 + i(2\alpha+1)v} \\
	\phi(\omega)
		& = \mathbb{E}^{Q_B}_t[e^{i \omega \log S(T)}],
\end{align}


The most common stochastic volatility model is the Heston model in which the variance rate is assumed to be a mean-reverting process with constant volatility of volatility,
\begin{equation}
	\alpha(S(t),v(t),t)=\kappa[\bar{v} - v(t)], \quad \quad \beta(S(t),v(t),t) = \sigma_v.
\end{equation}
Analytical expressions for the characteristic function are available in Heston model, which allows the integrals defining the call price to be performed numerically,
\begin{align}
	\phi(u)
		& = e^{i u[\log S(t)+(r-\delta)(T-t)]-\frac{1}{\sigma_v^2}\left[\bar{v}\kappa\left(a(T-t) + 2\log\beta\right)+v_0 \gamma \right]} \\
	\beta
		& = \frac{1-ge^{-d (T-t)}}{1-g} \\ 
	\gamma
		& = \frac{a(1-e^{-d (T-t)})}{1-g e^{-d (T-t)}} \\
	d
		& = \sqrt{(i\rho \sigma_v u - \kappa)^2 + \sigma_v^2(iu + u^2)} \\
	g
		& = a/b \\
	a
		& = i\rho\sigma_v u-\kappa + d \\
	b
		& = i\rho \sigma_v u-\kappa - d
\end{align}

\subsection{Jump-diffusion models}
The Merton model assumes that the stock price evolves according geometric Brownian motion with jumps,
\begin{equation}
	dS(t)
		= \hat{\mu}(t) S(t-) dt + S(t-) \sigma d\hat{w}(t) + S(t-)dJ(t),
\end{equation}
where $dJ(t)$ is a jump process with intensity $\lambda$ and with jump size distributions given by
\begin{equation}
	\log(1+J_i) \sim \mathcal{N}(\mu_J,\sigma_J^2).
\end{equation} 
It can be shown that
\begin{equation}
	\hat{\mu}(t) = r - \delta -\lambda\left(e^{\mu_J + \frac{1}{2}\sigma_J^2} - 1\right).
\end{equation}

The affine stochastic volatility jump (ASVJ) model is the Heston specification with jumps,
\begin{align}
	dS(t)
		& = \hat{\mu}(t) S(t-) dt + \sqrt{v(t)}S(t-) d\hat{w}(t) + S(t-)dJ(t), \\
	dv(t)
		& = \kappa[\bar{v}-v(t)]dt + \sigma_v\sqrt{v(t)}d\hat{w}_\sigma(t).
\end{align}

\subsection{American derivatives}
The Black-Scholes PDE expressed in terms of the coordinate $x= \log\bigl(S(t)/S(0)\bigr)$ is given by
\begin{equation}\label{e:logBS}
	\frac{\partial V}{\partial t} + \left(r-\delta -\frac{1}{2}\sigma^2\right)\frac{\partial V}{\partial x} + \frac{1}{2}\sigma^2 \frac{\partial^2V}{\partial x^2} - rV = 0.
\end{equation}
We will work on a discretized grid with coordinates $t_n = n \Delta t$ with $0 \leq n \leq N$ ($T = N\Delta t$) and $x_j = j \Delta x$ with $-M \leq j \leq M$ and will denote $S_j = S(0)e^{x_j}$ and $V^j_n = V(S_j,t_n)$. Since we are solving a final-value problem, it is convenient to approximate the time derivative using the so-called forward difference operator, which relates the current value at time $t_n$ to the value one time step in the future $t_{n+1}$, 
\begin{equation}
	\frac{\partial V}{\partial t} \simeq \frac{V_{n+1}^j-V_{n}^j}{\Delta t}.
\end{equation}
In the $x$-direction, we choose to approximate the derivatives by symmetric difference operators as follows
\begin{align}
	\frac{\partial V}{\partial x} 
		& \simeq \frac{V_{n+1}^{j+1}-V_{n+1}^{j-1}}{2\Delta x}, \\
	\frac{\partial^2 V}{\partial x^2}
		& \simeq \frac{V_{n+1}^{j+1}+V_{n+1}^{j-1}-2V_{n+1}^{j}}{(\Delta x)^2}.
\end{align}
The partial derivatives with respect to $x$ are evaluated at time $t_{n+1}$, which is convenient because it means that the resulting linear system expresses $V_n^j$ in terms of information purely at time $t_{n+1}$. If we evaluate the discount term at time $t_n$, then substituting these approximations into \eqref{e:logBS} and solving the linear system for $V_n^j$ we obtain
\begin{equation}\label{e:EulerBS}
	V_n^j = \CV(S_j,t_n),
\end{equation}
where
\begin{equation}
	\CV(S_j,t_n) = \frac{1}{1+r\Delta t} \bigl[p_{\rm U} V_{n+1}^{j+1}+p_{\rm M} V_{n+1}^{j} + p_{\rm D} V_{n+1}^{j-1}\bigr]
\end{equation}
is the continuation (or holding) value and the `probabilities' are given by
\begin{equation}
	p_{\rm U} = \frac{\sigma^2 \Delta t}{2\Delta x^2} + \frac{\mu \Delta t}{2\Delta x}, \quad \quad
	p_{\rm D} = \frac{\sigma^2 \Delta t}{2\Delta x^2} - \frac{\mu \Delta t}{2\Delta x}, \quad \quad
	p_{\rm M} = 1- \frac{\sigma^2 \Delta t}{\Delta x^2},
\end{equation}
where $\mu = r-\delta - \frac{1}{2}\sigma^2$. This is called the trinomial tree scheme because the expression for $V_n^j$ has the form of a discounted expectation value. It can be shown that convergence requires
\begin{equation}
	\Delta x > \sigma \sqrt{\Delta t}, \quad \quad \Delta x > |\mu|\sqrt{\Delta t}.
\end{equation}
Since we are working on a finite grid we also need to impose boundary conditions at $x_M$ and $x_{-M}$ for each $t_n$ with $0 \leq n \leq N-1$. We define the endpoint values $V_n^M$ and $V_n^{-M}$ by solving the following systems
\begin{align}
	0
		& = V_n^M + V_n^{M-2} - 2 V_n^{M-1}, \\
	0
		& = V_n^{-M} + V_{n}^{-M+2} - 2V_{n}^{-M+1}.
\end{align}
In the continuum limit these are equivalent to imposing the vanishing of the second partial derivative with respect to $x$ at the boundaries. An American option gives the right to exercise at time $t_n$ or to continue. For American options we therefore simply replace \eqref{e:EulerBS} by
\begin{equation}
	V_n^j = \max \big[\EV(S_j,t_n),\CV(S_j,t_n)\big]
\end{equation}
where $\EV(S_j,t_n)$ is the intrinsic (or exercise) value of the option at time $t_n$ as a function of the stock price $S_j$.
\begin{figure}[h]
\centering
\includegraphics[width=100mm]{Euler}
\caption{The present value of a European/American call (blue/orange) computed in the trinomial tree scheme. The call has expiry $T=1$ and strike $K=100$ on an underlying with $S(0) = 100$, dividend yield $\delta = 0.11$, volatility $\sigma = 0.16$ assuming risk-free rate $r = 0.05$. The finite-difference grid is chosen with  $\Delta x = 0.142$, $M = 79$ and $\Delta t = 1/252$ so $N = T/\Delta t = 252$.}
\end{figure}

\section{Fixed income derivatives}
\subsection{Swaps}
Recall that $r(t,T)$ is the rate at which an investment of $P(t,T)$ at time $t$ accrues to yield a unit amount of currency at time $T$ assuming continuous compounding $P(t,T)e^{r(t,T)(T-t)} = 1$. Let $R(t,T)$ denote the simply compounded zero-coupon rate
\begin{equation}
	R(t,T) \equiv \frac{1-P(t,T)}{(T-t)P(t,T)}.
\end{equation}
This is the rate at which an investment of $P(t,T)$ at time $t$ accrues to yield a unit amount of currency at time $T$, assuming that the accruing is proportional to the investment time; that is,
\begin{equation}
	\left[1+R(t,T)(T-t)\right]P(t,T)  = 1.
\end{equation}
Suppose that a company takes out a loan of $N$ units of currency at a variable interest rate $R(t,T)$. At the end of each interval $[T_{i-1},T_{i}]$ (assumed to be of length $\tau$), they must pay an amount $NR(T_{i-1},T_{i})\tau$ to the lender. A  $T_{i-1}\times T_{i}$ forward rate agreement is a forward contract which allows the borrower to lock in a fixed interest rate $K$ during the interval $[T_{i-1},T_i]$. The pay-off of the FRA at time $T_i$ from the borrower's perspective is $N(K-R(T_{i-1},T_i))\tau$. In other words, the company pays a fixed rate $K$ in exchange for the variable rate $R(T_{i-1},T_i)$. Taking into account the payment to the lender we see that the company is effectively paying a fixed rate $K$ on the notional amount $N$. The cash flows for the borrower are given by\footnote{In this section we take cash inflows to be positive.}
\begin{equation}
\underbrace{-NR(T_{i-1},T_{i})\tau}_{\textrm{payment to lender}} +	\underbrace{N(R(T_{i-1},T_i)-K)\tau}_{\textrm{FRA pay-off}} = -NK \tau
\end{equation}
Since $R(T_{i-1},T_i)$ is known at the fixing date $T_{i-1}$, the pay-off of a FRA on a unit notional at the maturity date $T_i$ is equivalent to a pay-off at time $T_{i-1}$ of
\begin{equation}
\mathbf{FRA}(T_{i-1},T_i,K)	=  P(T_{i-1},T_i)\left[R(T_{i-1},T_i)- K\right]\tau
\end{equation}
This is the amount that changes hands if the contract is settled at the fixing date $T_{i-1}$.
In terms of the ZCB prices we find
\begin{equation}
	\mathbf{FRA}(T_{i-1},T_i,K)	= P(T_{i-1},T_{i-1}) - (1+ K\tau)P(T_{i-1},T_{i})
\end{equation}
This shows that the FRA can be replicated by a portfolio long a ZCB with maturity $T_{i-1}$ and short $1+K\tau$ ZCBs with maturity $T_i$, so for any $t \leq T_{i-1}$
\begin{equation}
	\mathbf{FRA}(t,T_i,K) 
		= P(t,T_{i-1}) - (1+ K\tau)P(t,T_{i}) \label{e:FRAZCB}.
\end{equation}
The fair value of the fixed interest rate $K$ decided on the contract date $t=0$ is the one that renders the value of the FRA worthless at $t=0$; that is, $K = F(0,T_{i-1},T_i)$ where 
\begin{equation}
	F(t,T_{i-1},T_{i}) \equiv \frac{P(t,T_{i-1})-P(t,T_{i})}{\tau P(t,T_{i})}
\end{equation}
is the simply compounded forward rate agreed upon at time $t$ for borrowing in the interval $[T_{i-1},T_i]$. It then follows that the time-$t$ price of a $[T_{i-1},T_i]$ forward rate agreement is given by
\begin{equation}
	\mathbf{FRA}(t,T_i,K) = P(t,T_i) \left[F(t,T_{i-1},T_i)-K\right]\tau, \label{e:FRA1}
\end{equation}
which clearly satisfies
\begin{equation}
	\mathbf{FRA}(0,T_i,K) = 0.
\end{equation}
On the other hand, we may use the FTAP to price the FRA at any time $t \leq T_{i-1}$ as follows
\begin{equation}
	\mathbf{FRA}(t,T_i,K) = N(t) \mathbb{E}^{Q_N}_t\left[\frac{\mathbf{FRA}(T_{i-1},T_i,K)}{N(T_{i-1})}\right]
\end{equation}
Choosing $N(t) = P(t,T_{i})$ we obtain
\begin{align}
	\mathbf{FRA}(t,T_i,K)
		& = P(t,T_i) \mathbb{E}^{Q_i}_t\left[\frac{P(T_{i-1},T_i)\left[R(T_{i-1},T_i)- K\right]}{P(T_{i-1},T_i)}\right]\tau  \\
		& = P(t,T_i)  \left\{\mathbb{E}^{Q_i}_t\left[R(T_{i-1},T_i)\right] - K\right\}\tau.
\end{align}
Comparing with \eqref{e:FRA1} we obtain
\begin{equation}
	F(t,T_{i-1},T_i) = \mathbb{E}^{Q_i}_t\left[R(T_{i-1},T_i)\right].
\end{equation}
Now using the fact that $F(T_{i-1},T_{i-1},T_i)=R(T_{i-1},T_i)$ we discover the Martingale property of forward rates,
\begin{equation}
	F(t,T_{i-1},T_i) = \mathbb{E}^{Q_i}_t\left[F(T_{i-1},T_{i-1},T_i)\right]
\end{equation}
or
\begin{equation}
	F_i(t) = \mathbb{E}^{Q_i}_t\left[F(T_{i-1})\right]
\end{equation}
where $F_i(t) \equiv F(t,T_{i-1},T_i)$.
An interest rate swap is just a sum of forward rate agreements. Let $T_n$ denote the date of the last settlement. Then
\begin{equation}
	\mathbf{IRS}(t,T_j,T_n,K) = \sum_{i=j+1}^n \mathbf{FRA}(t,T_i,K).
\end{equation}
If $j = 0$ then the interest-rate swap is referred to as a spot IRS whereas if $j > 0$ then it is a forward-start IRS. The fixed rate for an IRS is determined by the condition that the value of the IRS at inception vanishes: $\mathbf{IRS}(0,T_j,T_n,K) = 0$. Representing the interest-rate swap in terms of zero-coupon bonds using \eqref{e:FRAZCB} we obtain
\begin{equation}
	\mathbf{IRS}(t,T_j,T_n,K) = P(t,T_j) - P(t,T_n) - K\tau A(t,T_j,T_n)
\end{equation}
where
\begin{equation}
	A(t,T_j,T_n) = \sum_{i=j+1}^n P(t,T_i)
\end{equation}
is the value of an annuity paying one unit at dates $\{T_{j+1},T_{j+2},\ldots,T_n\}$. It follows that $K = \mathrm{FSR}(0,T_j,T_n)$ where the forward swap rate is defined as
\begin{equation}
	\mathrm{FSR}(t,T_j,T_n) \equiv \frac{P(t,T_j)-P(t,T_n)}{\tau A(t,T_j,T_n)}.
\end{equation}
Therefore
\begin{equation}
	\mathbf{IRS}(t,T_j,T_n,K) = A(t,T_j,T_n) \left[\mathrm{FSR}(t,T_j,T_n) - K\right]\tau.
\end{equation}

\subsection{Yield curve construction}
The goal of yield curve construction is to deduce the zero-coupon yield curve today $P(0,T)$ from the prevailing prices of financial instruments such as zero-coupon bonds, forward-rate agreements, and interest rate swaps. 

The first step of yield curve construction is to compute the discount factors associated with the deposit accounts. These are the easiest instruments because their prices are algebraically related to the discount factor. In realistic deposits one must distinguish between today's trade date ($t=0$) and the settlement date $t_{\rm s} > 0$ at which actually money changes hands. For a deposit with trade date $t=0$ settlement date $t_{\rm s}$ and expiry date $T_{\rm E}$ the discount factor $P(t_{\rm s},T_{\rm E})$ can be extracted from the quoted Libor rate $R(t_{\rm s},T_{\rm E})$ as follows
\begin{equation}
	P(t_{\rm s},T_{\rm E}) = \frac{1}{1+R(t_{\rm s},T_{\rm E})(T_{\rm E}-t_{\rm s})}.
\end{equation}
Since we are ultimately interested in $P(0,T_{\rm E})$, however, we need to compute the additional discount factor $P(0,t_{\rm s})$ from which we compute the desired quantity; namely,
\begin{equation}
	P(0,T_{\rm E}) = P(0,t_{\rm s})P(t_{\rm s},T_{\rm E}).
\end{equation}
Fortunately, it is possible to determine $P(0,t_{\rm s})$ by compounding the so-called overnight (O/N) and tomorrow/next (T/N) rates.

The next class of instruments we consider are the forward rate agreements. Recall that break-even forward rate for borrowing in the period $[T_{i-1},T_i]$ is related to the discount factors as follows,
\begin{equation}
	F(0,T_{i-1},T_i) = \frac{1}{\tau}\left[\frac{P(0,T_{i-1})}{P(0,T_i)} - 1\right] .
\end{equation}
Rearranging we obtain the discount factor for expiration $T_i$ in terms of the Libor forward rate for borrowing in the period $[T_{i-1},T_i]$ and the discount factor for expiration $T_{i-1}$,
\begin{equation}
	P(0,T_i) = \frac{P(0,T_{i-1})}{1 + \tau F(0,T_{i-1},T_i)}.
\end{equation}
The discount factor for expiration $T_i$ can now be obtained recursively from this expression by interpolating the existing discount curve to estimate $P(0,T_{i-1})$.
\subsection{Bonds}
The price at time $t$ of a non-defaultable bond paying cash flows $C_i+\delta_{ni}$ at dates $\{T_1, \ldots, T_n \}$ is given by,
\begin{equation}
	B(t) = \sum_{T_i > t}^{T_n} (C_i+\delta_{ni}) P(t,T_i).
\end{equation}
The fact that the bond price suffers discontinuities at the coupon dates has motivated traders to introduce the concept of accrued interest at time $t \in (T_{i-1},T_i]$,
\begin{equation}
	\mathrm{AI}(t) = C_i\frac{t-T_{i-1}}{T_i-T_{i-1}}.
\end{equation}
The quoted prices are given by $B_{\rm clean}(t) = B(t)-\mathrm{AI}(t)$. The clean price is continuous but its first derivative is discontinuous.

The continuously compounded yield to maturity for a bond is defined as the unique solution to,
\begin{equation}
	B(t) = \sum_{T_i > t}^{T_n} (C_i+\delta_{ni}) e^{-y(t)(T_i - t)}.
\end{equation}
In the case of a non-defaultable bond we can find $y(t)$ in terms of the discount curve by solving,
\begin{equation}
\sum_{T_i > t}^{T_n} (C_i+\delta_{ni}) P(t,T_i) = \sum_{T_i > t}^{T_n} (C_i+\delta_{ni}) e^{-y(t)(T_i - t)}.
\end{equation}
Realistic bond yields do not satisfy the above identity as a result of factors such as default risk. It is therefore of general interest to consider the change in price of a bond induced by shifting the yield curve. For this purpose we define the Macaulay duration as
\begin{equation}
	\frac{dB(t)}{dy(t)} = -D_{\rm M}(t) B(t)
\end{equation}
or
\begin{equation}
	D_{\rm M}(t) \equiv -\frac{1}{B(t)}\frac{dB(t)}{dy(t)} = \frac{1}{B(t)}\sum_{T_i > t}^{T_n} (C_i+\delta_{ni})(T_i -t)e^{-y(t)(T_i-t).}
\end{equation}
It is also  interesting to consider the change in price induced by parallel shifts of the zero-coupon yield curve,
\begin{equation}
	B(t;s) = \sum_{T_i > t}^{T_n} (C_i+\delta_{ni}) e^{-\left(r(t,T_i) + s \right)(T_i-t)} 
\end{equation}
and hence we define the Fisher-Weil duration as
\begin{equation}
	D_{\rm FW}(t) \equiv -\frac{1}{B(t)}\left.\frac{d}{ds}B(t;s)\right|_{s= 0}  = \frac{1}{B(t)}\sum_{T_i > t}^{T_n} (C_i+\delta_{ni})(T_i - t) P(t,T_i) .
\end{equation}

\subsection{Caps and floors}
Interest rate caps and floors are defined as
\begin{equation}
	\mathbf{Cap}(t,T_j,T_n,K) = \sum_{i=j+1}^n \mathbf{Cpt}(t,T_i,K), \quad \mathbf{Flr}(t,T_j,T_n,K) = \sum_{i=j+1}^n \mathbf{Flt}(t,T_i,K)
\end{equation}
where $\mathbf{Cpt}(t,T_i,K)$ is the price of a call option on the interest rate $R(T_{i-1},T_i)$ with strike $K$ and expiration $T_i$ while  $\mathbf{Flt}(t,T_i,K)$ is the corresponding put option. The pay-offs at the settlement dates $T_i$ are
\begin{align}
	\mathbf{Cpt}(T_i,T_i,K)
		& = \max(R(T_{i-1},T_i)- K,0)\tau \\
	\mathbf{Flt}(T_i,T_i,K)
		& = \max(K-R(T_{i-1},T_i),0)\tau.
\end{align}
Notice that since $R(T_{i-1},T_i)=F(T_{i-1},T_{i-1},T_i)$ a caplet can be viewed as a call option on forward rate. Since the forward rate $F(t,T_{i-1},T_i)$ is a martingale with respect to $Q_i$, it follows that we can price a caplet by applying the fundamental theorem of asset pricing with $P(t,T_i)$ as the numeraire,
\begin{align}
	\mathbf{Cpt}(t,T_i,K)
		& = P(t,T_i)\mathbb{E}^{Q_i}_t \left[ (F_i(T_{i-1})- K)^+\right]\tau.
\end{align}
Starting from the identity
\begin{equation}
	\mathbf{Cpt}(T_i,T_i,K) + K\tau = \mathbf{Flt}(T_i,T_i,K) + F(T_{i-1},T_{i-1},T_i)\tau
\end{equation}
and applying the FTAP with $P(t,T_i)$ as the numeraire we obtain the cap-floor parity relation
\begin{equation}
	\mathbf{Cpt}(t,T_i,K) - \mathbf{Flt}(t,T_i,K) = \mathbf{FRA}(t,T_i,K).
\end{equation}
If the forward rate follows an Ito process, then the martingale condition implies
\begin{equation}
	dF_i(t) = F_i(t)\sigma_i(t)d\hat{w}_i(t)
\end{equation}
where $\hat{w}_i$ is a Brownian motion under $Q_i$. In the case when $\sigma_i(t)$ is deterministic, the usual Black formulas can be applied to give
\begin{align}
	\mathbf{Cpt}_{\rm B}(t,T_i,K) = P(t,T_i)\left[F_i(t) N(y_i+ \bar{\sigma}_i\sqrt{T_{i-1}-t})-KN(y_i)\right]\tau
\end{align}
where
\begin{align}
	y_i
		& = \frac{\log\left(\frac{F_i(t)}{K}\right)-\frac{1}{2}\bar{\sigma}_i^2(T_{i-1}-t)}{\bar{\sigma}_i\sqrt{T_{i-1}-t}}\\
	\bar{\sigma}_i^2
		& = \frac{1}{T_{i-1}-t}\int_t^{T_{i-1}} ds \,\sigma_i(s)^2
\end{align}
Cap prices are reported as cap implied volatilities which are defined by matching the Black cap pricing formula with equal-volatility component caplets to the cap market price,
\begin{equation}
\mathbf{Cap}_{\rm M}(t,T_j,T_n,K) = \sum_{i=j+1}^n \mathbf{Cpt}_{\rm B}(t,T_i,K;\sigma).
\end{equation}

An alternative way to price caplets and floorlets is to view them as options on bonds. We begin by noting that since $R(T_{i-1},T_i)$ is known on the fixing date $T_{i-1}$, the pay-off of a caplet on the settlement date $T_i$ is equivalent to a pay-off on the fixing date of
\begin{align}
	\mathbf{Cpt}(T_{i-1},T_i,K)
		& = P(T_{i-1},T_i)(R(T_{i-1},T_i)-K)^+\tau \\
		& = \left[1-P(T_{i-1},T_i)(1+K\tau)\right]^+ \\
		& = (1+K\tau)\left(\frac{1}{1+K\tau}-P(T_{i-1},T_i)\right)^+.
\end{align}
It follows that a caplet can be viewed as $1+K\tau$ put options with strike $1/(1+K\tau)$ and expiration $T_{i-1}$ on a ZCB with maturity $T_i$;
\begin{align}
	\mathbf{Cpt}(t,T_i,K)
		& = (1+K\tau) \mathbf{PZCB}(t,T_{i-1},T_i,1/(1+K\tau)) \label{e:cptZCB} \\
	\mathbf{Flt}(t,T_i,K)
		& = (1+K\tau) \mathbf{CZCB}(t,T_{i-1},T_i,1/(1+K\tau))
\end{align}
where $\mathbf{CZCB}(t,T_{i},T_j,K)$ is the price of a call option with strike $K$ and expiration $T_i$ on a ZCB with expiration $T_j$.

\subsection{Spot-rate models}
A spot-rate model is based on the idea that the prices of zero-coupon bonds can be expressed in terms of the instantaneous spot rate as follows
\begin{equation}\label{e:ZCBspot}
	P(t,T) = \mathbb{E}^{Q_B}_t\left[e^{-\int_t^T ds \, r(s)}\right].
\end{equation}
It follows that specifying the stochastic process for the instantaneous spot rate $r(t)$ is sufficient to reproduce the entire term structure of interest rates. A one-factor spot-rate model assumes that the spot rate follows a diffusion
\begin{equation}\label{e:spotdrift}
	dr(t) = \mu(r(t),t)dt + \sigma(r(t),t)d\hat{w}(t)
\end{equation}
where $\hat{w}$ is a Brownian motion under $Q_B$. The assumption of a diffusion implies that $P(t,T)$ only depends on $r(t)$ and $t$. The dynamics of the process $r(t)$ should be chosen such that the initial term structure of interest rates matches the market data
\begin{equation}
	P(0,T) = P_{\rm M}(0,T).
\end{equation}

The pricing PDE for a zero-coupon bond can be obtained by using the fact that the discounted price process $P(t,T)/B(t)$ is a Martingale under $Q_B$,
\begin{equation}
	\frac{P(t,T)}{B(t)} = \mathbb{E}^{Q_B}_t\left[\frac{P(T,T)}{B(T)}\right].
\end{equation}
Computing the differential of the left-hand side we obtain
\begin{align}
	d\left( \frac{P(t,T)}{B(t)} \right)
		& = \frac{1}{B(t)} \left[ dP(t,T) - P(t,T)r(t) dt\right]
\end{align}
where we have used Leibniz's rule
\begin{equation}
	\frac{d}{dt}\left(\frac{1}{B(t)}\right) =\frac{d}{dt}\left(e^{-\int_0^t ds \, r(s)}\right) = -e^{-\int_0^t ds \, r(s)}\frac{d}{dt}\int_0^t ds \, r(s) =  -\frac{r(t)}{B(t)}.
\end{equation}
Applying Ito's lemma to $dP(t,T)$ using \eqref{e:spotdrift} and writing $P(t,T) = g(r(t),t)$ we arrive at the partial differential equation
\begin{equation}
	\frac{\partial g}{\partial t} + \mu(r,t) \frac{\partial g}{\partial r} + \frac{1}{2}\sigma(r,t)^2\frac{\partial^2g}{\partial r^2} - g(r,t)r = 0.
\end{equation}
The boundary condition suitable for a zero-coupon bond is $g(r,T) =1$. Alternatively we can apply the Feynman-Kac formula directly to \eqref{e:ZCBspot}.
\subsubsection{Extended Vasicek model}
The Hull-White (or extended Vasicek) model assumes that the spot rate follows an Ornstein-Uhlenbeck process
\begin{equation}
	dr(t) = \kappa(\theta(t) - r(t))dt + \sigma d\hat{w}(t),
\end{equation}
where $\hat{w}$ is a Brownian motion under $Q_B$. The solution is $r(t) = x(t) + \varphi(t)$ where
\begin{align}
	\varphi(t)
		& = \kappa \int_0^t ds \, e^{-\kappa(t-s)}\theta(s) \\
	x(t)
		& = r(0)e^{-\kappa t} + \sigma \int_0^t e^{-\kappa(t-s)} d\hat{w}(s).
\end{align}
It is convenient to reformulate the model in terms of $\varphi(t)$ and a stochastic process $x(t)$ such that
\begin{align}
	r(t)
		& = \varphi(t) + x(t), \\
	dx(t) 
		& = -\kappa x(t) dt + \sigma d\hat{w}(t), \\
	x(0)
		& = r(0)
\end{align}
where $\hat{w}$ is a Brownian motion under $Q_B$. The price of a zero-coupon bond is given in this formulation by
\begin{align}
	P(t,T) 
		& = e^{-\int_t^T ds \, \varphi(s)}\mathbb{E}_t^{Q_B}\left[e^{-\int_t^T ds \, x(s)}\right] \\
		& = e^{-\int_t^T ds \, \varphi(s)}e^{-a(T-t)-b(T-t)x(t)} \label{e:EVZCBprice}
\end{align}
where
\begin{align}
	a(\tau)
		& = \left[\frac{b(\tau) - \tau}{\kappa} + \frac{b(\tau)^2}{2}\right]\frac{\sigma^2}{2\kappa} \\
	b(\tau)
		& = \frac{1}{\kappa}(1-e^{-\kappa \tau}).
\end{align}
In the limit $\kappa \to 0$ we have $a(\tau) \to -(\sigma^2/6)\tau^3$ and $b(\tau) \to \tau$. Taking the differential of $P(t,T)$ we obtain
\begin{equation}
	dP(t,T)
		= P(t,T) \left[\varphi(t)dt-\frac{d}{dt}a(T-t)dt-x(t)\frac{d}{dt}b(T-t)dt - b(T-t)dx(t) + \frac{1}{2}b(T-t)^2 \sigma^2 dt\right]
\end{equation}
where the last term follows from Ito's lemma. Using the expressions for $a(t)$ and $b(\tau)$ we find that $P(t,T)$ satisfied the SDE,
\begin{equation}
	dP(t,T)
		= P(t,T)r(t) dt - P(t,T)b(T-t)\sigma d\hat{w}(t).
\end{equation}
We see that the proportional volatility $-b(T-t)\sigma$ decays monotonically to zero as $t$ approaches the maturity date $T$.

The fitting function $\varphi(t)$ is chosen so that $P(0,t)$ matches the initial term structure of interest rates $P_{\rm M}(0,t)$ on the calibration date,
\begin{equation}
e^{-\int_0^t ds \, \varphi(s)} = e^{a(t)+b(t)r(0)}P_{\rm M}(0,t),
\end{equation}
where we have used $x(0)=r(0)$. Using this relation to eliminate the dependence on the fitting function we obtain
\begin{equation}\label{e:EVZCB}
	P(t,T) = A(t,T) e^{-b(T-t)x(t)}
\end{equation}
where
\begin{align}
	A(t,T)
		& = \frac{e^{a(T) + b(T)r(0)}P_{\rm M}(0,T)}{e^{a(t) + b(t)r(0)}P_{\rm M}(0,t)}	e^{-a(T-t)}.
\end{align}
We have thus expressed the prices of ZCBs in terms of the initial term structure $P_{\rm M}(0,T)$, the prevailing spot rate $r(0)$ and two calibration parameters; $\kappa$ and $\sigma$.
The expression \eqref{e:EVZCB} implies that ZCB prices are log-normally distributed. This allows puts and calls on ZCBs to be priced analytically as in the Black model. 

Let us consider a European call with expiration $T_i$ on a ZCB with maturity $T_j>T_i$. Since the pay-off occurs at time $T_i$ it is natural to use $P(t,T_i)$ as the numeraire. Defining $G(t) = P(t,T_j)/P(t,T_i)$ we find
\begin{equation}
	dG(t) = -G(t)\left[b(T_j-s)-b(T_i-s)\right]\sigma \, d\hat{w}_i(t)
\end{equation}
where $\hat{w}_i$ is a Brownian motion under $Q_i$. The prices of puts and calls on $P(t,T_j)$ can now be obtained by applying the Black formulas,
\begin{align}
	\mathbf{CZCB}_{\rm EV}(t,T_i,T_j,K)
		& = P(t,T_i)\left[G(t)N(y+\bar\sigma\sqrt{T_i-t})-KN(y)\right] \\
	\mathbf{PZCB}_{\rm EV}(t,T_i,T_j,K)
		& = P(t,T_i)\left[KN(-y)-G(t)N(-y-\bar\sigma\sqrt{T_i-t})\right].
\end{align}
where 
\begin{align}
	y 
		& = \frac{\log\left(\frac{G(t)}{K}\right)-\frac{1}{2}\bar{\sigma}^2(T_i-t)}{\bar{\sigma}\sqrt{T_i-t}} \\
	\bar{\sigma}^2
		& = \frac{1}{T_i - t}\int_t^{T_i} ds \, \sigma^2\left[b(T_j-s)-b(T_i-s)\right]^2, \\
		& = \frac{b(T_j-T_i)^2b(2(T_i-t))}{2(T_i-t)}\sigma^2,
\end{align}
Thus we have expressed the price at time zero of a European call or put in terms of the initial discount curve $P(0,T)$, the prevailing level of the spot rate $r(0)$ and two calibration parameters $\sigma$ and $\kappa$, and we didn't need to make any assumptions about the arbitrary fitting function.

Now we consider pricing a caplet using Monte-Carlo simulation. We start by expressing the pay-off for a caplet at the settlement date $T_i$ in terms of the price of a zero-coupon bond,
\begin{align}
 \mathbf{Cpt}(T_i,T_i,K)
 	& = \left(R(T_{i-1},T_i)- K\right)^+\tau \\
	& = \left(\frac{1-P(T_{i-1},T_i)}{P(T_{i-1},T_i)}- K\tau\right)^+.
\end{align}
The distribution of $P(T_{i-1},T_i)$ under $Q_B$ is known to be given by \eqref{e:EVZCBprice}. So it follows from the FTAP  that the time $t$ price of the caplet is given by
\begin{equation}
	\mathbf{Cpt}(t,T_i,K) = \mathbb{E}_t^{Q_B}\left[e^{-\int_t^{T_i} ds \, r(s)}\left(\frac{1-P(T_{i-1},T_i)}{P(T_{i-1},T_i)}- K\tau\right)^+\right].
\end{equation}
This expectation can be computed by simulating $x$ under $Q_B$. It is more convenient, however, to use $P(t,T_i)$ as the numeraire so that the pricing formula becomes
\begin{equation}
	\mathbf{Cpt}(t,T_i,K) = P(t,T_i)\mathbb{E}_t^{Q_i}\left[\left(\frac{1-P(T_{i-1},T_i)}{P(T_{i-1},T_i)}- K\tau\right)^+\right].
\end{equation}
In order to compute this expectation we need the dynamics of $x$ under $Q_i$. Changing numeraire from $Q_B$ to $Q_i$ using Girsanov's theorem leads to the following SDE
\begin{align}
	dx(t)
		& = - \left[\kappa x(t) + b(T_i-t)\sigma^2\right]dt + \sigma d\hat{w}_i(t), \\
	x(0)
		& = r(0),
\end{align}
where $\hat{w}_i$ is a Brownian motion under $Q_i$. The SDE has an explicit solution given by
\begin{align}
	x(t)
		& = r(0)e^{-\kappa t} - \frac{\sigma^2}{2}\left[b(t)^2 + b(T_i-t)b(2t)\right] + \Delta x(t), \\
	\Delta x(t)
		& = \mathcal{N} \left(0,\frac{\sigma^2}{2}b(2t)\right) \label{e:xdistn}.
\end{align}
The ZCB prices can now be simulated by drawing $\Delta x(t)$ from the distribution \eqref{e:xdistn} and substituting the resulting values of $x(t)$ into \eqref{e:EVZCB}.

\subsubsection{Ho-Lee model}
The Ho-Lee model assumes that the spot rate follows
\begin{equation}
	dr(t) = \theta(t) dt + \sigma d\hat{w}(t)
\end{equation}
where $\hat{w}$ is a Brownian motion under $Q_B$. This model can be obtained as a limit of the Extended Vasicek model when $\kappa \to 0$. It follows that the SDE for the bond price is given by
\begin{equation}
	dP(t,T) = P(t,T)r(t)dt - P(t,T)(T-t)\sigma d\hat{w}(t).
\end{equation}
It follows from the standard Black pricing formula that the price of a European call is given by
\begin{align}
	\mathbf{CZCB}(t,T_i,T_j,K)
		& = P(t,T_i)\mathbb{E}^{Q_i}_t\left[\max(G(T_i)-K,0)\right] \\
		& = P(t,T_i)\left[G(t)N(y+\bar{\sigma}\sqrt{T_i-t})-KN(y)\right]
\end{align}
where
\begin{equation}
	y = \frac{\log\left(\frac{G(0)}{K}\right)-\frac{1}{2}\bar{\sigma}^2(T_i-t)}{\bar{\sigma}\sqrt{T_i-t}}, \quad \quad
	\bar{\sigma} =\sigma(T_j-T_i).
\end{equation}

\subsubsection{Cox-Ingersoll-Ross model}
A disadvantage of the Ho-Lee and extended Vasicek models is that interest rates can become negative with positive probability. This problem can be overcome by assuming that the instantaneous interest rate follows a square-root diffusion (SRD) process. A time dependent parameter can be introduced by writing $r(t) = x(x) + \varphi(t)$ where
\begin{equation}
	dx(t) = \kappa(\theta  - x(t))dt + \sigma \sqrt{x(t)} d\hat{w}(t).
\end{equation}
This is not equivalent to assuming that $r(t)$ follows an SRD with a time-dependent $\theta$. The prices of ZCBs are given by
\begin{equation}\label{e:CIR++ZCB}
	P(t,T) = A(t,T) e^{-b(T-t)x(t)}
\end{equation}
where
\begin{align}
	A(t,T)
		& = \frac{e^{a(T) + b(T)x(0)}P_{\rm M}(0,T)}{e^{a(t) + b(t)x(0)}P_{\rm M}(0,t)}	e^{-a(T-t)} \\
	a(\tau)
		& = -\frac{2\kappa \theta}{\sigma^2}\left[\frac{(\kappa+\gamma)\tau}{2}+\log\frac{2\gamma}{c(\tau)}\right]\\
	b(\tau)
		& =  \frac{2(e^{\gamma \tau} - 1)}{c(\tau)} \\
	c(\tau)
		& = (\kappa+\gamma)(e^{\gamma\tau} - 1) + 2\gamma \\
	\gamma
		& = \sqrt{\kappa^2 + 2\sigma^2}.
\end{align}
This expresses the prices of ZCBs in terms of the initial term structure $P_{\rm M}(0,T)$ and four calibration parameters; $\kappa$, $\theta$ and $\sigma$ and $x(0)$.


%\subsubsection{Multi-factor extensions}
%A drawback of the one-factor spot-rate models considered above are that interest rates $r(t,T)$ of different maturities $T$ depend linearly on the same stochastic factor $x(t)$ and are thus perfectly correlated, which is contrary to empirical evidence. This problem can be overcome by considering spot-rate models which depend on multiple stochastic factors,
%\begin{equation}
%	r(t) = \varphi(t) + \sum_{k=1}^n x_k(t).
%\end{equation} 
%An analytically tractable case is the $n$-factor extension of the Hull-White model known as the G$n$++ model
%\begin{align}
%	dx_k(t)
%		& = - \kappa_k x_k(t) dt + \sigma_k d\hat{w}_k(t) \\
%	x_k(0)
%		& = 0.
%\end{align}
%For simplicity we will consider the two-factor case. Applying Ito's lemma to compute $dr(t)$ we obtain
%\begin{equation}
%	dr(t) = \kappa_1(\theta(t) - r(t))dt + \sigma_r d\hat{w}_r(t)
%\end{equation}
%where
%\begin{align}
%	\sigma_r^2
%		& = \sigma_1^2 + \sigma_2^2 + 2\rho\sigma_1\sigma_2 \\
%	\theta(t)
%		& = \varphi(t) + \frac{1}{\kappa_1}\varphi'(t) + \frac{\kappa_1-\kappa_2}{\kappa_1} x_2(t) \\
%	\hat{w}_r
%		& = \frac{1}{\sigma_r}(\sigma_1\hat{w}_1+\sigma_2\hat{w}_2)
%\end{align}
%and $\rho$ is the correlation of the Brownian motions $\hat{w}_1$ and $\hat{w}_2$. The stochastic process $\hat{w}_r$ is a Brownian motion under $Q_B$. 


\subsubsection{Jamshidian trick}
We can price a call option on a coupon bond by thinking of it as sum of call options on the constituent zero-coupon bonds with different strikes. The pay-off from a call option with exercise date $T_s < T_n$, written on a bond which pays coupons $c_i$ at dates $\{T_1, \ldots ,T_n \}$ is given by
\begin{equation}
	\max \left[\sum_{T_i>T_s}^{T_n} c_i P(T_s,T_i) -K,0\right].
\end{equation}
Notice that we only include the cash flows which occur after the exercise date $T_s$ of the option.
Using the Jamshidian trick we can re-write this as the pay-off for a portfolio of calls on zero-coupon bonds\footnote{This only works for one-factor models because the bond price is a monotonic decreasing function of the interest rate.}
\begin{equation}
	\max \left[\sum_{T_i>T_s}^{T_n} c_i P(T_s,T_i) -K,0\right]
	=
	\sum_{T_i>T_s}^{T_n} c_i \max \left[P(T_s,T_i)-K_i,0\right].
\end{equation}
The strikes $K_i$ on the zero-coupon bonds are given by
\begin{equation}
	K_i =  P(r_K;T_s,T_i)
\end{equation}
where $r_K$ is the level of the interest rate at time $T_s$ such that
\begin{equation}
	\sum_{T_i>T_s}^{T_n} c_i P(r_K;T_s,T_i) = K.
\end{equation}

\subsection{Forward rate models}
In spot rate models an arbitrary fitting function was required in order to match the initial term structure of interest rates. An alternative approach is to model the term structure directly and impose absence of arbitrage as an additional constraint. The continuously compounded forward rate is the rate $f(t,T,T+\tau)$ agreed upon at time $t$ for borrowing in the future in the interval $[T,T+\tau]$. Absence of arbitrage implies that the forward rate must satisfy
\begin{equation}
P(t,T+\tau) = P(t,T) e^{-\tau f(t,T,T+\tau)}
\end{equation}
or
\begin{equation}
	f(t,T,T+\tau) = \frac{\log P(t,T)-\log P(t,T+\tau)}{\tau}.
\end{equation} 
In the limit $\tau \to 0$ we obtain the instantaneous forward rate,
\begin{equation}
	\lim_{\tau \to 0}f(t,T,T+\tau) \equiv f(t,T) = -\frac{\partial}{\partial T} \log P(t,T)
\end{equation}
and thus
\begin{equation}
	P(t,T) = e^{-\int_t^T ds \, f(t,s)}.
\end{equation}
Comparing with the expression in terms of the instantaneous interest rate we find the relationship 
\begin{equation}
	f(t,t) = r(t).
\end{equation}

The Heath-Jarrow-Morton framework assumes that $f(t,T)$ follows an Ito process
\begin{equation}
	df(t,T) = \alpha(t,T)dt + \sum_{i=1}^d\sigma_i(t,T) d\hat{w}_i(t)
\end{equation}
where $\hat{w}$ is a $d$-dimensional Brownian motion under the risk-neutral probability measure. Demanding that the term structure is free of arbitrage implies the following constraint on the drift,
\begin{equation}
	\alpha(t,T) = \sum_{i=1}^d\sigma_i(t,T) \int_t^T ds \, \sigma_i(t,s).
\end{equation}
\begin{proof}
Using Ito's lemma we can determine the dynamics of the zero-coupon discount curve in terms of the forward rate. Defining $Y(t,T) = -\int_t^T ds \, f(t,s)$ so that $P(t,T) = e^{Y(t)}$, an application of Ito's lemma gives
\begin{equation}
	dP(t,T) = P(t,T)dY(t,T) +\frac{1}{2}P(t,T)dY(t,T)^2
\end{equation}
where summation over $i$ is implied. Applying Leibniz's rule we obtain
\begin{align}
	dY(t,T)
		&  = f(t,t)dt - \int_t^T ds \, \left[\alpha(t,s)dt + \sigma_i(t,s) d\hat{w}_i(t)\right] \\
		& = \left[r(t) - \bar{\alpha}(t,T)\right] - \bar{\sigma}_i(t,T)d\hat{w}_i(t)
\end{align}
where we have used $r(t) = f(t,t)$ and defined $\bar{\alpha}(t,T) = \int_t^T ds \, \alpha(t,s)$ and $\bar{\sigma}_i(t,T) = \int_t^T ds \, \sigma_i(t,s)$. Hence
\begin{equation}
	dP(t,T) = P(t,T)\left[r(t) - \bar{\alpha}(t,T) + \frac{1}{2}\bar{\sigma}_i(t,T)\bar{\sigma}_i(t,T)\right] dt - P(t,T)\bar{\sigma}_i(t,T)d\hat{w}_i(t)
\end{equation}
The no-arbitrage constraint is that $dP(t,T)$ must have a drift coefficient equal to $r(t) P(t,T)$ and thus
\begin{equation}
	\bar{\alpha}(t,s) = \frac{1}{2}\sum_{i=1}^d \bar{\sigma}_i(t,T)^2.
\end{equation}
Differentiating this expression with respect to $T$ gives the result.
\end{proof}

%\subsection{Libor market models}
%Recall that if the $T_{i-1}\times T_i$ forward rates are Ito processes then they have zero drift under the $Q_i$ measure,
%\begin{equation}
%	dF_i(t) = F_i(t) \sigma_i(t) d\hat{w}_i
%\end{equation}
%Libor market models assume that the Brownian motions are correlated with correlation
%\begin{equation}
%	\rho_{ij}(t) dt = \cov(d\hat{w}_i(t),d\hat{w}_j(t)).
%\end{equation}
%Let $Q_n$ be the measure associated with the Brownian motion for the last settlement date. Using Girsanov's theorem to change  numeraire for each forward rate we obtain
%\begin{equation}
%	dF_i(t) = F_i(t) \mu_{in}(t) dt + F_i(t)\sigma_i(t) d\hat{w}_{in}(t)
%\end{equation}
%where $\hat{w}_{in}$ is a Brownian motion under $Q_n$. Obviously $\mu_{in} = 0$ for $i = n$. For $i < n$ we have
%\begin{equation}
%	\mu_{in}(t) = - \sum_{j = i+1}^n \alpha_j(t)\rho_{ij}(t) \sigma_i(t)\sigma_j(t), \quad \alpha_j(t) = \frac{F_j(t)\tau}{1+F_j(t)\tau}.
%\end{equation}
%If the volatilities are non-stochastic then $F_i$ are log-normally distributed under $Q_i$ and caplet implied volatilizes are given by
%\begin{equation}
%\bar{\sigma}_i^2 = \frac{1}{T_{i-1}-t}\int_t^{T_{i-1}}\sigma_i(s)^2 ds
%\end{equation}
%However, caplet implied volatilities are not quoted directly in the market. Volatilities are quoted in the market for caps which consist of sums of caplets. One can strip caplet-implied volatilities from this data. One first interpolates the quoted values of cap IVs. These interpolated cap IVs can be converted to cap prices using the Black formula. The price of a one-year cap is equal to the price of a caplet. It is typically assumed that forward rate volatilities are time homogeneous; that is, $\sigma_i(t) = \sigma(T_i - t)$. For implementation it is assumed that $\sigma_i(t)$ is a piecewise-constant function which reproduces the caplet-implied volatilities.


\section{Credit derivatives}
\subsection{Structural models}
The Merton model assumes that a firm is financed by equity together with ZCBs with maturity $T$ and face value $K$. The firm is assumed to default at time $T$ if the asset value of the firm at time $T$ is insufficient to repay the outstanding debt; that is, if $V(T) < K$. The cash flow to the bond holders at time $T$ is either $V(T)$ or $K$ (whichever is smaller) and thus
\begin{equation}
	D(T) = \min(V(T),K) = K - \max(K-V(T),0).
\end{equation}
The cash flow to the stakeholders in the firm is
\begin{equation}
	E(T) = V(T) - D(T) = \max(V(T) - K,0)
\end{equation}
so that the standard accounting equation is satisfied
\begin{equation}
	V(T) = D(T) + E(T).
\end{equation}
If we assume that the asset value follows geometric Brownian motion with constant dividends, volatility and interest rates then the value of the debt at time $t$ is
\begin{equation}
	D(t) = K e^{-r(T-t)}+p(t)
\end{equation}
where $p(t)$ is the price process for a European put with strike $K$ and maturity $T$. We define the credit spread by 
\begin{equation}
	D(t) = K e^{-(r+s(t))(T-t)} \implies s(t) = -\frac{1}{T-t}\log\frac{D(t)}{K} - r.
\end{equation}
Notice that if $K>V(t)$ then as $T-t \to 0$ the credit spread tends to infinity. This reflects the fact that it is not possible to redeem the bond if it is matured immediately. Conversely if $K < V(t)$ then there is no risk of default in the limit $T-t \to 0$ so $s(t) \to 0$. For longer maturities there is a finite probability that the asset value will grow to exceed the face value of the bond so $s(t) \to 0$.
\begin{figure}[h]
\centering
\includegraphics[width=70mm]{mertonspreads}
\caption{The credit spreads as a function of maturity for the Merton model assuming $r= 0.5\%$, $\delta = 0$ and $\sigma=0.2$. The blue and green lines correspond to $K = 1.1V(t)$ and $K =0.6V(t)$, respectively.}
\end{figure}
The Black-Cox model is an improvement on the Merton model in which default can be triggered by the asset value falling by below a level $H(t)$. In the case when $H(t)  = Ke^{-r(T-t)}$ the debt is riskless because the bondholders receive the present value of the promised payment.
\subsection{Defaultable bonds}
The price of a zero-recovery, defaultable zero-coupon bond at time $t< T$ is given by
\begin{equation}
	\mathcal{P}_0(t,T) = B(t) \mathbb{E}^{Q_B}_t \left[ \frac{1_{\{ \tau > T \}}}{B(T_n)} \right].
\end{equation}
In reality the most heavily traded bonds pay periodic coupons in addition to the notional value which is paid at maturity. A bullet bond is a coupon bond in which the coupon payments are a fixed fraction of the notional. Realistic bonds also pay a recovery amount in the event of default. Consider a defaultable bond with unit notional paying coupons $c_i$ at dates $\{T_1,\ldots, T_n \}$. Upon default, the bond is assumed to pay a random recovery rate $R$. The value of the bond at time $t < T_n$ is given by
\begin{equation}
B(t,T_n) = \sum_{T_i > t}^{T_n} c_i \mathcal{P}_0(t,T_i) + B(t) \mathbb{E}^{Q_B}_t \left[\frac{R1_{t < \tau \leq T_n}}{B(\tau)}\right].
\end{equation}
As the probability of default increases, the bond will tend to default immediately $\tau \to t$ and thus the last term approaches $\mathbb{E}^{Q_B}_t[R]$. It follows that the value of the bond is bounded from below by the recovery rate. If the recovery rate is independent of the time of default as well as the level of interest rates then
\begin{equation}
	B(t,T_n) = \sum_{T_i > t}^{T_n} c_i \mathcal{P}_0(t,T_i) +\mathbb{E}^{Q_B}_t \left[R\right]\mathcal{D}(t,T_n)
\end{equation}
where
\begin{equation}
	\mathcal{D}(t,T_n) = B(t) \mathbb{E}^{Q_B}_t \left[\frac{1_{ t <\tau \leq T_n}}{B(\tau)}\right]
\end{equation}
is the value at time $t$ of a security that pays one unit of currency at time $\tau$ if $\tau \leq T_n$. If we moreover assume that the time of default is independent of the level of interest rates we obtain
\begin{equation}
	\mathcal{D}(t,T_n) = \int_t^{T_n} ds \, P(t,s)\frac{\partial }{\partial s}G(t,s)
\end{equation}
where
\begin{equation}
	G(t,s) = Q_B(\tau  \leq s \; | \; I_t)
\end{equation}
is the default probability distribution. The survival curve is defined as
\begin{equation}
	X(t,s) = Q_B(\tau  > s \; | \; I_t)
\end{equation}

\subsection{Credit default swaps}
In a single-name credit default swap the protection buyer pays quarterly coupons to the protection seller in exchange for a contingent payment if a credit event affecting the reference obligation occurs before the contract maturity. The contingent payment is equal to the difference between the par and recovery value and is called the loss given default 

Consider a single-name CDS with tenor $T_n$, coupon dates $\{T_1,\ldots,T_n\}$ and unit notional. The value at time zero of the protection leg paid by the protection seller at is given by
\begin{equation}
	V_{\rm prot}(0) = B(0) \mathbb{E}^{Q_B} \left[\frac{(1 - R)1_{\{ \tau \leq T_n \}}}{B(\tau)}\right] 
\end{equation}
where $R$ is the firm's (possibly random) recovery rate. In the case when the recovery rate is independent of the default time we have
\begin{equation}
	V_{\rm prot}(0)	= \left(1-\mathbb{E}^{Q_B}[R]\right)\mathcal{D}(0,T_n)
\end{equation}
The value at time zero of the coupon stream paid by the protection buyer is given by
\begin{equation}
	V_{\rm prem}(0) = c \mathcal{A}(0,T_n)
\end{equation}
where $\mathcal{A}(t,T_n)$ is the value at time $t$ of a defaultable annuity that pays $1$ unit of currency at each coupon date $T_i$ provided that $\tau < T_i$ and pays $(\tau - T_{i-1})/(T_i - T_{i-1})$ if default occurs in $[T_{i-1},T_i]$. The price of the defaultable annuity at time zero is given by 
\begin{align}
	\mathcal{A}(0,T_n)
		& = B(0)\sum_{i=1}^n \mathbb{E}^{Q_B}\left[\frac{1_{\{ \tau > T_i \}}}{B(T_i)} + \frac{\tau - T_{i-1}}{T_i - T_{i-1}}\frac{1_{\{ T_{i-1} \leq \tau \leq T_i \}}}{B(\tau)}\right].
\end{align}

The total value of the CDS from the protection buyer's point of view is
\begin{equation}
	V_{\rm CDS}(t) =V_{\rm prot}(t)- V_{\rm prem}(t).
\end{equation}
The value of coupon payments $c$ is determined such that the contract is worthless at inception. Defining 
\begin{equation}
	s(t) = \frac{\left(1-\mathbb{E}^{Q_B}[R]\right)\mathcal{D}(t,T_n)}{\mathcal{A}(t,T_n)} 
\end{equation}
we find that $c = s(0)$ and
\begin{equation}
	V_{\rm CDS}(t) = \left(s(t) - c\right)\mathcal{A}(t,T_n).
\end{equation}

Given a set of CDS market quotes for the par spread and an estimate for the recovery rate (typically around $40\%$) we can infer the probability distribution of default times $G(0,T)$.
The way this is achieved in practice is by expressing the par spreads for different maturities in terms of hazard rates, which are defined by
\begin{equation}
	1 - G(t,T) = e^{-\int_t^T ds \, h(t,s)}.
\end{equation}
The interpretation of $h(t,s)ds$ is the risk-neutral probability of default in the interval $[s,s+ds]$ conditional upon not defaulting before time $s$. 

The market standard approach is to assume that $h(0,s)$ is piecewise constant. For example if we are given two market quotes for CDSs with maturities $T_1 < T_2$ then we choose $h(0,s) = \lambda_1$ for $0 \leq s < T_1$  such that the par spread for the $T_1$-maturity CDS matches the market quote. Having found $\lambda_1$ we then assume
\begin{equation}
	h(0,s) =
	\begin{cases}
		\lambda_1, &  0 \leq s < T_1 \\
		\lambda_2, &  T_1 \leq s < T_2
	\end{cases}
\end{equation}
and determine $\lambda_2$ such that the par spread for the $T_2$-maturity CDS matches the corresponding market quote.
\subsubsection{Approximate pricing formula}
Let us assume that the term structure of interest rates observed at time $t$ is flat so that 
\begin{equation}
	P(t,s) = e^{-r_t(s-t)}.
\end{equation}
If we moreover assume that the hazard rate $h(t,s)$ at time $t$ has a flat term structure given by $\lambda_t$ and that $R$ is a non-random variable then the value of the protection leg is given by
\begin{equation}
	V_{\rm prot}(t)
		= \lambda_t(1-R)\int_t^{T_n} ds \, e^{-(r_t+\lambda_t)(s-t)} \\
		= \frac{\lambda_t(1-R)(1-e^{-(r_t+\lambda_t)(T_n-t)})}{r_t+\lambda_t}.
\end{equation}
We see that the protection leg is bounded from above by $1-R$ because $1-e^{-(r_t + \lambda_t)(T_n - 1)} \leq 1$ and $\lambda_t / (r_t + \lambda_t) \leq 1$. The value of the premium leg is given by
\begin{equation}
	V_{\rm prem}(t) = c \sum_{i=1}^n e^{-(r_t+\lambda_t)(T_i - t)}.
\end{equation}
The value of the CDS contract is thus given by (where $c = s_0$)
\begin{equation}
	V_{\rm CDS}(t) = (s_t - c)\sum_{i=1}^n e^{-(r_t+\lambda_t)(T_i - t)}, \quad \quad s_t = \frac{\lambda_t(1-R)(1-e^{-(r_t+\lambda_t)(T_n-t)})}{(r_t+\lambda_t)\sum_{i=1}^n e^{-(r_t+\lambda_t)(T_i - t)}}.
\end{equation}
\subsection{Forward credit default swaps}
A forward start CDS is an agreement to enter a credit default swap at some time $t_{\rm F}>0$ in the future, assuming the firm has not already defaulted by then. The value of the protection leg is now given by 
\begin{equation}
	V_{\rm prot}(t) = \left(1-\mathbb{E}^{Q_B}_t[R]\right) \int_{t_{\rm F}}^T ds \, P(t,s) \frac{\partial}{\partial s}G(t,s).
\end{equation}
On the premium leg we have the same cash flows as the standard CDS. However, these cash flows are now conditional upon the firm having not defaulted by time $t_{\rm F}$. Consider a defaultable annuity with coupon dates $\{T_1,\ldots,T_n \}$ and a forward start annuity with start date $t_{\rm F} < T_1$. By absence of arbitrage, we have
\begin{equation}
	\mathcal{A}(t,T_n) = \mathcal{A}(t,t_{\rm F}) + \mathcal{A}(t,t_{\rm F},T_n)
\end{equation}
and thus
\begin{align}
	V_{\rm prem}(t)
		& = c\mathcal{A}(t,t_{\rm F},T_n) \\
		& = c\left[\mathcal{A}(t,T_n)-\mathcal{A}(t,t_{\rm F})\right].
\end{align}
So the value of the forward CDS contract at time $t$ is
\begin{align}
	V_{\rm CDS}(t,t_{\rm F})
		& = \left(s(t,t_{\rm F})-c\right)\left[\mathcal{A}(t,T_n)-\mathcal{A}(t,t_{\rm F})\right].
\end{align}
where
\begin{equation}
	s(t,t_{\rm F}) \equiv \frac{\left(1-\mathbb{E}^{Q_B}_t[R] \right) \int_{t_{\rm F}}^T ds \, P(t,s) \frac{\partial}{\partial s}G(t,s)}{\mathcal{A}(t,t_{\rm F},T_n)}.
\end{equation}
and $c = s(0,t_{\rm F})$.
\subsection{Default swaptions}
A credit default swaption is an option to enter a credit default swap at some time $t_{\rm E} >0 $ in the future. The pay-off at expiry $t_{\rm E}$ of the payer swaption is given by
\begin{equation}
	V_{\rm Pay}(t_{\rm E}) = \left(s(t_{\rm E})-K\right)^+1_{ \{ \tau > t_{\rm E} \} }\mathcal{A}(t_{\rm E},T_n)
\end{equation}
where the indicator function means that there is no pay-off if the firm defaults prior to the exercise date of the swaption. To calculate $V_{\rm Pay}(0)$ it is convenient to use the following choice of numeraire 
\begin{equation}
	A(t) = 1_{\{ \tau > t \}}\mathcal{A}(t,t_{\rm E},T_n).
\end{equation}
By the fundamental theorem of asset pricing we obtain
\begin{align}
	V_{\rm Pay}(0)
		& = A(0)\mathbb{E}^{Q_A}\left[\frac{\left(s(t_{\rm E})-K\right)^+1_{\{ \tau > t_{\rm E} \}}\mathcal{A}(t_{\rm E},T_n)}{A(t_{\rm E})}\right] \\
	& = \mathcal{A}(0,t_{\rm E},T_n)\mathbb{E}^{Q_A} \left[\left(s(t_{\rm E})-K\right)^+\right]
\end{align}
where in the second line we have used the fact that $\mathcal{A}(t_{\rm E},t_{\rm E},T_n) = \mathcal{A}(t_{\rm E},T_n)$ and $1_{\{ \tau > 0 \}} =1$. Similarly the pay-off for a receiver swaption (a put) is
\begin{align}
	V_{\rm Rec}(t_{\rm E})
		& = \left(K - s(t_{\rm E})\right)^+ 1_{\{ \tau > t_{\rm E} \} } \mathcal{A}(t_{\rm E},T_n) \\
	V_{\rm Rec}(0)
		& = \mathcal{A}(0,t_{\rm E},T_n)\mathbb{E}^{Q_A} \left[ \left( K - s(t_{\rm E}) \right)^+\right]
\end{align}
\subsubsection{Put-call parity}
The pay-offs of a payer and receiver default swaption are related by
\begin{equation}
	V_{\rm Pay}(t_{\rm E}) - V_{\rm Rec}(t_{\rm E}) = \left(s(t_{\rm E}) - K\right)1_{\{ \tau > t_{\rm E} \}}\mathcal{A}(t_{\rm E},T_n).
\end{equation}
Applying the fundamental theorem of asset  pricing using $A(t)$ as the numeraire we obtain
\begin{align}
	V_{\rm Pay}(0) - V_{\rm Rec}(0)
		& = \mathcal{A}(0,t_{\rm E},T_n)\left(\mathbb{E}^{Q_A}\left[s(t_{\rm E})\right] - K\right) \\
		& = \mathcal{A}(0,t_{\rm E},T_n)\left(s(0,t_{\rm E}) - K\right)
\end{align}
where we have used the Martingale property of the spread and the fact that $s(t_{\rm E},t_{\rm E})=s(t_{\rm E})$,
\begin{equation}
s(0,t_{\rm E}) = \mathbb{E}^{Q_A} \left[s(t_{\rm E},t_{\rm E})\right] = \mathbb{E}^{Q_A} \left[s(t_{\rm E})\right]
\end{equation}
\subsubsection{Black model}
If we assume that $s(t,t_{\rm E})$ follows geometric Brownian motion then then the swaption can be priced using the usual Black formulas; namely,
\begin{equation}
	V_{\rm Pay}(0) = \mathcal{A}(0,t_{\rm E},T_n)\left[s(0,t_{\rm E})N(y + \sigma\sqrt{T_n}) - K N(y)\right]
\end{equation}
where
\begin{equation}
	y = \frac{\log\left(\frac{s(0,t_{\rm E})}{K}\right)+\frac{1}{2}\sigma^2 T_n}{\sqrt{T_n}}
\end{equation}
\subsection{Intensity models}
Intensity models postulate that the time of default is given by the first jump time of a Poisson process with stochastic intensity $\lambda$. The survival probability is thus assumed to be given by
\begin{equation}
	Q_B(\tau > T \, | \, I_t) = \mathbb{E}^{Q_B}_t\left[e^{-\int_t^T ds \, \lambda(s)}\right].
\end{equation}
Typically $\lambda$ is assumed to follow a non-negative interest rate process such as the SRD model, in which case the hazard rate can be analytically calculated. Notice that by definition of the hazard rate we have the identity
\begin{equation}
	e^{-\int_t^T ds \, h(t,s)} = \mathbb{E}^{Q_B}_t \left[e^{-\int_t^T ds \, \lambda(s)}\right],
\end{equation}
which implies $h(t,t) = \lambda(t)$. Let us consider the price of a zero-recovery defaultable ZCB
\begin{equation}
	\mathcal{P}_0(t,T) = B(t) \mathbb{E}^{Q_B}_t \left[\frac{1_{\tau > T}}{B(t)}\right].
\end{equation}
Assuming that the time of default is independent of the level of interest rates, this becomes simply
\begin{equation}
	\mathcal{P}_0(t,T) = P(t,T) Q_{B}(\tau > T \; | \; I_t)
\end{equation}
and thus the price in terms of the intensity is
\begin{equation}
	\mathcal{P}_0(t,T) = \mathbb{E}^{Q_B}_t \left[e^{-\int_t^T ds \, \left(r(s)+\lambda(s)\right)}\right].
\end{equation}
It can be shown that the above expression is also valid when the interest rate and default intensity are dependent.

As a concrete example, suppose that the risk-free rate and default intensity satisfy correlated Ho-Lee processes,
\begin{align}
	dr(t)
		& = \theta(t) \, dt + \sigma \, d\hat{w}_1(t) \\
	d\lambda(t)
		& = \omega(t) \, dt + \eta \, d\hat{w}_2(t)
\end{align}
where $\hat{w}_1$ and $\hat{w}_2$ are Brownian motions under $Q_B$ with correlation $\rho$. It is convenient to express $\hat{w}_1$ and $\hat{w}_2$ in terms of uncorrelated Brownian motions $Z_1$ and $Z_2$,
\begin{align}
	\hat{w}
		& = Z_1, \\
	\hat{w}_2
		& = \rho Z_1 + \sqrt{1-\rho^2}Z_2.
\end{align}
Integrating the SDEs we obtain
\begin{align}
	\int_0^T ds \bigl[r(s) + \lambda(s)\bigr]
		& = \bigl[r(0) + \lambda(0)\bigr]T + g(0,T) + f(0,T) + \\
		& \quad + (\eta\rho+\sigma)\int_0^T ds\, Z_1(s) + \eta\sqrt{1-\rho^2}\int_0^T ds \, Z_2(s) \notag
\end{align}
where $f(t,T) = \int_t^T ds \int_t^s du \, \theta(u)$ and $g(t,T) = \int_t^T ds \int_t^s du \, \omega(u)$.
Since $Z_1$ and $Z_2$ are uncorrelated we find
\begin{equation}
	-\int_0^T ds \bigl[r(s) + \lambda(s)\bigr] \sim \mathcal{N} \left(-\bigl[r(0) + \lambda(0)\bigr]T - g(0,T) - f(0,T), \frac{1}{3}(\sigma^2+\eta^2+2\sigma\eta\rho)T^3 \right)
\end{equation}
Recall that if $X \sim \mathcal{N}(\mu,\sigma^2)$ then $\mathbb{E}\left[e^X\right] = e^{\mu + \sigma^2/2}$. This follows from
\begin{equation}
	\int_{-\infty}^{\infty}dx \, f(x) e^{\mu + \sigma x} = e^{\mu + \sigma^2/2}
\end{equation}
where $f(x)$ is the standard normal pdf. Using this result we find that the price of a zero-recovery, defaultable ZCB is given by
\begin{align}
	\mathcal{P}_0(t,T)
		& = \mathbb{E}^{Q_B}\left[e^{-\int_t^T ds (r(s) + \lambda(s))}\right], \\
		& = \exp \left[-\bigl[r(t) + \lambda(t)\bigr](T-t) - g(t,T) - f(t,T) + \frac{1}{6}(\sigma^2+\eta^2+2\sigma\eta\rho)(T-t)^3\right]. \label{e:HoLeeBond}
\end{align}
One can show by explicit calculation that the bond price satisfies the following pricing PDE,
\begin{equation}
	\frac{\partial g}{\partial t} + \frac{\sigma^2}{2} \frac{\partial^2 g}{\partial r^2} + \frac{\eta^2}{2} \frac{\partial^2 g}{\partial \lambda^2} + \rho \sigma\eta \frac{\partial^2 g}{\partial r \partial \lambda} + \theta(t) \frac{\partial g}{\partial r} + \omega(t)\frac{\partial g}{\partial \lambda} - (r + \lambda) g(r,\lambda,t) = 0.
\end{equation}

Now consider a zero-coupon bond with deterministic interest rate $r=r(t)$, default intensity $\lambda = \lambda(t)$ and recovery $R = R(t)$ . Clearly the default time is independent of the interest rate level, so the price of the bond at time $t < T$ is given by
\begin{align}
	B(t)
		& = \mathcal{P}_0(t,T) + \int_t^{T}ds \, R(s) P(t,s)\frac{\partial}{\partial s}G(t,s) \\
		& = e^{-\int_t^T ds \, (r(s) + \lambda(s))} + \int_t^{T}ds \, R(s) \lambda(s) e^{-\int_t^s du \, (r(u) +\lambda(u))}.
\end{align}
Differentiating with respect to time $t$ and using Leibniz's rule we find that the bond price satisfies the following ODE,
\begin{equation}
	\frac{\partial B(t)}{\partial t} - \lambda(t) \big[ B(t) - R(t)\big] - r(t)B(t) = 0.
\end{equation}
The first, second and fourth term are easily recognized as the time-derivative and effective discount term which follow from the Feynman-Kac theorem. The third term is the contribution due to the recovery. The form of these terms stay the same even when $\lambda(t)$, $r(t)$ are stochastic and when the recovery is state-dependent.

\subsubsection{Simulating default times in intensity models}
Given the hazard rates $h(0,T)$ it is a simple matter to simulate the default times. We simply use the fact that since $F(T) \equiv Q_B(\tau \leq T \; | \; I_0)$ is the cumulative distribution function for $\tau$, then the probability integral transform $F(\tau)$ is uniformly distributed on $[0,1]$. Let $U$ be a random variable with uniform distribution on $[0,1]$. For each draw of $U$ we set the default time to $\tau = F^{-1}(U)$. Said another way, the random default time is the solution of the equation 
\begin{equation}
	U = 1 - e^{-\int_0^\tau ds \, h(0,s)}.
\end{equation}

\subsection{Multi-name credit derivatives}
\subsubsection{Basket CDS's}
A basket CDS is equivalent to a portfolio of $m$ single-name CDS's. A notional $N$ is specified and each single-name CDS is assigned a weighting $\sum_{i=1}^m N_i = N$. If, for example, the first reference obligation defaults, the protection seller makes a payment given by the loss given default multiplied by the notional for that obligation, $N_1(1-R_1)$. The basket CDS continues to maturity with revised notional amount $N-N_1$. CDS indices are basically standardized baskets CDS's.

An $n$-th to default ($n$-2D) CDS is a basket CDS that only makes a payment after $n$ of the $m$ credits in the basket have defaulted. The spreads of $n$-2D decrease with increasing $n$ since the probability that the protection payment will be triggered decreases.
\subsubsection{Simulation}
Consider an $n$-2D CDS with tenor $T_N$ on a basket of $m$ credits. We need to simulate the default times $(\tau_1,\ldots,\tau_m)$ under the risk-neutral probability measure $Q_B$ from which we can determine the time of $n$th default, which we denote by $\tau$. The cash flow on the protection leg is given by
\begin{equation}
	V_{\rm prot}(0) = B(0) \mathbb{E}^{Q_B} \left[\frac{(1-R)1_{\{ \tau \leq T_N \}}}{B(\tau)}\right]
\end{equation}
and similarly for the cash flow of the annuity. The mean of the simulated values is the Monte-Carlo estimate for $V_{\rm prot}(0)$ and $V_{\rm prem}(0)$ which can be used to determine the spread.

The simulation of $(\tau_1,\ldots,\tau_m)$ requires the joint probability distribution which incorporates correlations amongst the default times,
\begin{equation}
	F(T_1,\ldots,T_m) \equiv Q_B(\tau_1 \leq T_1,\ldots,\tau_m \leq T_m \; | \; I_0).
\end{equation}
The challenge is to find a joint probability distribution $F$ which is consistent with the individual probability distributions calibrated form market quotes
\begin{equation}
	F_i(T_i) \equiv Q_B(\tau_i \leq T_i \; | \; I_0), \quad \quad 1 \leq i \leq m.
\end{equation}
Consider the following joint probability distribution,
\begin{equation}
	C(u_1,\ldots,u_m) = F \left( F_1^{-1}(u_1), \ldots , F_m^{-1}(u_m) \right).
\end{equation}
By construction, this is a multivariate distribution on $[0,1]^m$ with uniform marginals.

We can now simulate correlated default times $(\tau_1,\ldots,\tau_m)$ by simply drawing random variables $(U_1,\ldots,U_m)$ with joint probability distribution $C$ and solving the following $m$ equations for $\tau_i$,
\begin{equation}
	U_i = 1 - e^{-\int_0^{\tau_i} ds \, h(0,s)}, \quad \quad 1 \leq i \leq m.
\end{equation}
\section{FX derivatives}
Let $X(t)$ be the price of a foreign currency at time $t$. The price of a forward contract entered at time $t$ for delivery of the foreign currency at time $T$ satisfies
\begin{equation}
	0 = \mathbb{E}_t^{Q_B}\left[\frac{F(T) - X(T)}{B(T)}\right] \implies F(t) = \frac{\mathbb{E}_t^{Q_B}\big[ X(T)/ B(T)\big]}{\mathbb{E}_t^{Q_B}\big[1/X(T)\big]}.
\end{equation}
If we assume that rates are constant then
\begin{align}
	F(t)
		& = \mathbb{E}_t^{Q_B}\big[X(T)\big], \\
		& = e^{(r_d - r_f)(T-t)}X_t.
\end{align}

\begin{thebibliography}{99}
	\bibitem{almgren} 
	Almgren, Robert, and Neil Chriss. ``Optimal execution of portfolio transactions.'' Journal of Risk 3 (2001): 5-40.
	
	\bibitem{sepin}
	Cheridito, Patrick, and Tardu Sepin. ``Optimal trade execution under stochastic volatility and liquidity.'' Applied Mathematical Finance 21, no. 4 (2014): 342-362.
	
	\bibitem{garleanu}
	Gârleanu, Nicolae, and Lasse Heje Pedersen. ``Dynamic trading with predictable returns and transaction costs.'' The Journal of Finance 68, no. 6 (2013): 2309-2340.
	
	\bibitem{sepin2} Tardu Sepin and James Stokes, Unpublished.
\end{thebibliography}

\appendix
\section{Discretization schemes}
\subsection{Theta scheme}
The trinomial tree scheme is intuitive but has poor convergence and stability properties. The theta scheme involves first replacing all $x$-derivatives by their central-difference approximations and then Taylor expanding the resulting set of coupled ordinary differential equations around an arbitrary point in $[t_n,t_{n+1}]$ parametrized by $\theta \in [0,1]$. We can write the system of coupled ODEs in vector notation as
\begin{equation}
	\frac{\partial \vec{V}}{\partial t} + A(t)\vec{V}(t) = 0.
\end{equation}
Taylor expanding in $t$ around the point 
\begin{equation}
	t_{n+1}(\theta) \equiv \theta t_n + (1-\theta)t_{n+1}
\end{equation}
we obtain the following system of finite-difference equations for the interior points
\begin{equation}
	\frac{\vec{V}(t_{n+1})-V(t_n)}{\Delta t} + \theta A\bigl(t_{n+1}(\theta)\bigr) \vec{V}(t_n) + (1-\theta) A\bigl(t_{n+1}(\theta)\bigr) \vec{V}(t_{n+1})  = 0
\end{equation}
which can be rearranged to give
\begin{equation}\label{e:thetascheme}
	\bigl[\mathbf{1} - \theta \Delta t A\bigl( t_{n+1}(\theta) \bigr) \bigr]\vec{V}(t_n) = \bigl[\mathbf{1} + (1-\theta) \Delta t A\bigl( t_{n+1}(\theta) \bigr) \bigr]\vec{V}(t_{n+1})
\end{equation}
where
\begin{equation}
	\mathbf{1} = 0 \oplus \diag(1,\ldots,1) \oplus 0
\end{equation}
In practical applications we usually replace \eqref{e:thetascheme} by
\begin{equation}\label{e:thetascheme}
	\bigl[\mathbf{1} - \theta \Delta t A( t_n ) \bigr]\vec{V}(t_n) = \bigl[\mathbf{1} + (1-\theta) \Delta t A( t_{n+1} ) \bigr]\vec{V}(t_{n+1})
\end{equation}
which does not affect the convergence properties. The special cases $\theta = 0$, $\theta = 1$ and $\theta = 1/2$ are referred to as fully explicit, fully implicit and Crank-Nicholson schemes, respectively. The fully explicit scheme is closely related to the trinomial tree scheme and has poor convergence properties. Boundary conditions must also be imposed to complete the recursive scheme. 

\subsection{ADI scheme}
Consider the two-dimensional backward heat equation
\begin{equation}
	\frac{\partial u}{\partial t}(x,y,t) + \nabla^2 u(x,y,t) = 0.
\end{equation}
Discretizing $x$ and $y$ we obtain the system of ODEs
\begin{equation}
	\frac{\partial }{\partial t}u^{j_1,j_2}(t) + \frac{1}{\Delta x^2}\left[u^{j_1+1,j_2}(t) + u^{j_1-1,j_2}(t) - 2u^{j_1,j_2}(t)\right]+ \frac{1}{\Delta y^2}\left[u^{j_1,j_2+1}(t) + u^{j_1,j_2-1}(t) - 2u^{j_1,j_2}(t)\right] = 0
\end{equation}
For comparison a mixed derivative term gives
\begin{align}
	\frac{\partial^2 u}{\partial x \partial y}
		& = \frac{1}{2\Delta x} \left[\frac{\partial u^{j_1+1}}{\partial x}(y,t) - \frac{\partial u^{j_1-1}}{\partial x}(y,t)\right] \\
		& = \frac{1}{4\Delta x \Delta y} \left[u^{j+1,j_2+1}(t) - u^{j+1,j_2-1}(t) - u^{j_1-1,j_2+1}(t) + u^{j_1-1,j_2-1}(t)\right]
\end{align}
In general when mixed derivative terms are absent we will obtain a system of ODEs of the form
\begin{equation}
	\frac{\partial V}{\partial t} + \left[A_1(t)\otimes \mathbf{1}+\mathbf{1} \otimes A_2(t)\right]V(t) = 0
\end{equation}
where $V(t)$ is now a two-component tensor with components $V(t) = \bigl( V^{j_1,j_2}(t) \bigr)$. In component notation this equation reads
\begin{equation}
	\frac{\partial }{\partial t}V^{j_1,j_2}(t) + A_{1,j}^{j_1}\delta^{j_2}_k V^{j,k}(t)+ \delta^{j_1}_j A_{2,k}^{j_1} V^{j,k}(t) = 0
\end{equation}
where summation over repeated indices is implied.
In the theta-scheme, the finite-difference equation for the interior points is given by
\begin{equation}
\Bigl[ \mathbf{1} \otimes \mathbf{1} - \theta \Delta t \Bigl(  A_1(t_n)\otimes \mathbf{1}+ \mathbf{1}\otimes A_2(t_{n}) \Bigr)\Bigr] V(t_{n}) = \Bigl[ \mathbf{1} \otimes \mathbf{1} + (1-\theta) \Delta t \Bigl(  A_1(t_{n+1})\otimes \mathbf{1}+ \mathbf{1}\otimes A_2(t_{n+1}) \Bigr) \Bigr]V(t_{n+1}).
\end{equation}
In principal we just need to represent the operator on the left-hand side in matrix form and then invert to express $V(t_n)$ in terms of $V(t_{n+1})$. In practice, however, this is not computationally efficient because the matrix representation is not tri-diagonal.

The trick is to represent the operator on the left-hand side as a single tensor product. Fortunately this can be achieved up to the required level of accuracy. We begin by  specializing to Crank-Nichsolson scheme ($\theta = 1/2$) and notice that (up to corrections of order $\Delta t^2$)
\begin{equation}
\Bigl[ \mathbf{1} - \frac{1}{2} \Delta t A_1(t_n)\Bigl]\otimes \Bigl[ \mathbf{1} - \frac{1}{2} \Delta t A_2(t_n)\Bigl] V(t_{n}) = \Bigl[ \mathbf{1} + \frac{1}{2} \Delta t A_1(t_{n+1})\Bigl]\otimes \Bigl[ \mathbf{1} + \frac{1}{2} \Delta t A_2(t_{n+1})\Bigl] V(t_{n+1})
\end{equation}
or in simplified notation,
\begin{equation}\label{e:ADIscheme}
\bigl(\mathbf{1} - M_1(t_n) \bigr)\otimes \bigl(\mathbf{1} - M_2(t_n) \bigr)V(t_n) = \bigl(\mathbf{1} + M_1(t_{n+1}) \bigr)\otimes \bigl(\mathbf{1} + M_2(t_{n+1}) \bigr)V(t_{n+1}).
\end{equation}


\subsection{Callable bonds}
In this section we will be concerned with pricing bonds with embedded American options. One way to think about a long position in a callable bond is as portfolio which is long a bullet bond and short a call option on the bullet bond. We therefore need to consider options on bullet bonds. We will work in the framework of the Ho-Lee model where the spot rate follows generalized Brownian motion under the risk-neutral probability measure $Q_B$,
\begin{equation}
	dr(t) = \theta(t)dt + \sigma d\hat{w}(t)
\end{equation}
The zero-coupon discount curve at time $t$ is determined by
\begin{equation}
	P(t,T) = \mathbb{E}^{Q_B}_t\left[ e^{-\int_t^T ds \, r(s)}\right].
\end{equation}
We will ignore the problem of calibration and assume $\theta = 0$, in which case the discount curve is determined entirely in terms of the constant volatility $\sigma$ and the prevailing level of the spot rate,
\begin{equation} \label{e:ZCBsimple}
	P(t,T) = e^{-r(t)(T-t) + \frac{\sigma^2}{6}(T-t)^3}.
\end{equation}
Consider a one-year zero-coupon bond callable at 6 months with strike $K = 0.4$. Substituting into the analytical pricing formulas we obtain\footnote{Unless otherwise specified, we will assume $\sigma = 0.16$ and $r(0) = 0.3$ for all numerical examples.},
\begin{equation}\label{e:callable}
	P(0,1)-\mathbf{CZCB}(0,1/2,1,0.4) = \$ \, 0.344467.
\end{equation}

An alternative approach is to view a callable bond as a spot-rate contingent claim which in the case of the Ho-Lee model satisfies the PDE (assuming constant default intensity $\lambda$),
\begin{equation}
	\frac{\partial g}{\partial t} + \frac{1}{2}\sigma^2 \frac{\partial^2 g}{\partial r^2} - (r+\lambda)g(r,t) = 0.
\end{equation}
This equation can be discretized on a grid with coordinates $t_n = n \Delta t$ with $0 \leq  n \leq N$ and $r_j = j \Delta r$ with $0 \leq j \leq M$. Denoting $g_n^j = g(r_j,t_n)$, the finite-difference equation in the trinomial tree scheme is
\begin{equation}\label{e:EulerIR}
	g_n^j = h_n^j , \quad \quad h_n^j =  \frac{1}{1+r_j\Delta t} \bigl[p g_{n+1}^{j+1}+(1-2p) g_{n+1}^{j} + p g_{n+1}^{j-1}\bigr], \quad \quad p = \frac{\sigma^2\Delta t}{2\Delta r^2}.
\end{equation}
The boundary conditions suitable for a zero-coupon bond are
\begin{equation}
	g_N^j =1 , \quad \quad 0 \leq j \leq M
\end{equation}
and
\begin{align}
	g_n^M + g_n^{M-2} - 2 g_n^{M-1} = 0 = g_n^{0} + g_{n}^{2} - 2g_{n}^{1}, \quad \quad 0 \leq n \leq N-1.
\end{align}
In the case of optimal exercise a callable bond will be redeemed if the holding value of the bond exceeds the early redemption price $K(t)$. It follows that for each date occurring in the strike schedule we should impose the additional boundary condition,
\begin{align}
	g_n^j 
		& = \min \bigl(K(t_n), h_n^j\bigr), \quad \quad 1 \leq j \leq M-1.
\end{align}
Let us reprice the callable bond of the previous example on a grid with $N = 12000$, $M = 100$ and $\Delta r = 0.01$. Implementing the callable feature with an additional boundary condition at call date $t_{N/2+1}$, we find that the interpolated price is in excellent agreement with \eqref{e:callable},
\begin{equation}
g_{N/2+1}^{j} = \min (K,h_{N/2+1}^j), \quad \quad 1 \leq j \leq M-1, \quad \quad  \$ \, 0.344469.
\end{equation}

\subsubsection{Coupon bonds}
The price of a one year bullet bond with unit face value paying semi-annual coupons of $c = 30\%$ is given by
\begin{equation}
 c P(0,1/2) + (1+c)P(0,1) = \$ \, 1.22553.
\end{equation}
This bond can be implemented in the finite difference solver by imposing the terminal condition $g_N^j = 1 + c$ for $0 \leq j \leq M$ in addition to 
\begin{equation}
	g_{N/2+1}^j = h_{N/2+1}^j+c, \quad \quad  1 \leq j \leq M-1 ,  \quad \quad \$ \, 1.22552.
\end{equation}

Suppose that the coupon bond of the previous example is callable at par on the first coupon date. The price can be obtained using the Jamshidian trick. Since the first coupon payment coincides with the exercise date we do not include it in the calculation of $r_K$,
\begin{equation}
	(1+c)P(r_K,1/2,1) = 1  \implies K_2 = 0.769231.
\end{equation} 
The value of the callable coupon bond is thus given by
\begin{equation}
	P(0,1) + c P(0,1/2) + cP(0,1) - (1+c) \mathbf{CZCB}(0,1/2,1,K_2) = 1.11914.
\end{equation}
In terms of the finite difference solver the only difference from the straight bond is that we change the boundary condition at $n = N/2 +1$ to allow the possibility of exercising the option,
\begin{equation}
	g_{N/2+1}^j = \min(1,h_{N/2+1}^j)+c, \quad \quad  1\leq j \leq M-1, \quad \quad \$ \, 1.11914.
\end{equation}
We see that the callable feature reduces the value of the bond but it still trades above par. If the bond is continuously callable then it will always trade below par. In the continuously callable case no analytical formula is available and we must resort to the finite difference solver,
\begin{equation}
	g_{n}^j = 
	\begin{cases}
	\min(1,h_{N/2+1}^j) + c, & n = N/2 + 1,  \\
	\min(1,h_{n}^j), & n \neq N,N/2+1,
	\end{cases}
	, \quad \quad  \, 1 \leq j \leq M-1, \quad \quad  \$ \,0.861235.
\end{equation}

\subsubsection{Effect of default probability}
In the case when the bond is defaultable with constant default intensity $\lambda$, the pricing PDE is modified to
\begin{equation}
	\frac{\partial g}{\partial t} + \frac{1}{2}\sigma^2 \frac{\partial^2 g}{\partial r^2} - (r + \lambda) g(r,t) = 0.
\end{equation}
The finite difference scheme is the same as before except that the continuation value is now given by
\begin{equation}
	h_n^j =  \frac{1}{1+(r_j+\lambda)\Delta t} \bigl[p g_{n+1}^{j+1}+(1-2p) g_{n+1}^{j} + p g_{n+1}^{j-1}\bigr].
\end{equation}
The PV of a defaultable bond can be regarded as a function of the free parameters $r(0)$ and $\lambda$. The present value of a zero-recovery, defaultable ZCB is given by the analytical formula,
\begin{equation}
	\mathcal{P}_0(0,T) = e^{-\left(r(0) + \lambda \right)T + \frac{\sigma^2}{6}T^3}
\end{equation}
and thus the price of a 1 year ZCB with default intensity $\lambda = 0.1$ is given by
\begin{equation}
	\mathcal{P}_0(0,1) = \$ \,0.673186
\end{equation}
Using the same boundary conditions for a non-defaultable ZCB and using the new formula for the continuation value we find agreement from the finite difference solver,
\begin{equation}
g^j_N = 1, \quad \quad 0 \leq j \leq M, \quad \quad \$ \,0.673186.
\end{equation}
\end{document}