-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinear-regression.html
449 lines (402 loc) · 32.8 KB
/
linear-regression.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="author" content="Jonathan Landy" />
<meta property="og:type" content="article" />
<meta name="twitter:card" content="summary">
<meta name="keywords" content=", Methods, Theory, " />
<meta property="og:title" content="Linear Regression "/>
<meta property="og:url" content="./linear-regression" />
<meta property="og:description" content="We review classical linear regression using vector-matrix notation. In particular, we derive a) the least-squares solution, b) the fit’s coefficient covariance matrix — showing that the coefficient estimates are most precise along directions that have been sampled over a large range of values (the high variance directions, a la PCA …" />
<meta property="og:site_name" content="EFAVDB" />
<meta property="og:article:author" content="Jonathan Landy" />
<meta property="og:article:published_time" content="2016-05-29T11:27:00-07:00" />
<meta name="twitter:title" content="Linear Regression ">
<meta name="twitter:description" content="We review classical linear regression using vector-matrix notation. In particular, we derive a) the least-squares solution, b) the fit’s coefficient covariance matrix — showing that the coefficient estimates are most precise along directions that have been sampled over a large range of values (the high variance directions, a la PCA …">
<title>Linear Regression · EFAVDB
</title>
<link href="//netdna.bootstrapcdn.com/twitter-bootstrap/2.3.2/css/bootstrap-combined.min.css" rel="stylesheet">
<link href="https://fonts.googleapis.com/css?family=Fira+Sans:300,400,700" rel="stylesheet" type='text/css' />
<link href="https://fonts.googleapis.com/css?family=Cardo:400,700" rel="stylesheet" type='text/css' />
<link rel="stylesheet" type="text/css" href="./theme/css/elegant.prod.css" media="screen">
<link rel="stylesheet" type="text/css" href="./theme/css/custom.css" media="screen">
<link rel="apple-touch-icon" sizes="180x180" href="./images/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="./images/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="./images/favicon-16x16.png">
<link rel="manifest" href="./images/site.webmanifest">
<link rel="mask-icon" href="./images/safari-pinned-tab.svg" color="#5bbad5">
<meta name="msapplication-TileColor" content="#da532c">
<meta name="theme-color" content="#ffffff">
</head>
<body>
<div id="content">
<div class="navbar navbar-static-top">
<div class="navbar-inner">
<div class="container-fluid">
<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</a>
<a class="brand" href="./"><span class=site-name>EFAVDB</span></a>
<div class="nav-collapse collapse">
<ul class="nav pull-right top-menu">
<li >
<a href=
.
>Home</a>
</li>
<li ><a href="./pages/authors.html">Authors</a></li>
<li ><a href="./categories.html">Categories</a></li>
<li ><a href="./tags.html">Tags</a></li>
<li ><a href="./archives.html">Archives</a></li>
<li><form class="navbar-search" action="./search.html" onsubmit="return validateForm(this.elements['q'].value);"> <input type="text" class="search-query" placeholder="Search" name="q" id="tipue_search_input"></form></li>
</ul>
</div>
</div>
</div>
</div>
<div class="container-fluid">
<div class="row-fluid">
<div class="span1"></div>
<div class="span10">
<article itemscope>
<div class="row-fluid">
<header class="page-header span8 offset2">
<h1>
<a href="./linear-regression">
Linear Regression
</a>
</h1>
</header>
<div class="span2"></div>
</div>
<div class="row-fluid">
<div class="span8 offset2 article-content">
<p>We review classical linear regression using vector-matrix notation. In particular, we derive a) the least-squares solution, b) the fit’s coefficient covariance matrix — showing that the coefficient estimates are most precise along directions that have been sampled over a large range of values (the high variance directions, a la <span class="caps">PCA</span>), and c) an unbiased estimate for the underlying sample variance (assuming normal sample variance in this last case). We then review how these last two results can be used to provide confidence intervals / hypothesis tests for the coefficient estimates. Finally, we show that similar results follow from a Bayesian approach.</p>
<p>Last edited July 23, 2016.</p>
<h3>Introduction</h3>
<p>Here, we consider the problem of fitting a linear curve to <span class="math">\(N\)</span> data points of the form <span class="math">\((\vec{x}_i, y_i),\)</span> where the <span class="math">\(\{\vec{x}_i\}\)</span> are column vectors of predictors that sit in an <span class="math">\(L\)</span>-dimensional space and the <span class="math">\(\{y_i\}\)</span> are the response values we wish to predict given the <span class="math">\(\{x_i\}\)</span>. The linear approximation will be defined by a set of coefficients, <span class="math">\(\{\beta_j\}\)</span> so that
</p>
<div class="math">\begin{align}
\hat{y}_i \equiv \sum_j x_{i,j} \beta_j = \vec{x}_i^T \cdot \vec{\beta} . \tag{1} \label{1}
\end{align}</div>
<p>
We seek the <span class="math">\(\vec{\beta}\)</span> that minimizes the average squared <span class="math">\(y\)</span> error,
</p>
<div class="math">\begin{align} \tag{2} \label{2}
J = \sum_i \left ( y_i - \hat{y}_i \right)^2 = \sum_i \left (y_i - \vec{x}_i^T \cdot \vec{\beta} \right)^2.
\end{align}</div>
<p>
It turns out that this is a problem where one can easily derive an analytic expression for the optimal solution. It’s also possible to derive an expression for the variance in the optimal solution — that is, how much we might expect the optimal parameter estimates to change were we to start with some other <span class="math">\(N\)</span> data points instead. These estimates can then be used to generate confidence intervals for the coefficient estimates. Here, we review these results, give a simple interpretation to the theoretical variance, and finally show that the same results follow from a Bayesian approach.</p>
<h3>Optimal solution</h3>
<p>We seek the coefficient vector that minimizes (\ref{2}). We can find this by differentiating this cost function with respect to <span class="math">\(\vec{\beta}\)</span>, setting the result to zero. This gives,
</p>
<div class="math">\begin{align} \tag{3}
\partial_{\beta_j} J = 2 \sum_i \left (y_i - \sum_k x_{i,k} \beta_k \right) x_{i,j} = 0.
\end{align}</div>
<p>
We next define the matrix <span class="math">\(X\)</span> so that <span class="math">\(X_{i,j} = \vec{x}_{i,j}\)</span>. Plugging this into the above, we obtain
</p>
<div class="math">\begin{align}
\partial_{\beta_j} J &= 2 \sum_i X_{j,i}^T \left (y_i - \sum_k X_{i,k} \beta_k \right) = 0 \\
&= X^T \cdot \left ( \vec{y} - X \cdot \vec{\beta}\right ) = 0.\tag{4}
\end{align}</div>
<p>
Rearranging gives
</p>
<div class="math">\begin{align}
X^T X \cdot \vec{\beta} = X^T \cdot \vec{y} \to
\vec{\beta} = (X^T X)^{-1} \cdot X^T \cdot \vec{y} \tag{5} \label{optimal}
\end{align}</div>
<p>
This is the squared-error-minimizing solution.</p>
<h3>Parameter covariance matrix</h3>
<p>Now, when one carries out a linear fit to some data, the best line often does not go straight through all of the data. Here, we consider the case where the reason for the discrepancy is not that the posited linear form is incorrect, but that there are some hidden variables not measured that the <span class="math">\(y\)</span>-values also depend on. Assuming our data points represent random samples over these hidden variables, we can model their effect as adding a random noise term to the form (\ref{1}), so that
</p>
<div class="math">\begin{align}\tag{6} \label{noise}
y_i = \vec{x}_i^T \cdot \vec{\beta}_{true} + \epsilon_i,
\end{align}</div>
<p>
with <span class="math">\(\langle \epsilon_i \rangle =0\)</span>, <span class="math">\(\langle \epsilon_i^2 \rangle = \sigma^2\)</span>, and <span class="math">\(\vec{\beta}_{true}\)</span> the exact (but unknown) coefficient vector.</p>
<p>Plugging (\ref{noise}) into (\ref{optimal}), we see that <span class="math">\(\langle \vec{\beta} \rangle = \vec{\beta}_{true}\)</span>. However, the variance of the <span class="math">\(\epsilon_i\)</span> injects some uncertainty into our fit: Each realization of the noise will generate slightly different <span class="math">\(y\)</span> values, causing the <span class="math">\(\vec{\beta}\)</span> fit coefficients to vary. To estimate the magnitude of this effect, we can calculate the covariance matrix of <span class="math">\(\vec{\beta}\)</span>. At fixed (constant) <span class="math">\(X\)</span>, plugging in (\ref{optimal}) for <span class="math">\(\vec{\beta}\)</span> gives
</p>
<div class="math">\begin{align}
cov(\vec{\beta}, \vec{\beta}) &= cov \left( (X^T X)^{-1} \cdot X^T \cdot \vec{y} , \vec{y}^T \cdot X \cdot (X^T X)^{-1, T} \right) \\
&= (X^T X)^{-1} \cdot X^T \cdot cov(\vec{y}^T, \vec{y} ) \cdot X \cdot (X^T X)^{-1, T}
\\
&= \sigma^2 \left( X^T X \right)^{-1} \cdot X^T X \cdot \left( X^T X \right)^{-1, T} \\
&= \sigma^2 \left( X^T X \right)^{-1}. \tag{7} \label{cov}
\end{align}</div>
<p>
In the third line here, note that we have assumed that the <span class="math">\(\epsilon_i\)</span> are independent, so that <span class="math">\(cov(\vec{y},\vec{y}) = \sigma^2 I.\)</span> We’ve also used the fact that <span class="math">\(X^T X\)</span> is symmetric.</p>
<p>To get a feel for the significance of (\ref{cov}), it is helpful to consider the case where the average <span class="math">\(x\)</span> values are zero. In this case,
</p>
<div class="math">\begin{align}
\left( X^T X \right)_{i,j} &\equiv& \sum_k \delta X_{k,i} \delta X_{k,j} \equiv N \times \langle x_i, x_j\rangle. \tag{8} \label{corr_mat}
\end{align}</div>
<p>
<a href="./wp-content/uploads/2016/05/scatter.jpg"><img alt="margin around decision boundary" src="./wp-content/uploads/2016/05/scatter.jpg"></a> That is, <span class="math">\(X^T X\)</span> is proportional to the correlation matrix of our <span class="math">\(x\)</span> values. This correlation matrix is real and symmetric, and thus has an orthonormal set of eigenvectors. The eigenvalue corresponding to the <span class="math">\(k\)</span>-th eigenvector gives the variance of our data set’s <span class="math">\(k\)</span>-th component values in this basis — details can be found in our <a href="http://efavdb.github.io/principal-component-analysis">article on <span class="caps">PCA</span></a>. This implies a simple interpretation of (\ref{cov}): The variance in the <span class="math">\(\vec{\beta}\)</span> coefficients will be lowest for predictors parallel to the highest variance <span class="caps">PCA</span> components (eg <span class="math">\(x_1\)</span> in the figure shown) and highest for predictors parallel to the lowest variance <span class="caps">PCA</span> components (<span class="math">\(x_2\)</span> in the figure). This observation can often be exploited during an experiment’s design: If a particular coefficient is desired to high accuracy, one should make sure to sample the corresponding predictor over a wide range.</p>
<p>[Note: Cathy gives an interesting, alternative interpretation for the parameter estimate variances in a follow-up post, <a href="http://efavdb.github.io/interpret-linear-regression">here</a>.]</p>
<h3>Unbiased estimator for <span class="math">\(\sigma^2\)</span></h3>
<p>The result (\ref{cov}) gives an expression for the variance of the parameter coefficients in terms of the underlying sample variance <span class="math">\(\sigma^2\)</span>. In practice, <span class="math">\(\sigma^2\)</span> is often not provided and must be estimated from the observations at hand. Assuming that the <span class="math">\(\{\epsilon_i\}\)</span> in (\ref{noise}) are independent <span class="math">\(\mathcal{N}(0, \sigma^2)\)</span> random variables, we now show that the following provides an unbiased estimate for this variance:
</p>
<div class="math">$$
S^2 \equiv \frac{1}{N-L} \sum_i \left ( y_i - \vec{x}_i^T \cdot \vec{\beta} \right) ^2. \tag{9} \label{S}
$$</div>
<p>
Note that this is a normalized sum of squared residuals from our fit, with <span class="math">\((N-L)\)</span> as the normalization constant — the number of samples minus the number of fit parameters. To prove that <span class="math">\(\langle S^2 \rangle = \sigma^2\)</span>, we plug in (\ref{optimal}) for <span class="math">\(\vec{\beta}\)</span>, combining with (\ref{noise}) for <span class="math">\(\vec{y}\)</span>. This gives
</p>
<div class="math">\begin{align} \nonumber
S^2 &= \frac{1}{N-L} \sum_i \left ( y_i - \vec{x}_i^T \cdot (X^T X)^{-1} \cdot X^T \cdot \{ X \cdot \vec{\beta}_{true} + \vec{\epsilon} \} \right) ^2 \\ \nonumber
&= \frac{1}{N-L} \sum_i \left ( \{y_i - \vec{x}_i^T \cdot\vec{\beta}_{true} \} - \vec{x}_i^T \cdot (X^T X)^{-1} \cdot X^T \cdot \vec{\epsilon} \right) ^2 \\
&= \frac{1}{N-L} \sum_i \left ( \epsilon_i - \vec{x}_i^T \cdot (X^T X)^{-1} \cdot X^T \cdot \vec{\epsilon} \right) ^2 \tag{10}. \label{S2}
\end{align}</div>
<p>
The second term in the last line is the <span class="math">\(i\)</span>-th component of the vector
</p>
<div class="math">$$
X \cdot (X^T X)^{-1} \cdot X^T \cdot \vec{\epsilon} \equiv \mathbb{P} \cdot \vec{\epsilon}. \tag{11} \label{projection}
$$</div>
<p>
Here, <span class="math">\(\mathbb{P}\)</span> is a projection operator — this follows from the fact that <span class="math">\(\mathbb{P}^2 = \mathbb{P}\)</span>. When it appears in (\ref{projection}), <span class="math">\(\mathbb{P}\)</span> maps <span class="math">\(\vec{\epsilon}\)</span> into the <span class="math">\(L\)</span>-dimensional coordinate space spanned by the <span class="math">\(\{\vec{x_i}\}\)</span>, scales the result using (\ref{corr_mat}), then maps it back into its original <span class="math">\(N\)</span>-dimensional space. The net effect is to project <span class="math">\(\vec{\epsilon}\)</span> into an <span class="math">\(L\)</span>-dimensional subspace of the full <span class="math">\(N\)</span>-dimensional space (more on the <span class="math">\(L\)</span>-dimensional subspace just below). Plugging (\ref{projection}) into (\ref{S2}), we obtain
</p>
<div class="math">$$
S^2 = \frac{1}{N-L} \sum_i \left ( \epsilon_i - (\mathbb{P} \cdot \vec{\epsilon})_i \right)^2 \equiv \frac{1}{N-L} \left \vert \vec{\epsilon} - \mathbb{P} \cdot \vec{\epsilon} \right \vert^2. \label{S3} \tag{12}
$$</div>
<p>
This final form gives the result: <span class="math">\(\vec{\epsilon}\)</span> is an <span class="math">\(N\)</span>-dimensional vector of independent, <span class="math">\(\mathcal{N}(0, \sigma^2)\)</span> variables, and (\ref{S3}) shows that <span class="math">\(S^2\)</span> is equal to <span class="math">\(1/(N-L)\)</span> times the squared length of an <span class="math">\((N-L)\)</span>-dimensional projection of it (the part along <span class="math">\(\mathbb{I} - \mathbb{P}\)</span>). The length of this projection will on average be <span class="math">\((N-L) \sigma^2\)</span>, so that <span class="math">\(\langle S^2 \rangle = \sigma^2\)</span>.</p>
<p>We need to make two final points before moving on. First, because <span class="math">\(S^2\)</span> is a sum of <span class="math">\((N-L)\)</span> independent <span class="math">\(\mathcal{N}(0, \sigma^2)\)</span> random variables, it follows that
</p>
<div class="math">$$
\frac{(N-L) S^2}{\sigma^2} \sim \chi_{N-L}^2. \tag{13} \label{chi2}
$$</div>
<p>
Second, <span class="math">\(S^2\)</span> is independent of <span class="math">\(\vec{\beta}\)</span>: We can see this by rearranging (\ref{optimal}) as
</p>
<div class="math">$$
\vec{\beta} = \vec{\beta}_{true} + (X^T X)^{-1} \cdot X^T \cdot \vec{\epsilon}. \tag{14} \label{beta3}
$$</div>
<p>
We can left multiply this by <span class="math">\(X\)</span> without loss to obtain
</p>
<div class="math">$$
X \cdot \vec{\beta} = X \cdot \vec{\beta}_{true} + \mathbb{P} \cdot \vec{\epsilon}, \tag{15} \label{beta2}
$$</div>
<p>
where we have used (\ref{projection}). Comparing (\ref{beta2}) and (\ref{S3}), we see that the components of <span class="math">\(\vec{\epsilon}\)</span> that inform <span class="math">\(\vec{\beta}\)</span> are in the subspace fixed by <span class="math">\(\mathbb{P}\)</span>. This is the space complementary to that informing <span class="math">\(S^2\)</span>, implying that <span class="math">\(S^2\)</span> is independent of <span class="math">\(\vec{\beta}\)</span>.</p>
<h3>Confidence intervals and hypothesis tests</h3>
<p>The results above immediately provide us with a method for generating confidence intervals for the individual coefficient estimates (continuing with our Normal error assumption): From (\ref{beta3}), it follows that the coefficients are themselves Normal random variables, with variance given by (\ref{cov}). Further, <span class="math">\(S^2\)</span> provides an unbiased estimate for <span class="math">\(\sigma^2\)</span>, proportional to a <span class="math">\(\chi^2_{N-L}\)</span> random variable. Combining these results gives
</p>
<div class="math">$$
\frac{\beta_{i,true} - \beta_{i}}{\sqrt{\left(X^T X\right)^{-1}_{ii} S^2}} \sim t_{(N-L)}. \tag{16}
$$</div>
<p>
That is, the pivot at left follows a Student’s <span class="math">\(t\)</span>-distribution with <span class="math">\((N-L)\)</span> degrees of freedom (i.e., it’s proportional to the ratio of a standard Normal and the square root of a chi-squared variable with that many degrees of freedom). A rearrangement of the above gives the following level <span class="math">\(\alpha\)</span> confidence interval for the true value:
</p>
<div class="math">$$
\beta_i - t_{(N-L), \alpha /2} \sqrt{\left(X^T X \right)^{-1}_{ii} S^2}\leq \beta_{i, true} \leq \beta_i + t_{(N-L), \alpha /2} \sqrt{\left(X^T X \right)^{-1}_{ii} S^2} \tag{17} \label{interval},
$$</div>
<p>
where <span class="math">\(\beta_i\)</span> is obtained from the solution (\ref{optimal}). The interval above can be inverted to generate level <span class="math">\(\alpha\)</span> hypothesis tests. In particular, we note that a test of the null — that a particular coefficient is actually zero — would not be rejected if (\ref{interval}) contains the origin. This approach is often used to test whether some data is consistent with the assertion that a predictor is linearly related to the response.</p>
<p>[Again, see Cathy’s follow-up post <a href="http://efavdb.github.io/interpret-linear-regression">here</a> for an alternate take on these results.]</p>
<h3>Bayesian analysis</h3>
<p>The final thing we wish to do here is consider the problem from a Bayesian perspective, using a flat prior on the <span class="math">\(\vec{\beta}\)</span>. In this case, assuming a Gaussian form for the <span class="math">\(\epsilon_i\)</span> in (\ref{noise}) gives
</p>
<div class="math">\begin{align}\tag{18} \label{18}
p(\vec{\beta} \vert \{y_i\}) \propto p(\{y_i\} \vert \vec{\beta}) p(\vec{\beta}) = \mathcal{N} \exp \left [ -\frac{1}{2 \sigma^2}\sum_i \left (y_i - \vec{\beta} \cdot \vec{x}_i \right)^2\right].
\end{align}</div>
<p>
Notice that this posterior form for <span class="math">\(\vec{\beta}\)</span> is also Gaussian, and is centered about the solution (\ref{optimal}). Formally, we can write the exponent here in the form
</p>
<div class="math">\begin{align}
-\frac{1}{2 \sigma^2}\sum_i \left (y_i - \vec{\beta} \cdot \vec{x}_i \right)^2 \equiv -\frac{1}{2} \vec{\beta}^T \cdot \frac{1}{\Sigma^2} \cdot \vec{\beta}, \tag{19}
\end{align}</div>
<p>
where <span class="math">\(\Sigma^2\)</span> is the covariance matrix for the components of <span class="math">\(\vec{\beta}\)</span>, as implied by the posterior form (\ref{18}). We can get the components of its inverse by differentiating (\ref{18}) twice. This gives,
</p>
<div class="math">\begin{align}
\left ( \frac{1}{\Sigma^2}\right)_{jk} &= \frac{1}{2 \sigma^2} \partial_{\beta_j} \partial_{\beta_k} \sum_i \left (y_i - \vec{\beta} \cdot \vec{x}_i \right)^2 \\
&= -\frac{1}{\sigma^2}\partial_{\beta_j} \sum_i \left (y_i - \vec{\beta} \cdot \vec{x}_i \right) x_{i,k} \\
&= \frac{1}{\sigma^2} \sum_i x_{i,j} x_{i,k} = \frac{1}{\sigma^2} (X^T X)_{jk}. \tag{20}
\end{align}</div>
<p>
In other words, <span class="math">\(\Sigma^2 = \sigma^2 (X^T X)^{-1}\)</span>, in agreement with the classical expression (\ref{cov}).</p>
<h3>Summary</h3>
<p>In summary, we’ve gone through one quick derivation of linear fit solution that minimizes the sum of squared <span class="math">\(y\)</span> errors for a given set of data. We’ve also considered the variance of this solution, showing that the resulting form is closely related to the principal components of the predictor variables sampled. The covariance solution (\ref{cov}) tells us that all parameters have standard deviations that decrease like <span class="math">\(1/\sqrt{N}\)</span>, with <span class="math">\(N\)</span> the number of samples. However, the predictors that are sampled over wider ranges always have coefficient estimates that more precise. This is due to the fact that sampling over many different values allows one to get a better read on how the underlying function being fit varies with a predictor. Following this, assuming normal errors, we showed that <span class="math">\(S^2\)</span> provides an unbiased estimate, chi-squared estimator for the sample variance — one that is independent of parameter estimates. This allowed us to then write down a confidence interval for the <span class="math">\(i\)</span>-th coefficient. The final thing we have shown is that the Bayesian, Gaussian approximation gives similar results: In this approach, the posterior that results is centered about the classical solution, and has a covariance matrix equal to that obtained by classical approach.</p>
<script type="text/javascript">if (!document.getElementById('mathjaxscript_pelican_#%@#$@#')) {
var align = "center",
indent = "0em",
linebreak = "false";
if (false) {
align = (screen.width < 768) ? "left" : align;
indent = (screen.width < 768) ? "0em" : indent;
linebreak = (screen.width < 768) ? 'true' : linebreak;
}
var mathjaxscript = document.createElement('script');
mathjaxscript.id = 'mathjaxscript_pelican_#%@#$@#';
mathjaxscript.type = 'text/javascript';
mathjaxscript.src = 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.3/latest.js?config=TeX-AMS-MML_HTMLorMML';
var configscript = document.createElement('script');
configscript.type = 'text/x-mathjax-config';
configscript[(window.opera ? "innerHTML" : "text")] =
"MathJax.Hub.Config({" +
" config: ['MMLorHTML.js']," +
" TeX: { extensions: ['AMSmath.js','AMSsymbols.js','noErrors.js','noUndefined.js'], equationNumbers: { autoNumber: 'none' } }," +
" jax: ['input/TeX','input/MathML','output/HTML-CSS']," +
" extensions: ['tex2jax.js','mml2jax.js','MathMenu.js','MathZoom.js']," +
" displayAlign: '"+ align +"'," +
" displayIndent: '"+ indent +"'," +
" showMathMenu: true," +
" messageStyle: 'normal'," +
" tex2jax: { " +
" inlineMath: [ ['\\\\(','\\\\)'] ], " +
" displayMath: [ ['$$','$$'] ]," +
" processEscapes: true," +
" preview: 'TeX'," +
" }, " +
" 'HTML-CSS': { " +
" availableFonts: ['STIX', 'TeX']," +
" preferredFont: 'STIX'," +
" styles: { '.MathJax_Display, .MathJax .mo, .MathJax .mi, .MathJax .mn': {color: 'inherit ! important'} }," +
" linebreaks: { automatic: "+ linebreak +", width: '90% container' }," +
" }, " +
"}); " +
"if ('default' !== 'default') {" +
"MathJax.Hub.Register.StartupHook('HTML-CSS Jax Ready',function () {" +
"var VARIANT = MathJax.OutputJax['HTML-CSS'].FONTDATA.VARIANT;" +
"VARIANT['normal'].fonts.unshift('MathJax_default');" +
"VARIANT['bold'].fonts.unshift('MathJax_default-bold');" +
"VARIANT['italic'].fonts.unshift('MathJax_default-italic');" +
"VARIANT['-tex-mathit'].fonts.unshift('MathJax_default-italic');" +
"});" +
"MathJax.Hub.Register.StartupHook('SVG Jax Ready',function () {" +
"var VARIANT = MathJax.OutputJax.SVG.FONTDATA.VARIANT;" +
"VARIANT['normal'].fonts.unshift('MathJax_default');" +
"VARIANT['bold'].fonts.unshift('MathJax_default-bold');" +
"VARIANT['italic'].fonts.unshift('MathJax_default-italic');" +
"VARIANT['-tex-mathit'].fonts.unshift('MathJax_default-italic');" +
"});" +
"}";
(document.body || document.getElementsByTagName('head')[0]).appendChild(configscript);
(document.body || document.getElementsByTagName('head')[0]).appendChild(mathjaxscript);
}
</script>
<p id="post-share-links">
Like this post? Share on:
<a href="https://twitter.com/intent/tweet?text=Linear%C2%A0Regression&url=/linear-regression" target="_blank" rel="nofollow noopener noreferrer" title="Share on Twitter">Twitter</a>
|
<a href="https://www.facebook.com/sharer/sharer.php?u=/linear-regression" target="_blank" rel="nofollow noopener noreferrer" title="Share on Facebook">Facebook</a>
|
<a href="mailto:?subject=Linear%C2%A0Regression&body=/linear-regression" target="_blank" rel="nofollow noopener noreferrer" title="Share via Email">Email</a>
</p>
<hr />
<div class="author_blurb">
<a href="" target="_blank" rel="nofollow noopener noreferrer">
<img src=/wp-content/uploads/2014/12/JonathanLinkedIn.jpg alt="Jonathan Landy Avatar" title="Jonathan Landy">
<span class="author_name">Jonathan Landy</span>
</a>
Jonathan grew up in the midwest and then went to school at Caltech and UCLA. Following this, he did two postdocs, one at UCSB and one at UC Berkeley. His academic research focused primarily on applications of statistical mechanics, but his professional passion has always been in the mastering, development, and practical application of slick math methods/tools. He currently works as a data-scientist at Stitch Fix.
</div>
<hr/>
<aside>
<nav>
<ul class="articles-timeline">
<li class="previous-article">« <a href="./average-queue-wait-times-with-random-arrivals" title="Previous: Average queue wait times with random arrivals">Average queue wait times with random arrivals</a></li>
<li class="next-article"><a href="./interpret-linear-regression" title="Next: Interpreting the results of linear regression">Interpreting the results of linear regression</a> »</li>
</ul>
</nav>
</aside>
</div>
<section id="article-sidebar" class="span2">
<h4>Published</h4>
<time itemprop="dateCreated" datetime="2016-05-29T11:27:00-07:00">May 29, 2016</time>
<h4>Category</h4>
<a class="category-link" href="./categories.html#methods-theory-ref">Methods, Theory</a>
<h4>Contact</h4>
<div id="sidebar-social-link">
<a href="https://twitter.com/efavdb" title="" target="_blank" rel="nofollow noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" aria-label="Twitter" role="img" viewBox="0 0 512 512"><rect width="512" height="512" rx="15%" fill="#1da1f3"/><path fill="#fff" d="M437 152a72 72 0 0 1-40 12 72 72 0 0 0 32-40 72 72 0 0 1-45 17 72 72 0 0 0-122 65 200 200 0 0 1-145-74 72 72 0 0 0 22 94 72 72 0 0 1-32-7 72 72 0 0 0 56 69 72 72 0 0 1-32 1 72 72 0 0 0 67 50 200 200 0 0 1-105 29 200 200 0 0 0 309-179 200 200 0 0 0 35-37"/></svg>
</a>
<a href="https://github.com/efavdb" title="" target="_blank" rel="nofollow noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" aria-label="GitHub" role="img" viewBox="0 0 512 512"><rect width="512" height="512" rx="15%" fill="#1B1817"/><path fill="#fff" d="M335 499c14 0 12 17 12 17H165s-2-17 12-17c13 0 16-6 16-12l-1-50c-71 16-86-28-86-28-12-30-28-37-28-37-24-16 1-16 1-16 26 2 40 26 40 26 22 39 59 28 74 22 2-17 9-28 16-35-57-6-116-28-116-126 0-28 10-51 26-69-3-6-11-32 3-67 0 0 21-7 70 26 42-12 86-12 128 0 49-33 70-26 70-26 14 35 6 61 3 67 16 18 26 41 26 69 0 98-60 120-117 126 10 8 18 24 18 48l-1 70c0 6 3 12 16 12z"/></svg>
</a>
<a href="https://www.youtube.com/channel/UClfvjoSiu0VvWOh5OpnuusA" title="" target="_blank" rel="nofollow noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" aria-label="YouTube" role="img" viewBox="0 0 512 512" fill="#ed1d24"><rect width="512" height="512" rx="15%"/><path d="m427 169c-4-15-17-27-32-31-34-9-239-10-278 0-15 4-28 16-32 31-9 38-10 135 0 174 4 15 17 27 32 31 36 10 241 10 278 0 15-4 28-16 32-31 9-36 9-137 0-174" fill="#fff"/><path d="m220 203v106l93-53"/></svg>
</a>
</div>
</section>
</div>
</article>
<button onclick="topFunction()" id="myBtn" title="Go to top">▲</button>
</div>
<div class="span1"></div>
</div>
</div>
</div>
<footer>
<div>
<span class="site-name"><a href= .>EFAVDB</span> - Everybody's Favorite Data Blog</a>
</div>
<!-- <div id="fpowered"> -->
<!-- Powered by: <a href="http://getpelican.com/" title="Pelican Home Page" target="_blank" rel="nofollow noopener noreferrer">Pelican</a> -->
<!-- Theme: <a href="https://elegant.oncrashreboot.com/" title="Theme Elegant Home Page" target="_blank" rel="nofollow noopener noreferrer">Elegant</a> -->
<!-- -->
<!-- </div> -->
</footer> <script src="//code.jquery.com/jquery.min.js"></script>
<script src="//netdna.bootstrapcdn.com/twitter-bootstrap/2.3.2/js/bootstrap.min.js"></script>
<script>
function validateForm(query)
{
return (query.length > 0);
}
</script>
<script>
//** Scroll to top button **
//Get the button:
mybutton = document.getElementById("myBtn");
// When the user scrolls down 30px from the top of the document, show the button
window.onscroll = function() {scrollFunction()};
function scrollFunction() {
if (document.body.scrollTop > 30 || document.documentElement.scrollTop > 30) {
mybutton.style.display = "block";
} else {
mybutton.style.display = "none";
}
}
// When the user clicks on the button, scroll to the top of the document
function topFunction() {
document.body.scrollTop = 0; // For Safari
document.documentElement.scrollTop = 0; // For Chrome, Firefox, IE and Opera
}
</script>
<script>
(function () {
if (window.location.hash.match(/^#comment-\d+$/)) {
$('#comment_thread').collapse('show');
}
})();
window.onhashchange=function(){
if (window.location.hash.match(/^#comment-\d+$/))
window.location.reload(true);
}
$('#comment_thread').on('shown', function () {
var link = document.getElementById('comment-accordion-toggle');
var old_innerHTML = link.innerHTML;
$(link).fadeOut(200, function() {
$(this).text('Click here to hide comments').fadeIn(200);
});
$('#comment_thread').on('hidden', function () {
$(link).fadeOut(200, function() {
$(this).text(old_innerHTML).fadeIn(200);
});
})
})
</script>
</body>
<!-- Theme: Elegant built for Pelican
License : MIT -->
</html>