From 1246cc91713475a4d22f9eb0273e44d75cd48342 Mon Sep 17 00:00:00 2001 From: Quarto GHA Workflow Runner Date: Wed, 20 Sep 2023 00:10:01 +0000 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- schedule/index.html | 2 +- schedule/slides/00-cv-for-many-models.html | 935 +++++++++++++++++++++ search.json | 597 ++++++------- sitemap.xml | 70 +- 5 files changed, 1290 insertions(+), 316 deletions(-) create mode 100644 schedule/slides/00-cv-for-many-models.html diff --git a/.nojekyll b/.nojekyll index 2427172..f27f726 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -4f3d24fa \ No newline at end of file +163684ba \ No newline at end of file diff --git a/schedule/index.html b/schedule/index.html index 7b0b69c..7eb718a 100644 --- a/schedule/index.html +++ b/schedule/index.html @@ -330,7 +330,7 @@

2 Regul 5 Oct 23 -CV for comparison, NP 1 +CV for comparison, NP 1 diff --git a/schedule/slides/00-cv-for-many-models.html b/schedule/slides/00-cv-for-many-models.html new file mode 100644 index 0000000..0a47be9 --- /dev/null +++ b/schedule/slides/00-cv-for-many-models.html @@ -0,0 +1,935 @@ + + + + + + + + + + + + + + UBC Stat406 2023W – cv-for-many-models + + + + + + + + + + + + + + + + + + +
+
+ + +
+

00 CV for many models

+

Stat 406

+

Daniel J. McDonald

+

Last modified – 19 September 2023

+

\[ +\DeclareMathOperator*{\argmin}{argmin} +\DeclareMathOperator*{\argmax}{argmax} +\DeclareMathOperator*{\minimize}{minimize} +\DeclareMathOperator*{\maximize}{maximize} +\DeclareMathOperator*{\find}{find} +\DeclareMathOperator{\st}{subject\,\,to} +\newcommand{\E}{E} +\newcommand{\Expect}[1]{\E\left[ #1 \right]} +\newcommand{\Var}[1]{\mathrm{Var}\left[ #1 \right]} +\newcommand{\Cov}[2]{\mathrm{Cov}\left[#1,\ #2\right]} +\newcommand{\given}{\ \vert\ } +\newcommand{\X}{\mathbf{X}} +\newcommand{\x}{\mathbf{x}} +\newcommand{\y}{\mathbf{y}} +\newcommand{\P}{\mathcal{P}} +\newcommand{\R}{\mathbb{R}} +\newcommand{\norm}[1]{\left\lVert #1 \right\rVert} +\newcommand{\snorm}[1]{\lVert #1 \rVert} +\newcommand{\tr}[1]{\mbox{tr}(#1)} +\newcommand{\brt}{\widehat{\beta}^R_{s}} +\newcommand{\brl}{\widehat{\beta}^R_{\lambda}} +\newcommand{\bls}{\widehat{\beta}_{ols}} +\newcommand{\blt}{\widehat{\beta}^L_{s}} +\newcommand{\bll}{\widehat{\beta}^L_{\lambda}} +\]

+
+
+

Some data and 4 models

+
+
data("mobility", package = "Stat406")
+
+

Model 1: Lasso on all predictors, use CV min

+

Model 2: Ridge on all predictors, use CV min

+

Model 3: OLS on all predictors (no tuning parameters)

+

Model 4: (1) Lasso on all predictors, then (2) OLS on those chosen at CV min

+
+

How do I decide between these 4 models?

+
+
+
+

CV functions

+
+
kfold_cv <- function(data, estimator, predictor, error_fun, kfolds = 5) {
+  fold_labels <- sample(rep(seq_len(kfolds), length.out = nrow(data)))
+  errors <- double(kfolds)
+  for (fold in seq_len(kfolds)) {
+    test_rows <- fold_labels == fold
+    train <- data[!test_rows, ]
+    test <- data[test_rows, ]
+    current_model <- estimator(train)
+    test$.preds <- predictor(current_model, test)
+    errors[fold] <- error_fun(test)
+  }
+  mean(errors)
+}
+
+loo_cv <- function(dat) {
+  mdl <- lm(Mobility ~ ., data = dat)
+  mean( abs(residuals(mdl)) / abs(1 - hatvalues(mdl)) ) # MAE version
+}
+
+
+
+

Experiment setup

+
+
# prepare our data
+# note that mob has only continuous predictors, otherwise could be trouble
+mob <- mobility[complete.cases(mobility), ] |> select(-ID, -State, -Name)
+# avoid doing this same operation a bunch
+xmat <- function(dat) dat |> select(!Mobility) |> as.matrix()
+
+# set up our model functions
+library(glmnet)
+mod1 <- function(dat, ...) cv.glmnet(xmat(dat), dat$Mobility, type.measure = "mae", ...)
+mod2 <- function(dat, ...) cv.glmnet(xmat(dat), dat$Mobility, alpha = 0, type.measure = "mae", ...)
+mod3 <- function(dat, ...) glmnet(xmat(dat), dat$Mobility, lambda = 0, ...) # just does lm()
+mod4 <- function(dat, ...) cv.glmnet(xmat(dat), dat$Mobility, relax = TRUE, gamma = 1, type.measure = "mae", ...)
+
+# this will still "work" on mod3, because there's only 1 s
+predictor <- function(mod, dat) drop(predict(mod, newx = xmat(dat), s = "lambda.min"))
+
+# chose mean absolute error just 'cause
+error_fun <- function(testdata) mean(abs(testdata$Mobility - testdata$.preds))
+
+
+
+

Run the experiment

+
+
all_model_funs <- lst(mod1, mod2, mod3, mod4)
+all_fits <- map(all_model_funs, .f = exec, dat = mob)
+
+# unfortunately, does different splits for each method, so we use 10, 
+# it would be better to use the _SAME_ splits
+ten_fold_cv <- map_dbl(all_model_funs, ~ kfold_cv(mob, .x, predictor, error_fun, 10)) 
+
+in_sample_cv <- c(
+  mod1 = min(all_fits[[1]]$cvm),
+  mod2 = min(all_fits[[2]]$cvm),
+  mod3 = loo_cv(mob),
+  mod4 = min(all_fits[[4]]$cvm)
+)
+
+tib <- bind_rows(in_sample_cv, ten_fold_cv)
+tib$method = c("in_sample", "out_of_sample")
+tib
+
+
# A tibble: 2 × 5
+    mod1   mod2   mod3   mod4 method       
+   <dbl>  <dbl>  <dbl>  <dbl> <chr>        
+1 0.0159 0.0161 0.0164 0.0156 in_sample    
+2 0.0158 0.0161 0.0165 0.0161 out_of_sample
+
+
+ + +
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/search.json b/search.json index f51d640..4037664 100644 --- a/search.json +++ b/search.json @@ -679,271 +679,236 @@ "text": "Prediction accuracy (last year)\n\nsummary(lm(actual ~ predicted - 1, data = acc))\n\n\nCall:\nlm(formula = actual ~ predicted - 1, data = acc)\n\nResiduals:\n Min 1Q Median 3Q Max \n-63.931 -2.931 1.916 6.052 21.217 \n\nCoefficients:\n Estimate Std. Error t value Pr(>|t|) \npredicted 0.96590 0.01025 94.23 <2e-16 ***\n---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nResidual standard error: 10.2 on 137 degrees of freedom\n (8 observations deleted due to missingness)\nMultiple R-squared: 0.9848, Adjusted R-squared: 0.9847 \nF-statistic: 8880 on 1 and 137 DF, p-value: < 2.2e-16\n\n\n\n\nUBC Stat 406 - 2023" }, { - "objectID": "schedule/index.html", - "href": "schedule/index.html", - "title": " Schedule", - "section": "", - "text": "Required readings and lecture videos are listed below for each module. Readings from [ISLR] are always required while those from [ESL] are optional and supplemental." - }, - { - "objectID": "schedule/index.html#introduction-and-review", - "href": "schedule/index.html#introduction-and-review", - "title": " Schedule", - "section": "0 Introduction and Review", - "text": "0 Introduction and Review\nRequired reading below is meant to reengage brain cells which have no doubt forgotten all the material that was covered in STAT 306 or CPSC 340. We don’t presume that you remember all these details, but that, upon rereading, they at least sound familiar. If this all strikes you as completely foreign, this class may not be for you.\n\nRequired reading\n\n[ISLR] 2.1, 2.2, and Chapter 3 (this material is review)\n\nOptional reading\n\n[ESL] 2.4 and 2.6\n\nHandouts\n\nProgramming in R .Rmd, .pdf\n\n\nUsing in RMarkdown .Rmd, .pdf\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n05 Sep 23\n(no class, Imagine UBC)\n\n\n\n07 Sep 23\nIntro to class, Git\n(Quiz 0 due tomorrow)\n\n\n12 Sep 23\nUnderstanding R / Rmd\nLab 00, (Labs begin)\n\n\n14 Sep 23\nLM review, LM Example" + "objectID": "schedule/slides/00-cv-for-many-models.html#meta-lecture", + "href": "schedule/slides/00-cv-for-many-models.html#meta-lecture", + "title": "UBC Stat406 2023W", + "section": "00 CV for many models", + "text": "00 CV for many models\nStat 406\nDaniel J. McDonald\nLast modified – 19 September 2023\n\\[\n\\DeclareMathOperator*{\\argmin}{argmin}\n\\DeclareMathOperator*{\\argmax}{argmax}\n\\DeclareMathOperator*{\\minimize}{minimize}\n\\DeclareMathOperator*{\\maximize}{maximize}\n\\DeclareMathOperator*{\\find}{find}\n\\DeclareMathOperator{\\st}{subject\\,\\,to}\n\\newcommand{\\E}{E}\n\\newcommand{\\Expect}[1]{\\E\\left[ #1 \\right]}\n\\newcommand{\\Var}[1]{\\mathrm{Var}\\left[ #1 \\right]}\n\\newcommand{\\Cov}[2]{\\mathrm{Cov}\\left[#1,\\ #2\\right]}\n\\newcommand{\\given}{\\ \\vert\\ }\n\\newcommand{\\X}{\\mathbf{X}}\n\\newcommand{\\x}{\\mathbf{x}}\n\\newcommand{\\y}{\\mathbf{y}}\n\\newcommand{\\P}{\\mathcal{P}}\n\\newcommand{\\R}{\\mathbb{R}}\n\\newcommand{\\norm}[1]{\\left\\lVert #1 \\right\\rVert}\n\\newcommand{\\snorm}[1]{\\lVert #1 \\rVert}\n\\newcommand{\\tr}[1]{\\mbox{tr}(#1)}\n\\newcommand{\\brt}{\\widehat{\\beta}^R_{s}}\n\\newcommand{\\brl}{\\widehat{\\beta}^R_{\\lambda}}\n\\newcommand{\\bls}{\\widehat{\\beta}_{ols}}\n\\newcommand{\\blt}{\\widehat{\\beta}^L_{s}}\n\\newcommand{\\bll}{\\widehat{\\beta}^L_{\\lambda}}\n\\]" }, { - "objectID": "schedule/index.html#model-accuracy", - "href": "schedule/index.html#model-accuracy", - "title": " Schedule", - "section": "1 Model Accuracy", - "text": "1 Model Accuracy\n\nTopics\n\nModel selection; cross validation; information criteria; stepwise regression\n\nRequired reading\n\n[ISLR] Ch 2.2 (not 2.2.3), 5.1 (not 5.1.5), 6.1, 6.4\n\nOptional reading\n\n[ESL] 7.1-7.5, 7.10\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n19 Sep 23\nRegression function, Bias and Variance\n\n\n\n21 Sep 23\nRisk estimation, Info Criteria\n\n\n\n26 Sep 23\nGreedy selection\n\n\n\n28 Sep 23\n\nHW 1 due" + "objectID": "schedule/slides/00-cv-for-many-models.html#some-data-and-4-models", + "href": "schedule/slides/00-cv-for-many-models.html#some-data-and-4-models", + "title": "UBC Stat406 2023W", + "section": "Some data and 4 models", + "text": "Some data and 4 models\n\ndata(\"mobility\", package = \"Stat406\")\n\nModel 1: Lasso on all predictors, use CV min\nModel 2: Ridge on all predictors, use CV min\nModel 3: OLS on all predictors (no tuning parameters)\nModel 4: (1) Lasso on all predictors, then (2) OLS on those chosen at CV min\n\nHow do I decide between these 4 models?" }, { - "objectID": "schedule/index.html#regularization-smoothing-and-trees", - "href": "schedule/index.html#regularization-smoothing-and-trees", - "title": " Schedule", - "section": "2 Regularization, smoothing, and trees", - "text": "2 Regularization, smoothing, and trees\n\nTopics\n\nRidge regression, lasso, and related; linear smoothers (splines, kernels); kNN\n\nRequired reading\n\n[ISLR] Ch 6.2, 7.1-7.7.1, 8.1, 8.1.1, 8.1.3, 8.1.4\n\nOptional reading\n\n[ESL] 3.4, 3.8, 5.4, 6.3\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n3 Oct 23\nRidge, Lasso\n\n\n\n5 Oct 23\nCV for comparison, NP 1\n\n\n\n10 Oct 23\nNP 2, Why smoothing?\n\n\n\n12 Oct 23\nNo class (Makeup Monday)\n\n\n\n17 Oct 23\nOther\nHW 2 due" + "objectID": "schedule/slides/00-cv-for-many-models.html#cv-functions", + "href": "schedule/slides/00-cv-for-many-models.html#cv-functions", + "title": "UBC Stat406 2023W", + "section": "CV functions", + "text": "CV functions\n\nkfold_cv <- function(data, estimator, predictor, error_fun, kfolds = 5) {\n fold_labels <- sample(rep(seq_len(kfolds), length.out = nrow(data)))\n errors <- double(kfolds)\n for (fold in seq_len(kfolds)) {\n test_rows <- fold_labels == fold\n train <- data[!test_rows, ]\n test <- data[test_rows, ]\n current_model <- estimator(train)\n test$.preds <- predictor(current_model, test)\n errors[fold] <- error_fun(test)\n }\n mean(errors)\n}\n\nloo_cv <- function(dat) {\n mdl <- lm(Mobility ~ ., data = dat)\n mean( abs(residuals(mdl)) / abs(1 - hatvalues(mdl)) ) # MAE version\n}" }, { - "objectID": "schedule/index.html#classification", - "href": "schedule/index.html#classification", - "title": " Schedule", - "section": "3 Classification", - "text": "3 Classification\n\nTopics\n\nlogistic regression; LDA/QDA; naive bayes; trees\n\nRequired reading\n\n[ISLR] Ch 2.2.3, 5.1.5, 4-4.5, 8.1.2\n\nOptional reading\n\n[ESL] 4-4.4, 9.2, 13.3\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n19 Oct 23\nClassification, LDA and QDA\n\n\n\n24 Oct 23\nLogistic regression\n\n\n\n26 Oct 23\nGradient descent, Other losses\n\n\n\n31 Oct 23\nNonlinear" + "objectID": "schedule/slides/00-cv-for-many-models.html#experiment-setup", + "href": "schedule/slides/00-cv-for-many-models.html#experiment-setup", + "title": "UBC Stat406 2023W", + "section": "Experiment setup", + "text": "Experiment setup\n\n# prepare our data\n# note that mob has only continuous predictors, otherwise could be trouble\nmob <- mobility[complete.cases(mobility), ] |> select(-ID, -State, -Name)\n# avoid doing this same operation a bunch\nxmat <- function(dat) dat |> select(!Mobility) |> as.matrix()\n\n# set up our model functions\nlibrary(glmnet)\nmod1 <- function(dat, ...) cv.glmnet(xmat(dat), dat$Mobility, type.measure = \"mae\", ...)\nmod2 <- function(dat, ...) cv.glmnet(xmat(dat), dat$Mobility, alpha = 0, type.measure = \"mae\", ...)\nmod3 <- function(dat, ...) glmnet(xmat(dat), dat$Mobility, lambda = 0, ...) # just does lm()\nmod4 <- function(dat, ...) cv.glmnet(xmat(dat), dat$Mobility, relax = TRUE, gamma = 1, type.measure = \"mae\", ...)\n\n# this will still \"work\" on mod3, because there's only 1 s\npredictor <- function(mod, dat) drop(predict(mod, newx = xmat(dat), s = \"lambda.min\"))\n\n# chose mean absolute error just 'cause\nerror_fun <- function(testdata) mean(abs(testdata$Mobility - testdata$.preds))" }, { - "objectID": "schedule/index.html#modern-techniques", - "href": "schedule/index.html#modern-techniques", - "title": " Schedule", - "section": "4 Modern techniques", - "text": "4 Modern techniques\n\nTopics\n\nbagging; boosting; random forests; neural networks\n\nRequired reading\n\n[ISLR] 5.2, 8.2, 10.1, 10.2, 10.6, 10.7\n\nOptional reading\n\n[ESL] 10.1-10.10 (skip 10.7), 11.1, 11.3, 11.4, 11.7\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n2 Nov 23\nThe bootstrap\nHW 3 due\n\n\n7 Nov 23\nBagging and random forests, Boosting\n\n\n\n9 Nov 23\nIntro to neural nets\n\n\n\n14 Nov 23\nNo class. (Midterm break)\n\n\n\n16 Nov 23\nEstimating neural nets\n\n\n\n21 Nov 23\nNeural nets wrapup\nHW 4 due" + "objectID": "schedule/slides/00-cv-for-many-models.html#run-the-experiment", + "href": "schedule/slides/00-cv-for-many-models.html#run-the-experiment", + "title": "UBC Stat406 2023W", + "section": "Run the experiment", + "text": "Run the experiment\n\nall_model_funs <- lst(mod1, mod2, mod3, mod4)\nall_fits <- map(all_model_funs, .f = exec, dat = mob)\n\n# unfortunately, does different splits for each method, so we use 10, \n# it would be better to use the _SAME_ splits\nten_fold_cv <- map_dbl(all_model_funs, ~ kfold_cv(mob, .x, predictor, error_fun, 10)) \n\nin_sample_cv <- c(\n mod1 = min(all_fits[[1]]$cvm),\n mod2 = min(all_fits[[2]]$cvm),\n mod3 = loo_cv(mob),\n mod4 = min(all_fits[[4]]$cvm)\n)\n\ntib <- bind_rows(in_sample_cv, ten_fold_cv)\ntib$method = c(\"in_sample\", \"out_of_sample\")\ntib\n\n# A tibble: 2 × 5\n mod1 mod2 mod3 mod4 method \n <dbl> <dbl> <dbl> <dbl> <chr> \n1 0.0159 0.0161 0.0164 0.0156 in_sample \n2 0.0158 0.0161 0.0165 0.0161 out_of_sample\n\n\n\n\nUBC Stat 406 - 2023" }, { - "objectID": "schedule/index.html#unsupervised-learning", - "href": "schedule/index.html#unsupervised-learning", - "title": " Schedule", - "section": "5 Unsupervised learning", - "text": "5 Unsupervised learning\n\nTopics\n\ndimension reduction and clustering\n\nRequired reading\n\n[ISLR] 12\n\nOptional reading\n\n[ESL] 8.5, 13.2, 14.3, 14.5.1, 14.8, 14.9\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n23 Nov 23\nIntro to PCA, Issues with PCA\n\n\n\n28 Nov 23\nPCA v KPCA\n\n\n\n30 Nov 23\nK means clustering\n\n\n\n5 Dec 23\nHierarchical clustering\n\n\n\n7 Dec 23\n\nHW 5 due" + "objectID": "course-setup.html", + "href": "course-setup.html", + "title": "Guide for setting up the course infrastructure", + "section": "", + "text": "Version 2023\nThis guide (hopefully) gives enough instructions for recreating new iterations of Stat 406." }, { - "objectID": "schedule/index.html#f-final-exam", - "href": "schedule/index.html#f-final-exam", - "title": " Schedule", - "section": "F Final exam", - "text": "F Final exam\nDate and time TBD.\n\n\n\n\n\n\nImportant\n\n\n\nDo not make any plans to leave Vancouver before the final exam date is announced.\n\n\n\nIn person attendance is required (per Faculty of Science guidelines)\nYou must bring your computer as the exam will be given through Canvas\nPlease arrange to borrow one from the library if you do not have your own. Let me know ASAP if this may pose a problem.\nYou may bring 2 sheets of front/back 8.5x11 paper with any notes you want to use. No other materials will be allowed.\nThere will be no required coding, but I may show code or output and ask questions about it.\nIt will be entirely multiple choice / True-False / matching, etc. Delivered on Canvas." + "objectID": "course-setup.html#github-org", + "href": "course-setup.html#github-org", + "title": "Guide for setting up the course infrastructure", + "section": "Github Org", + "text": "Github Org\n\nCreate a GitHub.com organization\n\nThis is free for faculty with instructor credentials.\nAllows more comprehensive GitHub actions, PR templates and CODEOWNER behaviour than the UBC Enterprise version\nDownside is getting students added (though we include R scripts for this)\n\nOnce done, go to https://github.com/watching. Click the Red Down arrow “Unwatch all”. Then select this Org. The TAs should do the same.\n\n\nPermissions and structure\nSettings > Member Privileges\nWe list only the important ones.\n\nBase Permissions: No Permission\nRepository creation: None\nRepo forking: None\nPages creation: None\nTeam creation rules: No\n\nBe sure to click save in each area after making changes.\nSettings > Actions > General\nAll repositories: Allow all actions and reusable workflows.\nWorkflow permissions: Read and write permissions.\n\n\nTeams\n\n2 teams, one for the TAs and one for the students\nYou must then manually add the teams to any repos they should access\n\nI generally give the TAs “Write” permission, and the students “Read” permission with some exceptions. See the Repos section below." }, { - "objectID": "syllabus.html", - "href": "syllabus.html", - "title": " Syllabus", - "section": "", - "text": "Term 2023 Winter 1: 05 Sep - 07 Dec 2023" + "objectID": "course-setup.html#repos", + "href": "course-setup.html#repos", + "title": "Guide for setting up the course infrastructure", + "section": "Repos", + "text": "Repos\nThere are typically about 10 repositories. Homeworks and Labs each have 3 with very similar behaviours.\nBe careful copying directories. All of them have hidden files and folders, e.g. .git. Of particular importance are the .github directories which contain PR templates and GitHub Actions. Also relevant are the .Rprofile files which try to override Student Language settings and avoid unprintible markdown characters.\n\nHomeworks\n\nhomework-solutions\nThis is where most of the work happens. My practice is to create the homework solutions first. I edit these (before school starts) until I’m happy. I then duplicate the file and remove the answers. The result is hwxx-instructions.Rmd. The .gitignore file should ignore all of the solutions and commmit only the instructions. Then, about 1 week after the deadline, I adjust the .gitignore and push the solution files.\n\nStudents have Read permission.\nTAs have Write permission.\nThe preamble.tex file is common to HWs and Labs. It creates a lavender box where the solution will go. This makes life easy for the TAs.\n\n\n\nhomework-solutions-private\nExactly the same as homework-solutions except that all solutions are available from the beginning for TA access. To create this, after I’m satisfied with homework-solutions I copy all files (not the directory) into a new directory, git init then upload to the org. The students never have permission here.\n\n\nhomework-template\nThis is a “template repo” used for creating student specific homework-studentgh repos (using the setup scripts).\nVery Important: copy the hwxx-instructions files over to a new directory. Do NOT copy the directory or you’ll end up with the solutions visible to the students.\nThen rename hwxx-instructions.Rmd to hwxx.Rmd. Now the students have a .pdf with instructions, and a template .Rmd to work on.\nOther important tasks: * The .gitignore is more elaborate in an attempt to avoid students pushing junk into these repos. * The .github directory contains 3 files: CODEOWNERS begins as an empty doc which will be populated with the assigned grader later; pull_request_template.md is used for all HW submission PRs; workflows contains a GH-action to comment on the PR with the date+time when the PR is opened. * Under Settings > General, select “Template repository”. This makes it easier to duplicate to the student repos.\n\n\n\nLabs\nThe three Labs repos operate exactly as the analogous homework repos.\n\nlabs-solutions\nDo any edits here before class begins.\n\n\nlabs-solutions-private\nSame as with the homeworks\n\n\nlabs-template\nSame as with the homeworks\n\n\n\nclicker-solutions\nThis contains the complete set of clicker questions.\nAnswers are hidden in comments on the presentation.\nI release them incrementally after each module (copying over from my clicker deck).\n\n\nopen-pr-log\nThis contains a some GitHub actions to automatically keep track of open PRs for the TAs.\nIt’s still in testing phase, but should work properly. It will create two markdown docs, 1 for labs and 1 for homework. Each shows the assigned TA, the date the PR was opened, and a link to the PR. If everything is configured properly, it should run automatically at 3am every night.\n\nOnly the TAs should have access.\nUnder Settings > Secrets and Variables > Actions you must add a “Repository Secret”. This should be a GitHub Personal Access Token created in your account (Settings > Developer settings > Tokens (classic)). It needs Repo, Workflow, and Admin:Org permissions. I set it to expire at the end of the course. I use it only for this purpose (rather than my other tokens for typical logins).\n\n\n\n.github / .github-private\nThese contains a README that gives some basic information about the available repos and the course. It’s visible Publically, and appears on the Org homepage for all to see. The .github-private has the same function, but applies only to Org members.\n\n\nbakeoff-bakeoff\nThis is for the bonus for HW4. Both TAs and Students have access. I put the TA team as CODEOWNERS and protect the main branch (Settings > Branches > Branch Protection Rules). Here, we “Require approvals” and “Require Review from Code Owners”." }, { - "objectID": "syllabus.html#course-info", - "href": "syllabus.html#course-info", - "title": " Syllabus", - "section": "Course info", - "text": "Course info\nInstructor:\nDaniel McDonald\nOffice: Earth Sciences Building 3106\nWebsite: https://dajmcdon.github.io/\nEmail: daniel@stat.ubc.ca\nSlack: @prof-daniel\nOffice hours:\nMonday (TA), 2-3pm ESB 1045\nTuesday (DJM), 4-5pm ESB 4182\nThursday (TA), 3-4pm ESB 3174\nFriday (TA/DJM), 10-11am Zoom (link on Canvas)\nCourse webpage:\nWWW: https://ubc-stat.github.io/stat-406/\nGithub: https://github.com/stat-406-2023\nSee also Canvas\nLectures:\nTue/Thu 0800h - 0930h\n(In person) Earth Sciences Building (ESB) 1012\nTextbooks:\n[ISLR]\n[ESL]\nPrerequisite:\nSTAT 306 or CPSC 340" + "objectID": "course-setup.html#r-package", + "href": "course-setup.html#r-package", + "title": "Guide for setting up the course infrastructure", + "section": "R package", + "text": "R package\nThis is hosted at https://github.com/ubc-stat/stat-406-rpackage/. The main purposes are:\n\nDocumentation of datasets used in class, homework, and labs (if not in other R packages)\nProvide a few useful functions.\nInstall all the packages the students need at once, and try to compile LaTeX.\n\nPackage requirements are done manually, unfortunately. Typically, I’ll open the various projects in RStudio and run sort(unique(renv::dependencies()$Package)). It’s not infallible, but works well.\nAll necessary packages should go in “Suggests:” in the DESCRIPTION. This avoids build errors. Note that install via remotes::install_github() then requires dependencies = TRUE." }, { - "objectID": "syllabus.html#course-objectives", - "href": "syllabus.html#course-objectives", - "title": " Syllabus", - "section": "Course objectives", - "text": "Course objectives\nThis is a course in statistical learning methods. Based on the theory of linear models covered in Stat 306, this course will focus on applying many techniques of data analysis to interesting datasets.\nThe course combines analysis with methodology and computational aspects. It treats both the “art” of understanding unfamiliar data and the “science” of analyzing that data in terms of statistical properties. The focus will be on practical aspects of methodology and intuition to help students develop tools for selecting appropriate methods and approaches to problems in their own lives.\nThis is not a “how to program” course, nor a “tour of machine learning methods”. Rather, this course is about how to understand some ML methods. STAT 306 tends to give background in many of the tools of understanding as well as working with already-written R packages. On the other hand, CPSC 340 introduces many methods with a focus on “from-scratch” implementation (in Julia or Python). This course will try to bridge the gap between these approaches. Depending on which course you took, you may be more or less skilled in some aspects than in others. That’s OK and expected.\n\nLearning outcomes\n\nAssess the prediction properties of the supervised learning methods covered in class;\nCorrectly use regularization to improve predictions from linear models, and also to identify important explanatory variables;\nExplain the practical difference between predictions obtained with parametric and non-parametric methods, and decide in specific applications which approach should be used;\nSelect and construct appropriate ensembles to obtain improved predictions in different contexts;\nUse and interpret principal components and other dimension reduction techniques;\nEmploy reasonable coding practices and understand basic R syntax and function.\nWrite reports and use proper version control; engage with standard software." + "objectID": "course-setup.html#worksheets", + "href": "course-setup.html#worksheets", + "title": "Guide for setting up the course infrastructure", + "section": "Worksheets", + "text": "Worksheets\nThese are derived from Matías’s Rmd notes from 2018. They haven’t been updated much.\nThey are hosted at https://github.com/ubc-stat/stat-406-worksheets/.\nI tried requiring them one year. The model was to distribute the R code for the chapters with some random lines removed. Then the students could submit the completed code for small amounts of credit. It didn’t seem to move the needle much and was hard to grade (autograding would be nice here).\nNote that there is a GHaction that automatically renders the book from source and pushes to the gh-pages branch. So local build isn’t necessary and derivative files should not be checked in to version control." }, { - "objectID": "syllabus.html#textbooks", - "href": "syllabus.html#textbooks", - "title": " Syllabus", - "section": "Textbooks", - "text": "Textbooks\n\nRequired:\nAn Introduction to Statistical Learning, James, Witten, Hastie, Tibshirani, 2013, Springer, New York. (denoted [ISLR])\nAvailable free online: https://www.statlearning.com\n\n\nOptional (but excellent):\nThe Elements of Statistical Learning, Hastie, Tibshirani, Friedman, 2009, Second Edition, Springer, New York. (denoted [ESL])\nAlso available free online: https://web.stanford.edu/~hastie/ElemStatLearn/\nThis second book is a more advanced treatment of a superset of the topics we will cover. If you want to learn more and understand the material more deeply, this is the book for you. All readings from [ESL] are optional." + "objectID": "course-setup.html#course-website-lectures", + "href": "course-setup.html#course-website-lectures", + "title": "Guide for setting up the course infrastructure", + "section": "Course website / lectures", + "text": "Course website / lectures" }, { - "objectID": "syllabus.html#course-assessment-opportunities", - "href": "syllabus.html#course-assessment-opportunities", - "title": " Syllabus", - "section": "Course assessment opportunities", - "text": "Course assessment opportunities\n\nEffort-based component\nLabs: [0, 20]\nHomework assignments: [0, 50]\nClickers: [0, 10]\nTotal: min(65, Labs + Homework + Clickers)\n\n\nLabs\nThese are intended to keep you on track. They are to be submitted via pull requests in your personal labs-<username> repo (see the computing tab for descriptions on how to do this).\nLabs typically have a few questions for you to answer or code to implement. These are to be done during lab periods. But you can do them on your own as well. These are worth 2 points each up to a maximum of 20 points. They are due at 2300 on the day of your assigned lab section.\nIf you attend lab, you may share a submission with another student (with acknowledgement on the PR). If you do not attend lab, you must work on your own (subject to the collaboration instructions for Assignments below).\n\nRules.\nYou must submit via PR by the deadline. Your PR must include at least 3 commits. After lab 2, failure to include at least 3 commits will result in a maximum score of 1.\n\n\n\n\n\n\nTip\n\n\n\nIf you attend your lab section, you may work in pairs, submitting a single document to one of your Repos. Be sure to put both names on the document, and mention the collaboration on your PR. You still have until 11pm to submit.\n\n\n\n\nMarking.\nThe overriding theme here is “if you put in the effort, you’ll get all the points.” Grading scheme:\n\n2 if basically all correct\n\n1 if complete but with some major errors, or mostly complete and mostly correct\n\n0 otherwise\n\nYou may submit as many labs as you wish up to 20 total points.\nThere are no appeals on grades.\nIt’s important here to recognize just how important active participation in these activities is. You learn by doing, and this is your opportunity to learn in a low-stakes environment. One thing you’ll learn, for example, is that all animals urinate in 21 seconds.1\n\n\n\nAssignments\nThere will be 5 assignments. These are submitted via pull request similar to the labs but to the homework-<username> repo. Each assignment is worth up to 10 points. They are due by 2300 on the deadline. You must make at least 5 commits. Failure to have at least 5 commits will result in a 25% deduction on HW1 and a 50% deduction thereafter. No exceptions.\nAssignments are typically lightly marked. The median last year was 8/10. But they are not easy. Nor are they short. They often involve a combination of coding, writing, description, and production of statistical graphics.\nAfter receiving a mark and feedback, if you score less than 7, you may make corrections to bring your total to 7. This means, if you fix everything that you did wrong, you get 7. Not 10. The revision must be submitted within 1 week of getting your mark. Only 1 revision per assignment. The TA decision is final. Note that the TAs will only regrade parts you missed, but if you somehow make it worse, they can deduct more points.\nThe revision allowance applies only if you got 3 or more points of “content” deductions. If you missed 3 points for content and 2 more for “penalties” (like insufficient commits, code that runs off the side of the page, etc), then you are ineligible.\n\nPolicy on collaboration on assignments\nDiscussing assignments with your classmates is allowed and encouraged, but it is important that every student get practice working on these problems. This means that all the work you turn in must be your own. The general policy on homework collaboration is:\n\nYou must first make a serious effort to solve the problem.\nIf you are stuck after doing so, you may ask for help from another student. You may discuss strategies to solve the problem, but you may not look at their code, nor may they spell out the solution to you step-by-step.\nOnce you have gotten help, you must write your own solution individually. You must disclose, in your GitHub pull request, the names of anyone from whom you got help.\nThis also applies in reverse: if someone approaches you for help, you must not provide it unless they have already attempted to solve the problem, and you may not share your code or spell out the solution step-by-step.\n\n\n\n\n\n\n\nWarning\n\n\n\nAdherence to the above policy means that identical answers, or nearly identical answers, cannot occur. Thus, such occurrences are violations of the Course’s Academic honesty policy.\n\n\nThese rules also apply to getting help from other people such as friends not in the course (try the problem first, discuss strategies, not step-by-step solutions, acknowledge those from whom you received help).\nYou may not use homework help websites, ChatGPT, Stack Overflow, and so on under any circumstances. The purpose here is to learn. Good faith efforts toward learning are rewarded.\nYou can always, of course, ask me for help on Slack. And public Slack questions are allowed and encouraged.\nYou may also use external sources (books, websites, papers, …) to\n\nLook up programming language documentation, find useful packages, find explanations for error messages, or remind yourself about the syntax for some feature. I do this all the time in the real world. Wikipedia is your friend.\nRead about general approaches to solving specific problems (e.g. a guide to dynamic programming or a tutorial on unit testing in your programming language), or\nClarify material from the course notes or assignments.\n\nBut external sources must be used to support your solution, not to obtain your solution. You may not use them to\n\nFind solutions to the specific problems assigned as homework (in words or in code)—you must independently solve the problem assigned, not translate a solution presented online or elsewhere.\nFind course materials or solutions from this or similar courses from previous years, or\nCopy text or code to use in your submissions without attribution.\n\nIf you use code from online or other sources, you must include code comments identifying the source. It must be clear what code you wrote and what code is from other sources. This rule also applies to text, images, and any other material you submit.\nPlease talk to me if you have any questions about this policy. Any form of plagiarism or cheating will result in sanctions to be determined by me, including grade penalties (such as negative points for the assignment or reductions in letter grade) or course failure. I am obliged to report violations to the appropriate University authorities. See also the text below.\n\n\n\nClickers\nThese are short multiple choice and True / False questions. They happen in class. For each question, correct answers are worth 4, incorrect answers are worth 2. You get 0 points for not answering.\nSuppose there are N total clicker questions, and you have x points. Your final score for this component is\nmax(0, min(5 * x / N - 5, 10)).\nNote that if your average is less than 1, you get 0 points in this component.\n\n\n\n\n\n\nImportant\n\n\n\nIn addition, your final grade in this course will be reduced by 1 full letter grade.\n\n\nThis means that if you did everything else and get a perfect score on the final exam, you will get a 79. Two people did this last year. They were sad.\n\n\n\n\n\n\nWarning\n\n\n\nDON’T DO THIS!!\n\n\nThis may sound harsh, but think about what is required for such a penalty. You’d have to skip more than 50% of class meetings and get every question wrong when you are in class. This is an in-person course. It is not possible to get an A without attending class on a regular basis.\nTo compensate, I will do my best to post recordings of lectures. Past experience has shown 2 things:\n\nYou learn better by attending class than by skipping and “watching”.\nSometimes the technology messes up. So there’s no guarantee that these will be available.\n\nThe purpose is to let you occasionally miss class for any reason with minimal consequences. See also below. If for some reason you need to miss longer streches of time, please contact me or discuss your situation with your Academic Advisor as soon as possible. Don’t wait until December.\n\n\n\nYour score on HW, Labs, and Clickers\nThe total you can accumulate across these 3 components is 65 points. But you can get there however you want. The total available is 80 points. The rest is up to you. But with choice, comes responsibility.\nRules:\n\nNothing dropped.\nNo extensions.\nIf you miss a lab or a HW deadline, then you miss it.\nMake up for missed work somewhere else.\nIf you isolate due to Covid, fine. You miss a few clickers and maybe a lab (though you can do it remotely).\nIf you have a job interview and can’t complete an assignment on time, then skip it.\n\nWe’re not going to police this stuff. You don’t need to let me know. There is no reason that every single person enrolled in this course shouldn’t get > 65 in this class.\nIllustrative scenarios:\n\nDoing 80% on 5 homeworks, coming to class and getting 50% correct, get 2 points on 8 labs gets you 65 points.\nDoing 90% on 5 homeworks, getting 50% correct on all the clickers, averaging 1/2 on all the labs gets you 65 points.\nGoing to all the labs and getting 100%, 100% on 4 homeworks, plus being wrong on every clicker gets you 65 points\n\nChoose your own adventure. Note that the biggest barrier to getting to 65 is skipping the assignments.\n\n\n\n\nFinal exam\n35 points\n\n\nAll multiple choice, T/F, matching.\nThe clickers are the best preparation.\nQuestions may ask you to understand or find mistakes in code.\nNo writing code.\n\nThe Final is very hard. By definition, it cannot be effort-based.\nIt is intended to separate those who really understand the material from those who don’t. Last year, the median was 50%. But if you put in the work (do all the effort points) and get 50%, you get an 83 (an A-). If you put in the work (do all the effort points) and skip the final, you get a 65. You do not have to pass the final to pass the course. You don’t even have to take the final.\nThe point of this scheme is for those who work hard to do well. But only those who really understand the material will get 90+." + "objectID": "course-setup.html#ghclass-package", + "href": "course-setup.html#ghclass-package", + "title": "Guide for setting up the course infrastructure", + "section": "{ghclass} package", + "text": "{ghclass} package" }, { - "objectID": "syllabus.html#health-issues-and-considerations", - "href": "syllabus.html#health-issues-and-considerations", - "title": " Syllabus", - "section": "Health issues and considerations", - "text": "Health issues and considerations\n\nCovid Safety in the Classroom\n\n\n\n\n\n\nImportant\n\n\n\nIf you think you’re sick, stay home no matter what.\n\n\nMasks. Masks are recommended. For our in-person meetings in this class, it is important that all of us feel as comfortable as possible engaging in class activities while sharing an indoor space. Masks are a primary tool to make it harder for Covid-19 to find a new host. Please feel free to wear one or not given your own personal circumstances. Note that there are some people who cannot wear a mask. These individuals are equally welcome in our class.\nVaccination. If you have not yet had a chance to get vaccinated against Covid-19, vaccines are available to you, free. See http://www.vch.ca/covid-19/covid-19-vaccine for help finding an appointment. Boosters will be available later this term. The higher the rate of vaccination in our community overall, the lower the chance of spreading this virus. You are an important part of the UBC community. Please arrange to get vaccinated if you have not already done so. The same goes for Flu.\n\n\nYour personal health\n\n\n\n\n\n\nWarning\n\n\n\nIf you are sick, it’s important that you stay home – no matter what you think you may be sick with (e.g., cold, flu, other).\n\n\n\nDo not come to class if you have Covid symptoms, have recently tested positive for Covid, or are required to quarantine. You can check this website to find out if you should self-isolate or self-monitor: http://www.bccdc.ca/health-info/diseases-conditions/covid-19/self-isolation#Who.\nYour precautions will help reduce risk and keep everyone safer. In this class, the marking scheme is intended to provide flexibility so that you can prioritize your health and still be able to succeed. All work can be completed outside of class with reasonable time allowances.\nIf you do miss class because of illness:\n\nMake a connection early in the term to another student or a group of students in the class. You can help each other by sharing notes. If you don’t yet know anyone in the class, post on the discussion forum to connect with other students.\nConsult the class resources on here and on Canvas. We will post all the slides, readings, and recordings for each class day.\nUse Slack for help.\nCome to virtual office hours.\nSee the marking scheme for reassurance about what flexibility you have. No part of your final grade will be directly impacted by missing class.\n\nIf you are sick on final exam day, do not attend the exam. You must follow up with your home faculty’s advising office to apply for deferred standing. Students who are granted deferred standing write the final exam at a later date. If you’re a Science student, you must apply for deferred standing (an academic concession) through Science Advising no later than 48 hours after the missed final exam/assignment. Learn more and find the application online. For additional information about academic concessions, see the UBC policy here.\n\n\n\n\n\n\n\nNote\n\n\n\nPlease talk with me if you have any concerns or ask me if you are worried about falling behind." + "objectID": "course-setup.html#canvas", + "href": "course-setup.html#canvas", + "title": "Guide for setting up the course infrastructure", + "section": "Canvas", + "text": "Canvas\nI use a the shell provided by FoS.\nNothing else goes here, but you have to update all the links.\nTwo Canvas Quizzes: * Quiz 0 collects GitHub accounts, ensures that students read the syllabus. Due in Week 1. * Final Exam is the final * I usually record lectures (automatically) using the classroom tech that automatically uploads. * Update the various links on the Homepage." }, { - "objectID": "syllabus.html#university-policies", - "href": "syllabus.html#university-policies", - "title": " Syllabus", - "section": "University policies", - "text": "University policies\nUBC provides resources to support student learning and to maintain healthy lifestyles but recognizes that sometimes crises arise and so there are additional resources to access including those for survivors of sexual violence. UBC values respect for the person and ideas of all members of the academic community. Harassment and discrimination are not tolerated nor is suppression of academic freedom. UBC provides appropriate accommodation for students with disabilities and for religious, spiritual and cultural observances. UBC values academic honesty and students are expected to acknowledge the ideas generated by others and to uphold the highest academic standards in all of their actions. Details of the policies and how to access support are available here.\n\nAcademic honesty and standards\nUBC Vancouver Statement\nAcademic honesty is essential to the continued functioning of the University of British Columbia as an institution of higher learning and research. All UBC students are expected to behave as honest and responsible members of an academic community. Breach of those expectations or failure to follow the appropriate policies, principles, rules, and guidelines of the University with respect to academic honesty may result in disciplinary action.\nFor the full statement, please see the 2022/23 Vancouver Academic Calendar\nCourse specific\nSeveral commercial services have approached students regarding selling class notes/study guides to their classmates. Please be advised that selling a faculty member’s notes/study guides individually or on behalf of one of these services using UBC email or Canvas, violates both UBC information technology and UBC intellectual property policy. Selling the faculty member’s notes/study guides to fellow students in this course is not permitted. Violations of this policy will be considered violations of UBC Academic Honesty and Standards and will be reported to the Dean of Science as a violation of course rules. Sanctions for academic misconduct may include a failing grade on the assignment for which the notes/study guides are being sold, a reduction in your final course grade, a failing grade in the course, among other possibilities. Similarly, contracting with any service that results in an individual other than the enrolled student providing assistance on quizzes or exams or posing as an enrolled student is considered a violation of UBC’s academic honesty standards.\nSome of the problems that are assigned are similar or identical to those assigned in previous years by me or other instructors for this or other courses. Using proofs or code from anywhere other than the textbooks, this year’s course notes, or the course website is not only considered cheating (as described above), it is easily detectable cheating. Such behavior is strictly forbidden.\nIn previous years, I have caught students cheating on the exams or assignments. I did not enforce any penalty because the action did not help. Cheating, in my experience, occurs because students don’t understand the material, so the result is usually a failing grade even before I impose any penalty and report the incident to the Dean’s office. I carefully structure exams and assignments to make it so that I can catch these issues. I will catch you, and it does not help. Do your own work, and use the TAs and me as resources. If you are struggling, we are here to help.\n\n\n\n\n\n\nCaution\n\n\n\nIf I suspect cheating, your case will be forwarded to the Dean’s office. No questions asked.\n\n\nGenerative AI\nTools to help you code more quickly are rapidly becoming more prevalent. I use them regularly myself. The point of this course is not to “complete assignments” but to learn coding (and other things). With that goal in mind, I recommend you avoid the use of Generative AI. It is unlikely to contribute directly to your understanding of the material. Furthermore, I have experimented with certain tools on the assignments for this course and have found the results underwhelming.\nThe material in this course is best learned through trial and error. Avoiding this mechanism (with generative AI or by copying your friend) is a short-term solution at best. I have tried to structure this course to discourage these types of short cuts, and minimize the pressure you may feel to take them.\n\n\nAcademic Concessions\nThese are handled according to UBC policy. Please see\n\nUBC student services\nUBC Vancouver Academic Calendar\nFaculty of Science Concessions\n\n\n\nMissed final exam\nStudents who miss the final exam must report to their Faculty advising office within 72 hours of the missed exam, and must supply supporting documentation. Only your Faculty Advising office can grant deferred standing in a course. You must also notify your instructor prior to (if possible) or immediately after the exam. Your instructor will let you know when you are expected to write your deferred exam. Deferred exams will ONLY be provided to students who have applied for and received deferred standing from their Faculty.\n\n\nTake care of yourself\nCourse work at this level can be intense, and I encourage you to take care of yourself. Do your best to maintain a healthy lifestyle this semester by eating well, exercising, avoiding drugs and alcohol, getting enough sleep and taking some time to relax. This will help you achieve your goals and cope with stress. I struggle with these issues too, and I try hard to set aside time for things that make me happy (cooking, playing/listening to music, exercise, going for walks).\nAll of us benefit from support during times of struggle. If you are having any problems or concerns, do not hesitate to speak with me. There are also many resources available on campus that can provide help and support. Asking for support sooner rather than later is almost always a good idea.\nIf you or anyone you know experiences any academic stress, difficult life events, or feelings like anxiety or depression, I strongly encourage you to seek support. UBC Counseling Services is here to help: call 604 822 3811 or visit their website. Consider also reaching out to a friend, faculty member, or family member you trust to help get you the support you need.\n\nA dated PDF is available at this link." + "objectID": "course-setup.html#slack", + "href": "course-setup.html#slack", + "title": "Guide for setting up the course infrastructure", + "section": "Slack", + "text": "Slack\n\nSet up a free Org. Invite link gets posted to Canvas.\nI add @students.ubc.ca, @ubc.ca, @stat.ubc.ca to the whitelist.\nI also post the invite on Canvas.\nCreate channels before people join. That way you can automatically add everyone to channels all at once. I do one for each module, 1 for code/github, 1 for mechanics. + 1 for the TAs (private)\nClick through all the settings. It’s useful to adjust these a bit." }, { - "objectID": "syllabus.html#footnotes", - "href": "syllabus.html#footnotes", - "title": " Syllabus", - "section": "Footnotes", - "text": "Footnotes\n\n\nA careful reading of this paper with the provocative title “Law of Urination: all mammals empty their bladders over the same duration” reveals that the authors actually mean something far less precise. In fact, their claim is more accurately stated as “mammals over 3kg in body weight urinate in 21 seconds with a standard deviation of 13 seconds”. But the accurate characterization is far less publicity-worthy.↩︎" + "objectID": "course-setup.html#clickers", + "href": "course-setup.html#clickers", + "title": "Guide for setting up the course infrastructure", + "section": "Clickers", + "text": "Clickers\nSee https://lthub.ubc.ca/guides/iclicker-cloud-instructor-guide/\nI only use “Polling” no “Quizzing” and no “Attendance”\n\nIn clicker Settings > Polling > Sharing. Turn off the Sending (to avoid students doing it at home)\nNo participation points.\n2 points for correct, 2 for answering.\nIntegrations > Set this up with Canvas. Sync the roster. You’ll likely have to repeat this near the Add/Drop Deadline.\nI only sync the total, since I’ll recalibrate later." }, { - "objectID": "computing/ubuntu.html", - "href": "computing/ubuntu.html", - "title": " Ubuntu", + "objectID": "computing/windows.html", + "href": "computing/windows.html", + "title": " Windows", "section": "", - "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below." + "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below.\nIn all the sections below, if you are presented with the choice to download either a 64-bit (also called x64) or a 32-bit (also called x86) version of the application always choose the 64-bit version." }, { - "objectID": "computing/ubuntu.html#installation-notes", - "href": "computing/ubuntu.html#installation-notes", - "title": " Ubuntu", + "objectID": "computing/windows.html#installation-notes", + "href": "computing/windows.html#installation-notes", + "title": " Windows", "section": "", - "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below." + "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below.\nIn all the sections below, if you are presented with the choice to download either a 64-bit (also called x64) or a 32-bit (also called x86) version of the application always choose the 64-bit version." }, { - "objectID": "computing/ubuntu.html#ubuntu-software-settings", - "href": "computing/ubuntu.html#ubuntu-software-settings", - "title": " Ubuntu", - "section": "Ubuntu software settings", - "text": "Ubuntu software settings\nTo ensure that you are installing the right version of the software in this guide, open “Software & Updates” and make sure that the boxes in the screenshot are checked (this is the default configuration)." + "objectID": "computing/windows.html#terminal", + "href": "computing/windows.html#terminal", + "title": " Windows", + "section": "Terminal", + "text": "Terminal\nBy “Terminal” below we mean the command line program called “Terminal”. Note that this is also available Inside RStudio. Either works." }, { - "objectID": "computing/ubuntu.html#github", - "href": "computing/ubuntu.html#github", - "title": " Ubuntu", + "objectID": "computing/windows.html#github", + "href": "computing/windows.html#github", + "title": " Windows", "section": "GitHub", "text": "GitHub\nIn Stat 406 we will use the publicly available GitHub.com. If you do not already have an account, please sign up for one at GitHub.com\nSign up for a free account at GitHub.com if you don’t have one already." }, { - "objectID": "computing/ubuntu.html#git", - "href": "computing/ubuntu.html#git", - "title": " Ubuntu", - "section": "Git", - "text": "Git\nWe will be using the command line version of Git as well as Git through RStudio. Some of the Git commands we will use are only available since Git 2.23, so if your Git is older than this version, so if your Git is older than this version, we ask you to update it using the following commands:\nsudo apt update\nsudo apt install git\nYou can check your git version with the following command:\ngit --version\n\n\n\n\n\n\nNote\n\n\n\nIf you run into trouble, please see the Install Git Linux section from Happy Git and GitHub for the useR for additional help or strategies for Git installation.\n\n\n\nConfiguring Git user info\nNext, we need to configure Git by telling it your name and email. To do this, type the following into the terminal (replacing Jane Doe and janedoe@example.com, with your name and email that you used to sign up for GitHub, respectively):\ngit config --global user.name \"Jane Doe\"\ngit config --global user.email janedoe@example.com\n\n\n\n\n\n\nNote\n\n\n\nTo ensure that you haven’t made a typo in any of the above, you can view your global Git configurations by either opening the configuration file in a text editor (e.g. via the command nano ~/.gitconfig) or by typing git config --list --global).\n\n\nIf you have never used Git before, we recommend also setting the default editor:\ngit config --global core.editor nano\nIf you prefer VScode (and know how to set it up) or something else, feel free." + "objectID": "computing/windows.html#git-bash-and-windows-terminal", + "href": "computing/windows.html#git-bash-and-windows-terminal", + "title": " Windows", + "section": "Git, Bash, and Windows Terminal", + "text": "Git, Bash, and Windows Terminal\nAlthough these three are separate programs, we are including them in the same section here since they are packaged together in the same installer on Windows. Briefly, we will be using the Bash shell to interact with our computers via a command line interface, Git to keep a version history of our files and upload to/download from to GitHub, and Windows Terminal to run the both Bash and Git.\nGo to https://git-scm.com/download/win and download the windows version of git. After the download has finished, run the installer and accept the default configuration for all pages except for the following:\n\nOn the Select Components page, add a Git Bash profile to Windows Terminal.\n\n\nTo install windows terminal visit this link and click Get to open it in Windows Store. Inside the Store, click Get again and then click Install. After installation, click Launch to start Windows Terminal. In the top of the window, you will see the tab bar with one open tab, a plus sign, and a down arrow. Click the down arrow and select Settings (or type the shortcut Ctrl + ,). In the Startup section, click the dropdown menu under Default profile and select Git Bash.\n\nYou can now launch the Windows terminal from the start menu or pin it to the taskbar like any other program (you can read the rest of the article linked above for additional tips if you wish). To make sure everything worked, close down Windows Terminal, and open it again. Git Bash should open by default, the text should be green and purple, and the tab should read MINGW64:/c/Users/$USERNAME (you should also see /c/Users/$USERNAME if you type pwd into the terminal). This screenshot shows what it should look like:\n\n\n\n\n\n\n\nNote\n\n\n\nWhenever we refer to “the terminal” in these installation instructions, we want you to use the Windows Terminal that you just installed with the Git Bash profile. Do not use Windows PowerShell, CMD, or anything else unless explicitly instructed to do so.\n\n\nTo open a new tab you can click the plus sign or use Ctrl + Shift + t (you can close a tab with Ctrl + Shift + w). To copy text from the terminal, you can highlight it with the mouse and then click Ctrl + Shift + c. To paste text you use Ctrl + Shift + v, try it by pasting the following into the terminal to check which version of Bash you just installed:\nbash --version\nThe output should look similar to this:\nGNU bash, version 4.4.23(1)-release (x86_64-pc-sys)\nCopyright (C) 2019 Free Software Foundation, Inc.\nLicense GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\nThis is free software; you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n\n\n\n\n\n\nNote\n\n\n\nIf there is a newline (the enter character) in the clipboard when you are pasting into the terminal, you will be asked if you are sure you want to paste since this newline will act as if you pressed enter and run the command. As a guideline you can press Paste anyway unless you are sure you don’t want this to happen.\n\n\nLet’s also check which version of git was installed:\ngit --version\ngit version 2.32.0.windows.2\n\n\n\n\n\n\nNote\n\n\n\nSome of the Git commands we will use are only available since Git 2.23, so make sure your if your Git is at least this version.\n\n\n\nConfiguring Git user info\nNext, we need to configure Git by telling it your name and email. To do this, type the following into the terminal (replacing Jane Doe and janedoe@example.com, with your name and email that you used to sign up for GitHub, respectively):\ngit config --global user.name \"Jane Doe\"\ngit config --global user.email janedoe@example.com\n\n\n\n\n\n\nNote\n\n\n\nTo ensure that you haven’t made a typo in any of the above, you can view your global Git configurations by either opening the configuration file in a text editor (e.g. via the command nano ~/.gitconfig) or by typing git config --list --global).\n\n\nIf you have never used Git before, we recommend also setting the default editor:\ngit config --global core.editor nano\nIf you prefer VScode (and know how to set it up) or something else, feel free." }, { - "objectID": "computing/ubuntu.html#latex", - "href": "computing/ubuntu.html#latex", - "title": " Ubuntu", + "objectID": "computing/windows.html#latex", + "href": "computing/windows.html#latex", + "title": " Windows", "section": "LaTeX", - "text": "LaTeX\nIt is possible you already have this installed.\nFirst try the following check in RStudio\nStat406::test_latex_installation()\nIf you see Green checkmarks, then you’re good.\nEven if it fails, follow the instructions, and try it again.\nIf it still fails, proceed with the instructions\n\nWe will install the lightest possible version of LaTeX and its necessary packages as possible so that we can render Jupyter notebooks and R Markdown documents to html and PDF. If you have previously installed LaTeX, please uninstall it before proceeding with these instructions.\nFirst, run the following command to make sure that /usr/local/bin is writable:\nsudo chown -R $(whoami):admin /usr/local/bin\n\n\n\n\n\n\nNote\n\n\n\nYou might be asked to enter your password during installation.\n\n\nNow open RStudio and run the following commands to install the tinytex package and setup tinytex:\ntinytex::install_tinytex()\nYou can check that the installation is working by opening a terminal and asking for the version of latex:\nlatex --version\nYou should see something like this if you were successful:\npdfTeX 3.141592653-2.6-1.40.23 (TeX Live 2022/dev)\nkpathsea version 6.3.4/dev\nCopyright 2021 Han The Thanh (pdfTeX) et al.\nThere is NO warranty. Redistribution of this software is\ncovered by the terms of both the pdfTeX copyright and\nthe Lesser GNU General Public License.\nFor more information about these matters, see the file\nnamed COPYING and the pdfTeX source.\nPrimary author of pdfTeX: Han The Thanh (pdfTeX) et al.\nCompiled with libpng 1.6.37; using libpng 1.6.37\nCompiled with zlib 1.2.11; using zlib 1.2.11\nCompiled with xpdf version 4.03" + "text": "LaTeX\nIt is possible you already have this installed.\nFirst try the following check in RStudio\nStat406::test_latex_installation()\nIf you see Green checkmarks, then you’re good.\nEven if it fails, follow the instructions, and try it again.\nNote that you might see two error messages regarding lua during the installation, you can safely ignore these, the installation will complete successfully after clicking “OK”.\nIf it still fails, proceed with the instructions\n\nIn RStudio, run the following commands to install the tinytex package and setup tinytex:\ninstall.packages('tinytex')\ntinytex::install_tinytex()\nIn order for Git Bash to be able to find the location of TinyTex, you will need to sign out of Windows and back in again. After doing that, you can check that the installation worked by opening a terminal and asking for the version of latex:\nlatex --version\nYou should see something like this if you were successful:\npdfTeX 3.141592653-2.6-1.40.23 (TeX Live 2021/W32TeX)\nkpathsea version 6.3.3\nCopyright 2021 Han The Thanh (pdfTeX) et al.\nThere is NO warranty. Redistribution of this software is\ncovered by the terms of both the pdfTeX copyright and\nthe Lesser GNU General Public License.\nFor more information about these matters, see the file\nnamed COPYING and the pdfTeX source.\nPrimary author of pdfTeX: Han The Thanh (pdfTeX) et al.\nCompiled with libpng 1.6.37; using libpng 1.6.37\nCompiled with zlib 1.2.11; using zlib 1.2.11\nCompiled with xpdf version 4.03" }, { - "objectID": "computing/ubuntu.html#github-pat", - "href": "computing/ubuntu.html#github-pat", - "title": " Ubuntu", + "objectID": "computing/windows.html#github-pat", + "href": "computing/windows.html#github-pat", + "title": " Windows", "section": "Github PAT", "text": "Github PAT\nYou’re probably familiar with 2-factor authentication for your UBC account or other accounts which is a very secure way to protect sensitive information (in case your password gets exposed). Github uses a Personal Access Token (PAT) for the Command Line Interface (CLI) and RStudio. This is different from the password you use to log in with a web browser. You will have to create one. There are some nice R functions that will help you along, and I find that easiest.\nComplete instructions are in Chapter 9 of Happy Git With R. Here’s the quick version (you need the usethis and gitcreds libraries, which you can install with install.packages(c(\"usethis\", \"gitcreds\"))):\n\nIn the RStudio Console, call usethis::create_github_token() This should open a webbrowser. In the Note field, write what you like, perhaps “Stat 406 token”. Then update the Expiration to any date after December 15. (“No expiration” is fine, though not very secure). Make sure that everything in repo is checked. Leave all other checks as is. Scroll to the bottom and click the green “Generate Token” button.\nThis should now give you a long string to Copy. It often looks like ghp_0asfjhlasdfhlkasjdfhlksajdhf9234u. Copy that. (You would use this instead of the browser password in RStudio when it asks for a password).\nTo store the PAT permanently in R (so you’ll never have to do this again, hopefully) call gitcreds::gitcreds_set() and paste the thing you copied there." }, { - "objectID": "computing/ubuntu.html#post-installation-notes", - "href": "computing/ubuntu.html#post-installation-notes", - "title": " Ubuntu", + "objectID": "computing/windows.html#post-installation-notes", + "href": "computing/windows.html#post-installation-notes", + "title": " Windows", "section": "Post-installation notes", "text": "Post-installation notes\nYou have completed the installation instructions, well done 🙌!" }, { - "objectID": "computing/ubuntu.html#attributions", - "href": "computing/ubuntu.html#attributions", - "title": " Ubuntu", + "objectID": "computing/windows.html#attributions", + "href": "computing/windows.html#attributions", + "title": " Windows", "section": "Attributions", "text": "Attributions\nThe DSCI 310 Teaching Team, notably, Anmol Jawandha, Tomas Beuzen, Rodolfo Lourenzutti, Joel Ostblom, Arman Seyed-Ahmadi, Florencia D’Andrea, and Tiffany Timbers." }, { - "objectID": "computing/mac_arm.html", - "href": "computing/mac_arm.html", - "title": " MacOS ARM", + "objectID": "computing/mac_x86.html", + "href": "computing/mac_x86.html", + "title": " MacOS x86", "section": "", "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below.\nIn all the sections below, if you are presented with the choice to download either a 64-bit (also called x64) or a 32-bit (also called x86) version of the application always choose the 64-bit version." }, { - "objectID": "computing/mac_arm.html#installation-notes", - "href": "computing/mac_arm.html#installation-notes", - "title": " MacOS ARM", + "objectID": "computing/mac_x86.html#installation-notes", + "href": "computing/mac_x86.html#installation-notes", + "title": " MacOS x86", "section": "", "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below.\nIn all the sections below, if you are presented with the choice to download either a 64-bit (also called x64) or a 32-bit (also called x86) version of the application always choose the 64-bit version." }, { - "objectID": "computing/mac_arm.html#terminal", - "href": "computing/mac_arm.html#terminal", - "title": " MacOS ARM", + "objectID": "computing/mac_x86.html#terminal", + "href": "computing/mac_x86.html#terminal", + "title": " MacOS x86", "section": "Terminal", "text": "Terminal\nBy “Terminal” below we mean the command line program called “Terminal”. Note that this is also available Inside RStudio. Either works. To easily pull up the Terminal (outside RStudio), Type Cmd + Space then begin typing “Terminal” and press Return." }, { - "objectID": "computing/mac_arm.html#github", - "href": "computing/mac_arm.html#github", - "title": " MacOS ARM", + "objectID": "computing/mac_x86.html#github", + "href": "computing/mac_x86.html#github", + "title": " MacOS x86", "section": "GitHub", "text": "GitHub\nIn Stat 406 we will use the publicly available GitHub.com. If you do not already have an account, please sign up for one at GitHub.com\nSign up for a free account at GitHub.com if you don’t have one already." }, { - "objectID": "computing/mac_arm.html#git", - "href": "computing/mac_arm.html#git", - "title": " MacOS ARM", + "objectID": "computing/mac_x86.html#git", + "href": "computing/mac_x86.html#git", + "title": " MacOS x86", "section": "Git", "text": "Git\nWe will be using the command line version of Git as well as Git through RStudio. Some of the Git commands we will use are only available since Git 2.23, so if your Git is older than this version, we ask you to update it using the Xcode command line tools (not all of Xcode), which includes Git.\nOpen Terminal and type the following command to install Xcode command line tools:\nxcode-select --install\nAfter installation, in terminal type the following to ask for the version:\ngit --version\nyou should see something like this (does not have to be the exact same version) if you were successful:\ngit version 2.32.1 (Apple Git-133)\n\n\n\n\n\n\nNote\n\n\n\nIf you run into trouble, please see the Install Git Mac OS section from Happy Git and GitHub for the useR for additional help or strategies for Git installation.\n\n\n\nConfiguring Git user info\nNext, we need to configure Git by telling it your name and email. To do this, type the following into the terminal (replacing Jane Doe and janedoe@example.com, with your name and email that you used to sign up for GitHub, respectively):\ngit config --global user.name \"Jane Doe\"\ngit config --global user.email janedoe@example.com\n\n\n\n\n\n\nNote\n\n\n\nTo ensure that you haven’t made a typo in any of the above, you can view your global Git configurations by either opening the configuration file in a text editor (e.g. via the command nano ~/.gitconfig) or by typing git config --list --global).\n\n\nIf you have never used Git before, we recommend also setting the default editor:\ngit config --global core.editor nano\nIf you prefer VScode (and know how to set it up) or something else, feel free." }, { - "objectID": "computing/mac_arm.html#latex", - "href": "computing/mac_arm.html#latex", - "title": " MacOS ARM", + "objectID": "computing/mac_x86.html#latex", + "href": "computing/mac_x86.html#latex", + "title": " MacOS x86", "section": "LaTeX", "text": "LaTeX\nIt is possible you already have this installed.\nFirst try the following check in RStudio\nStat406::test_latex_installation()\nIf you see Green checkmarks, then you’re good.\nEven if it fails, follow the instructions, and try it again.\nIf it stall fails, proceed with the instructions\n\nWe will install the lightest possible version of LaTeX and its necessary packages as possible so that we can render Jupyter notebooks and R Markdown documents to html and PDF. If you have previously installed LaTeX, please uninstall it before proceeding with these instructions.\nFirst, run the following command to make sure that /usr/local/bin is writable:\nsudo chown -R $(whoami):admin /usr/local/bin\n\n\n\n\n\n\nNote\n\n\n\nYou might be asked to enter your password during installation.\n\n\nNow open RStudio and run the following commands to install the tinytex package and setup tinytex:\ntinytex::install_tinytex()\nYou can check that the installation is working by opening a terminal and asking for the version of latex:\nlatex --version\nYou should see something like this if you were successful:\npdfTeX 3.141592653-2.6-1.40.23 (TeX Live 2022/dev)\nkpathsea version 6.3.4/dev\nCopyright 2021 Han The Thanh (pdfTeX) et al.\nThere is NO warranty. Redistribution of this software is\ncovered by the terms of both the pdfTeX copyright and\nthe Lesser GNU General Public License.\nFor more information about these matters, see the file\nnamed COPYING and the pdfTeX source.\nPrimary author of pdfTeX: Han The Thanh (pdfTeX) et al.\nCompiled with libpng 1.6.37; using libpng 1.6.37\nCompiled with zlib 1.2.11; using zlib 1.2.11\nCompiled with xpdf version 4.03" }, { - "objectID": "computing/mac_arm.html#github-pat", - "href": "computing/mac_arm.html#github-pat", - "title": " MacOS ARM", + "objectID": "computing/mac_x86.html#github-pat", + "href": "computing/mac_x86.html#github-pat", + "title": " MacOS x86", "section": "Github PAT", "text": "Github PAT\nYou’re probably familiar with 2-factor authentication for your UBC account or other accounts which is a very secure way to protect sensitive information (in case your password gets exposed). Github uses a Personal Access Token (PAT) for the Command Line Interface (CLI) and RStudio. This is different from the password you use to log in with a web browser. You will have to create one. There are some nice R functions that will help you along, and I find that easiest.\nComplete instructions are in Chapter 9 of Happy Git With R. Here’s the quick version (you need the usethis and gitcreds libraries, which you can install with install.packages(c(\"usethis\", \"gitcreds\"))):\n\nIn the RStudio Console, call usethis::create_github_token() This should open a webbrowser. In the Note field, write what you like, perhaps “Stat 406 token”. Then update the Expiration to any date after December 15. (“No expiration” is fine, though not very secure). Make sure that everything in repo is checked. Leave all other checks as is. Scroll to the bottom and click the green “Generate Token” button.\nThis should now give you a long string to Copy. It often looks like ghp_0asfjhlasdfhlkasjdfhlksajdhf9234u. Copy that. (You would use this instead of the browser password in RStudio when it asks for a password).\nTo store the PAT permanently in R (so you’ll never have to do this again, hopefully) call gitcreds::gitcreds_set() and paste the thing you copied there." }, { - "objectID": "computing/mac_arm.html#post-installation-notes", - "href": "computing/mac_arm.html#post-installation-notes", - "title": " MacOS ARM", + "objectID": "computing/mac_x86.html#post-installation-notes", + "href": "computing/mac_x86.html#post-installation-notes", + "title": " MacOS x86", "section": "Post-installation notes", "text": "Post-installation notes\nYou have completed the installation instructions, well done 🙌!" }, { - "objectID": "computing/mac_arm.html#attributions", - "href": "computing/mac_arm.html#attributions", - "title": " MacOS ARM", + "objectID": "computing/mac_x86.html#attributions", + "href": "computing/mac_x86.html#attributions", + "title": " MacOS x86", "section": "Attributions", "text": "Attributions\nThe DSCI 310 Teaching Team, notably, Anmol Jawandha, Tomas Beuzen, Rodolfo Lourenzutti, Joel Ostblom, Arman Seyed-Ahmadi, Florencia D’Andrea, and Tiffany Timbers." }, - { - "objectID": "faq.html", - "href": "faq.html", - "title": " Frequently asked questions", - "section": "", - "text": "Complete readings before the material is covered in class, and then review again afterwards.\nParticipate actively in class. If you don’t understand something, I can guarantee no one else does either. I have a Ph.D., and I’ve been doing this for more than 10 years. It’s hard for me to remember what it’s like to be you and what you don’t know. Say something! I want you to learn this stuff, and I love to explain more carefully.\nCome to office hours. Again, I like explaining things.\nTry the Labs again without the help of your classmates.\nRead the examples at the end of the [ISLR] chapters. Try the exercises.\nDo not procrastinate — don’t let a module go by with unanswered questions as it will just make the following module’s material even more difficult to follow.\nDo the Worksheets." - }, - { - "objectID": "faq.html#how-do-i-succeed-in-this-class", - "href": "faq.html#how-do-i-succeed-in-this-class", - "title": " Frequently asked questions", - "section": "", - "text": "Complete readings before the material is covered in class, and then review again afterwards.\nParticipate actively in class. If you don’t understand something, I can guarantee no one else does either. I have a Ph.D., and I’ve been doing this for more than 10 years. It’s hard for me to remember what it’s like to be you and what you don’t know. Say something! I want you to learn this stuff, and I love to explain more carefully.\nCome to office hours. Again, I like explaining things.\nTry the Labs again without the help of your classmates.\nRead the examples at the end of the [ISLR] chapters. Try the exercises.\nDo not procrastinate — don’t let a module go by with unanswered questions as it will just make the following module’s material even more difficult to follow.\nDo the Worksheets." - }, - { - "objectID": "faq.html#git-and-github", - "href": "faq.html#git-and-github", - "title": " Frequently asked questions", - "section": "Git and Github", - "text": "Git and Github\n\nHomework/Labs workflow\nRstudio version (uses the Git tab. Usually near Environment/History in the upper right)\n\nMake sure you are on main. Pull in remote changes. Click .\nCreate a new branch by clicking the think that looks kinda like .\nWork on your documents and save frequently.\nStage your changes by clicking the check boxes.\nCommit your changes by clicking Commit.\nRepeat 3-5 as necessary.\nPush to Github \nWhen done, go to Github and open a PR.\nUse the dropdown menu to go back to main and avoid future headaches.\n\nCommand line version\n\n(Optional, but useful. Pull in any remote changes.) git pull\nCreate a new branch git branch -b <name-of-branch>\nWork on your documents and save frequently.\nStage your changes git add <name-of-document1> repeat for each changed document. git add . stages all changed documents.\nCommit your changes git commit -m \"some message that is meaningful\"\nRepeat 3-5 as necessary.\nPush to Github git push. It may suggest a longer form of this command, obey.\nWhen done, go to Github and open a PR.\nSwitch back to main to avoid future headaches. git checkout main.\n\n\n\nAsking for a HW regrade.\n\n\n\n\n\n\nTo be eligible\n\n\n\n\nYou must have received >3 points of deductions to be eligible.\nAnd they must have been for “content”, not penalties.\nIf you fix the errors, you can raise your grade to 7/10.\nYou must make revisions and re-request review within 1 week of your initial review.\n\n\n\n\nGo to the your local branch for this HW. If you don’t remember the right name, you can check the PRs in your repo on GitHub by clicking “Pull Requests” tab. It might be closed.\nMake any changes you need to make to the files, commit and push. Make sure to rerender the .pdf if needed.\nGo to GitHub.com and find the original PR for this assignment. There should now be additional commits since the previous Review.\nAdd a comment to the TA describing the changes you’ve made. Be concise and clear.\nUnder “Reviewers” on the upper right of the screen, you should see a 🔁 button. Once you click that, the TA will be notified to review your changes.\n\n\n\nFixing common problems\n\nmaster/main\n“master” has some pretty painful connotations. So as part of an effort to remove racist names from code, the default branch is now “main” on new versions of GitHub. But old versions (like the UBC version) still have “master”. Below, I’ll use “main”, but if you see “master” on what you’re doing, that’s the one to use.\n\n\nStart from main\nBranches should be created from the main branch, not the one you used for the last assignment.\ngit checkout main\nThis switches to main. Then pull and start the new assignment following the workflow above. (In Rstudio, use the dropdown menu.)\n\n\nYou forgot to work on a new branch\nUgh, you did some labs before realizing you forgot to create a new branch. Don’t stress. There are some things below to try. But if you’re confused ASK. We’ve had practice with this, and soon you will too!\n(1) If you started from main and haven’t made any commits (but you SAVED!!):\ngit branch -b <new-branch-name>\nThis keeps everything you have and puts you on a new branch. No problem. Commit and proceed as usual.\n(2) If you are on main and made some commits:\ngit branch <new-branch-name>\ngit log\nThe first line makes a new branch with all the stuff you’ve done. Then we look at the log. Locate the most recent commit before you started working. It’s a long string like ac2a8365ce0fa220c11e658c98212020fa2ba7d1. Then,\ngit reset ac2a8 --hard\nThis rolls main back to that commit. You don’t need the whole string, just the first few characters. Finally\ngit checkout <new-branch-name>\nand continue working.\n(3) If you started work on <some-old-branch> for work you already submitted:\nThis one is harder, and I would suggest getting in touch with the TAs. Here’s the procedure.\ngit commit -am \"uhoh, I need to be on a different branch\"\ngit branch <new-branch-name>\nCommit your work with a dumb message, then create a new branch. It’s got all your stuff.\ngit log\nLocate the most recent commit before you started working. It’s a long string like ac2a8365ce0fa220c11e658c98212020fa2ba7d1. Then,\ngit rebase --onto main ac2a8 <new-branch-name>\ngit checkout <new-branch-name>\nThis makes the new branch look like main but without the differences from main that are on ac2a8 and WITH all the work you did after ac2a8. It’s pretty cool. And should work. Finally, we switch to our new branch.\n\n\n\nHow can I get better at R?\nI get this question a lot. The answer is almost never “go read the book How to learn R fast” or “watch the video on FreeRadvice.com”. To learn programming, the only thing to do is to program. Do your tutorialls. Redo your tutorials. Run through the code in the textbook. Ask yourself why we used one function instead of another. Ask questions. Play little coding games. If you find yourself wondering how some bit of code works, run through it step by step. Print out the results and see what it’s doing. If you take on these kinds of tasks regularly, you will improve rapidly.\nCoding is an active activity just like learning Spanish. You have to practice constantly. For the same reasons that it is difficult/impossible to learn Spanish just from reading a textbook, it is difficult/impossible to learn R just from reading/watching.\nWhen I took German in 7th grade, I remember my teacher saying “to learn a language, you have to constantly tell lies”. What he meant was, you don’t just say “yesterday I went to the gym”. You say “yesterday I went to the market”, “yesterday I went to the movies”, “today she’s going to the gym”, etc. The point is to internalize conjugation, vocabulary, and the inner workings of the language. The same is true when coding. Do things different ways. Try automating regular tasks.\nRecommended resources\n\nData Science: A first introduction This is the course textbook for UBC’s DSCI 100\nR4DS written by Hadley Wickham and Garrett Grolemund\nDSCI 310 Coursenotes by Tiffany A. Timbers, Joel Ostblom, Florencia D’Andrea, and Rodolfo Lourenzutti\nHappy Git with R by Jenny Bryan\nModern Dive: Statistical Inference via Data Science\nStat545\nGoogle\n\n\n\nMy code doesn’t run. What do I do?\nThis is a constant issue with code, and it happens to everyone. The following is a general workflow for debugging stuck code.\n\nIf the code is running, but not doing what you want, see below.\nRead the Error message. It will give you some important hints. Sometimes these are hard to parse, but that’s ok.\n\n\nset.seed(12345)\ny <- rnorm(10)\nx <- matrix(rnorm(20), 2)\nlinmod <- lm(y ~ x)\n## Error in model.frame.default(formula = y ~ x, drop.unused.levels = TRUE): variable lengths differ (found for 'x')\n\nThis one is a little difficult. The first stuff before the colon is telling me where the error happened, but I didn’t use a function called model.frame.default. Nonetheless, after the colon it says variable lengths differ. Well y is length 10 and x has 10 rows right? Oh wait, how many rows does x have?\n\nRead the documentation for the function in the error message. For the above, I should try ?matrix.\nGoogle!! If the first few steps didn’t help, copy the error message into Google. This almost always helps. Best to remove any overly specific information first.\nAsk your classmates Slack. In order to ask most effectively, you should probably provide them some idea of how the error happened. See the section on MWEs for how to do this.\nSee me or the TA. Note that it is highly likely that I will ask if you did the above steps first. And I will want to see your minimal working example (MWE).\n\n\n\n\n\n\n\nWarning\n\n\n\nIf you meet with me, be prepared to show me your code! Or message me your MWE. Or both. But not neither.\n\n\nIf the error cannot be reproduced in my presence, it is very unlikely that I can fix it.\n\n\nMinimal working examples\nAn MWE is a small bit of code which will work on anyone’s machine and reproduce the error that you are getting. This is a key component of getting help debugging. When you do your homework, there’s lots of stuff going on that will differ from most other students. To allow them (or me, or the TA) to help you, you need to be able to get their machine to reproduce your error (and only your error) without much hassle.\nI find that, in the process of preparing an MWE, I can often answer my own question. So it is a useful exercise even if you aren’t ready to call in the experts yet. The process of stripping your problem down to its bare essence often reveals where the root issue lies. My above code is an MWE: I set a seed, so we both can use exactly the same data, and it’s only a few lines long without calling any custom code that you don’t have.\nFor a good discussion of how to do this, see the R Lecture or stackexchange.\n\n\nHow to write good code\nThis is covered in much greater detail in the lectures, so see there. Here is my basic advice.\n\nWrite script files (which you save) and source them. Don’t do everything in the console. R (and python and Matlab and SAS) is much better as a scripting language than as a calculator.\nDon’t write anything more than once. This has three corollaries:\n\nIf you are tempted to copy/paste, don’t.\nDon’t use magic numbers. Define all constants at the top of the script.\nWrite functions.\n\nThe third is very important. Functions are easy to test. You give different inputs and check whether the output is as expected. This helps catch mistakes.\nThere are two kinds of errors: syntax and function.\n\nThe first R can find (missing close parenthesis, wrong arguments, etc.)\n\nThe second you can only catch by thorough testing\n\nDon’t use magic numbers.\nUse meaningful names. Don’t do this:\n\ndata(\"ChickWeight\")\nout <- lm(weight ~ Time + Chick + Diet, data = ChickWeight)\n\nComment things that aren’t clear from the (meaningful) names.\nComment long formulas that don’t immediately make sense:\n\ngarbage <- with(\n ChickWeight, \n by(weight, Chick, function(x) (x^2 + 23) / length(x))\n) ## WTF???" - }, - { - "objectID": "index.html", - "href": "index.html", - "title": "Stat 406", - "section": "", - "text": "Jump to Schedule Syllabus\n\n\nAt the end of the course, you will be able to:\n\nAssess the prediction properties of the supervised learning methods covered in class;\nCorrectly use regularization to improve predictions from linear models, and also to identify important explanatory variables;\nExplain the practical difference between predictions obtained with parametric and non-parametric methods, and decide in specific applications which approach should be used;\nSelect and construct appropriate ensembles to obtain improved predictions in different contexts;\nUse and interpret principal components and other dimension reduction techniques;\nEmploy reasonable coding practices and understand basic R syntax and function.\nWrite reports and use proper version control; engage with standard software." - }, { "objectID": "computing/index.html", "href": "computing/index.html", @@ -966,200 +931,270 @@ "text": "Software installation instructions\nPlease click the appropriate link below to view the installation instructions for your operating system:\n\nmacOS x86 or macOS arm\nUbuntu\nWindows" }, { - "objectID": "computing/mac_x86.html", - "href": "computing/mac_x86.html", - "title": " MacOS x86", + "objectID": "index.html", + "href": "index.html", + "title": "Stat 406", + "section": "", + "text": "Jump to Schedule Syllabus\n\n\nAt the end of the course, you will be able to:\n\nAssess the prediction properties of the supervised learning methods covered in class;\nCorrectly use regularization to improve predictions from linear models, and also to identify important explanatory variables;\nExplain the practical difference between predictions obtained with parametric and non-parametric methods, and decide in specific applications which approach should be used;\nSelect and construct appropriate ensembles to obtain improved predictions in different contexts;\nUse and interpret principal components and other dimension reduction techniques;\nEmploy reasonable coding practices and understand basic R syntax and function.\nWrite reports and use proper version control; engage with standard software." + }, + { + "objectID": "faq.html", + "href": "faq.html", + "title": " Frequently asked questions", + "section": "", + "text": "Complete readings before the material is covered in class, and then review again afterwards.\nParticipate actively in class. If you don’t understand something, I can guarantee no one else does either. I have a Ph.D., and I’ve been doing this for more than 10 years. It’s hard for me to remember what it’s like to be you and what you don’t know. Say something! I want you to learn this stuff, and I love to explain more carefully.\nCome to office hours. Again, I like explaining things.\nTry the Labs again without the help of your classmates.\nRead the examples at the end of the [ISLR] chapters. Try the exercises.\nDo not procrastinate — don’t let a module go by with unanswered questions as it will just make the following module’s material even more difficult to follow.\nDo the Worksheets." + }, + { + "objectID": "faq.html#how-do-i-succeed-in-this-class", + "href": "faq.html#how-do-i-succeed-in-this-class", + "title": " Frequently asked questions", + "section": "", + "text": "Complete readings before the material is covered in class, and then review again afterwards.\nParticipate actively in class. If you don’t understand something, I can guarantee no one else does either. I have a Ph.D., and I’ve been doing this for more than 10 years. It’s hard for me to remember what it’s like to be you and what you don’t know. Say something! I want you to learn this stuff, and I love to explain more carefully.\nCome to office hours. Again, I like explaining things.\nTry the Labs again without the help of your classmates.\nRead the examples at the end of the [ISLR] chapters. Try the exercises.\nDo not procrastinate — don’t let a module go by with unanswered questions as it will just make the following module’s material even more difficult to follow.\nDo the Worksheets." + }, + { + "objectID": "faq.html#git-and-github", + "href": "faq.html#git-and-github", + "title": " Frequently asked questions", + "section": "Git and Github", + "text": "Git and Github\n\nHomework/Labs workflow\nRstudio version (uses the Git tab. Usually near Environment/History in the upper right)\n\nMake sure you are on main. Pull in remote changes. Click .\nCreate a new branch by clicking the think that looks kinda like .\nWork on your documents and save frequently.\nStage your changes by clicking the check boxes.\nCommit your changes by clicking Commit.\nRepeat 3-5 as necessary.\nPush to Github \nWhen done, go to Github and open a PR.\nUse the dropdown menu to go back to main and avoid future headaches.\n\nCommand line version\n\n(Optional, but useful. Pull in any remote changes.) git pull\nCreate a new branch git branch -b <name-of-branch>\nWork on your documents and save frequently.\nStage your changes git add <name-of-document1> repeat for each changed document. git add . stages all changed documents.\nCommit your changes git commit -m \"some message that is meaningful\"\nRepeat 3-5 as necessary.\nPush to Github git push. It may suggest a longer form of this command, obey.\nWhen done, go to Github and open a PR.\nSwitch back to main to avoid future headaches. git checkout main.\n\n\n\nAsking for a HW regrade.\n\n\n\n\n\n\nTo be eligible\n\n\n\n\nYou must have received >3 points of deductions to be eligible.\nAnd they must have been for “content”, not penalties.\nIf you fix the errors, you can raise your grade to 7/10.\nYou must make revisions and re-request review within 1 week of your initial review.\n\n\n\n\nGo to the your local branch for this HW. If you don’t remember the right name, you can check the PRs in your repo on GitHub by clicking “Pull Requests” tab. It might be closed.\nMake any changes you need to make to the files, commit and push. Make sure to rerender the .pdf if needed.\nGo to GitHub.com and find the original PR for this assignment. There should now be additional commits since the previous Review.\nAdd a comment to the TA describing the changes you’ve made. Be concise and clear.\nUnder “Reviewers” on the upper right of the screen, you should see a 🔁 button. Once you click that, the TA will be notified to review your changes.\n\n\n\nFixing common problems\n\nmaster/main\n“master” has some pretty painful connotations. So as part of an effort to remove racist names from code, the default branch is now “main” on new versions of GitHub. But old versions (like the UBC version) still have “master”. Below, I’ll use “main”, but if you see “master” on what you’re doing, that’s the one to use.\n\n\nStart from main\nBranches should be created from the main branch, not the one you used for the last assignment.\ngit checkout main\nThis switches to main. Then pull and start the new assignment following the workflow above. (In Rstudio, use the dropdown menu.)\n\n\nYou forgot to work on a new branch\nUgh, you did some labs before realizing you forgot to create a new branch. Don’t stress. There are some things below to try. But if you’re confused ASK. We’ve had practice with this, and soon you will too!\n(1) If you started from main and haven’t made any commits (but you SAVED!!):\ngit branch -b <new-branch-name>\nThis keeps everything you have and puts you on a new branch. No problem. Commit and proceed as usual.\n(2) If you are on main and made some commits:\ngit branch <new-branch-name>\ngit log\nThe first line makes a new branch with all the stuff you’ve done. Then we look at the log. Locate the most recent commit before you started working. It’s a long string like ac2a8365ce0fa220c11e658c98212020fa2ba7d1. Then,\ngit reset ac2a8 --hard\nThis rolls main back to that commit. You don’t need the whole string, just the first few characters. Finally\ngit checkout <new-branch-name>\nand continue working.\n(3) If you started work on <some-old-branch> for work you already submitted:\nThis one is harder, and I would suggest getting in touch with the TAs. Here’s the procedure.\ngit commit -am \"uhoh, I need to be on a different branch\"\ngit branch <new-branch-name>\nCommit your work with a dumb message, then create a new branch. It’s got all your stuff.\ngit log\nLocate the most recent commit before you started working. It’s a long string like ac2a8365ce0fa220c11e658c98212020fa2ba7d1. Then,\ngit rebase --onto main ac2a8 <new-branch-name>\ngit checkout <new-branch-name>\nThis makes the new branch look like main but without the differences from main that are on ac2a8 and WITH all the work you did after ac2a8. It’s pretty cool. And should work. Finally, we switch to our new branch.\n\n\n\nHow can I get better at R?\nI get this question a lot. The answer is almost never “go read the book How to learn R fast” or “watch the video on FreeRadvice.com”. To learn programming, the only thing to do is to program. Do your tutorialls. Redo your tutorials. Run through the code in the textbook. Ask yourself why we used one function instead of another. Ask questions. Play little coding games. If you find yourself wondering how some bit of code works, run through it step by step. Print out the results and see what it’s doing. If you take on these kinds of tasks regularly, you will improve rapidly.\nCoding is an active activity just like learning Spanish. You have to practice constantly. For the same reasons that it is difficult/impossible to learn Spanish just from reading a textbook, it is difficult/impossible to learn R just from reading/watching.\nWhen I took German in 7th grade, I remember my teacher saying “to learn a language, you have to constantly tell lies”. What he meant was, you don’t just say “yesterday I went to the gym”. You say “yesterday I went to the market”, “yesterday I went to the movies”, “today she’s going to the gym”, etc. The point is to internalize conjugation, vocabulary, and the inner workings of the language. The same is true when coding. Do things different ways. Try automating regular tasks.\nRecommended resources\n\nData Science: A first introduction This is the course textbook for UBC’s DSCI 100\nR4DS written by Hadley Wickham and Garrett Grolemund\nDSCI 310 Coursenotes by Tiffany A. Timbers, Joel Ostblom, Florencia D’Andrea, and Rodolfo Lourenzutti\nHappy Git with R by Jenny Bryan\nModern Dive: Statistical Inference via Data Science\nStat545\nGoogle\n\n\n\nMy code doesn’t run. What do I do?\nThis is a constant issue with code, and it happens to everyone. The following is a general workflow for debugging stuck code.\n\nIf the code is running, but not doing what you want, see below.\nRead the Error message. It will give you some important hints. Sometimes these are hard to parse, but that’s ok.\n\n\nset.seed(12345)\ny <- rnorm(10)\nx <- matrix(rnorm(20), 2)\nlinmod <- lm(y ~ x)\n## Error in model.frame.default(formula = y ~ x, drop.unused.levels = TRUE): variable lengths differ (found for 'x')\n\nThis one is a little difficult. The first stuff before the colon is telling me where the error happened, but I didn’t use a function called model.frame.default. Nonetheless, after the colon it says variable lengths differ. Well y is length 10 and x has 10 rows right? Oh wait, how many rows does x have?\n\nRead the documentation for the function in the error message. For the above, I should try ?matrix.\nGoogle!! If the first few steps didn’t help, copy the error message into Google. This almost always helps. Best to remove any overly specific information first.\nAsk your classmates Slack. In order to ask most effectively, you should probably provide them some idea of how the error happened. See the section on MWEs for how to do this.\nSee me or the TA. Note that it is highly likely that I will ask if you did the above steps first. And I will want to see your minimal working example (MWE).\n\n\n\n\n\n\n\nWarning\n\n\n\nIf you meet with me, be prepared to show me your code! Or message me your MWE. Or both. But not neither.\n\n\nIf the error cannot be reproduced in my presence, it is very unlikely that I can fix it.\n\n\nMinimal working examples\nAn MWE is a small bit of code which will work on anyone’s machine and reproduce the error that you are getting. This is a key component of getting help debugging. When you do your homework, there’s lots of stuff going on that will differ from most other students. To allow them (or me, or the TA) to help you, you need to be able to get their machine to reproduce your error (and only your error) without much hassle.\nI find that, in the process of preparing an MWE, I can often answer my own question. So it is a useful exercise even if you aren’t ready to call in the experts yet. The process of stripping your problem down to its bare essence often reveals where the root issue lies. My above code is an MWE: I set a seed, so we both can use exactly the same data, and it’s only a few lines long without calling any custom code that you don’t have.\nFor a good discussion of how to do this, see the R Lecture or stackexchange.\n\n\nHow to write good code\nThis is covered in much greater detail in the lectures, so see there. Here is my basic advice.\n\nWrite script files (which you save) and source them. Don’t do everything in the console. R (and python and Matlab and SAS) is much better as a scripting language than as a calculator.\nDon’t write anything more than once. This has three corollaries:\n\nIf you are tempted to copy/paste, don’t.\nDon’t use magic numbers. Define all constants at the top of the script.\nWrite functions.\n\nThe third is very important. Functions are easy to test. You give different inputs and check whether the output is as expected. This helps catch mistakes.\nThere are two kinds of errors: syntax and function.\n\nThe first R can find (missing close parenthesis, wrong arguments, etc.)\n\nThe second you can only catch by thorough testing\n\nDon’t use magic numbers.\nUse meaningful names. Don’t do this:\n\ndata(\"ChickWeight\")\nout <- lm(weight ~ Time + Chick + Diet, data = ChickWeight)\n\nComment things that aren’t clear from the (meaningful) names.\nComment long formulas that don’t immediately make sense:\n\ngarbage <- with(\n ChickWeight, \n by(weight, Chick, function(x) (x^2 + 23) / length(x))\n) ## WTF???" + }, + { + "objectID": "computing/mac_arm.html", + "href": "computing/mac_arm.html", + "title": " MacOS ARM", "section": "", "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below.\nIn all the sections below, if you are presented with the choice to download either a 64-bit (also called x64) or a 32-bit (also called x86) version of the application always choose the 64-bit version." }, { - "objectID": "computing/mac_x86.html#installation-notes", - "href": "computing/mac_x86.html#installation-notes", - "title": " MacOS x86", + "objectID": "computing/mac_arm.html#installation-notes", + "href": "computing/mac_arm.html#installation-notes", + "title": " MacOS ARM", "section": "", "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below.\nIn all the sections below, if you are presented with the choice to download either a 64-bit (also called x64) or a 32-bit (also called x86) version of the application always choose the 64-bit version." }, { - "objectID": "computing/mac_x86.html#terminal", - "href": "computing/mac_x86.html#terminal", - "title": " MacOS x86", + "objectID": "computing/mac_arm.html#terminal", + "href": "computing/mac_arm.html#terminal", + "title": " MacOS ARM", "section": "Terminal", "text": "Terminal\nBy “Terminal” below we mean the command line program called “Terminal”. Note that this is also available Inside RStudio. Either works. To easily pull up the Terminal (outside RStudio), Type Cmd + Space then begin typing “Terminal” and press Return." }, { - "objectID": "computing/mac_x86.html#github", - "href": "computing/mac_x86.html#github", - "title": " MacOS x86", + "objectID": "computing/mac_arm.html#github", + "href": "computing/mac_arm.html#github", + "title": " MacOS ARM", "section": "GitHub", "text": "GitHub\nIn Stat 406 we will use the publicly available GitHub.com. If you do not already have an account, please sign up for one at GitHub.com\nSign up for a free account at GitHub.com if you don’t have one already." }, { - "objectID": "computing/mac_x86.html#git", - "href": "computing/mac_x86.html#git", - "title": " MacOS x86", + "objectID": "computing/mac_arm.html#git", + "href": "computing/mac_arm.html#git", + "title": " MacOS ARM", "section": "Git", "text": "Git\nWe will be using the command line version of Git as well as Git through RStudio. Some of the Git commands we will use are only available since Git 2.23, so if your Git is older than this version, we ask you to update it using the Xcode command line tools (not all of Xcode), which includes Git.\nOpen Terminal and type the following command to install Xcode command line tools:\nxcode-select --install\nAfter installation, in terminal type the following to ask for the version:\ngit --version\nyou should see something like this (does not have to be the exact same version) if you were successful:\ngit version 2.32.1 (Apple Git-133)\n\n\n\n\n\n\nNote\n\n\n\nIf you run into trouble, please see the Install Git Mac OS section from Happy Git and GitHub for the useR for additional help or strategies for Git installation.\n\n\n\nConfiguring Git user info\nNext, we need to configure Git by telling it your name and email. To do this, type the following into the terminal (replacing Jane Doe and janedoe@example.com, with your name and email that you used to sign up for GitHub, respectively):\ngit config --global user.name \"Jane Doe\"\ngit config --global user.email janedoe@example.com\n\n\n\n\n\n\nNote\n\n\n\nTo ensure that you haven’t made a typo in any of the above, you can view your global Git configurations by either opening the configuration file in a text editor (e.g. via the command nano ~/.gitconfig) or by typing git config --list --global).\n\n\nIf you have never used Git before, we recommend also setting the default editor:\ngit config --global core.editor nano\nIf you prefer VScode (and know how to set it up) or something else, feel free." }, { - "objectID": "computing/mac_x86.html#latex", - "href": "computing/mac_x86.html#latex", - "title": " MacOS x86", + "objectID": "computing/mac_arm.html#latex", + "href": "computing/mac_arm.html#latex", + "title": " MacOS ARM", "section": "LaTeX", "text": "LaTeX\nIt is possible you already have this installed.\nFirst try the following check in RStudio\nStat406::test_latex_installation()\nIf you see Green checkmarks, then you’re good.\nEven if it fails, follow the instructions, and try it again.\nIf it stall fails, proceed with the instructions\n\nWe will install the lightest possible version of LaTeX and its necessary packages as possible so that we can render Jupyter notebooks and R Markdown documents to html and PDF. If you have previously installed LaTeX, please uninstall it before proceeding with these instructions.\nFirst, run the following command to make sure that /usr/local/bin is writable:\nsudo chown -R $(whoami):admin /usr/local/bin\n\n\n\n\n\n\nNote\n\n\n\nYou might be asked to enter your password during installation.\n\n\nNow open RStudio and run the following commands to install the tinytex package and setup tinytex:\ntinytex::install_tinytex()\nYou can check that the installation is working by opening a terminal and asking for the version of latex:\nlatex --version\nYou should see something like this if you were successful:\npdfTeX 3.141592653-2.6-1.40.23 (TeX Live 2022/dev)\nkpathsea version 6.3.4/dev\nCopyright 2021 Han The Thanh (pdfTeX) et al.\nThere is NO warranty. Redistribution of this software is\ncovered by the terms of both the pdfTeX copyright and\nthe Lesser GNU General Public License.\nFor more information about these matters, see the file\nnamed COPYING and the pdfTeX source.\nPrimary author of pdfTeX: Han The Thanh (pdfTeX) et al.\nCompiled with libpng 1.6.37; using libpng 1.6.37\nCompiled with zlib 1.2.11; using zlib 1.2.11\nCompiled with xpdf version 4.03" }, { - "objectID": "computing/mac_x86.html#github-pat", - "href": "computing/mac_x86.html#github-pat", - "title": " MacOS x86", + "objectID": "computing/mac_arm.html#github-pat", + "href": "computing/mac_arm.html#github-pat", + "title": " MacOS ARM", "section": "Github PAT", "text": "Github PAT\nYou’re probably familiar with 2-factor authentication for your UBC account or other accounts which is a very secure way to protect sensitive information (in case your password gets exposed). Github uses a Personal Access Token (PAT) for the Command Line Interface (CLI) and RStudio. This is different from the password you use to log in with a web browser. You will have to create one. There are some nice R functions that will help you along, and I find that easiest.\nComplete instructions are in Chapter 9 of Happy Git With R. Here’s the quick version (you need the usethis and gitcreds libraries, which you can install with install.packages(c(\"usethis\", \"gitcreds\"))):\n\nIn the RStudio Console, call usethis::create_github_token() This should open a webbrowser. In the Note field, write what you like, perhaps “Stat 406 token”. Then update the Expiration to any date after December 15. (“No expiration” is fine, though not very secure). Make sure that everything in repo is checked. Leave all other checks as is. Scroll to the bottom and click the green “Generate Token” button.\nThis should now give you a long string to Copy. It often looks like ghp_0asfjhlasdfhlkasjdfhlksajdhf9234u. Copy that. (You would use this instead of the browser password in RStudio when it asks for a password).\nTo store the PAT permanently in R (so you’ll never have to do this again, hopefully) call gitcreds::gitcreds_set() and paste the thing you copied there." }, { - "objectID": "computing/mac_x86.html#post-installation-notes", - "href": "computing/mac_x86.html#post-installation-notes", - "title": " MacOS x86", + "objectID": "computing/mac_arm.html#post-installation-notes", + "href": "computing/mac_arm.html#post-installation-notes", + "title": " MacOS ARM", "section": "Post-installation notes", "text": "Post-installation notes\nYou have completed the installation instructions, well done 🙌!" }, { - "objectID": "computing/mac_x86.html#attributions", - "href": "computing/mac_x86.html#attributions", - "title": " MacOS x86", + "objectID": "computing/mac_arm.html#attributions", + "href": "computing/mac_arm.html#attributions", + "title": " MacOS ARM", "section": "Attributions", "text": "Attributions\nThe DSCI 310 Teaching Team, notably, Anmol Jawandha, Tomas Beuzen, Rodolfo Lourenzutti, Joel Ostblom, Arman Seyed-Ahmadi, Florencia D’Andrea, and Tiffany Timbers." }, { - "objectID": "computing/windows.html", - "href": "computing/windows.html", - "title": " Windows", + "objectID": "computing/ubuntu.html", + "href": "computing/ubuntu.html", + "title": " Ubuntu", "section": "", - "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below.\nIn all the sections below, if you are presented with the choice to download either a 64-bit (also called x64) or a 32-bit (also called x86) version of the application always choose the 64-bit version." + "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below." }, { - "objectID": "computing/windows.html#installation-notes", - "href": "computing/windows.html#installation-notes", - "title": " Windows", + "objectID": "computing/ubuntu.html#installation-notes", + "href": "computing/ubuntu.html#installation-notes", + "title": " Ubuntu", "section": "", - "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below.\nIn all the sections below, if you are presented with the choice to download either a 64-bit (also called x64) or a 32-bit (also called x86) version of the application always choose the 64-bit version." + "text": "If you have already installed Git, LaTeX, or any of the R packages, you should be OK. However, if you have difficulty with Homework or Labs, we may ask you to uninstall and try again.\nIn order to be able to support you effectively and minimize setup issues and software conflicts, we suggest you install the required software as specified below." }, { - "objectID": "computing/windows.html#terminal", - "href": "computing/windows.html#terminal", - "title": " Windows", - "section": "Terminal", - "text": "Terminal\nBy “Terminal” below we mean the command line program called “Terminal”. Note that this is also available Inside RStudio. Either works." + "objectID": "computing/ubuntu.html#ubuntu-software-settings", + "href": "computing/ubuntu.html#ubuntu-software-settings", + "title": " Ubuntu", + "section": "Ubuntu software settings", + "text": "Ubuntu software settings\nTo ensure that you are installing the right version of the software in this guide, open “Software & Updates” and make sure that the boxes in the screenshot are checked (this is the default configuration)." }, { - "objectID": "computing/windows.html#github", - "href": "computing/windows.html#github", - "title": " Windows", + "objectID": "computing/ubuntu.html#github", + "href": "computing/ubuntu.html#github", + "title": " Ubuntu", "section": "GitHub", "text": "GitHub\nIn Stat 406 we will use the publicly available GitHub.com. If you do not already have an account, please sign up for one at GitHub.com\nSign up for a free account at GitHub.com if you don’t have one already." }, { - "objectID": "computing/windows.html#git-bash-and-windows-terminal", - "href": "computing/windows.html#git-bash-and-windows-terminal", - "title": " Windows", - "section": "Git, Bash, and Windows Terminal", - "text": "Git, Bash, and Windows Terminal\nAlthough these three are separate programs, we are including them in the same section here since they are packaged together in the same installer on Windows. Briefly, we will be using the Bash shell to interact with our computers via a command line interface, Git to keep a version history of our files and upload to/download from to GitHub, and Windows Terminal to run the both Bash and Git.\nGo to https://git-scm.com/download/win and download the windows version of git. After the download has finished, run the installer and accept the default configuration for all pages except for the following:\n\nOn the Select Components page, add a Git Bash profile to Windows Terminal.\n\n\nTo install windows terminal visit this link and click Get to open it in Windows Store. Inside the Store, click Get again and then click Install. After installation, click Launch to start Windows Terminal. In the top of the window, you will see the tab bar with one open tab, a plus sign, and a down arrow. Click the down arrow and select Settings (or type the shortcut Ctrl + ,). In the Startup section, click the dropdown menu under Default profile and select Git Bash.\n\nYou can now launch the Windows terminal from the start menu or pin it to the taskbar like any other program (you can read the rest of the article linked above for additional tips if you wish). To make sure everything worked, close down Windows Terminal, and open it again. Git Bash should open by default, the text should be green and purple, and the tab should read MINGW64:/c/Users/$USERNAME (you should also see /c/Users/$USERNAME if you type pwd into the terminal). This screenshot shows what it should look like:\n\n\n\n\n\n\n\nNote\n\n\n\nWhenever we refer to “the terminal” in these installation instructions, we want you to use the Windows Terminal that you just installed with the Git Bash profile. Do not use Windows PowerShell, CMD, or anything else unless explicitly instructed to do so.\n\n\nTo open a new tab you can click the plus sign or use Ctrl + Shift + t (you can close a tab with Ctrl + Shift + w). To copy text from the terminal, you can highlight it with the mouse and then click Ctrl + Shift + c. To paste text you use Ctrl + Shift + v, try it by pasting the following into the terminal to check which version of Bash you just installed:\nbash --version\nThe output should look similar to this:\nGNU bash, version 4.4.23(1)-release (x86_64-pc-sys)\nCopyright (C) 2019 Free Software Foundation, Inc.\nLicense GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\nThis is free software; you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n\n\n\n\n\n\nNote\n\n\n\nIf there is a newline (the enter character) in the clipboard when you are pasting into the terminal, you will be asked if you are sure you want to paste since this newline will act as if you pressed enter and run the command. As a guideline you can press Paste anyway unless you are sure you don’t want this to happen.\n\n\nLet’s also check which version of git was installed:\ngit --version\ngit version 2.32.0.windows.2\n\n\n\n\n\n\nNote\n\n\n\nSome of the Git commands we will use are only available since Git 2.23, so make sure your if your Git is at least this version.\n\n\n\nConfiguring Git user info\nNext, we need to configure Git by telling it your name and email. To do this, type the following into the terminal (replacing Jane Doe and janedoe@example.com, with your name and email that you used to sign up for GitHub, respectively):\ngit config --global user.name \"Jane Doe\"\ngit config --global user.email janedoe@example.com\n\n\n\n\n\n\nNote\n\n\n\nTo ensure that you haven’t made a typo in any of the above, you can view your global Git configurations by either opening the configuration file in a text editor (e.g. via the command nano ~/.gitconfig) or by typing git config --list --global).\n\n\nIf you have never used Git before, we recommend also setting the default editor:\ngit config --global core.editor nano\nIf you prefer VScode (and know how to set it up) or something else, feel free." + "objectID": "computing/ubuntu.html#git", + "href": "computing/ubuntu.html#git", + "title": " Ubuntu", + "section": "Git", + "text": "Git\nWe will be using the command line version of Git as well as Git through RStudio. Some of the Git commands we will use are only available since Git 2.23, so if your Git is older than this version, so if your Git is older than this version, we ask you to update it using the following commands:\nsudo apt update\nsudo apt install git\nYou can check your git version with the following command:\ngit --version\n\n\n\n\n\n\nNote\n\n\n\nIf you run into trouble, please see the Install Git Linux section from Happy Git and GitHub for the useR for additional help or strategies for Git installation.\n\n\n\nConfiguring Git user info\nNext, we need to configure Git by telling it your name and email. To do this, type the following into the terminal (replacing Jane Doe and janedoe@example.com, with your name and email that you used to sign up for GitHub, respectively):\ngit config --global user.name \"Jane Doe\"\ngit config --global user.email janedoe@example.com\n\n\n\n\n\n\nNote\n\n\n\nTo ensure that you haven’t made a typo in any of the above, you can view your global Git configurations by either opening the configuration file in a text editor (e.g. via the command nano ~/.gitconfig) or by typing git config --list --global).\n\n\nIf you have never used Git before, we recommend also setting the default editor:\ngit config --global core.editor nano\nIf you prefer VScode (and know how to set it up) or something else, feel free." }, { - "objectID": "computing/windows.html#latex", - "href": "computing/windows.html#latex", - "title": " Windows", + "objectID": "computing/ubuntu.html#latex", + "href": "computing/ubuntu.html#latex", + "title": " Ubuntu", "section": "LaTeX", - "text": "LaTeX\nIt is possible you already have this installed.\nFirst try the following check in RStudio\nStat406::test_latex_installation()\nIf you see Green checkmarks, then you’re good.\nEven if it fails, follow the instructions, and try it again.\nNote that you might see two error messages regarding lua during the installation, you can safely ignore these, the installation will complete successfully after clicking “OK”.\nIf it still fails, proceed with the instructions\n\nIn RStudio, run the following commands to install the tinytex package and setup tinytex:\ninstall.packages('tinytex')\ntinytex::install_tinytex()\nIn order for Git Bash to be able to find the location of TinyTex, you will need to sign out of Windows and back in again. After doing that, you can check that the installation worked by opening a terminal and asking for the version of latex:\nlatex --version\nYou should see something like this if you were successful:\npdfTeX 3.141592653-2.6-1.40.23 (TeX Live 2021/W32TeX)\nkpathsea version 6.3.3\nCopyright 2021 Han The Thanh (pdfTeX) et al.\nThere is NO warranty. Redistribution of this software is\ncovered by the terms of both the pdfTeX copyright and\nthe Lesser GNU General Public License.\nFor more information about these matters, see the file\nnamed COPYING and the pdfTeX source.\nPrimary author of pdfTeX: Han The Thanh (pdfTeX) et al.\nCompiled with libpng 1.6.37; using libpng 1.6.37\nCompiled with zlib 1.2.11; using zlib 1.2.11\nCompiled with xpdf version 4.03" + "text": "LaTeX\nIt is possible you already have this installed.\nFirst try the following check in RStudio\nStat406::test_latex_installation()\nIf you see Green checkmarks, then you’re good.\nEven if it fails, follow the instructions, and try it again.\nIf it still fails, proceed with the instructions\n\nWe will install the lightest possible version of LaTeX and its necessary packages as possible so that we can render Jupyter notebooks and R Markdown documents to html and PDF. If you have previously installed LaTeX, please uninstall it before proceeding with these instructions.\nFirst, run the following command to make sure that /usr/local/bin is writable:\nsudo chown -R $(whoami):admin /usr/local/bin\n\n\n\n\n\n\nNote\n\n\n\nYou might be asked to enter your password during installation.\n\n\nNow open RStudio and run the following commands to install the tinytex package and setup tinytex:\ntinytex::install_tinytex()\nYou can check that the installation is working by opening a terminal and asking for the version of latex:\nlatex --version\nYou should see something like this if you were successful:\npdfTeX 3.141592653-2.6-1.40.23 (TeX Live 2022/dev)\nkpathsea version 6.3.4/dev\nCopyright 2021 Han The Thanh (pdfTeX) et al.\nThere is NO warranty. Redistribution of this software is\ncovered by the terms of both the pdfTeX copyright and\nthe Lesser GNU General Public License.\nFor more information about these matters, see the file\nnamed COPYING and the pdfTeX source.\nPrimary author of pdfTeX: Han The Thanh (pdfTeX) et al.\nCompiled with libpng 1.6.37; using libpng 1.6.37\nCompiled with zlib 1.2.11; using zlib 1.2.11\nCompiled with xpdf version 4.03" }, { - "objectID": "computing/windows.html#github-pat", - "href": "computing/windows.html#github-pat", - "title": " Windows", + "objectID": "computing/ubuntu.html#github-pat", + "href": "computing/ubuntu.html#github-pat", + "title": " Ubuntu", "section": "Github PAT", "text": "Github PAT\nYou’re probably familiar with 2-factor authentication for your UBC account or other accounts which is a very secure way to protect sensitive information (in case your password gets exposed). Github uses a Personal Access Token (PAT) for the Command Line Interface (CLI) and RStudio. This is different from the password you use to log in with a web browser. You will have to create one. There are some nice R functions that will help you along, and I find that easiest.\nComplete instructions are in Chapter 9 of Happy Git With R. Here’s the quick version (you need the usethis and gitcreds libraries, which you can install with install.packages(c(\"usethis\", \"gitcreds\"))):\n\nIn the RStudio Console, call usethis::create_github_token() This should open a webbrowser. In the Note field, write what you like, perhaps “Stat 406 token”. Then update the Expiration to any date after December 15. (“No expiration” is fine, though not very secure). Make sure that everything in repo is checked. Leave all other checks as is. Scroll to the bottom and click the green “Generate Token” button.\nThis should now give you a long string to Copy. It often looks like ghp_0asfjhlasdfhlkasjdfhlksajdhf9234u. Copy that. (You would use this instead of the browser password in RStudio when it asks for a password).\nTo store the PAT permanently in R (so you’ll never have to do this again, hopefully) call gitcreds::gitcreds_set() and paste the thing you copied there." }, { - "objectID": "computing/windows.html#post-installation-notes", - "href": "computing/windows.html#post-installation-notes", - "title": " Windows", + "objectID": "computing/ubuntu.html#post-installation-notes", + "href": "computing/ubuntu.html#post-installation-notes", + "title": " Ubuntu", "section": "Post-installation notes", "text": "Post-installation notes\nYou have completed the installation instructions, well done 🙌!" }, { - "objectID": "computing/windows.html#attributions", - "href": "computing/windows.html#attributions", - "title": " Windows", + "objectID": "computing/ubuntu.html#attributions", + "href": "computing/ubuntu.html#attributions", + "title": " Ubuntu", "section": "Attributions", "text": "Attributions\nThe DSCI 310 Teaching Team, notably, Anmol Jawandha, Tomas Beuzen, Rodolfo Lourenzutti, Joel Ostblom, Arman Seyed-Ahmadi, Florencia D’Andrea, and Tiffany Timbers." }, { - "objectID": "course-setup.html", - "href": "course-setup.html", - "title": "Guide for setting up the course infrastructure", + "objectID": "syllabus.html", + "href": "syllabus.html", + "title": " Syllabus", "section": "", - "text": "Version 2023\nThis guide (hopefully) gives enough instructions for recreating new iterations of Stat 406." + "text": "Term 2023 Winter 1: 05 Sep - 07 Dec 2023" }, { - "objectID": "course-setup.html#github-org", - "href": "course-setup.html#github-org", - "title": "Guide for setting up the course infrastructure", - "section": "Github Org", - "text": "Github Org\n\nCreate a GitHub.com organization\n\nThis is free for faculty with instructor credentials.\nAllows more comprehensive GitHub actions, PR templates and CODEOWNER behaviour than the UBC Enterprise version\nDownside is getting students added (though we include R scripts for this)\n\nOnce done, go to https://github.com/watching. Click the Red Down arrow “Unwatch all”. Then select this Org. The TAs should do the same.\n\n\nPermissions and structure\nSettings > Member Privileges\nWe list only the important ones.\n\nBase Permissions: No Permission\nRepository creation: None\nRepo forking: None\nPages creation: None\nTeam creation rules: No\n\nBe sure to click save in each area after making changes.\nSettings > Actions > General\nAll repositories: Allow all actions and reusable workflows.\nWorkflow permissions: Read and write permissions.\n\n\nTeams\n\n2 teams, one for the TAs and one for the students\nYou must then manually add the teams to any repos they should access\n\nI generally give the TAs “Write” permission, and the students “Read” permission with some exceptions. See the Repos section below." + "objectID": "syllabus.html#course-info", + "href": "syllabus.html#course-info", + "title": " Syllabus", + "section": "Course info", + "text": "Course info\nInstructor:\nDaniel McDonald\nOffice: Earth Sciences Building 3106\nWebsite: https://dajmcdon.github.io/\nEmail: daniel@stat.ubc.ca\nSlack: @prof-daniel\nOffice hours:\nMonday (TA), 2-3pm ESB 1045\nTuesday (DJM), 4-5pm ESB 4182\nThursday (TA), 3-4pm ESB 3174\nFriday (TA/DJM), 10-11am Zoom (link on Canvas)\nCourse webpage:\nWWW: https://ubc-stat.github.io/stat-406/\nGithub: https://github.com/stat-406-2023\nSee also Canvas\nLectures:\nTue/Thu 0800h - 0930h\n(In person) Earth Sciences Building (ESB) 1012\nTextbooks:\n[ISLR]\n[ESL]\nPrerequisite:\nSTAT 306 or CPSC 340" }, { - "objectID": "course-setup.html#repos", - "href": "course-setup.html#repos", - "title": "Guide for setting up the course infrastructure", - "section": "Repos", - "text": "Repos\nThere are typically about 10 repositories. Homeworks and Labs each have 3 with very similar behaviours.\nBe careful copying directories. All of them have hidden files and folders, e.g. .git. Of particular importance are the .github directories which contain PR templates and GitHub Actions. Also relevant are the .Rprofile files which try to override Student Language settings and avoid unprintible markdown characters.\n\nHomeworks\n\nhomework-solutions\nThis is where most of the work happens. My practice is to create the homework solutions first. I edit these (before school starts) until I’m happy. I then duplicate the file and remove the answers. The result is hwxx-instructions.Rmd. The .gitignore file should ignore all of the solutions and commmit only the instructions. Then, about 1 week after the deadline, I adjust the .gitignore and push the solution files.\n\nStudents have Read permission.\nTAs have Write permission.\nThe preamble.tex file is common to HWs and Labs. It creates a lavender box where the solution will go. This makes life easy for the TAs.\n\n\n\nhomework-solutions-private\nExactly the same as homework-solutions except that all solutions are available from the beginning for TA access. To create this, after I’m satisfied with homework-solutions I copy all files (not the directory) into a new directory, git init then upload to the org. The students never have permission here.\n\n\nhomework-template\nThis is a “template repo” used for creating student specific homework-studentgh repos (using the setup scripts).\nVery Important: copy the hwxx-instructions files over to a new directory. Do NOT copy the directory or you’ll end up with the solutions visible to the students.\nThen rename hwxx-instructions.Rmd to hwxx.Rmd. Now the students have a .pdf with instructions, and a template .Rmd to work on.\nOther important tasks: * The .gitignore is more elaborate in an attempt to avoid students pushing junk into these repos. * The .github directory contains 3 files: CODEOWNERS begins as an empty doc which will be populated with the assigned grader later; pull_request_template.md is used for all HW submission PRs; workflows contains a GH-action to comment on the PR with the date+time when the PR is opened. * Under Settings > General, select “Template repository”. This makes it easier to duplicate to the student repos.\n\n\n\nLabs\nThe three Labs repos operate exactly as the analogous homework repos.\n\nlabs-solutions\nDo any edits here before class begins.\n\n\nlabs-solutions-private\nSame as with the homeworks\n\n\nlabs-template\nSame as with the homeworks\n\n\n\nclicker-solutions\nThis contains the complete set of clicker questions.\nAnswers are hidden in comments on the presentation.\nI release them incrementally after each module (copying over from my clicker deck).\n\n\nopen-pr-log\nThis contains a some GitHub actions to automatically keep track of open PRs for the TAs.\nIt’s still in testing phase, but should work properly. It will create two markdown docs, 1 for labs and 1 for homework. Each shows the assigned TA, the date the PR was opened, and a link to the PR. If everything is configured properly, it should run automatically at 3am every night.\n\nOnly the TAs should have access.\nUnder Settings > Secrets and Variables > Actions you must add a “Repository Secret”. This should be a GitHub Personal Access Token created in your account (Settings > Developer settings > Tokens (classic)). It needs Repo, Workflow, and Admin:Org permissions. I set it to expire at the end of the course. I use it only for this purpose (rather than my other tokens for typical logins).\n\n\n\n.github / .github-private\nThese contains a README that gives some basic information about the available repos and the course. It’s visible Publically, and appears on the Org homepage for all to see. The .github-private has the same function, but applies only to Org members.\n\n\nbakeoff-bakeoff\nThis is for the bonus for HW4. Both TAs and Students have access. I put the TA team as CODEOWNERS and protect the main branch (Settings > Branches > Branch Protection Rules). Here, we “Require approvals” and “Require Review from Code Owners”." + "objectID": "syllabus.html#course-objectives", + "href": "syllabus.html#course-objectives", + "title": " Syllabus", + "section": "Course objectives", + "text": "Course objectives\nThis is a course in statistical learning methods. Based on the theory of linear models covered in Stat 306, this course will focus on applying many techniques of data analysis to interesting datasets.\nThe course combines analysis with methodology and computational aspects. It treats both the “art” of understanding unfamiliar data and the “science” of analyzing that data in terms of statistical properties. The focus will be on practical aspects of methodology and intuition to help students develop tools for selecting appropriate methods and approaches to problems in their own lives.\nThis is not a “how to program” course, nor a “tour of machine learning methods”. Rather, this course is about how to understand some ML methods. STAT 306 tends to give background in many of the tools of understanding as well as working with already-written R packages. On the other hand, CPSC 340 introduces many methods with a focus on “from-scratch” implementation (in Julia or Python). This course will try to bridge the gap between these approaches. Depending on which course you took, you may be more or less skilled in some aspects than in others. That’s OK and expected.\n\nLearning outcomes\n\nAssess the prediction properties of the supervised learning methods covered in class;\nCorrectly use regularization to improve predictions from linear models, and also to identify important explanatory variables;\nExplain the practical difference between predictions obtained with parametric and non-parametric methods, and decide in specific applications which approach should be used;\nSelect and construct appropriate ensembles to obtain improved predictions in different contexts;\nUse and interpret principal components and other dimension reduction techniques;\nEmploy reasonable coding practices and understand basic R syntax and function.\nWrite reports and use proper version control; engage with standard software." }, { - "objectID": "course-setup.html#r-package", - "href": "course-setup.html#r-package", - "title": "Guide for setting up the course infrastructure", - "section": "R package", - "text": "R package\nThis is hosted at https://github.com/ubc-stat/stat-406-rpackage/. The main purposes are:\n\nDocumentation of datasets used in class, homework, and labs (if not in other R packages)\nProvide a few useful functions.\nInstall all the packages the students need at once, and try to compile LaTeX.\n\nPackage requirements are done manually, unfortunately. Typically, I’ll open the various projects in RStudio and run sort(unique(renv::dependencies()$Package)). It’s not infallible, but works well.\nAll necessary packages should go in “Suggests:” in the DESCRIPTION. This avoids build errors. Note that install via remotes::install_github() then requires dependencies = TRUE." + "objectID": "syllabus.html#textbooks", + "href": "syllabus.html#textbooks", + "title": " Syllabus", + "section": "Textbooks", + "text": "Textbooks\n\nRequired:\nAn Introduction to Statistical Learning, James, Witten, Hastie, Tibshirani, 2013, Springer, New York. (denoted [ISLR])\nAvailable free online: https://www.statlearning.com\n\n\nOptional (but excellent):\nThe Elements of Statistical Learning, Hastie, Tibshirani, Friedman, 2009, Second Edition, Springer, New York. (denoted [ESL])\nAlso available free online: https://web.stanford.edu/~hastie/ElemStatLearn/\nThis second book is a more advanced treatment of a superset of the topics we will cover. If you want to learn more and understand the material more deeply, this is the book for you. All readings from [ESL] are optional." }, { - "objectID": "course-setup.html#worksheets", - "href": "course-setup.html#worksheets", - "title": "Guide for setting up the course infrastructure", - "section": "Worksheets", - "text": "Worksheets\nThese are derived from Matías’s Rmd notes from 2018. They haven’t been updated much.\nThey are hosted at https://github.com/ubc-stat/stat-406-worksheets/.\nI tried requiring them one year. The model was to distribute the R code for the chapters with some random lines removed. Then the students could submit the completed code for small amounts of credit. It didn’t seem to move the needle much and was hard to grade (autograding would be nice here).\nNote that there is a GHaction that automatically renders the book from source and pushes to the gh-pages branch. So local build isn’t necessary and derivative files should not be checked in to version control." + "objectID": "syllabus.html#course-assessment-opportunities", + "href": "syllabus.html#course-assessment-opportunities", + "title": " Syllabus", + "section": "Course assessment opportunities", + "text": "Course assessment opportunities\n\nEffort-based component\nLabs: [0, 20]\nHomework assignments: [0, 50]\nClickers: [0, 10]\nTotal: min(65, Labs + Homework + Clickers)\n\n\nLabs\nThese are intended to keep you on track. They are to be submitted via pull requests in your personal labs-<username> repo (see the computing tab for descriptions on how to do this).\nLabs typically have a few questions for you to answer or code to implement. These are to be done during lab periods. But you can do them on your own as well. These are worth 2 points each up to a maximum of 20 points. They are due at 2300 on the day of your assigned lab section.\nIf you attend lab, you may share a submission with another student (with acknowledgement on the PR). If you do not attend lab, you must work on your own (subject to the collaboration instructions for Assignments below).\n\nRules.\nYou must submit via PR by the deadline. Your PR must include at least 3 commits. After lab 2, failure to include at least 3 commits will result in a maximum score of 1.\n\n\n\n\n\n\nTip\n\n\n\nIf you attend your lab section, you may work in pairs, submitting a single document to one of your Repos. Be sure to put both names on the document, and mention the collaboration on your PR. You still have until 11pm to submit.\n\n\n\n\nMarking.\nThe overriding theme here is “if you put in the effort, you’ll get all the points.” Grading scheme:\n\n2 if basically all correct\n\n1 if complete but with some major errors, or mostly complete and mostly correct\n\n0 otherwise\n\nYou may submit as many labs as you wish up to 20 total points.\nThere are no appeals on grades.\nIt’s important here to recognize just how important active participation in these activities is. You learn by doing, and this is your opportunity to learn in a low-stakes environment. One thing you’ll learn, for example, is that all animals urinate in 21 seconds.1\n\n\n\nAssignments\nThere will be 5 assignments. These are submitted via pull request similar to the labs but to the homework-<username> repo. Each assignment is worth up to 10 points. They are due by 2300 on the deadline. You must make at least 5 commits. Failure to have at least 5 commits will result in a 25% deduction on HW1 and a 50% deduction thereafter. No exceptions.\nAssignments are typically lightly marked. The median last year was 8/10. But they are not easy. Nor are they short. They often involve a combination of coding, writing, description, and production of statistical graphics.\nAfter receiving a mark and feedback, if you score less than 7, you may make corrections to bring your total to 7. This means, if you fix everything that you did wrong, you get 7. Not 10. The revision must be submitted within 1 week of getting your mark. Only 1 revision per assignment. The TA decision is final. Note that the TAs will only regrade parts you missed, but if you somehow make it worse, they can deduct more points.\nThe revision allowance applies only if you got 3 or more points of “content” deductions. If you missed 3 points for content and 2 more for “penalties” (like insufficient commits, code that runs off the side of the page, etc), then you are ineligible.\n\nPolicy on collaboration on assignments\nDiscussing assignments with your classmates is allowed and encouraged, but it is important that every student get practice working on these problems. This means that all the work you turn in must be your own. The general policy on homework collaboration is:\n\nYou must first make a serious effort to solve the problem.\nIf you are stuck after doing so, you may ask for help from another student. You may discuss strategies to solve the problem, but you may not look at their code, nor may they spell out the solution to you step-by-step.\nOnce you have gotten help, you must write your own solution individually. You must disclose, in your GitHub pull request, the names of anyone from whom you got help.\nThis also applies in reverse: if someone approaches you for help, you must not provide it unless they have already attempted to solve the problem, and you may not share your code or spell out the solution step-by-step.\n\n\n\n\n\n\n\nWarning\n\n\n\nAdherence to the above policy means that identical answers, or nearly identical answers, cannot occur. Thus, such occurrences are violations of the Course’s Academic honesty policy.\n\n\nThese rules also apply to getting help from other people such as friends not in the course (try the problem first, discuss strategies, not step-by-step solutions, acknowledge those from whom you received help).\nYou may not use homework help websites, ChatGPT, Stack Overflow, and so on under any circumstances. The purpose here is to learn. Good faith efforts toward learning are rewarded.\nYou can always, of course, ask me for help on Slack. And public Slack questions are allowed and encouraged.\nYou may also use external sources (books, websites, papers, …) to\n\nLook up programming language documentation, find useful packages, find explanations for error messages, or remind yourself about the syntax for some feature. I do this all the time in the real world. Wikipedia is your friend.\nRead about general approaches to solving specific problems (e.g. a guide to dynamic programming or a tutorial on unit testing in your programming language), or\nClarify material from the course notes or assignments.\n\nBut external sources must be used to support your solution, not to obtain your solution. You may not use them to\n\nFind solutions to the specific problems assigned as homework (in words or in code)—you must independently solve the problem assigned, not translate a solution presented online or elsewhere.\nFind course materials or solutions from this or similar courses from previous years, or\nCopy text or code to use in your submissions without attribution.\n\nIf you use code from online or other sources, you must include code comments identifying the source. It must be clear what code you wrote and what code is from other sources. This rule also applies to text, images, and any other material you submit.\nPlease talk to me if you have any questions about this policy. Any form of plagiarism or cheating will result in sanctions to be determined by me, including grade penalties (such as negative points for the assignment or reductions in letter grade) or course failure. I am obliged to report violations to the appropriate University authorities. See also the text below.\n\n\n\nClickers\nThese are short multiple choice and True / False questions. They happen in class. For each question, correct answers are worth 4, incorrect answers are worth 2. You get 0 points for not answering.\nSuppose there are N total clicker questions, and you have x points. Your final score for this component is\nmax(0, min(5 * x / N - 5, 10)).\nNote that if your average is less than 1, you get 0 points in this component.\n\n\n\n\n\n\nImportant\n\n\n\nIn addition, your final grade in this course will be reduced by 1 full letter grade.\n\n\nThis means that if you did everything else and get a perfect score on the final exam, you will get a 79. Two people did this last year. They were sad.\n\n\n\n\n\n\nWarning\n\n\n\nDON’T DO THIS!!\n\n\nThis may sound harsh, but think about what is required for such a penalty. You’d have to skip more than 50% of class meetings and get every question wrong when you are in class. This is an in-person course. It is not possible to get an A without attending class on a regular basis.\nTo compensate, I will do my best to post recordings of lectures. Past experience has shown 2 things:\n\nYou learn better by attending class than by skipping and “watching”.\nSometimes the technology messes up. So there’s no guarantee that these will be available.\n\nThe purpose is to let you occasionally miss class for any reason with minimal consequences. See also below. If for some reason you need to miss longer streches of time, please contact me or discuss your situation with your Academic Advisor as soon as possible. Don’t wait until December.\n\n\n\nYour score on HW, Labs, and Clickers\nThe total you can accumulate across these 3 components is 65 points. But you can get there however you want. The total available is 80 points. The rest is up to you. But with choice, comes responsibility.\nRules:\n\nNothing dropped.\nNo extensions.\nIf you miss a lab or a HW deadline, then you miss it.\nMake up for missed work somewhere else.\nIf you isolate due to Covid, fine. You miss a few clickers and maybe a lab (though you can do it remotely).\nIf you have a job interview and can’t complete an assignment on time, then skip it.\n\nWe’re not going to police this stuff. You don’t need to let me know. There is no reason that every single person enrolled in this course shouldn’t get > 65 in this class.\nIllustrative scenarios:\n\nDoing 80% on 5 homeworks, coming to class and getting 50% correct, get 2 points on 8 labs gets you 65 points.\nDoing 90% on 5 homeworks, getting 50% correct on all the clickers, averaging 1/2 on all the labs gets you 65 points.\nGoing to all the labs and getting 100%, 100% on 4 homeworks, plus being wrong on every clicker gets you 65 points\n\nChoose your own adventure. Note that the biggest barrier to getting to 65 is skipping the assignments.\n\n\n\n\nFinal exam\n35 points\n\n\nAll multiple choice, T/F, matching.\nThe clickers are the best preparation.\nQuestions may ask you to understand or find mistakes in code.\nNo writing code.\n\nThe Final is very hard. By definition, it cannot be effort-based.\nIt is intended to separate those who really understand the material from those who don’t. Last year, the median was 50%. But if you put in the work (do all the effort points) and get 50%, you get an 83 (an A-). If you put in the work (do all the effort points) and skip the final, you get a 65. You do not have to pass the final to pass the course. You don’t even have to take the final.\nThe point of this scheme is for those who work hard to do well. But only those who really understand the material will get 90+." }, { - "objectID": "course-setup.html#course-website-lectures", - "href": "course-setup.html#course-website-lectures", - "title": "Guide for setting up the course infrastructure", - "section": "Course website / lectures", - "text": "Course website / lectures" + "objectID": "syllabus.html#health-issues-and-considerations", + "href": "syllabus.html#health-issues-and-considerations", + "title": " Syllabus", + "section": "Health issues and considerations", + "text": "Health issues and considerations\n\nCovid Safety in the Classroom\n\n\n\n\n\n\nImportant\n\n\n\nIf you think you’re sick, stay home no matter what.\n\n\nMasks. Masks are recommended. For our in-person meetings in this class, it is important that all of us feel as comfortable as possible engaging in class activities while sharing an indoor space. Masks are a primary tool to make it harder for Covid-19 to find a new host. Please feel free to wear one or not given your own personal circumstances. Note that there are some people who cannot wear a mask. These individuals are equally welcome in our class.\nVaccination. If you have not yet had a chance to get vaccinated against Covid-19, vaccines are available to you, free. See http://www.vch.ca/covid-19/covid-19-vaccine for help finding an appointment. Boosters will be available later this term. The higher the rate of vaccination in our community overall, the lower the chance of spreading this virus. You are an important part of the UBC community. Please arrange to get vaccinated if you have not already done so. The same goes for Flu.\n\n\nYour personal health\n\n\n\n\n\n\nWarning\n\n\n\nIf you are sick, it’s important that you stay home – no matter what you think you may be sick with (e.g., cold, flu, other).\n\n\n\nDo not come to class if you have Covid symptoms, have recently tested positive for Covid, or are required to quarantine. You can check this website to find out if you should self-isolate or self-monitor: http://www.bccdc.ca/health-info/diseases-conditions/covid-19/self-isolation#Who.\nYour precautions will help reduce risk and keep everyone safer. In this class, the marking scheme is intended to provide flexibility so that you can prioritize your health and still be able to succeed. All work can be completed outside of class with reasonable time allowances.\nIf you do miss class because of illness:\n\nMake a connection early in the term to another student or a group of students in the class. You can help each other by sharing notes. If you don’t yet know anyone in the class, post on the discussion forum to connect with other students.\nConsult the class resources on here and on Canvas. We will post all the slides, readings, and recordings for each class day.\nUse Slack for help.\nCome to virtual office hours.\nSee the marking scheme for reassurance about what flexibility you have. No part of your final grade will be directly impacted by missing class.\n\nIf you are sick on final exam day, do not attend the exam. You must follow up with your home faculty’s advising office to apply for deferred standing. Students who are granted deferred standing write the final exam at a later date. If you’re a Science student, you must apply for deferred standing (an academic concession) through Science Advising no later than 48 hours after the missed final exam/assignment. Learn more and find the application online. For additional information about academic concessions, see the UBC policy here.\n\n\n\n\n\n\n\nNote\n\n\n\nPlease talk with me if you have any concerns or ask me if you are worried about falling behind." }, { - "objectID": "course-setup.html#ghclass-package", - "href": "course-setup.html#ghclass-package", - "title": "Guide for setting up the course infrastructure", - "section": "{ghclass} package", - "text": "{ghclass} package" + "objectID": "syllabus.html#university-policies", + "href": "syllabus.html#university-policies", + "title": " Syllabus", + "section": "University policies", + "text": "University policies\nUBC provides resources to support student learning and to maintain healthy lifestyles but recognizes that sometimes crises arise and so there are additional resources to access including those for survivors of sexual violence. UBC values respect for the person and ideas of all members of the academic community. Harassment and discrimination are not tolerated nor is suppression of academic freedom. UBC provides appropriate accommodation for students with disabilities and for religious, spiritual and cultural observances. UBC values academic honesty and students are expected to acknowledge the ideas generated by others and to uphold the highest academic standards in all of their actions. Details of the policies and how to access support are available here.\n\nAcademic honesty and standards\nUBC Vancouver Statement\nAcademic honesty is essential to the continued functioning of the University of British Columbia as an institution of higher learning and research. All UBC students are expected to behave as honest and responsible members of an academic community. Breach of those expectations or failure to follow the appropriate policies, principles, rules, and guidelines of the University with respect to academic honesty may result in disciplinary action.\nFor the full statement, please see the 2022/23 Vancouver Academic Calendar\nCourse specific\nSeveral commercial services have approached students regarding selling class notes/study guides to their classmates. Please be advised that selling a faculty member’s notes/study guides individually or on behalf of one of these services using UBC email or Canvas, violates both UBC information technology and UBC intellectual property policy. Selling the faculty member’s notes/study guides to fellow students in this course is not permitted. Violations of this policy will be considered violations of UBC Academic Honesty and Standards and will be reported to the Dean of Science as a violation of course rules. Sanctions for academic misconduct may include a failing grade on the assignment for which the notes/study guides are being sold, a reduction in your final course grade, a failing grade in the course, among other possibilities. Similarly, contracting with any service that results in an individual other than the enrolled student providing assistance on quizzes or exams or posing as an enrolled student is considered a violation of UBC’s academic honesty standards.\nSome of the problems that are assigned are similar or identical to those assigned in previous years by me or other instructors for this or other courses. Using proofs or code from anywhere other than the textbooks, this year’s course notes, or the course website is not only considered cheating (as described above), it is easily detectable cheating. Such behavior is strictly forbidden.\nIn previous years, I have caught students cheating on the exams or assignments. I did not enforce any penalty because the action did not help. Cheating, in my experience, occurs because students don’t understand the material, so the result is usually a failing grade even before I impose any penalty and report the incident to the Dean’s office. I carefully structure exams and assignments to make it so that I can catch these issues. I will catch you, and it does not help. Do your own work, and use the TAs and me as resources. If you are struggling, we are here to help.\n\n\n\n\n\n\nCaution\n\n\n\nIf I suspect cheating, your case will be forwarded to the Dean’s office. No questions asked.\n\n\nGenerative AI\nTools to help you code more quickly are rapidly becoming more prevalent. I use them regularly myself. The point of this course is not to “complete assignments” but to learn coding (and other things). With that goal in mind, I recommend you avoid the use of Generative AI. It is unlikely to contribute directly to your understanding of the material. Furthermore, I have experimented with certain tools on the assignments for this course and have found the results underwhelming.\nThe material in this course is best learned through trial and error. Avoiding this mechanism (with generative AI or by copying your friend) is a short-term solution at best. I have tried to structure this course to discourage these types of short cuts, and minimize the pressure you may feel to take them.\n\n\nAcademic Concessions\nThese are handled according to UBC policy. Please see\n\nUBC student services\nUBC Vancouver Academic Calendar\nFaculty of Science Concessions\n\n\n\nMissed final exam\nStudents who miss the final exam must report to their Faculty advising office within 72 hours of the missed exam, and must supply supporting documentation. Only your Faculty Advising office can grant deferred standing in a course. You must also notify your instructor prior to (if possible) or immediately after the exam. Your instructor will let you know when you are expected to write your deferred exam. Deferred exams will ONLY be provided to students who have applied for and received deferred standing from their Faculty.\n\n\nTake care of yourself\nCourse work at this level can be intense, and I encourage you to take care of yourself. Do your best to maintain a healthy lifestyle this semester by eating well, exercising, avoiding drugs and alcohol, getting enough sleep and taking some time to relax. This will help you achieve your goals and cope with stress. I struggle with these issues too, and I try hard to set aside time for things that make me happy (cooking, playing/listening to music, exercise, going for walks).\nAll of us benefit from support during times of struggle. If you are having any problems or concerns, do not hesitate to speak with me. There are also many resources available on campus that can provide help and support. Asking for support sooner rather than later is almost always a good idea.\nIf you or anyone you know experiences any academic stress, difficult life events, or feelings like anxiety or depression, I strongly encourage you to seek support. UBC Counseling Services is here to help: call 604 822 3811 or visit their website. Consider also reaching out to a friend, faculty member, or family member you trust to help get you the support you need.\n\nA dated PDF is available at this link." }, { - "objectID": "course-setup.html#canvas", - "href": "course-setup.html#canvas", - "title": "Guide for setting up the course infrastructure", - "section": "Canvas", - "text": "Canvas\nI use a the shell provided by FoS.\nNothing else goes here, but you have to update all the links.\nTwo Canvas Quizzes: * Quiz 0 collects GitHub accounts, ensures that students read the syllabus. Due in Week 1. * Final Exam is the final * I usually record lectures (automatically) using the classroom tech that automatically uploads. * Update the various links on the Homepage." + "objectID": "syllabus.html#footnotes", + "href": "syllabus.html#footnotes", + "title": " Syllabus", + "section": "Footnotes", + "text": "Footnotes\n\n\nA careful reading of this paper with the provocative title “Law of Urination: all mammals empty their bladders over the same duration” reveals that the authors actually mean something far less precise. In fact, their claim is more accurately stated as “mammals over 3kg in body weight urinate in 21 seconds with a standard deviation of 13 seconds”. But the accurate characterization is far less publicity-worthy.↩︎" }, { - "objectID": "course-setup.html#slack", - "href": "course-setup.html#slack", - "title": "Guide for setting up the course infrastructure", - "section": "Slack", - "text": "Slack\n\nSet up a free Org. Invite link gets posted to Canvas.\nI add @students.ubc.ca, @ubc.ca, @stat.ubc.ca to the whitelist.\nI also post the invite on Canvas.\nCreate channels before people join. That way you can automatically add everyone to channels all at once. I do one for each module, 1 for code/github, 1 for mechanics. + 1 for the TAs (private)\nClick through all the settings. It’s useful to adjust these a bit." + "objectID": "schedule/index.html", + "href": "schedule/index.html", + "title": " Schedule", + "section": "", + "text": "Required readings and lecture videos are listed below for each module. Readings from [ISLR] are always required while those from [ESL] are optional and supplemental." }, { - "objectID": "course-setup.html#clickers", - "href": "course-setup.html#clickers", - "title": "Guide for setting up the course infrastructure", - "section": "Clickers", - "text": "Clickers\nSee https://lthub.ubc.ca/guides/iclicker-cloud-instructor-guide/\nI only use “Polling” no “Quizzing” and no “Attendance”\n\nIn clicker Settings > Polling > Sharing. Turn off the Sending (to avoid students doing it at home)\nNo participation points.\n2 points for correct, 2 for answering.\nIntegrations > Set this up with Canvas. Sync the roster. You’ll likely have to repeat this near the Add/Drop Deadline.\nI only sync the total, since I’ll recalibrate later." + "objectID": "schedule/index.html#introduction-and-review", + "href": "schedule/index.html#introduction-and-review", + "title": " Schedule", + "section": "0 Introduction and Review", + "text": "0 Introduction and Review\nRequired reading below is meant to reengage brain cells which have no doubt forgotten all the material that was covered in STAT 306 or CPSC 340. We don’t presume that you remember all these details, but that, upon rereading, they at least sound familiar. If this all strikes you as completely foreign, this class may not be for you.\n\nRequired reading\n\n[ISLR] 2.1, 2.2, and Chapter 3 (this material is review)\n\nOptional reading\n\n[ESL] 2.4 and 2.6\n\nHandouts\n\nProgramming in R .Rmd, .pdf\n\n\nUsing in RMarkdown .Rmd, .pdf\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n05 Sep 23\n(no class, Imagine UBC)\n\n\n\n07 Sep 23\nIntro to class, Git\n(Quiz 0 due tomorrow)\n\n\n12 Sep 23\nUnderstanding R / Rmd\nLab 00, (Labs begin)\n\n\n14 Sep 23\nLM review, LM Example" + }, + { + "objectID": "schedule/index.html#model-accuracy", + "href": "schedule/index.html#model-accuracy", + "title": " Schedule", + "section": "1 Model Accuracy", + "text": "1 Model Accuracy\n\nTopics\n\nModel selection; cross validation; information criteria; stepwise regression\n\nRequired reading\n\n[ISLR] Ch 2.2 (not 2.2.3), 5.1 (not 5.1.5), 6.1, 6.4\n\nOptional reading\n\n[ESL] 7.1-7.5, 7.10\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n19 Sep 23\nRegression function, Bias and Variance\n\n\n\n21 Sep 23\nRisk estimation, Info Criteria\n\n\n\n26 Sep 23\nGreedy selection\n\n\n\n28 Sep 23\n\nHW 1 due" + }, + { + "objectID": "schedule/index.html#regularization-smoothing-and-trees", + "href": "schedule/index.html#regularization-smoothing-and-trees", + "title": " Schedule", + "section": "2 Regularization, smoothing, and trees", + "text": "2 Regularization, smoothing, and trees\n\nTopics\n\nRidge regression, lasso, and related; linear smoothers (splines, kernels); kNN\n\nRequired reading\n\n[ISLR] Ch 6.2, 7.1-7.7.1, 8.1, 8.1.1, 8.1.3, 8.1.4\n\nOptional reading\n\n[ESL] 3.4, 3.8, 5.4, 6.3\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n3 Oct 23\nRidge, Lasso\n\n\n\n5 Oct 23\nCV for comparison, NP 1\n\n\n\n10 Oct 23\nNP 2, Why smoothing?\n\n\n\n12 Oct 23\nNo class (Makeup Monday)\n\n\n\n17 Oct 23\nOther\nHW 2 due" + }, + { + "objectID": "schedule/index.html#classification", + "href": "schedule/index.html#classification", + "title": " Schedule", + "section": "3 Classification", + "text": "3 Classification\n\nTopics\n\nlogistic regression; LDA/QDA; naive bayes; trees\n\nRequired reading\n\n[ISLR] Ch 2.2.3, 5.1.5, 4-4.5, 8.1.2\n\nOptional reading\n\n[ESL] 4-4.4, 9.2, 13.3\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n19 Oct 23\nClassification, LDA and QDA\n\n\n\n24 Oct 23\nLogistic regression\n\n\n\n26 Oct 23\nGradient descent, Other losses\n\n\n\n31 Oct 23\nNonlinear" + }, + { + "objectID": "schedule/index.html#modern-techniques", + "href": "schedule/index.html#modern-techniques", + "title": " Schedule", + "section": "4 Modern techniques", + "text": "4 Modern techniques\n\nTopics\n\nbagging; boosting; random forests; neural networks\n\nRequired reading\n\n[ISLR] 5.2, 8.2, 10.1, 10.2, 10.6, 10.7\n\nOptional reading\n\n[ESL] 10.1-10.10 (skip 10.7), 11.1, 11.3, 11.4, 11.7\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n2 Nov 23\nThe bootstrap\nHW 3 due\n\n\n7 Nov 23\nBagging and random forests, Boosting\n\n\n\n9 Nov 23\nIntro to neural nets\n\n\n\n14 Nov 23\nNo class. (Midterm break)\n\n\n\n16 Nov 23\nEstimating neural nets\n\n\n\n21 Nov 23\nNeural nets wrapup\nHW 4 due" + }, + { + "objectID": "schedule/index.html#unsupervised-learning", + "href": "schedule/index.html#unsupervised-learning", + "title": " Schedule", + "section": "5 Unsupervised learning", + "text": "5 Unsupervised learning\n\nTopics\n\ndimension reduction and clustering\n\nRequired reading\n\n[ISLR] 12\n\nOptional reading\n\n[ESL] 8.5, 13.2, 14.3, 14.5.1, 14.8, 14.9\n\n\n\n\n\nDate\nSlides\nDeadlines\n\n\n\n\n23 Nov 23\nIntro to PCA, Issues with PCA\n\n\n\n28 Nov 23\nPCA v KPCA\n\n\n\n30 Nov 23\nK means clustering\n\n\n\n5 Dec 23\nHierarchical clustering\n\n\n\n7 Dec 23\n\nHW 5 due" + }, + { + "objectID": "schedule/index.html#f-final-exam", + "href": "schedule/index.html#f-final-exam", + "title": " Schedule", + "section": "F Final exam", + "text": "F Final exam\nDate and time TBD.\n\n\n\n\n\n\nImportant\n\n\n\nDo not make any plans to leave Vancouver before the final exam date is announced.\n\n\n\nIn person attendance is required (per Faculty of Science guidelines)\nYou must bring your computer as the exam will be given through Canvas\nPlease arrange to borrow one from the library if you do not have your own. Let me know ASAP if this may pose a problem.\nYou may bring 2 sheets of front/back 8.5x11 paper with any notes you want to use. No other materials will be allowed.\nThere will be no required coding, but I may show code or output and ask questions about it.\nIt will be entirely multiple choice / True-False / matching, etc. Delivered on Canvas." }, { "objectID": "schedule/slides/00-intro-to-class.html#meta-lecture", diff --git a/sitemap.xml b/sitemap.xml index 52d38ff..01da8a8 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,98 +2,102 @@ https://github.com/UBC-STAT/stat-406/schedule/handouts/lab00-git.html - 2023-09-19T00:38:39.945Z + 2023-09-20T00:10:00.052Z https://github.com/UBC-STAT/stat-406/schedule/slides/08-ridge-regression.html - 2023-09-19T00:38:37.577Z + 2023-09-20T00:09:57.988Z https://github.com/UBC-STAT/stat-406/schedule/slides/06-information-criteria.html - 2023-09-19T00:38:35.856Z + 2023-09-20T00:09:56.512Z https://github.com/UBC-STAT/stat-406/schedule/slides/04-bias-variance.html - 2023-09-19T00:38:34.260Z + 2023-09-20T00:09:55.124Z https://github.com/UBC-STAT/stat-406/schedule/slides/02-lm-example.html - 2023-09-19T00:38:32.640Z + 2023-09-20T00:09:53.664Z https://github.com/UBC-STAT/stat-406/schedule/slides/00-version-control.html - 2023-09-19T00:38:31.260Z + 2023-09-20T00:09:52.500Z https://github.com/UBC-STAT/stat-406/schedule/slides/00-quiz-0-wrap.html - 2023-09-19T00:38:28.532Z + 2023-09-20T00:09:50.052Z - https://github.com/UBC-STAT/stat-406/schedule/index.html - 2023-09-19T00:38:27.028Z + https://github.com/UBC-STAT/stat-406/schedule/slides/00-cv-for-many-models.html + 2023-09-20T00:09:48.776Z - https://github.com/UBC-STAT/stat-406/syllabus.html - 2023-09-19T00:38:25.492Z + https://github.com/UBC-STAT/stat-406/course-setup.html + 2023-09-20T00:09:46.400Z - https://github.com/UBC-STAT/stat-406/computing/ubuntu.html - 2023-09-19T00:38:23.204Z + https://github.com/UBC-STAT/stat-406/computing/windows.html + 2023-09-20T00:09:44.656Z - https://github.com/UBC-STAT/stat-406/computing/mac_arm.html - 2023-09-19T00:38:21.540Z + https://github.com/UBC-STAT/stat-406/computing/mac_x86.html + 2023-09-20T00:09:43.136Z - https://github.com/UBC-STAT/stat-406/faq.html - 2023-09-19T00:38:20.263Z + https://github.com/UBC-STAT/stat-406/computing/index.html + 2023-09-20T00:09:41.692Z https://github.com/UBC-STAT/stat-406/index.html - 2023-09-19T00:38:18.763Z + 2023-09-20T00:09:39.960Z - https://github.com/UBC-STAT/stat-406/computing/index.html - 2023-09-19T00:38:20.704Z + https://github.com/UBC-STAT/stat-406/faq.html + 2023-09-20T00:09:41.300Z - https://github.com/UBC-STAT/stat-406/computing/mac_x86.html - 2023-09-19T00:38:22.372Z + https://github.com/UBC-STAT/stat-406/computing/mac_arm.html + 2023-09-20T00:09:42.412Z - https://github.com/UBC-STAT/stat-406/computing/windows.html - 2023-09-19T00:38:24.176Z + https://github.com/UBC-STAT/stat-406/computing/ubuntu.html + 2023-09-20T00:09:43.864Z - https://github.com/UBC-STAT/stat-406/course-setup.html - 2023-09-19T00:38:26.164Z + https://github.com/UBC-STAT/stat-406/syllabus.html + 2023-09-20T00:09:45.792Z + + + https://github.com/UBC-STAT/stat-406/schedule/index.html + 2023-09-20T00:09:47.184Z https://github.com/UBC-STAT/stat-406/schedule/slides/00-intro-to-class.html - 2023-09-19T00:38:27.844Z + 2023-09-20T00:09:49.460Z https://github.com/UBC-STAT/stat-406/schedule/slides/00-r-review.html - 2023-09-19T00:38:30.124Z + 2023-09-20T00:09:51.436Z https://github.com/UBC-STAT/stat-406/schedule/slides/01-lm-review.html - 2023-09-19T00:38:31.984Z + 2023-09-20T00:09:53.080Z https://github.com/UBC-STAT/stat-406/schedule/slides/03-regression-function.html - 2023-09-19T00:38:33.524Z + 2023-09-20T00:09:54.460Z https://github.com/UBC-STAT/stat-406/schedule/slides/05-estimating-test-mse.html - 2023-09-19T00:38:35.128Z + 2023-09-20T00:09:55.868Z https://github.com/UBC-STAT/stat-406/schedule/slides/07-greedy-selection.html - 2023-09-19T00:38:36.701Z + 2023-09-20T00:09:57.220Z https://github.com/UBC-STAT/stat-406/schedule/slides/09-l1-penalties.html - 2023-09-19T00:38:38.445Z + 2023-09-20T00:09:58.720Z