diff --git a/materials/tutorial_04/data/facebook_data.csv b/materials/tutorial_04/data/facebook_data.csv
new file mode 100644
index 0000000..e0cea71
--- /dev/null
+++ b/materials/tutorial_04/data/facebook_data.csv
@@ -0,0 +1,492 @@
\ No newline at end of file
diff --git a/materials/tutorial_04/tests_tutorial_04.R b/materials/tutorial_04/tests_tutorial_04.R
new file mode 100644
index 0000000..bb524b2
--- /dev/null
+++ b/materials/tutorial_04/tests_tutorial_04.R
@@ -0,0 +1,416 @@
+# +
+#abstraction templates
+check_TF <- function(answerX.X, expectedHash) {
+ var_name <- deparse(substitute(answerX.X))
+ test_that(paste('Did not assign answer to an object called ', var_name), {
+ expect_true(exists(var_name))
+ })
+ test_that('Solution should be "true" or "false"', {
+ expect_match(answerX.X, "true|false", ignore.case = TRUE)
+ })
+ answer_hash <- digest(tolower(answerX.X))
+ #if (answer_hash == "HASH_HERE") {
+ # print("HINT_HERE")
+ #}
+ test_that("Solution is incorrect", {
+ expect_equal(answer_hash, expectedHash)
+ })
+ print("Success!")
+check_MC <- function(answerX.X, choiceList, expectedHash) {
+ var_name <- deparse(substitute(answerX.X))
+ test_that(paste('Did not assign answer to an object called ', var_name), {
+ expect_true(exists(var_name))
+ })
+ test_that(paste('Solution should be a single character ', toString(choiceList)), {
+ expect_true(tolower(answerX.X) %in% tolower(choiceList))
+ })
+ answer_hash <- digest(tolower(answerX.X))
+ #if (answer_hash == "HASH_HERE") {
+ # print("HINT_HERE")
+ #} else if (answer_hash == "HASH_HERE") {
+ # print("HINT_HERE")
+ #} else if (answer_hash == "HASH_HERE") {
+ # print("HINT_HERE")
+ #}
+ test_that("Solution is incorrect", {
+ expect_equal(answer_hash, expectedHash)
+ })
+ print("Success!")
+# dataCheckTuples is data.frame(c(colnames), c(scale factor), c(expectedHash))
+check_DF <- function(answerX.X, expected_colnames, hashNRows, cols_to_check, precision_list, expectedHashes) {
+ dataCheckTuples <- data.frame(cols_to_check, precision_list, expectedHashes)
+ var_name <- deparse(substitute(answerX.X))
+ test_that(paste('Did not assign answer to an object called ', var_name), {
+ expect_true(exists(var_name))
+ })
+ test_that("Solution should be a data frame", {
+ expect_true("data.frame" %in% class(answerX.X))
+ })
+ given_colnames <- colnames(answerX.X)
+ test_that("Data frame does not have the correct columns", {
+ expect_equal(length(setdiff(
+ union(expected_colnames, given_colnames),
+ intersect(expected_colnames, given_colnames)
+ )), 0)
+ })
+ test_that("Data frame does not contain the correct number of rows", {
+ expect_equal(digest(as.integer(nrow(answerX.X))), hashNRows)
+ })
+ apply(dataCheckTuples, 1, function(tuple) {
+ test_that(paste(tuple[[1]], " does not contain the correct data"), {
+ expect_equal(digest(as.integer(sum(answerX.X[tuple[[1]]]) * as.double(tuple[[2]]))),
+ tuple[[3]])
+ })
+ })
+ print("Success!")
+check_numeric <- function(answerX.X, precision, expectedHash) {
+ var_name <- deparse(substitute(answerX.X))
+ test_that(paste('Did not assign answer to an object called ', var_name), {
+ expect_true(exists(var_name))
+ })
+ answer_as_numeric <- as.numeric(answerX.X)
+ test_that(paste(var_name, " should be a number"), {
+ expect_false(is.na(answer_as_numeric))
+ })
+ test_that(paste(var_name, " value is incorrect"), {
+ expect_equal(digest(as.integer(answer_as_numeric * precision)), expectedHash)
+ })
+ print("Success!")
+check_numeric_element <- function(answerX.X, precision, expectedHash) {
+ var_name <- deparse(substitute(answerX.X))
+ answer_as_numeric <- as.numeric(answerX.X)
+ test_that(paste(var_name, " should be a number"), {
+ expect_false(is.na(answer_as_numeric))
+ })
+ test_that(paste(var_name, " value is incorrect"), {
+ expect_equal(digest(as.integer(answer_as_numeric * precision)), expectedHash)
+ })
+ print("Success!")
+check_plot <- function(answerX.X, x_axis_var, geom_type, hasVline, bin_width_hash, nrow_hash, x_axis_var_hash, hasTitle) {
+ var_name <- deparse(substitute(answerX.X))
+ test_that(paste('Did not assign answer to an object called ', var_name), {
+ expect_true(exists(var_name))
+ })
+ test_that("Solution should be a ggplot object", {
+ expect_true(is.ggplot(answerX.X))
+ })
+ properties <- c(answerX.X$layers[[1]]$mapping, answerX.X$mapping)
+ test_that(paste("Plot should have ", x_axis_var," on the x-axis"), {
+ expect_true(x_axis_var == rlang::get_expr(properties$x))
+ })
+ test_that("Plot does not have the correct layers", {
+ expect_true(geom_type %in% class(answerX.X$layers[[1]]$geom))
+ if(hasVline) {
+ expect_true("GeomVline" %in% class(answerX.X$layers[[2]]$geom))
+ }
+ })
+ test_that("Plot does not have the correct bin width", {
+ expect_equal(
+ digest(as.integer(mget("stat_params", answerX.X$layers[[1]])[["stat_params"]][["binwidth"]])),
+ bin_width_hash)
+ })
+ test_that("Plot does not use the correct data", {
+ expect_equal(digest(nrow(answerX.X$data)), nrow_hash)
+ expect_equal(digest(round(sum(answerX.X$data[x_axis_var]))), x_axis_var_hash)
+ # If X_AXIS_VAR is not known:
+ # expect_equal(digest(round(sum(pull(answerX.X$data, rlang::get_expr(properties$x))))), "HASH_HERE")
+ })
+ test_that("x-axis label should be descriptive and human readable", {
+ expect_false(answerX.X$labels$x == toString(rlang::get_expr(properties$x)))
+ })
+ if(hasTitle){
+ test_that("Plot should have a title", {
+ expect_true("title" %in% names(answerX.X$labels))
+ })
+ }
+ print("Success!")
+getPermutations <- function(vec) {
+ rsf <- c()
+ for (i in 1:length(vec)) {
+ for (j in i:length(vec)) {
+ temp <- vec[i:j]
+ rsf <- c(rsf, paste(temp, collapse= ''))
+ if (i < j) {
+ for (k in i:j) {
+ rsf <- c(rsf, paste(temp[-k], collapse=''))
+ }
+ }
+ }
+ }
+ return(unique(rsf))
+# +
+# Question 1.0
+test_1.0 <- function() {
+ check_MC(answer1.0, LETTERS[1:3], '127a2ec00989b9f7faf671ed470be7f8')
+# +
+# Question 1.1
+test_1.1 <- function() {
+ check_MC(answer1.1, LETTERS[1:4], '127a2ec00989b9f7faf671ed470be7f8')
+# +
+# Question 1.2
+test_1.2 <- function() {
+ check_MC(answer1.2, LETTERS[1:2], '127a2ec00989b9f7faf671ed470be7f8')
+# +
+# Question 1.3
+test_1.3 <- function() {
+ check_MC(answer1.3, LETTERS[1:6], 'ddf100612805359cd81fdc5ce3b9fbba')
+# +
+# Question 1.4
+test_1.4.0 <- function() {
+ check_numeric_element(caschools_MLR_add$coefficients[1],1e4,"7d5fdc4b1617f1213563cf12b0be1c68")
+test_1.4.1 <- function() {
+ check_numeric_element(caschools_MLR_add$coefficients[2],1e4,"1cf3a32c120bd3f6faddc6ae470fd2b8")
+test_1.4.2 <- function() {
+ check_numeric_element(caschools_MLR_add$coefficients[3],1e4,"8873eb87d376c1c05700be1435ff22dc")
+# +
+# Question 1.5
+test_1.5 <- function() {
+ check_plot(caschools_MLR_add_plot,
+ "income",
+ "GeomPoint" ,
+ "3e2e4a08c44d0224de5b7e668c75ace3",
+ "d6af036ffbdd7ccfc34dc7862b0e50d3",
+ "13742f581e47f00fd8beeade76db838c",
+# +
+# Question 1.6
+test_1.6 <- function() {
+ check_DF(caschools_MLR_add_results,
+ c("term","estimate", "std.error", "statistic", "p.value", "conf.low", "conf.high"),
+ "11946e7a3ed5e1776e81c0f0ecd383d0",
+ c("estimate", "std.error", "statistic", "p.value", "conf.low", "conf.high"),
+ c(100, 100, 100, 100, 100, 100),
+ c("3f2caceee58bc5e5092ab875c8066a9b",
+ "d3c5ba4fef4e46e63e649dc573977aad",
+ "6e4a7f58d3c649fe8d8313435565c283",
+ "569ddf8dcb5af0cbc7177be6a8b9700d",
+ "fd34f0720893972017cb2261e9353fb2",
+ "ee86da303eb6b2854286c84802788768"))
+# +
+# Question 1.7
+# manual
+# +
+# Question 1.8
+test_1.8 <- function() {
+ check_MC(answer1.8, getPermutations(LETTERS[1:4]), "6e7a8c1c098e8817e3df3fd1b21149d1")
+# +
+# Question 2.0
+test_2.0 <- function() {
+ check_MC(answer2.0, LETTERS[1:6], 'd110f00cfb1b248e835137025804a23b')
+# +
+# Question 2.1
+test_2.1.0 <- function() {
+ check_numeric_element(caschools_MLR_int$coefficients[1], 1e4, "080bbbf3c10c028f53b1901022933269")
+test_2.1.1 <- function() {
+ check_numeric_element(caschools_MLR_int$coefficients[2], 1e4, "6750c14599866d62ee03d881a88d4737")
+test_2.1.2 <- function() {
+ check_numeric_element(caschools_MLR_int$coefficients[3], 1e4, "9c3c926f355b55c26682a59a66d621e3")
+test_2.1.3 <- function() {
+ check_numeric_element(caschools_MLR_int$coefficients[4], 1e4, "491174b285abb2888e9e602eecfb6037")
+# +
+# Question 2.2
+test_2.2 <- function() {
+ check_plot(caschools_MLR_int_plot,
+ "income",
+ "GeomPoint",
+ "3e2e4a08c44d0224de5b7e668c75ace3",
+ "d6af036ffbdd7ccfc34dc7862b0e50d3",
+ "13742f581e47f00fd8beeade76db838c",
+# +
+# Question 2.3
+test_2.3 <- function() {
+ check_DF(caschools_MLR_int_results,
+ c("term","estimate", "std.error", "statistic", "p.value", "conf.low", "conf.high"),
+ "234a2a5581872457b9fe1187d1616b13",
+ c("estimate", "std.error", "statistic", "p.value", "conf.low", "conf.high"),
+ c(100, 100, 100, 100, 100, 100),
+ c("8adf1c6033759946124174b1ffecc9d1",
+ "2109c9c0db840505d06b32da37f3261e",
+ "6f11355413a4aa7c96bf7412868a7e7f",
+ "242f3fe311473815db79235ff6708bde",
+ "ec9d7c4ea6c09c65a368c4865e500886",
+ "8fcc24b94746de0d011202126d17c4e0"))
+# +
+# Question 2.4
+test_2.4 <- function() {
+ check_MC(answer2.4, getPermutations(LETTERS[1:5]), '8310e591706d1e38cdbfd4e26f17a274')
+# +
+# Question 2.5
+# manual
+# +
+# Quesiton 2.6
+test_2.6.0 <- function() {
+ check_DF(caschools_SLR_kk06_results,
+ c("term","estimate", "std.error", "statistic", "p.value"),
+ "c01f179e4b57ab8bd9de309e6d576c48",
+ c("estimate", "std.error", "statistic", "p.value"),
+ c(100, 100, 100, 100),
+ c("82798d4574cf47d5cb838e4aca470ed8",
+ "9a1e47f252c2f2aa62cab1323c75885b",
+ "878fe51e9e4a668776d114d06fd00cc9",
+ "1473d70e5646a26de3c52aa1abd85b1f"))
+test_2.6.1 <- function() {
+ check_DF(caschools_SLR_kk08_results,
+ c("term","estimate", "std.error", "statistic", "p.value"),
+ "c01f179e4b57ab8bd9de309e6d576c48",
+ c("estimate", "std.error", "statistic", "p.value"),
+ c(100, 100, 100, 100),
+ c("8adf1c6033759946124174b1ffecc9d1",
+ "525d73b4c5528794642ceb4dd9f987c2",
+ "eda26e24431618557efc7037b20d0568",
+ "1473d70e5646a26de3c52aa1abd85b1f"))
+test_2.6.2 <- function() {
+ check_DF(caschools_MLR_int_results,
+ c("term","estimate", "std.error", "statistic", "p.value", "conf.low", "conf.high"),
+ "234a2a5581872457b9fe1187d1616b13",
+ c("estimate", "std.error", "statistic", "p.value", "conf.low", "conf.high"),
+ c(100, 100, 100, 100, 100, 100),
+ c("8adf1c6033759946124174b1ffecc9d1",
+ "2109c9c0db840505d06b32da37f3261e",
+ "6f11355413a4aa7c96bf7412868a7e7f",
+ "242f3fe311473815db79235ff6708bde",
+ "ec9d7c4ea6c09c65a368c4865e500886",
+ "8fcc24b94746de0d011202126d17c4e0"))
+# +
+# Question 2.7
+# manual
diff --git a/materials/tutorial_04/tutorial_04.ipynb b/materials/tutorial_04/tutorial_04.ipynb
new file mode 100644
index 0000000..f842418
--- /dev/null
+++ b/materials/tutorial_04/tutorial_04.ipynb
@@ -0,0 +1,1613 @@
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "f97f65e4d6b4c1d4d999d0c2ae225fc5",
+ "grade": false,
+ "grade_id": "cell-f1e1d845873036f4",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "# Tutorial 4: MLR with different types of input variables"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "1c57f899f006691c0cb0f40be06bb51e",
+ "grade": false,
+ "grade_id": "cell-82d9926086d47a80",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "#### Lecture and Tutorial Learning Goals:\n",
+ "After completing this week's lecture and tutorial work, you will be able to:\n",
+ "\n",
+ "1. Give an example of a real problem that that could be answered by a multiple linear regression.\n",
+ "2. Interpret the coefficients and $p$-values of different types of input variables, including categorical input variables.\n",
+ "3. Define interactions in the context of linear regression.\n",
+ "4. Write a computer script to perform linear regression when input variables are continuous or discrete, and when there are interactions between some of these variables."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "7fc505b9da3ba24a646a8a35b69f517d",
+ "grade": false,
+ "grade_id": "cell-a2a153352bc44a68",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Run this cell before continuing.\n",
+ "library(tidyverse)\n",
+ "library(repr)\n",
+ "library(infer)\n",
+ "library(cowplot)\n",
+ "library(broom)\n",
+ "library(AER)\n",
+ "source(\"tests_tutorial_04.R\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "ef63ebcfe8716aad38a2bf893e613b02",
+ "grade": false,
+ "grade_id": "cell-9be71f65643c5906",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "## The data\n",
+ "\n",
+ "In this tutorial, we will continue using the `CASchools` real world dataset from 420 K-6 and K-8 districts in California. The California School data set comes with an R package called `AER`, an acronym for Applied Econometrics with R (by Christian Kleiber & Zeileis, 2017). \n",
+ "\n",
+ "The dataset contains data on test performance, school characteristics and student demographic backgrounds for school districts in California. Among many variables available we will use the following:\n",
+ "\n",
+ "- `grades`: factor indicating grade span of district.\n",
+ "\n",
+ "- `income`: District average income (in USD 1,000).\n",
+ "\n",
+ "- `english`: Percent of English learners.\n",
+ "\n",
+ "- `read`: Average reading score.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "e46acff06f2da23622a95dcad738046c",
+ "grade": false,
+ "grade_id": "cell-751a3a4db77146a7",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "#run this cell\n",
+ "\n",
+ "data(CASchools)\n",
+ "\n",
+ "caschools <- CASchools %>%\n",
+ " select(grades, income, english, read) %>%\n",
+ " mutate_if(is.numeric, round, 2)\n",
+ "\n",
+ "head(caschools)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "491d42046529205e254c52113200e55a",
+ "grade": false,
+ "grade_id": "cell-3dfb99accf2154a8",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "#### Important: Note that if the categorical variable is not a factor, `lm` won't create a dummy variable!! \n",
+ "\n",
+ "> Make sure that categorical variables in your model are factors. If they are not, then **set them as factors**!!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "f058fbba73a9a4936f333e50270d5d74",
+ "grade": false,
+ "grade_id": "cell-a7382e79e72c9703",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "## 1. MLR: additive\n",
+ "\n",
+ "As discussed in the lecture, R will create dummy variables to include categorical variables in the model. In this example, `grades` is a categorical variable with 2 levels. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "9367815b59f44875ddc3b3cedffd41a2",
+ "grade": false,
+ "grade_id": "cell-18576173209dcbfe",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 1.0**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Since the input variable `grades` is a discrete and nominal variable with 2 levels, KK-06 and KK-08. Since this variable is a factor in the dataset, `lm` selects one level as a baseline to create a dummy variable. Which level of `grades` is selected, by default, as a baseline?\n",
+ "\n",
+ "**A.** `KK-06`\n",
+ "\n",
+ "**B.** `KK-08`\n",
+ "\n",
+ "*Assign your answer to an object called `answer1.0`. Your answer should be one of `\"A\"` or `\"B\"` surrounded by quotes.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "cd4bd2ff4058b85570033044f5e731a9",
+ "grade": false,
+ "grade_id": "cell-f073f90a4398375f",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# answer1.0 <- ...\n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "d6f3723f0f8ef72f8cbfa6906e42b426",
+ "grade": true,
+ "grade_id": "cell-b6ce784a5466648d",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_1.0()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "dc00d710ccdb843088fd0cd32e7120bc",
+ "grade": false,
+ "grade_id": "cell-3e16a0eb1d1251a6",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 1.1**\n",
+ "
{points: 1}\n",
+ "\n",
+ "How many dummy variables does `lm` create to fit a linear regression with the categorical variable `grades`?\n",
+ "\n",
+ "**A.** 1\n",
+ "\n",
+ "**B.** 2\n",
+ "\n",
+ "**C.** 3\n",
+ "\n",
+ "**D.** 4\n",
+ "\n",
+ "*Assign your answer to an object called `answer1.1`. Your answer should be one of `\"A\"`, `\"B\"`, `\"C\"`, or `\"D\"` surrounded by quotes.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "c3a74807a9fd10b0a98bf7040df5d7ca",
+ "grade": false,
+ "grade_id": "cell-49fd43204dbbbe64",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# answer1.1 <- \n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "aab29ab12d9f21578b4e0bb9fb243b26",
+ "grade": true,
+ "grade_id": "cell-8a6966bb42e5995c",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_1.1()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "accc4ebe5b88ec809ed324f02192458c",
+ "grade": false,
+ "grade_id": "cell-75a9578496bd6d9c",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 1.2**\n",
+ "
{points: 1}\n",
+ "\n",
+ "In the previous tutorial, you used a simple linear regression (SLR) to study the relation between `read` and `income`, on average for all type of schools. Suppose you want to examine if there is any difference in this relation depending on the grade span of the school (i.e.,KK-06 vs KK-08). \n",
+ "\n",
+ "If for all types of school (i.e., for all levels of `grades`) you expect the same change in reading score per unit change in the average income, which MLR will you fit in `R` using the `lm` function?\n",
+ "\n",
+ "**A.** `lm(read ~ income + grades, data = caschools)`\n",
+ "\n",
+ "**B.** `lm(read ~ income * grades, data = caschools)`\n",
+ "\n",
+ "*Assign your answer to an object called `answer1.2`. Your answer should be one of `\"A\"` or `\"B\"` surrounded by quotes.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "e97fed2fc79c181b9447e6caafaa4b18",
+ "grade": false,
+ "grade_id": "cell-f6b2d0e8025cf80e",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# answer1.2 <- \n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "d810fcc6f81a8afc5987ec10316adfb0",
+ "grade": true,
+ "grade_id": "cell-645245eaf0a415b6",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_1.2()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "12af6ed36232289caa83e3518b468b98",
+ "grade": false,
+ "grade_id": "cell-37a0395078a18c50",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 1.3**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Which of the following descriptions will best describe a visualization of the MLR considered in **Question 1.2**? \n",
+ "\n",
+ "**A.** one line through a cloud of data points\n",
+ "\n",
+ "**B.** two lines with equal slopes but different intercepts\n",
+ "\n",
+ "**C.** two lines with different slopes and different intercepts\n",
+ "\n",
+ "**D.** a smooth concave curve through a cloud of data points\n",
+ "\n",
+ "**E.** two boxplots for different levels of `grades`\n",
+ "\n",
+ "*Assign your answer to an object called `answer1.3`. Your answer should be one of `\"A\"`, `\"B\"`, `\"C\"`, `\"D\"`, or `\"E\"` surrounded by quotes.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "a44f499620bcd84405a2c6bd2a8a9d73",
+ "grade": false,
+ "grade_id": "cell-fd274734aed6b6c7",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# answer1.3 <- \n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "35393769c0469b8c86febb09350aee54",
+ "grade": true,
+ "grade_id": "cell-905c24ed72606cd0",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_1.3()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "b0960d461c3ee4f2a8374e801249aa6e",
+ "grade": false,
+ "grade_id": "cell-e905a3e604e8750e",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 1.4**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Using `caschools`, estimate the MLR proposed in **Question 1.2** and called it `caschools_MLR_add`.\n",
+ "\n",
+ "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "50c8eb2ff60b12ebb21f9c86ca8d4190",
+ "grade": false,
+ "grade_id": "cell-537a5de8c45b73fd",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# caschools_MLR_add <- ...(...,\n",
+ "# ...\n",
+ "# )\n",
+ "# caschools_MLR_add\n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "1024a6c0ac471806ff34ea856ddcbc0a",
+ "grade": true,
+ "grade_id": "cell-c955967bb20ade73",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_1.4.0()\n",
+ "test_1.4.1()\n",
+ "test_1.4.2()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "1b032d1a200038ebfb0c9418863f77a5",
+ "grade": false,
+ "grade_id": "cell-8a3de76c80df5397",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 1.5**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Create a plot of the data in `caschools` (using `geom_point()`) along with the estimated regression lines coming from the additive regression model `caschools_MLR_add`. Use different colours for the points and regression lines of each type of school (levels of `grades`). Include a legend indicating what colour corresponds to each level with proper axis labels. The `ggplot()` object's name will be `caschools_MLR_add_plot`.\n",
+ "\n",
+ "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "3a291e10325632cc6dbe54e29fbd0a5b",
+ "grade": false,
+ "grade_id": "cell-9dcae296b8566967",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "options(repr.plot.width = 15, repr.plot.height = 7) # Adjust these numbers so the plot looks good in your desktop.\n",
+ "\n",
+ "# caschools$pred_MLR_Add <- predict(caschools_MLR_add) # Using predict() to create estimated regression lines.\n",
+ "\n",
+ "# caschools_MLR_add_plot <- ggplot(..., aes(\n",
+ "# ...,\n",
+ "# ...,\n",
+ "# color = ...\n",
+ "# )) +\n",
+ "# ...() +\n",
+ "# geom_line(aes(y = pred_MLR_Add), size = 1) +\n",
+ "# labs(\n",
+ "# title = ...,\n",
+ "# x = ...,\n",
+ "# y = ...\n",
+ "# ) +\n",
+ "# theme(\n",
+ "# text = element_text(size = 16.5),\n",
+ "# plot.title = element_text(face = \"bold\"),\n",
+ "# axis.title = element_text(face = \"bold\"),\n",
+ "# legend.title = element_text(face = \"bold\"),\n",
+ "# ) +\n",
+ "# labs(color = \"grades\")\n",
+ "# caschools_MLR_add_plot\n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "125d765392989a6f948357a6cc048d01",
+ "grade": true,
+ "grade_id": "cell-60b4784943979e31",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_1.5()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "806f32d510d22cb6fee3cde12b870e5f",
+ "grade": false,
+ "grade_id": "cell-5a6b3d5e566adacc",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 1.6**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Find the estimated coefficients of `caschools_MLR_add` using `tidy()`. Report the estimated coefficients, their standard errors and corresponding $p$-values. Include the corresponding asymptotic **90% confidence intervals**. Store the results in the variable `caschools_MLR_add_results`.\n",
+ "\n",
+ "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "d35aa6dc891e12999af8ebac83a3f01d",
+ "grade": false,
+ "grade_id": "cell-253df594cc213305",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# caschools_MLR_add_results <- ...(..., ..., ....) %>% mutate_if(is.numeric, round, 2)\n",
+ "# caschools_MLR_add_results\n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "182cc3fee5a4319bbefb8dee3139a641",
+ "grade": true,
+ "grade_id": "cell-2e20842d0ef2e18c",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_1.6()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "99fa41618949973ba0ef9a3898cfcdac",
+ "grade": false,
+ "grade_id": "cell-045d79b6d3e4e227",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 1.7**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Using the results in `caschools_MLR_add_results` from **Question 1.6**, how would you interpret the estimated coefficient of the continuous variable `income` ?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "885cf006afa42b04df3d244c08372d30",
+ "grade": true,
+ "grade_id": "cell-c4f0c10f52097476",
+ "locked": false,
+ "points": 1,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "source": [
+ "> *Your answer goes here.*\n",
+ "\n",
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "29524df0c431f38b0c86393221e632ae",
+ "grade": false,
+ "grade_id": "cell-2c547503213abacd",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 1.8**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Using a **significance level $\\alpha = 0.10$**, which of the following claims are correct?\n",
+ "\n",
+ "\n",
+ "**A.** for any average income, the expected reading score is the same for schools with KK-06 and schools with KK-08 grades. \n",
+ "\n",
+ "**B.** there is enough evidence to reject the hypothesis that, for any average income, the expected reading score is the same for schools with KK-06 and schools with KK-08 grades.\n",
+ "\n",
+ "**C.** for any type of school, the change in `read` per unit change in `income` is statistically significant.\n",
+ "\n",
+ "**D.** there is enough evidence to believe that the association between `income` and `read` varies depending on the grade span of the school.\n",
+ "\n",
+ "\n",
+ "*Assign your answers to the object `answer1.8`. Your answers have to be included in a single string indicating the correct options **in alphabetical order** and surrounded by quotes (e.g., `\"ABCD\"` indicates you are selecting the four options).*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "e77708faa194004e77f8a89d1d0fa38c",
+ "grade": false,
+ "grade_id": "cell-7ded1cd632f3869e",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# answer1.8 <- \n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "041fdc33a657af0a6e2e3f9dbd1d9fb7",
+ "grade": true,
+ "grade_id": "cell-51e7b8a61bab24c9",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_1.8()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "66e67b9204bf0f10fb51ab869301f118",
+ "grade": false,
+ "grade_id": "cell-e9994b034ba662aa",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "source": [
+ "**Question 1.9**\n",
+ "
{points: 1}\n",
+ "\n",
+ "In one or two sentence explain what \"statistically significant\" mean in the following sentence and how it is different from \"practical significant\".\n",
+ "\n",
+ "> \"for any type of school, the change in `read` per unit change in `income` is statistically significant\"\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "a810e419b66aa68785aac7a6f64b97e0",
+ "grade": true,
+ "grade_id": "cell-66c583a8d25d2564",
+ "locked": false,
+ "points": 1,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "source": [
+ "> *Your answer goes here.*\n",
+ "\n",
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "2a9dc9062fbfdf7e4d6b107eacc71562",
+ "grade": false,
+ "grade_id": "cell-873ce5c231e18f2c",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "source": [
+ "## 2. MLR: with interactions\n",
+ "\n",
+ "In this section we will explore if the relation between `read` and `income` is the same for all types of schools. We can do this using **interactions** between the input variables!\n",
+ "\n",
+ "> **Note** that interactions can be used, in general, when the relation between an input and the response depends on another input variable (not necessarily categorical!)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "4fa6f32f5ed0f62df4758f88422b5fcd",
+ "grade": false,
+ "grade_id": "cell-004da4c289bb7b52",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 2.0**\n",
+ "
{points: 1}\n",
+ "\n",
+ "We can use `lm` to fit the MLR with interactions between the continuous variable `income` and the categorical variable `grades` (with 2 levels) defined above.\n",
+ "\n",
+ "How many regression coefficients will be estimated by `lm`?\n",
+ "\n",
+ "**A.** 1\n",
+ "\n",
+ "**B.** 2\n",
+ "\n",
+ "**C.** 3\n",
+ "\n",
+ "**D.** 4\n",
+ "\n",
+ "\n",
+ "*Assign your answer to an object called `answer2.0`. Your answer should be one of `\"A\"`, `\"B\"`, `\"C\"`, `\"D\"`, `\"E\"`, or `\"F\"` surrounded by quotes.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "c879ea7b9e4c78544e35bad40039dbff",
+ "grade": false,
+ "grade_id": "cell-598588b1701bd38d",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# answer2.0 <- \n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "7392fce520cd1b366107f36865b48507",
+ "grade": true,
+ "grade_id": "cell-632b241ba713f5aa",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_2.0()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "38550478a20bfdac1e3007e1eeaa8707",
+ "grade": false,
+ "grade_id": "cell-d67f4a30fbd6b249",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 2.1**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Using `caschools`, estimate the MLR with interaction described above and called it `caschools_MLR_int`.\n",
+ "\n",
+ "> **Hint:** Interaction terms can be easily specified in `lm()` using the notation `*`.\n",
+ "\n",
+ "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "3f7cd83f01098924b263228b29661d5e",
+ "grade": false,
+ "grade_id": "cell-d99a7bc9a1759fac",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# caschools_MLR_int <- ...(...,\n",
+ "# ...\n",
+ "# )\n",
+ "# caschools_MLR_int\n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "2fdb37752dd73b167cb7c10e09747df9",
+ "grade": true,
+ "grade_id": "cell-a2d3def81c3ee729",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_2.1.0()\n",
+ "test_2.1.1()\n",
+ "test_2.1.2()\n",
+ "test_2.1.3()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "cf9923865860d6e214047d8d307a97e2",
+ "grade": false,
+ "grade_id": "cell-1fc6d7f5ba4c6352",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 2.2**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Create a plot of the data in `caschools` (using `geom_point()`) along with the estimated regression lines coming from the interaction regression model `caschools_MLR_int` (note that your plot should have two regression lines, one for each `grades`). Use different colours for the points and regression lines of each type of school (levels of `grades`). Include a legend indicating what colour corresponds to each level with proper axis labels. The `ggplot()` object's name will be `caschools_MLR_int_plot`.\n",
+ "\n",
+ "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "f174d5a9cf49023348e6d142454b81c4",
+ "grade": false,
+ "grade_id": "cell-0264d31dfcee946e",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# caschools$pred_MLR_int <- predict(caschools_MLR_int) # Using predict() to create estimated regression lines.\n",
+ "\n",
+ "# caschools_MLR_int_plot <- ggplot(..., aes(\n",
+ "# ...,\n",
+ "# ...,\n",
+ "# color = ...\n",
+ "# )) +\n",
+ "# ...() +\n",
+ "# geom_line(aes(y = pred_MLR_int), size = 1) +\n",
+ "# labs(\n",
+ "# title = ...,\n",
+ "# x = ...,\n",
+ "# y = ...\n",
+ "# ) +\n",
+ "# theme(\n",
+ "# text = element_text(size = 16.5),\n",
+ "# plot.title = element_text(face = \"bold\"),\n",
+ "# axis.title = element_text(face = \"bold\"),\n",
+ "# legend.title = element_text(face = \"bold\"),\n",
+ "# ) +\n",
+ "# labs(color = \"grades\")\n",
+ "# caschools_MLR_int_plot\n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "a0895f6f2f13d7ce6a6805aa3ca750a7",
+ "grade": true,
+ "grade_id": "cell-1de4883ebac641a7",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_2.2()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "766e5717f264eef3dd6af79a62b04deb",
+ "grade": false,
+ "grade_id": "cell-e850e2f516d793c9",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 2.3**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Find the estimated coefficients of `caschools_MLR_int` using `tidy()`. Report the estimated coefficients, their standard errors and corresponding $p$-values. Include the corresponding asymptotic 90% confidence intervals. Store the results in the variable `caschools_MLR_int_results`.\n",
+ "\n",
+ "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "41ef5655a7cbfe3e5ae00bdddcae0a2a",
+ "grade": false,
+ "grade_id": "cell-b8a3711ac0aa9e78",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# caschools_MLR_int_results <- ...(..., ..., ....) %>% mutate_if(is.numeric, round, 2)\n",
+ "# caschools_MLR_int_results\n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "243d015f2afebda5db3917619f72d6fd",
+ "grade": true,
+ "grade_id": "cell-db1c5162e30c84bd",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_2.3()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "08d7282803f182d6211ed4dbf302b04c",
+ "grade": false,
+ "grade_id": "cell-3867f871a7af4b28",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 2.4**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Using a **significance level $\\alpha = 0.10$**, which of the following claims are correct?\n",
+ "\n",
+ "\n",
+ "**A.** There is enough evidence to reject the hypothesis that, for any average income, the expected reading score is the same for schools with KK-06 and schools with KK-08 grades.\n",
+ "\n",
+ "**B.** For any type of school, the change in `read` per unit change in `income` is statistically significant.\n",
+ "\n",
+ "**C.** There is enough evidence to reject the hypothesis that the change in `read` per unit change in `income` is the same for schools with KK-06 and schools with KK-08 grades.\n",
+ "\n",
+ "**D.** There is not enough evidence to reject the hypothesis that the change in `read` per unit change in `income` is the same for schools with KK-06 and schools with KK-08 grades.\n",
+ "\n",
+ "**E.** For schools with KK-06 grade span, the change in `read` per unit change in `income` is statistically significant.\n",
+ "\n",
+ "*Assign your answers to the object `answer2.4`. Your answers have to be included in a single string indicating the correct options **in alphabetical order** and surrounded by quotes (e.g., `\"ABCDEFG\"` indicates you are selecting the seven options).*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "feef0f6cf6e41f07ab507be043fcd7fa",
+ "grade": false,
+ "grade_id": "cell-8040936e931b0c26",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# answer2.4 <- \n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "3e0e9dc82c3cc0b9ec220187142156a4",
+ "grade": true,
+ "grade_id": "cell-1873e4d29d602968",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_2.4()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "3f41bec2efe1206490c1b9db22847a4e",
+ "grade": false,
+ "grade_id": "cell-a4830b2233d003de",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 2.5**\n",
+ "
{points: 1}\n",
+ "\n",
+ "A common practice is not to interpret coefficients that are not statistically significant, since what you observe is not significantly different from 0 and reflects just noise in the data. Alternatively, you can provide an interpretation but with the caveat that the result is not statistically significant. \n",
+ "\n",
+ "Following the second approach, what would be a correct interpretation of estimated coefficient of the interaction term `income:gradesKK-08` from `caschools_MLR_int_results` in **Question 2.3**? (remember to comment on the significance of the result)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "af5ae4b1617c5eb5fa76f953cc7fc90d",
+ "grade": true,
+ "grade_id": "cell-94b5d3fd08f12a3f",
+ "locked": false,
+ "points": 1,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ }
+ },
+ "source": [
+ "> *Your answer goes here.*\n",
+ "\n",
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "fec03cccfbc7e114c21bfd7fceaf1439",
+ "grade": false,
+ "grade_id": "cell-ad8a6bbdaf066526",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 2.6**\n",
+ "
{points: 1}\n",
+ "\n",
+ "Fit the following 3 models as indicated:\n",
+ "\n",
+ "**A.** a SLR with `read` as the response and `income` as the *only* input variable using only KK-06 schools in `caschools`. Use `tidy` to get estimated parameters and standard errors. Call the results `caschools_SLR_kk06_results`\n",
+ "\n",
+ "**B.** a SLR with `read` as the response and `income` as the *only* input variable using only KK-08 schools in `caschools`. Use `tidy` to get estimated parameters and standard errors. Call the results `caschools_SLR_kk08_results`\n",
+ "\n",
+ "**C.** a MLR with `read` as the response and `income` and `grades` as input variables, *including their interaction*, using `caschools`. Note that you already have the estimated parameters and standard errors in `caschools_MLR_int_results`. Uncomment the line to get the results again here.\n",
+ "\n",
+ "*Fill out those parts indicated with `...`, uncomment the corresponding code in the cell below, and run it.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "0162042f6abfbd87251fe0c24ba68add",
+ "grade": false,
+ "grade_id": "cell-6e18beb71a4fb71f",
+ "locked": false,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# caschools_SLR_kk06_results <- tidy(lm(... ~ ... , \n",
+ "# data= subset(...,... == ...))) %>% mutate_if(is.numeric, round, 2)\n",
+ "# caschools_SLR_kk06_results\n",
+ "\n",
+ "# caschools_SLR_kk08_results <- tidy(lm(... ~ ... , \n",
+ "# data= subset(...,... == ...))) %>% mutate_if(is.numeric, round, 2)\n",
+ "# caschools_SLR_kk08_results\n",
+ "\n",
+ "# caschools_MLR_int_results\n",
+ "\n",
+ "\n",
+ "# your code here\n",
+ "fail() # No Answer - remove if you provide an answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "code",
+ "checksum": "3588b0c0ddca3eaaa2cfe7eb68472928",
+ "grade": true,
+ "grade_id": "cell-7211318468890365",
+ "locked": true,
+ "points": 1,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_2.6.0()\n",
+ "test_2.6.1()\n",
+ "test_2.6.2()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "7607e9621ddc4c26db6df310297f81dc",
+ "grade": false,
+ "grade_id": "cell-22795bb3ed88a300",
+ "locked": true,
+ "schema_version": 3,
+ "solution": false,
+ "task": false
+ }
+ },
+ "source": [
+ "**Question 2.7**\n",
+ "
{points: 1}\n",
+ "\n",
+ "**2.7.0** Using the results from `caschools_SLR_kk06_results` and `caschools_MLR_int_results` in **Question 2.6**, explain why the estimated coefficients of `income` are the same in both models\n",
+ "\n",
+ "**2.7.1** Using the results from `caschools_SLR_kk08_results` and `caschools_MLR_int_results` in **Question 2.6**, explain why the estimated coefficients of `income` are *not* the same in both models. \n",
+ "\n",
+ "**2.7.2** Explain why the estimated coefficients of `income` in `caschools_SLR_kk08_results` is *not* the same as that of `income:gradesKK-08` in `caschools_MLR_int_results` using the results from **Question 2.6**."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "nbgrader": {
+ "cell_type": "markdown",
+ "checksum": "8c6973a0f022fe93f2585f89c5365adb",
+ "grade": true,
+ "grade_id": "cell-1bc7a024f2ac0de4",
+ "locked": false,
+ "points": 1,
+ "schema_version": 3,
+ "solution": true,
+ "task": false
+ }
+ },
+ "source": [
+ "> *Your answer goes here.*\n",
+ "\n",
+ ]
+ }
+ ],
+ "metadata": {
+ "jupytext": {
+ "formats": "ipynb,Rmd"
+ },
+ "kernelspec": {
+ "display_name": "R",
+ "language": "R",
+ "name": "ir"
+ },
+ "language_info": {
+ "codemirror_mode": "r",
+ "file_extension": ".r",
+ "mimetype": "text/x-r-source",
+ "name": "R",
+ "pygments_lexer": "r",
+ "version": "4.2.3"
+ },
+ "latex_envs": {
+ "LaTeX_envs_menu_present": true,
+ "autoclose": false,
+ "autocomplete": true,
+ "bibliofile": "biblio.bib",
+ "cite_by": "apalike",
+ "current_citInitial": 1,
+ "eqLabelWithNumbers": true,
+ "eqNumInitial": 1,
+ "hotkeys": {
+ "equation": "Ctrl-E",
+ "itemize": "Ctrl-I"
+ },
+ "labels_anchors": false,
+ "latex_user_defs": false,
+ "report_style_numbering": false,
+ "user_envs_cfg": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4