Merge pull request #104 from worldbank/mar-2023-intro

Mar 2023 intro sessions 1 and 2
worldbank · Mar 22, 2023 · 903a4c9 · 903a4c9
2 parents 48e7d14 + 0039f9d
commit 903a4c9
Show file tree

Hide file tree

Showing 32 changed files with 910 additions and 878 deletions.
diff --git a/DataWork/Code/1-intro-to-R-solutions.R b/DataWork/Code/1-intro-to-R-solutions.R
@@ -0,0 +1,44 @@
+## R for Stata Users
+## March 2023
+## Exercise solutions 
+## Session: Introduction to R
+
+## Exercise 1 ====
+whr <- read.csv("/path/to/data/file")
+# note that this was executed through point-and-click
+# during the actual session
+
+## Exercise 2 ====
+# Subset data
+subset(whr, year == 2016)
+# Check first 6 observations or whr
+head(whr)
+
+## Exercise 3 ====
+# Subset data and store result in a new df
+whr2016 <- subset(whr, year == 2016)
+# Display head of new df
+head(whr2016)
+# Display head of origninal df
+head(whr)
+
+## Exercise 4 ====
+# Create vector of strings
+str_vec <- c("R", "Python", "SAS", "Excel", "Stata")
+# Create string "scalar"
+str_scalar <- "can be an option to"
+# Concatenation
+paste(str_vec[1], str_scalar, str_vec[5])
+
+## Exercise 5 ====
+# Create boolean vector
+inc_below_avg <- whr$economy_gdp_per_capita < mean(whr$economy_gdp_per_capita)
+# See head of vector
+head(inc_below_avg)
+
+## Exercise 6 ====
+# Create new column (vector) of zeros
+whr$rank_low <- 0
+# Subset obs with income below average
+# and replace values of rank_low with 1 for those obs
+whr$rank_low[inc_below_avg] <- 1
diff --git a/DataWork/Code/2-intro-to-R-programming-solutions.R b/DataWork/Code/2-intro-to-R-programming-solutions.R
@@ -0,0 +1,53 @@
+## R for Stata Users
+## March 2023
+## Exercise solutions 
+## Session: Introduction to R programming
+
+## Exercise 1 ====
+# (no coding needed for exercise)
+
+## Exercise 2 ====
+# (no coding needed for exercise)
+
+## Exercise 3 ====
+library(here)
+whr <- read.csv(here("DataWork", "DataSets", "Final", "whr_panel.csv"))
+# note that this will only work if exercise 2
+# was executed correctly
+
+## Exercise 4 ====
+#install.packages("dplyr") # uncomment installation if needed
+#install.packages("purrr") # uncomment installation if needed
+library(dply)
+library(purrr)
+
+## Exercise 5 ====
+# Create dataframe
+df <- data.frame(replicate(50000, sample(1:100, 400, replace=TRUE)))
+# Create empty vector
+col_means_loop <- c()
+# Loop and append means to vector (will take a few seconds)
+for (column in df){
+  col_means_loop <- append(col_means_loop, mean(column))
+}
+
+## Exercise 6 ====
+col_means_map <- map(df, mean)
+# this will only work if you defined df in exercise 5
+
+## Exercise 7 ====
+zscore <- function(x) {
+  mean <- mean(x, na.rm = TRUE)
+  sd   <- sd(x, na.rm = TRUE)
+  z    <- (x - mean)/sd
+  return(z)
+}
+
+## Exercise 8 ====
+z_scores <- whr %>%
+  select(health_life_expectancy, freedom) %>%
+  map(zscore)
+whr$hle_st <- z_scores[[1]]
+whr$freedom_st <- z_scores[[2]]
+# this will only run if you created the function
+# zscores() in exercise 7
diff --git a/DataWork/Code/main.R b/DataWork/Code/main.R
@@ -0,0 +1,90 @@
+# ------------------------------------------------------------------------------ #
+#                                                                                #
+#                                     DIME                                       #
+#                        Introduction to R for Stata users                       #
+#                                  MAIN SCRIPT                                   #
+#                                                                                #
+# ------------------------------------------------------------------------------ #
+
+# PURPOSE:    Set-up configurations and run scripts
+
+# NOTES:      Version 2
+
+# WRITTEN BY: Luiza Cardoso de Andrade, Leonardo Viotti
+
+#                                                     Last modified in Mar 2023
+
+# PART 1: Select sections to run ----------------------------------------------
+
+Lab2                 <- 0
+Lab3                 <- 0
+Lab4                 <- 0
+Lab5                 <- 0
+Lab6                 <- 0
+
+# PART 2: Load packages   -----------------------------------------------------
+
+packages  <- c("readstata13","foreign",
+               "doBy", "broom", "dplyr",
+               "stargazer",
+               "ggplot2", "plotly", "ggrepel",
+               "RColorBrewer", "wesanderson",
+               "sp", "rgdal", "rgeos", "raster", "velox",
+               "ggmap", "rasterVis", "leaflet",
+               "htmlwidgets", "geosphere")
+
+# If you selected the option to install packages, install them
+sapply(packages, function(x) {
+  if (!(x %in% installed.packages())) {
+    install.packages(x, dependencies = TRUE) 
+  }
+}
+)
+
+# Load all packages -- this is equivalent to using library(package) for each 
+# package listed before
+invisible(sapply(packages, library, character.only = TRUE))
+
+# PART 3: Set folder folder paths --------------------------------------------
+
+#-------------#
+# Root folder #
+#-------------#
+
+# Add your username and folder path here (for Windows computers)
+# To find out what your username is, type Sys.getenv("USERNAME")
+if (Sys.getenv("USERNAME") == "luiza") {
+
+  projectFolder  <- "C:/Users/luiza/Documents/GitHub/dime-r-training"
+
+}
+
+# If you're using Mac, just add your folder path, without the if statement
+
+#--------------------#
+# Project subfolders #
+#--------------------#
+
+rawData           <- file.path(projectFolder, "Data", "Raw")
+finalData         <- file.path(projectFolder, "Data", "Final")
+Code              <- file.path(projectFolder ,"Codes")
+Output            <- file.path(projectFolder, "Output")
+
+
+# PART 4: Run selected sections -----------------------------------------------
+
+if (Lab2 == 1) {
+  source(file.path(Code, "Lab 2 - Coding for Reproducible Research"))
+}
+if (Lab3 == 1) {
+  source(file.path(Code, "Lab 3 - Data Processing"))
+}
+if (Lab4 == 1) {
+  source(file.path(Code, "Lab 4 - Descriptive Analysis"))
+}
+if (Lab5 == 1) {
+  source(file.path(Code, "Lab 5 - Data Visualization"))
+}
+if (Lab6 == 1) {
+  source(file.path(Code, "Lab 6 - Spatial Data"))
+}