Skip to content

Commit

Permalink
Merge pull request #104 from worldbank/mar-2023-intro
Browse files Browse the repository at this point in the history
Mar 2023 intro sessions 1 and 2
  • Loading branch information
luisesanmartin authored Mar 22, 2023
2 parents 48e7d14 + 0039f9d commit 903a4c9
Show file tree
Hide file tree
Showing 32 changed files with 910 additions and 878 deletions.
44 changes: 44 additions & 0 deletions DataWork/Code/1-intro-to-R-solutions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
## R for Stata Users
## March 2023
## Exercise solutions
## Session: Introduction to R

## Exercise 1 ====
whr <- read.csv("/path/to/data/file")
# note that this was executed through point-and-click
# during the actual session

## Exercise 2 ====
# Subset data
subset(whr, year == 2016)
# Check first 6 observations or whr
head(whr)

## Exercise 3 ====
# Subset data and store result in a new df
whr2016 <- subset(whr, year == 2016)
# Display head of new df
head(whr2016)
# Display head of origninal df
head(whr)

## Exercise 4 ====
# Create vector of strings
str_vec <- c("R", "Python", "SAS", "Excel", "Stata")
# Create string "scalar"
str_scalar <- "can be an option to"
# Concatenation
paste(str_vec[1], str_scalar, str_vec[5])

## Exercise 5 ====
# Create boolean vector
inc_below_avg <- whr$economy_gdp_per_capita < mean(whr$economy_gdp_per_capita)
# See head of vector
head(inc_below_avg)

## Exercise 6 ====
# Create new column (vector) of zeros
whr$rank_low <- 0
# Subset obs with income below average
# and replace values of rank_low with 1 for those obs
whr$rank_low[inc_below_avg] <- 1
53 changes: 53 additions & 0 deletions DataWork/Code/2-intro-to-R-programming-solutions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
## R for Stata Users
## March 2023
## Exercise solutions
## Session: Introduction to R programming

## Exercise 1 ====
# (no coding needed for exercise)

## Exercise 2 ====
# (no coding needed for exercise)

## Exercise 3 ====
library(here)
whr <- read.csv(here("DataWork", "DataSets", "Final", "whr_panel.csv"))
# note that this will only work if exercise 2
# was executed correctly

## Exercise 4 ====
#install.packages("dplyr") # uncomment installation if needed
#install.packages("purrr") # uncomment installation if needed
library(dply)
library(purrr)

## Exercise 5 ====
# Create dataframe
df <- data.frame(replicate(50000, sample(1:100, 400, replace=TRUE)))
# Create empty vector
col_means_loop <- c()
# Loop and append means to vector (will take a few seconds)
for (column in df){
col_means_loop <- append(col_means_loop, mean(column))
}

## Exercise 6 ====
col_means_map <- map(df, mean)
# this will only work if you defined df in exercise 5

## Exercise 7 ====
zscore <- function(x) {
mean <- mean(x, na.rm = TRUE)
sd <- sd(x, na.rm = TRUE)
z <- (x - mean)/sd
return(z)
}

## Exercise 8 ====
z_scores <- whr %>%
select(health_life_expectancy, freedom) %>%
map(zscore)
whr$hle_st <- z_scores[[1]]
whr$freedom_st <- z_scores[[2]]
# this will only run if you created the function
# zscores() in exercise 7
90 changes: 90 additions & 0 deletions DataWork/Code/main.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# ------------------------------------------------------------------------------ #
# #
# DIME #
# Introduction to R for Stata users #
# MAIN SCRIPT #
# #
# ------------------------------------------------------------------------------ #

# PURPOSE: Set-up configurations and run scripts

# NOTES: Version 2

# WRITTEN BY: Luiza Cardoso de Andrade, Leonardo Viotti

# Last modified in Mar 2023

# PART 1: Select sections to run ----------------------------------------------

Lab2 <- 0
Lab3 <- 0
Lab4 <- 0
Lab5 <- 0
Lab6 <- 0

# PART 2: Load packages -----------------------------------------------------

packages <- c("readstata13","foreign",
"doBy", "broom", "dplyr",
"stargazer",
"ggplot2", "plotly", "ggrepel",
"RColorBrewer", "wesanderson",
"sp", "rgdal", "rgeos", "raster", "velox",
"ggmap", "rasterVis", "leaflet",
"htmlwidgets", "geosphere")

# If you selected the option to install packages, install them
sapply(packages, function(x) {
if (!(x %in% installed.packages())) {
install.packages(x, dependencies = TRUE)
}
}
)

# Load all packages -- this is equivalent to using library(package) for each
# package listed before
invisible(sapply(packages, library, character.only = TRUE))

# PART 3: Set folder folder paths --------------------------------------------

#-------------#
# Root folder #
#-------------#

# Add your username and folder path here (for Windows computers)
# To find out what your username is, type Sys.getenv("USERNAME")
if (Sys.getenv("USERNAME") == "luiza") {

projectFolder <- "C:/Users/luiza/Documents/GitHub/dime-r-training"

}

# If you're using Mac, just add your folder path, without the if statement

#--------------------#
# Project subfolders #
#--------------------#

rawData <- file.path(projectFolder, "Data", "Raw")
finalData <- file.path(projectFolder, "Data", "Final")
Code <- file.path(projectFolder ,"Codes")
Output <- file.path(projectFolder, "Output")


# PART 4: Run selected sections -----------------------------------------------

if (Lab2 == 1) {
source(file.path(Code, "Lab 2 - Coding for Reproducible Research"))
}
if (Lab3 == 1) {
source(file.path(Code, "Lab 3 - Data Processing"))
}
if (Lab4 == 1) {
source(file.path(Code, "Lab 4 - Descriptive Analysis"))
}
if (Lab5 == 1) {
source(file.path(Code, "Lab 5 - Data Visualization"))
}
if (Lab6 == 1) {
source(file.path(Code, "Lab 6 - Spatial Data"))
}
Loading

0 comments on commit 903a4c9

Please sign in to comment.