Skip to content

Commit

Permalink
accurate critical t-values, allow users to specify weights
Browse files Browse the repository at this point in the history
  • Loading branch information
yhoriuchi committed Aug 8, 2023
1 parent bd7fd8c commit d49a26c
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 44 deletions.
Binary file modified .DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import(stringr)
import(tidyr)
import(tidyselect)
importFrom(MASS,mvrnorm)
importFrom(estimatr,lm_robust)
importFrom(methods,is)
importFrom(methods,new)
importFrom(stats,setNames)
147 changes: 108 additions & 39 deletions R/pj_estimate.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#'
#' @import dplyr
#' @import rlang
#' @importFrom estimatr lm_robust
#' @importFrom MASS mvrnorm
#' @keywords internal
#' @param .data A `projoint_data` object
Expand All @@ -18,21 +19,32 @@
#' @param .ignore_position TRUE if you ignore the location of profile (left or right). Relevant only if .structure == "choice_level". Defaults to NULL.
#' @param .n_sims The number of simulations. Relevant only if .method == "simulation"
#' @param .n_boot The number of bootstrapped samples. Relevant only if .method == "bootstrap"
#' @param .weights_1 the weight to estimate IRR (see `lm_robust()`): NULL (default)
#' @param .clusters_1 the clusters to estimate IRR (see `lm_robust()`): NULL (default)
#' @param .se_type_1 the standard error type to estimate IRR (see `lm_robust()`): "classical" (default)
#' @param .weights_2 the weight to estimate MM or AMCE (see `lm_robust()`): NULL (default)
#' @param .clusters_2 the clusters to estimate MM or AMCE (see `lm_robust()`): NULL (default)
#' @param .se_type_2 the standard error type to estimate MM or AMCE (see `lm_robust()`): "classical" (default)
#' @return A data frame of estimates


# .data = out1
# .attribute = "att1"
# .level = "level1"
# .level = "level2"
# .structure = "profile_level"
# .estimand = "mm"
# .estimand = "amce"
# .se_method = "analytical"
# .irr = NULL
# .baseline = NULL
# .baseline = "level1"
# .remove_ties = TRUE
# .ignore_position = NULL
# .n_sims = NULL
# .n_boot = NULL
# .weights_1 <- NULL
# .clusters_1 <- NULL
# .se_type_1 <- "classical"
# .weights_2 <- NULL
# .clusters_2 <- NULL
# .se_type_2 <- "classical"

pj_estimate <- function(
.data,
Expand All @@ -46,7 +58,13 @@ pj_estimate <- function(
.remove_ties = TRUE,
.ignore_position = NULL,
.n_sims = NULL,
.n_boot = NULL
.n_boot = NULL,
.weights_1 = NULL,
.clusters_1 = NULL,
.se_type_1 = "classical",
.weights_2 = NULL,
.clusters_2 = NULL,
.se_type_2 = "classical"
){

.dataframe <- .data@data
Expand All @@ -63,12 +81,13 @@ pj_estimate <- function(
estimate <- NULL
cov_mm_tau <- NULL
reg_tau <- NULL
se <- NULL

# check various settings --------------------------------------------------

structure <- rlang::arg_match0(.structure, c("choice_level", "profile_level"))
estimand <- rlang::arg_match0(.estimand, c("mm", "amce"))
se_method <- rlang::arg_match0(.se_method, c("analytical", "simulation", "bootstrap"))
se_method <- rlang::arg_match0(.se_method, c("analytical", "simulation", "bootstrap"))

if(!is.null(.irr) & !is.numeric(.irr) & length(.irr) == 1){
stop("The .irr argument must be either a numeric scalar or NULL.")
Expand All @@ -81,7 +100,7 @@ pj_estimate <- function(
if (.structure == "profile_level" & !is.null(.ignore_position)){
stop("The .ignore_position argument can be specified only when the .structure argument is choice_level.")
}

if (.structure == "choice_level" & is.null(.ignore_position)){
stop("Specify the .ignore_position argument.")
}
Expand All @@ -101,20 +120,20 @@ pj_estimate <- function(
if(.se_method == "simulation" & is.null(.n_sims)){
stop("Specify the .n_sims arguement for simulation")
}

if(.se_method != "simulation" & !is.null(.n_sims)){
stop("You cannot specify the .n_sims arguement for analytical derivation or bootstrapping")
}

if(.se_method == "bootstrap" & is.null(.n_boot)){
stop("Specify the .n_boot arguement for bootstrapping")
}

if(.se_method != "bootstrap" & !is.null(.n_boot)){
stop("You cannot specify the .n_boot arguement for analytical derivation or simulation")
}


if (.structure == "choice_level" & .estimand == "mm" & .remove_ties == FALSE){
stop("The .remove_ties argument should be TRUE to estimate choice-level MMs.")
}
Expand Down Expand Up @@ -238,18 +257,22 @@ pj_estimate <- function(
)

}

}

# Estimate or specify tau -------------------------------------------------

if (is.null(.irr)){

# run intercept-only regression models
reg_irr <- stats::lm(agree ~ 1, data_for_irr)
reg_irr <- estimatr::lm_robust(agree ~ 1,
weights = .weights_1,
clusters = .clusters_1,
se_type = .se_type_1,
data = data_for_irr) %>% tidy()

irr <- reg_irr$coefficient %>% as.numeric()
var_irr <- vcov(reg_irr) %>% as.numeric()
irr <- reg_irr$estimate[1]
var_irr <- reg_irr$std.error[1]^2

tau <- (1 - sqrt(1 - 2 * (1 - irr))) / 2
var_tau <- 0.25 * (2 * irr - 1)^(-1) * var_irr
Expand All @@ -270,19 +293,33 @@ pj_estimate <- function(

if (estimand == "mm"){

reg_mm <- stats::lm(selected ~ 1, data_for_estimand)
reg_mm <- estimatr::lm_robust(selected ~ 1,
weights = .weights_2,
clusters = .clusters_2,
se_type = .se_type_2,
data = data_for_estimand) %>% tidy()

# the critical t-value
critical_t <- abs((reg_mm$conf.low[1] - reg_mm$estimate[1]) / reg_mm$std.error[1])

mm_uncorrected <- reg_mm$coefficient %>% as.numeric()
var_uncorrected_mm <- vcov(reg_mm) %>% as.numeric()
mm_uncorrected <- reg_mm$estimate[1]
var_uncorrected_mm <- reg_mm$std.error[1]^2

}

if (estimand == "amce") {

reg_amce <- stats::lm(selected ~ x, data_for_estimand)
reg_amce <- estimatr::lm_robust(selected ~ x,
weights = .weights_2,
clusters = .clusters_2,
se_type = .se_type_2,
data = data_for_estimand) %>% tidy()

amce_uncorrected <- reg_amce$coefficient[2] %>% as.numeric()
var_uncorrected_amce <- vcov(reg_amce)[2, 2] %>% as.numeric()
# the critical t-value
critical_t <- abs((reg_amce$conf.low[2] - reg_amce$estimate[2]) / reg_amce$std.error[2])

amce_uncorrected <- reg_amce$estimate[2]
var_uncorrected_amce <- reg_amce$std.error[2]^2

}

Expand Down Expand Up @@ -318,20 +355,20 @@ pj_estimate <- function(

cov_mm0_irr <- cov(d_cov0$selected, d_cov0$agree) / nrow(data_for_irr)
cov_mm1_irr <- cov(d_cov1$selected, d_cov1$agree) / nrow(data_for_irr)

} else {

cov_mm0_irr <- 0
cov_mm1_irr <- 0

}

cov_mm0_tau <- -0.5 * (2 * irr - 1)^(-1/2) * cov_mm0_irr
cov_mm1_tau <- -0.5 * (2 * irr - 1)^(-1/2) * cov_mm1_irr
cov_amce_tau <- cov_mm1_tau - cov_mm0_tau

}

# estimate and correct MMs ------------------------------------------------

if (estimand == "mm"){
Expand All @@ -355,7 +392,9 @@ pj_estimate <- function(
"estimate" = c(mm_uncorrected, mm_corrected),
"se" = c(sqrt(var_uncorrected_mm),
sqrt(var_corrected_mm))) %>%
dplyr::mutate("tau" = tau)
dplyr::mutate("conf.low" = estimate - critical_t * se,
"conf.high" = estimate + critical_t * se,
"tau" = tau)

}

Expand All @@ -381,7 +420,9 @@ pj_estimate <- function(
output <- data.frame("estimand" = c("mm_uncorrected", "mm_corrected"),
"estimate" = c(mm_uncorrected, mean(sim_mm$mm_corrected)),
"se" = c(sqrt(var_uncorrected_mm), sd(sim_mm$mm_corrected))) %>%
dplyr::mutate("tau" = tau)
dplyr::mutate("conf.low" = estimate - critical_t * se,
"conf.high" = estimate + critical_t * se,
"tau" = tau)

}

Expand Down Expand Up @@ -411,14 +452,24 @@ pj_estimate <- function(
relationship = "many-to-many")

# run intercept-only regression models
reg_irr <- stats::lm(agree ~ 1, bs_sample_1)
reg_mm <- stats::lm(selected ~ 1, bs_sample_2)
reg_irr <- estimatr::lm_robust(agree ~ 1,
weights = .weights_1,
clusters = .clusters_1,
se_type = .se_type_1,
data = bs_sample_1) %>% tidy()

reg_mm <- estimatr::lm_robust(selected ~ 1,
weights = .weights_2,
clusters = .clusters_2,
se_type = .se_type_2,
data = bs_sample_2) %>% tidy()

# calculate the means
mm_uncorrected <- reg_mm$coefficient %>% as.numeric()
mm_uncorrected <- reg_mm$estimate[1]

if (is.null(.irr)){

irr <- reg_irr$coefficient %>% as.numeric()
irr <- reg_irr$estimate[1]
tau <- (1 - sqrt(1 - 2 * (1 - irr))) / 2

} else {
Expand All @@ -445,7 +496,9 @@ pj_estimate <- function(
mean(out %>% filter(estimand == "mm_corrected") %>% pull(estimate))),
"se" = c(sd(out %>% filter(estimand == "mm_uncorrected") %>% pull(estimate)),
sd(out %>% filter(estimand == "mm_corrected") %>% pull(estimate)))) %>%
dplyr::mutate("tau" = tau)
dplyr::mutate("conf.low" = estimate - critical_t * se,
"conf.high" = estimate + critical_t * se,
"tau" = tau)
}

}
Expand Down Expand Up @@ -473,7 +526,9 @@ pj_estimate <- function(
"estimate" = c(amce_uncorrected, amce_corrected),
"se" = c(sqrt(var_uncorrected_amce),
sqrt(var_corrected_amce))) %>%
dplyr::mutate("tau" = tau)
dplyr::mutate("conf.low" = estimate - critical_t * se,
"conf.high" = estimate + critical_t * se,
"tau" = tau)

}

Expand All @@ -499,7 +554,9 @@ pj_estimate <- function(
output <- data.frame("estimand" = c("amce_uncorrected", "amce_corrected"),
"estimate" = c(amce_uncorrected, mean(sim_amce$amce_corrected)),
"se" = c(sqrt(var_uncorrected_amce), sd(sim_amce$amce_corrected))) %>%
dplyr::mutate("tau" = tau)
dplyr::mutate("conf.low" = estimate - critical_t * se,
"conf.high" = estimate + critical_t * se,
"tau" = tau)

}

Expand Down Expand Up @@ -529,14 +586,24 @@ pj_estimate <- function(
relationship = "many-to-many")

# run intercept-only regression models
reg_irr <- stats::lm(agree ~ 1, bs_sample_1)
reg_amce <- stats::lm(selected ~ x, bs_sample_2)
reg_irr <- estimatr::lm_robust(agree ~ 1,
weights = .weights_1,
clusters = .clusters_1,
se_type = .se_type_1,
data = bs_sample_1) %>% tidy()

reg_amce <- estimatr::lm_robust(selected ~ x,
weights = .weights_2,
clusters = .clusters_2,
se_type = .se_type_2,
bs_sample_2) %>% tidy()


# calculate the means
amce_uncorrected <- reg_amce$coefficient[2] %>% as.numeric()
amce_uncorrected <- reg_amce$estimate[2]
if (is.null(.irr)){

irr <- reg_irr$coefficient %>% as.numeric()
irr <- reg_irr$estimate[1]
tau <- (1 - sqrt(1 - 2 * (1 - irr))) / 2

} else {
Expand All @@ -563,7 +630,9 @@ pj_estimate <- function(
mean(out %>% filter(estimand == "amce_corrected") %>% pull(estimate))),
"se" = c(sd(out %>% filter(estimand == "amce_uncorrected") %>% pull(estimate)),
sd(out %>% filter(estimand == "amce_corrected") %>% pull(estimate)))) %>%
dplyr::mutate("tau" = tau)
dplyr::mutate("conf.low" = estimate - critical_t * se,
"conf.high" = estimate + critical_t * se,
"tau" = tau)

}
}
Expand Down
18 changes: 16 additions & 2 deletions R/projoint.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#' @importFrom MASS mvrnorm
#' @importFrom methods is
#' @importFrom methods new
#' @importFrom estimatr lm_robust
#' @param .data A `projoint_data` object
#' @param .qoi A `projoint_qoi` object. If NULL, defaults to producing all MMs and all AMCEs.
#' @param .structure Either "profile_level" or "choice_level"
Expand All @@ -18,6 +19,13 @@
#' @param .se_method c("analytic", "simulation", "bootstrap") description
#' @param .n_sims The number of simulations. Relevant only if .se_method == "simulation"
#' @param .n_boot The number of bootstrapped samples. Relevant only if .se_method == "bootstrap"
#' @param .n_boot The number of bootstrapped samples. Relevant only if .method == "bootstrap"
#' @param .weights_1 the weight to estimate IRR (see `lm_robust()`): NULL (default)
#' @param .clusters_1 the clusters to estimate IRR (see `lm_robust()`): NULL (default)
#' @param .se_type_1 the standard error type to estimate IRR (see `lm_robust()`): "classical" (default)
#' @param .weights_2 the weight to estimate MM or AMCE (see `lm_robust()`): NULL (default)
#' @param .clusters_2 the clusters to estimate MM or AMCE (see `lm_robust()`): NULL (default)
#' @param .se_type_2 the standard error type to estimate MM or AMCE (see `lm_robust()`): "classical" (default)
#' @return A `projoint_results` object
#' @export
#' @examples
Expand Down Expand Up @@ -51,13 +59,19 @@ projoint <- function(
.remove_ties = TRUE,
.ignore_position = NULL,
.n_sims = NULL,
.n_boot = NULL
.n_boot = NULL,
.weights_1 = NULL,
.clusters_1 = NULL,
.se_type_1 = "classical",
.weights_2 = NULL,
.clusters_2 = NULL,
.se_type_2 = "classical"
){

# bind variables locally to the function ----------------------------------

.baseline <- NULL

# check various settings --------------------------------------------------
# also see: many checks in pj_estimate()

Expand Down
Loading

0 comments on commit d49a26c

Please sign in to comment.