From 13dd04db903fd03107dab01228f76cf2f02789f6 Mon Sep 17 00:00:00 2001 From: Aivuce <99028139+Aivuce@users.noreply.github.com> Date: Tue, 8 Feb 2022 14:07:42 +0200 Subject: [PATCH 1/2] Create test_vardom_vardomh_2levels.R Izveido jaunu testu. --- .../testthat/test_vardom_vardomh_2levels.R | 218 ++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 vardpoor/tests/testthat/test_vardom_vardomh_2levels.R diff --git a/vardpoor/tests/testthat/test_vardom_vardomh_2levels.R b/vardpoor/tests/testthat/test_vardom_vardomh_2levels.R new file mode 100644 index 0000000..d7c9e21 --- /dev/null +++ b/vardpoor/tests/testthat/test_vardom_vardomh_2levels.R @@ -0,0 +1,218 @@ +require(testthat) +#devtools::load_all(path = "vardpoor") +library(data.table) +library(vardpoor) + + +#### bez perioda ---- +test_fun <- function(n1 = 3000){ + # datu izveide ---- + #g_matrica + g_dat <- data.table(id_m = 1:n1, + x0 = 1L, + x1 = sample(0:2, n1, replace = T), + x2 = sample(0:3, n1, replace = T), + x3 = sample(0:3, n1, replace = T), + x4 = sample(0:3, n1, replace = T), + x5 = sample(0:3, n1, replace = T), + g = rnorm(n1, 1, 0.1), + q = runif(n1)) + + #vardom dati + nn <- round(n1 / 300, 0) #number of PSU (iecirkni) + dat_x <- data.table() + strata <- sample(1:4, n1, replace = T,) + dat_x <- data.table(id_m = 1:n1, + b06 = sample(1:11, n1, replace = T, prob = c(0.1922021, 0.1302067, 0.0641481, 0.0471424, 0.01797438, + 0.007695619, 0.002179529, 0.00131848, 0.0003498009, 0.000107631, 2.690776e-05)), + wd = sample(1:5, n1, replace = T), + strata = strata, + survey = rep(1:4, each = n1/4), + iec = sample(1:nn, n1, replace = T)) + + #vardomh dati + dat_y <- data.table() + l <-1 + for (i in 1:n1){ + if (dat_x[i, b06] == 1){ + rinda <- dat_x[i, ] + rinda <- rinda[, id_p := l] + dat_y <- rbind(dat_y, rinda) + l <- l+1 + } else { + j <- dat_x[i, b06] + for (k in 1:j){ + rinda <- dat_x[i,] + rinda <- rinda[, b06 := k] + rinda <- rinda[, id_p := l] + dat_y <- rbind(dat_y, rinda) + l <- l+1 + } + } + } + rm(i, j, k, l, rinda) + + n2 <- nrow(dat_y) + if ((nrow(dat_y) - sum(dat_x$b06)) !=0) stop("nekorekti datu garumi") + + dat_y[, y := sample(0:1, n2, replace = T)]#piekārto y vērtību + gg <- g_dat[, c("id_m", "g")] + + ycol <- dat_y[, lapply(.SD, sum), + keyby = id_m, + .SDcols = "y"] + dat_yy <- merge(dat_y, gg, by = "id_m") + dat_x$y <- ycol$y # papildina vardom datni + dat_xx <- merge(dat_x, g_dat, by = c("id_m")) + dat_xx[, wc := wd * g] + dat_yy[, wc := wd * g] + rm(dat_x, dat_y, gg, ycol) + + #calculations ---- + + n_h <- data.table(strata = 1:4, pop = sample(500:1500, 4)) + res_1 <- vardom(Y = "y", + H = "strata", + PSU = "iec", + w_final = "wc", + fh_zero = TRUE, + N_h = n_h, + X = paste0("x", 0:5), + g = "g", + q = "q", + dataset = dat_xx)$all_result + + res_2 <- vardomh(Y = "y", + H = "strata", + PSU = "iec", + w_final = "wc", + ID_level1 = "id_m", + ID_level2 = "id_p", + N_h = n_h, + fh_zero = TRUE, + X = paste0("x", 0:5), + X_ID_level1 = "id_m", + g = "g", + q = "q", + dataset = dat_yy, + datasetX = g_dat)$all_result + res_1 <- res_1[, -c("respondent_count", "n_nonzero","pop_size")] # #radītāji atšķirās, jo rēķina no dažādiem līmeņiem + res_2 <- res_2[, -c("respondent_count", "n_nonzero","pop_size")] + names_a <- names(res_2) + return(list(res_2, res_1[, .SD, .SDcols = names_a])) +} + +test_that("test equal for one period, diferent levels", { + results <- test_fun() + expect_equal(results[[1]], results[[2]]) +}) + + +#### 2 periodi, ar kalibrāciju, bez Z ---- + +test_fun2 <- function(n1){ + #g matricas izveide + g_dat <- data.table(id_m = 1:n1, + x0 = 1L, + x1 = sample(0:2, n1, replace = T), + x2 = sample(0:3, n1, replace = T), + x3 = sample(0:3, n1, replace = T), + x4 = sample(0:3, n1, replace = T), + x5 = sample(0:3, n1, replace = T), + g = rnorm(n1, 1, 0.1), + q = runif(n1), + period = rep(1:2, each = n1/2)) + + #vardom dati + nn <- round(n1 / 300, 0) #number of PSU (iecirkni) + dat_x <- data.table() + strata <- sample(1:4, n1, replace = T,) + dat_x <- data.table(id_m = 1:n1, + b06 = sample(1:11, n1, replace = T, prob = c(0.1922021, 0.1302067, 0.0641481, 0.0471424, 0.01797438, + 0.007695619, 0.002179529, 0.00131848, 0.0003498009, 0.000107631, 2.690776e-05)), + wd = sample(1:5, n1, replace = T), + strata = strata, + survey = rep(1:4, each = n1/4), + iec = sample(1:nn, n1, replace = T)) + + #vardomh dati + dat_y <- data.table() + l <- 1 + for (i in 1:n1){ + if (dat_x[i, b06] == 1){ + rinda <- dat_x[i, ] + rinda <- rinda[, id_p := l] + dat_y <- rbind(dat_y, rinda) + l <- l+1 + } else { + j <- dat_x[i, b06] + for (k in 1:j){ + rinda <- dat_x[i,] + rinda <- rinda[, b06 := k] + rinda <- rinda[, id_p := l] + dat_y <- rbind(dat_y, rinda) + l <- l+1 + } + } + } + rm(i, j, k, l, rinda) + + n2 <- nrow(dat_y) + if ((nrow(dat_y) - sum(dat_x$b06)) !=0) stop("nekorekti datu garumi") + + dat_y[, y := sample(0:1, n2, replace = T)]#piekārto y vērtību + gg <- g_dat[, c("id_m", "g", "period")] + + ycol <- dat_y[, lapply(.SD, sum), + keyby = id_m, + .SDcols = "y"] + dat_yy <- merge(dat_y, gg, by = "id_m") + dat_x$y <- ycol$y # papildina vardom datni + dat_xx <- merge(dat_x, g_dat, by = c("id_m")) + dat_xx[, wc := wd * g] + dat_yy[, wc := wd * g] + + n_h <- data.table(period = rep(1:2, each = 4), strata = rep(1:4, 2), pop = rep(sample(500:1500, 4),2)) + + res_1 <- vardom(Y = "y", + H = "strata", + PSU = "iec", + w_final = "wc", + fh_zero = TRUE, + N_h = n_h, + X = paste0("x", 0:5), + g = "g", + q = "q", + dataset = dat_xx, + period = "period")$all_result + + res_2 <- vardomh(Y = "y", + H = "strata", + PSU = "iec", + w_final = "wc", + ID_level1 = "id_m", + ID_level2 = "id_p", + N_h = n_h, + fh_zero = TRUE, + X = paste0("x", 0:5), + X_ID_level1 = "id_m", + g = "g", + q = "q", + dataset = dat_yy, + datasetX = g_dat, + period = "period", + periodX = "period")$all_result + res_1 <- res_1[, -c("respondent_count", "n_nonzero","pop_size")] # #radītāji atšķirās, jo rēķina no dažādiem līmeņiem + res_2 <- res_2[, -c("respondent_count", "n_nonzero","pop_size")] + names_a <- names(res_2) + return(list(res_2, res_1[, .SD, .SDcols = names_a])) +} + + + +test_that("test equal for one period, diferent levels", { + results <- test_fun2(6000) + expect_equal(results[[1]], results[[2]]) +}) + +#res_1[, -c("variable", "respondent_count", "n_nonzero", "pop_size", "n_eff", "period")] - res_2[, -c("variable", "respondent_count", "n_nonzero", "pop_size", "period")] From efab8a2477cd00e2f7f9fdd3258da1f3867c1256 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81rti=C5=86=C5=A1=20Liberts?= Date: Thu, 17 Feb 2022 16:48:41 +0200 Subject: [PATCH 2/2] Test for vardom and vardomh equality. --- .../testthat/test_vardom_vardomh_2levels.R | 150 ++++++++++-------- 1 file changed, 85 insertions(+), 65 deletions(-) diff --git a/vardpoor/tests/testthat/test_vardom_vardomh_2levels.R b/vardpoor/tests/testthat/test_vardom_vardomh_2levels.R index d7c9e21..aad56b3 100644 --- a/vardpoor/tests/testthat/test_vardom_vardomh_2levels.R +++ b/vardpoor/tests/testthat/test_vardom_vardomh_2levels.R @@ -1,13 +1,21 @@ -require(testthat) -#devtools::load_all(path = "vardpoor") -library(data.table) -library(vardpoor) +# Test if vardom and vardomh return the same result with the same information +# where: +# - vardom use the data at level 1 (for example households) +# - vardomh use the data at level 2 (for example persons) +# +# test_fun - test for data without period +# test_fun2 - test for data with period +# library(testthat) +# devtools::load_all(path = "vardpoor") +# library(data.table) +# library(vardpoor) -#### bez perioda ---- -test_fun <- function(n1 = 3000){ - # datu izveide ---- - #g_matrica +#### test without period ---- + +test_fun <- function(n1 = 3000) { + # data generation + # g-matrix g_dat <- data.table(id_m = 1:n1, x0 = 1L, x1 = sample(0:2, n1, replace = T), @@ -18,57 +26,61 @@ test_fun <- function(n1 = 3000){ g = rnorm(n1, 1, 0.1), q = runif(n1)) - #vardom dati - nn <- round(n1 / 300, 0) #number of PSU (iecirkni) - dat_x <- data.table() + # vardom data + nn <- round(n1 / 300, 0) # number of PSUs + # dat_x <- data.table() strata <- sample(1:4, n1, replace = T,) - dat_x <- data.table(id_m = 1:n1, - b06 = sample(1:11, n1, replace = T, prob = c(0.1922021, 0.1302067, 0.0641481, 0.0471424, 0.01797438, - 0.007695619, 0.002179529, 0.00131848, 0.0003498009, 0.000107631, 2.690776e-05)), - wd = sample(1:5, n1, replace = T), - strata = strata, - survey = rep(1:4, each = n1/4), - iec = sample(1:nn, n1, replace = T)) - - #vardomh dati + dat_x <- data.table( + id_m = 1:n1, + b06 = sample(1:11, n1, replace = T, + prob = c(0.1922021, 0.1302067, 0.0641481, 0.0471424, + 0.01797438, 0.007695619, 0.002179529, 0.00131848, + 0.0003498009, 0.000107631, 2.690776e-05)), + wd = sample(1:5, n1, replace = T), + strata = strata, + survey = rep(1:4, each = n1/4), + iec = sample(1:nn, n1, replace = T) + ) + + # vardomh data dat_y <- data.table() - l <-1 - for (i in 1:n1){ - if (dat_x[i, b06] == 1){ - rinda <- dat_x[i, ] + l <- 1 + for (i in 1:n1) { + if (dat_x[i, b06] == 1) { + rinda <- dat_x[i] rinda <- rinda[, id_p := l] dat_y <- rbind(dat_y, rinda) - l <- l+1 + l <- l + 1 } else { j <- dat_x[i, b06] - for (k in 1:j){ - rinda <- dat_x[i,] + for (k in 1:j) { + rinda <- dat_x[i] rinda <- rinda[, b06 := k] rinda <- rinda[, id_p := l] dat_y <- rbind(dat_y, rinda) - l <- l+1 + l <- l + 1 } } } rm(i, j, k, l, rinda) n2 <- nrow(dat_y) - if ((nrow(dat_y) - sum(dat_x$b06)) !=0) stop("nekorekti datu garumi") + if ((nrow(dat_y) - sum(dat_x$b06)) != 0) stop("wrong number of records") - dat_y[, y := sample(0:1, n2, replace = T)]#piekārto y vērtību + dat_y[, y := sample(0:1, n2, replace = T)] # generate y variable gg <- g_dat[, c("id_m", "g")] ycol <- dat_y[, lapply(.SD, sum), keyby = id_m, .SDcols = "y"] dat_yy <- merge(dat_y, gg, by = "id_m") - dat_x$y <- ycol$y # papildina vardom datni + dat_x$y <- ycol$y # add to vardom data dat_xx <- merge(dat_x, g_dat, by = c("id_m")) dat_xx[, wc := wd * g] dat_yy[, wc := wd * g] rm(dat_x, dat_y, gg, ycol) - #calculations ---- + # calculations ---- n_h <- data.table(strata = 1:4, pop = sample(500:1500, 4)) res_1 <- vardom(Y = "y", @@ -96,8 +108,11 @@ test_fun <- function(n1 = 3000){ q = "q", dataset = dat_yy, datasetX = g_dat)$all_result - res_1 <- res_1[, -c("respondent_count", "n_nonzero","pop_size")] # #radītāji atšķirās, jo rēķina no dažādiem līmeņiem + + # Those variables differ if calculated from different levels + res_1 <- res_1[, -c("respondent_count", "n_nonzero","pop_size")] res_2 <- res_2[, -c("respondent_count", "n_nonzero","pop_size")] + names_a <- names(res_2) return(list(res_2, res_1[, .SD, .SDcols = names_a])) } @@ -108,10 +123,10 @@ test_that("test equal for one period, diferent levels", { }) -#### 2 periodi, ar kalibrāciju, bez Z ---- +#### 2 periods, with calibration, without Z ---- test_fun2 <- function(n1){ - #g matricas izveide + # g-matrix g_dat <- data.table(id_m = 1:n1, x0 = 1L, x1 = sample(0:2, n1, replace = T), @@ -121,58 +136,64 @@ test_fun2 <- function(n1){ x5 = sample(0:3, n1, replace = T), g = rnorm(n1, 1, 0.1), q = runif(n1), - period = rep(1:2, each = n1/2)) + period = rep(1:2, each = n1 / 2)) - #vardom dati - nn <- round(n1 / 300, 0) #number of PSU (iecirkni) - dat_x <- data.table() + # vardom data + nn <- round(n1 / 300, 0) # number of PSUs + # dat_x <- data.table() strata <- sample(1:4, n1, replace = T,) - dat_x <- data.table(id_m = 1:n1, - b06 = sample(1:11, n1, replace = T, prob = c(0.1922021, 0.1302067, 0.0641481, 0.0471424, 0.01797438, - 0.007695619, 0.002179529, 0.00131848, 0.0003498009, 0.000107631, 2.690776e-05)), - wd = sample(1:5, n1, replace = T), - strata = strata, - survey = rep(1:4, each = n1/4), - iec = sample(1:nn, n1, replace = T)) - - #vardomh dati + dat_x <- data.table( + id_m = 1:n1, + b06 = sample(1:11, n1, replace = T, + prob = c(0.1922021, 0.1302067, 0.0641481, 0.0471424, + 0.01797438, 0.007695619, 0.002179529, 0.00131848, + 0.0003498009, 0.000107631, 2.690776e-05)), + wd = sample(1:5, n1, replace = T), + strata = strata, + survey = rep(1:4, each = n1/4), + iec = sample(1:nn, n1, replace = T) + ) + + # vardomh data dat_y <- data.table() l <- 1 - for (i in 1:n1){ - if (dat_x[i, b06] == 1){ - rinda <- dat_x[i, ] + for (i in 1:n1) { + if (dat_x[i, b06] == 1) { + rinda <- dat_x[i] rinda <- rinda[, id_p := l] dat_y <- rbind(dat_y, rinda) - l <- l+1 + l <- l + 1 } else { j <- dat_x[i, b06] - for (k in 1:j){ - rinda <- dat_x[i,] + for (k in 1:j) { + rinda <- dat_x[i] rinda <- rinda[, b06 := k] rinda <- rinda[, id_p := l] dat_y <- rbind(dat_y, rinda) - l <- l+1 + l <- l + 1 } } } rm(i, j, k, l, rinda) n2 <- nrow(dat_y) - if ((nrow(dat_y) - sum(dat_x$b06)) !=0) stop("nekorekti datu garumi") + if ((nrow(dat_y) - sum(dat_x$b06)) != 0) stop("wrong number of records") - dat_y[, y := sample(0:1, n2, replace = T)]#piekārto y vērtību + dat_y[, y := sample(0:1, n2, replace = T)] # add y variable gg <- g_dat[, c("id_m", "g", "period")] ycol <- dat_y[, lapply(.SD, sum), keyby = id_m, .SDcols = "y"] dat_yy <- merge(dat_y, gg, by = "id_m") - dat_x$y <- ycol$y # papildina vardom datni + dat_x$y <- ycol$y # add to vardom data dat_xx <- merge(dat_x, g_dat, by = c("id_m")) dat_xx[, wc := wd * g] dat_yy[, wc := wd * g] - - n_h <- data.table(period = rep(1:2, each = 4), strata = rep(1:4, 2), pop = rep(sample(500:1500, 4),2)) + + n_h <- data.table(period = rep(1:2, each = 4), + strata = rep(1:4, 2), + pop = rep(sample(500:1500, 4),2)) res_1 <- vardom(Y = "y", H = "strata", @@ -202,17 +223,16 @@ test_fun2 <- function(n1){ datasetX = g_dat, period = "period", periodX = "period")$all_result - res_1 <- res_1[, -c("respondent_count", "n_nonzero","pop_size")] # #radītāji atšķirās, jo rēķina no dažādiem līmeņiem + + # Those variables differ if calculated from different levels + res_1 <- res_1[, -c("respondent_count", "n_nonzero","pop_size")] res_2 <- res_2[, -c("respondent_count", "n_nonzero","pop_size")] + names_a <- names(res_2) return(list(res_2, res_1[, .SD, .SDcols = names_a])) } - - test_that("test equal for one period, diferent levels", { results <- test_fun2(6000) expect_equal(results[[1]], results[[2]]) }) - -#res_1[, -c("variable", "respondent_count", "n_nonzero", "pop_size", "n_eff", "period")] - res_2[, -c("variable", "respondent_count", "n_nonzero", "pop_size", "period")]