Skip to content

Commit

Permalink
Test for vardom and vardomh equality.
Browse files Browse the repository at this point in the history
  • Loading branch information
Mārtiņš Liberts committed Feb 17, 2022
1 parent 840bef6 commit efab8a2
Showing 1 changed file with 85 additions and 65 deletions.
150 changes: 85 additions & 65 deletions vardpoor/tests/testthat/test_vardom_vardomh_2levels.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
require(testthat)
#devtools::load_all(path = "vardpoor")
library(data.table)
library(vardpoor)
# Test if vardom and vardomh return the same result with the same information
# where:
# - vardom use the data at level 1 (for example households)
# - vardomh use the data at level 2 (for example persons)
#
# test_fun - test for data without period
# test_fun2 - test for data with period

# library(testthat)
# devtools::load_all(path = "vardpoor")
# library(data.table)
# library(vardpoor)

#### bez perioda ----
test_fun <- function(n1 = 3000){
# datu izveide ----
#g_matrica
#### test without period ----

test_fun <- function(n1 = 3000) {
# data generation
# g-matrix
g_dat <- data.table(id_m = 1:n1,
x0 = 1L,
x1 = sample(0:2, n1, replace = T),
Expand All @@ -18,57 +26,61 @@ test_fun <- function(n1 = 3000){
g = rnorm(n1, 1, 0.1),
q = runif(n1))

#vardom dati
nn <- round(n1 / 300, 0) #number of PSU (iecirkni)
dat_x <- data.table()
# vardom data
nn <- round(n1 / 300, 0) # number of PSUs
# dat_x <- data.table()
strata <- sample(1:4, n1, replace = T,)
dat_x <- data.table(id_m = 1:n1,
b06 = sample(1:11, n1, replace = T, prob = c(0.1922021, 0.1302067, 0.0641481, 0.0471424, 0.01797438,
0.007695619, 0.002179529, 0.00131848, 0.0003498009, 0.000107631, 2.690776e-05)),
wd = sample(1:5, n1, replace = T),
strata = strata,
survey = rep(1:4, each = n1/4),
iec = sample(1:nn, n1, replace = T))

#vardomh dati
dat_x <- data.table(
id_m = 1:n1,
b06 = sample(1:11, n1, replace = T,
prob = c(0.1922021, 0.1302067, 0.0641481, 0.0471424,
0.01797438, 0.007695619, 0.002179529, 0.00131848,
0.0003498009, 0.000107631, 2.690776e-05)),
wd = sample(1:5, n1, replace = T),
strata = strata,
survey = rep(1:4, each = n1/4),
iec = sample(1:nn, n1, replace = T)
)

# vardomh data
dat_y <- data.table()
l <-1
for (i in 1:n1){
if (dat_x[i, b06] == 1){
rinda <- dat_x[i, ]
l <- 1
for (i in 1:n1) {
if (dat_x[i, b06] == 1) {
rinda <- dat_x[i]
rinda <- rinda[, id_p := l]
dat_y <- rbind(dat_y, rinda)
l <- l+1
l <- l + 1
} else {
j <- dat_x[i, b06]
for (k in 1:j){
rinda <- dat_x[i,]
for (k in 1:j) {
rinda <- dat_x[i]
rinda <- rinda[, b06 := k]
rinda <- rinda[, id_p := l]
dat_y <- rbind(dat_y, rinda)
l <- l+1
l <- l + 1
}
}
}
rm(i, j, k, l, rinda)

n2 <- nrow(dat_y)
if ((nrow(dat_y) - sum(dat_x$b06)) !=0) stop("nekorekti datu garumi")
if ((nrow(dat_y) - sum(dat_x$b06)) != 0) stop("wrong number of records")

dat_y[, y := sample(0:1, n2, replace = T)]#piekārto y vērtību
dat_y[, y := sample(0:1, n2, replace = T)] # generate y variable
gg <- g_dat[, c("id_m", "g")]

ycol <- dat_y[, lapply(.SD, sum),
keyby = id_m,
.SDcols = "y"]
dat_yy <- merge(dat_y, gg, by = "id_m")
dat_x$y <- ycol$y # papildina vardom datni
dat_x$y <- ycol$y # add to vardom data
dat_xx <- merge(dat_x, g_dat, by = c("id_m"))
dat_xx[, wc := wd * g]
dat_yy[, wc := wd * g]
rm(dat_x, dat_y, gg, ycol)

#calculations ----
# calculations ----

n_h <- data.table(strata = 1:4, pop = sample(500:1500, 4))
res_1 <- vardom(Y = "y",
Expand Down Expand Up @@ -96,8 +108,11 @@ test_fun <- function(n1 = 3000){
q = "q",
dataset = dat_yy,
datasetX = g_dat)$all_result
res_1 <- res_1[, -c("respondent_count", "n_nonzero","pop_size")] # #radītāji atšķirās, jo rēķina no dažādiem līmeņiem

# Those variables differ if calculated from different levels
res_1 <- res_1[, -c("respondent_count", "n_nonzero","pop_size")]
res_2 <- res_2[, -c("respondent_count", "n_nonzero","pop_size")]

names_a <- names(res_2)
return(list(res_2, res_1[, .SD, .SDcols = names_a]))
}
Expand All @@ -108,10 +123,10 @@ test_that("test equal for one period, diferent levels", {
})


#### 2 periodi, ar kalibrāciju, bez Z ----
#### 2 periods, with calibration, without Z ----

test_fun2 <- function(n1){
#g matricas izveide
# g-matrix
g_dat <- data.table(id_m = 1:n1,
x0 = 1L,
x1 = sample(0:2, n1, replace = T),
Expand All @@ -121,58 +136,64 @@ test_fun2 <- function(n1){
x5 = sample(0:3, n1, replace = T),
g = rnorm(n1, 1, 0.1),
q = runif(n1),
period = rep(1:2, each = n1/2))
period = rep(1:2, each = n1 / 2))

#vardom dati
nn <- round(n1 / 300, 0) #number of PSU (iecirkni)
dat_x <- data.table()
# vardom data
nn <- round(n1 / 300, 0) # number of PSUs
# dat_x <- data.table()
strata <- sample(1:4, n1, replace = T,)
dat_x <- data.table(id_m = 1:n1,
b06 = sample(1:11, n1, replace = T, prob = c(0.1922021, 0.1302067, 0.0641481, 0.0471424, 0.01797438,
0.007695619, 0.002179529, 0.00131848, 0.0003498009, 0.000107631, 2.690776e-05)),
wd = sample(1:5, n1, replace = T),
strata = strata,
survey = rep(1:4, each = n1/4),
iec = sample(1:nn, n1, replace = T))

#vardomh dati
dat_x <- data.table(
id_m = 1:n1,
b06 = sample(1:11, n1, replace = T,
prob = c(0.1922021, 0.1302067, 0.0641481, 0.0471424,
0.01797438, 0.007695619, 0.002179529, 0.00131848,
0.0003498009, 0.000107631, 2.690776e-05)),
wd = sample(1:5, n1, replace = T),
strata = strata,
survey = rep(1:4, each = n1/4),
iec = sample(1:nn, n1, replace = T)
)

# vardomh data
dat_y <- data.table()
l <- 1
for (i in 1:n1){
if (dat_x[i, b06] == 1){
rinda <- dat_x[i, ]
for (i in 1:n1) {
if (dat_x[i, b06] == 1) {
rinda <- dat_x[i]
rinda <- rinda[, id_p := l]
dat_y <- rbind(dat_y, rinda)
l <- l+1
l <- l + 1
} else {
j <- dat_x[i, b06]
for (k in 1:j){
rinda <- dat_x[i,]
for (k in 1:j) {
rinda <- dat_x[i]
rinda <- rinda[, b06 := k]
rinda <- rinda[, id_p := l]
dat_y <- rbind(dat_y, rinda)
l <- l+1
l <- l + 1
}
}
}
rm(i, j, k, l, rinda)

n2 <- nrow(dat_y)
if ((nrow(dat_y) - sum(dat_x$b06)) !=0) stop("nekorekti datu garumi")
if ((nrow(dat_y) - sum(dat_x$b06)) != 0) stop("wrong number of records")

dat_y[, y := sample(0:1, n2, replace = T)]#piekārto y vērtību
dat_y[, y := sample(0:1, n2, replace = T)] # add y variable
gg <- g_dat[, c("id_m", "g", "period")]

ycol <- dat_y[, lapply(.SD, sum),
keyby = id_m,
.SDcols = "y"]
dat_yy <- merge(dat_y, gg, by = "id_m")
dat_x$y <- ycol$y # papildina vardom datni
dat_x$y <- ycol$y # add to vardom data
dat_xx <- merge(dat_x, g_dat, by = c("id_m"))
dat_xx[, wc := wd * g]
dat_yy[, wc := wd * g]

n_h <- data.table(period = rep(1:2, each = 4), strata = rep(1:4, 2), pop = rep(sample(500:1500, 4),2))

n_h <- data.table(period = rep(1:2, each = 4),
strata = rep(1:4, 2),
pop = rep(sample(500:1500, 4),2))

res_1 <- vardom(Y = "y",
H = "strata",
Expand Down Expand Up @@ -202,17 +223,16 @@ test_fun2 <- function(n1){
datasetX = g_dat,
period = "period",
periodX = "period")$all_result
res_1 <- res_1[, -c("respondent_count", "n_nonzero","pop_size")] # #radītāji atšķirās, jo rēķina no dažādiem līmeņiem

# Those variables differ if calculated from different levels
res_1 <- res_1[, -c("respondent_count", "n_nonzero","pop_size")]
res_2 <- res_2[, -c("respondent_count", "n_nonzero","pop_size")]

names_a <- names(res_2)
return(list(res_2, res_1[, .SD, .SDcols = names_a]))
}



test_that("test equal for one period, diferent levels", {
results <- test_fun2(6000)
expect_equal(results[[1]], results[[2]])
})

#res_1[, -c("variable", "respondent_count", "n_nonzero", "pop_size", "n_eff", "period")] - res_2[, -c("variable", "respondent_count", "n_nonzero", "pop_size", "period")]

0 comments on commit efab8a2

Please sign in to comment.