AAGI-AUS · wvjgsuhp · Feb 12, 2025 · Jul 10, 2024 · Jul 17, 2024 · Aug 12, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,5 +1,5 @@
 Type: Package
-Package: RankedSetSampling
+Package: InPlotSampling
 Title: Easing the Application of Ranked Set Sampling in Practice
 Version: 0.1.0
 Date: 2021-02-09
@@ -11,15 +11,15 @@ Authors@R: c(
            comment = c(ORCID = "0000-0001-5291-3600")),
     person("Peter", "Kasprzak", , "[email protected]", role = "aut")
   )
-Description: The RankedSetSampling package provides a way for researchers
+Description: The InPlotSampling package provides a way for researchers
     to easily implement Ranked Set Sampling in practice.  Ranked Set
     Sampling was originally described by McIntyre (1952) (reprinted in
     2005) <doi:10.1198/000313005X54180>. This package takes work by Omer
     and Kravchuk (2021) <https://doi.org/10.1007/s13253-021-00439-1> and
     enables easy use of the methods.
 License: MIT + file LICENSE
-URL: https://biometryhub.github.io/RankedSetSampling/
-BugReports: https://github.com/biometryhub/RankedSetSampling/issues
+URL: https://biometryhub.github.io/InPlotSampling/
+BugReports: https://github.com/biometryhub/InPlotSampling/issues
 Depends:
     R (>= 3.5.0)
 Imports: 
@@ -36,4 +36,4 @@ LinkingTo:
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -6,10 +6,11 @@ export(rss_jps_estimate)
 export(rss_sample)
 export(sbs_pps_estimate)
 export(sbs_pps_sample)
+export(two_stage_cluster_sample)
 importFrom(Rcpp,sourceCpp)
 importFrom(stats,aggregate)
 importFrom(stats,qt)
 importFrom(stats,rnorm)
 importFrom(stats,sd)
 importFrom(stats,var)
-useDynLib(RankedSetSampling, .registration = TRUE)
+useDynLib(InPlotSampling, .registration = TRUE)
diff --git a/R/CoefF.R b/R/CoefF.R
@@ -1,6 +1,6 @@
 #' This function computes the coefficient of variance estimator
 #'
-#' @param H Set size for each raking group.
+#' @param H Set size for each ranking group.
 #' @param n Sample size.
 #'
 #' @return

diff --git a/R/RankedSetSampling-package.R → R/InPlotSampling-package.R b/R/RankedSetSampling-package.R → R/InPlotSampling-package.R
@@ -1,11 +1,11 @@
-#' @name RankedSetSampling
+#' @name InPlotSampling
 "_PACKAGE"
 
 # The following block is used by usethis to automatically manage
 # roxygen namespace tags. Modify with care!
 ## usethis namespace: start
 #' @importFrom Rcpp sourceCpp
-#' @useDynLib RankedSetSampling, .registration = TRUE
+#' @useDynLib InPlotSampling, .registration = TRUE
 ## usethis namespace: end
 NULL
 

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -0,0 +1,8 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+#' @keywords internal
+pascal <- function(m, popsize, set) {
+    .Call(`_InPlotSampling_pascal`, m, popsize, set)
+}
+
diff --git a/R/jps_estimate.R b/R/jps_estimate.R
@@ -1,7 +1,7 @@
 #' Computes the estimator for JPS data
 #'
 #' @param data The data to use for estimation.
-#' @param set_size Set size for each raking group.
+#' @param set_size Set size for each ranking group.
 #' @param replace Logical (default `TRUE`). Sample with replacement?
 #' @param model_based An inference mode:
 #' - `FALSE`: design based inference

diff --git a/R/jps_estimate_single.R b/R/jps_estimate_single.R
@@ -2,7 +2,7 @@
 #'
 #' @param ranks Ranks of Y.
 #' @param y Response measurements.
-#' @param set_size Set size for each raking group.
+#' @param set_size Set size for each ranking group.
 #' @param N Finite population size.
 #' @param coef Coefficients used in variance computation when sample size is n.
 #' @param coef_del Coefficients used in variance computation when the i-th unit is deleted.

diff --git a/R/jps_sample.R b/R/jps_sample.R
@@ -1,7 +1,12 @@
 #' Generate JPS sampling on the provided population.
 #'
-#' @inheritParams rss_sample
+#' @param pop Population that will be sampled.
+#' @param n Sample size.
+#' @param H Set size for each ranking group.
+#' @param K Number of rankers.
 #' @param tau A parameter which controls ranking quality.
+#' @param replace A boolean which specifies whether to sample with replacement or not.
+#' @param with_index A boolean which specifies whether to return the index of the sampled population.
 #'
 #' @return A matrix with ranks from each ranker.
 #' @export
@@ -36,10 +41,11 @@
 #' #>  [9,]  8.701285  2  1  2
 #' #> [10,] 13.323884  3  3  3
 #'
-jps_sample <- function(pop, n, H, tau, K, replace = FALSE) {
-  verify_jps_params(pop, n, H, tau, K, replace)
+jps_sample <- function(pop, n, H, tau, K, replace = FALSE, with_index = FALSE) {
+  verify_jps_params(pop, n, H, tau, K, replace, with_index)
 
-  sampling_matrix <- matrix(sample(pop, n * H, replace = replace), ncol = H, nrow = n)
+  sampling_indices <- sample(seq_along(pop), n * H, replace = replace)
+  sampling_matrix <- matrix(pop[sampling_indices], ncol = H, nrow = n)
 
   # rank each SRS unit post experimentally
   jps_matrix <- matrix(0, ncol = K + 1, nrow = n)
@@ -55,6 +61,10 @@ jps_sample <- function(pop, n, H, tau, K, replace = FALSE) {
   }
 
   colnames(jps_matrix) <- c("Y", paste0("R", 1:K))
+  if (with_index) {
+    jps_matrix <- cbind(sampling_indices[1:n], jps_matrix)
+    colnames(jps_matrix)[1] <- "i"
+  }
+
   return(jps_matrix)
 }
-#' @export
diff --git a/R/rss_jps_estimate.R b/R/rss_jps_estimate.R
@@ -36,7 +36,7 @@
 #' taus <- sigma * sqrt(1 / rhos^2 - 1)
 #' population <- qnorm((1:population_size) / (population_size + 1), mu, sigma)
 #'
-#' data <- RankedSetSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement)
+#' data <- InPlotSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement)
 #' data <- data[order(data[, 2]), ]
 #'
 #' rss_jps_estimate(
@@ -75,7 +75,7 @@
 #' x <- population + tau * rnorm(population_size, 0, 1)
 #'
 #' population <- cbind(population, x)
-#' data <- RankedSetSampling::rss_sample(population, n, H, n_rankers, with_replacement)
+#' data <- InPlotSampling::rss_sample(population, n, H, n_rankers, with_replacement)
 #' data <- data[order(data[, 2]), ]
 #'
 #' rss_estimates <- rss_jps_estimate(

diff --git a/R/rss_sample.R b/R/rss_sample.R
@@ -50,7 +50,7 @@ rss_sample <- function(pop, n, H, K, replace = FALSE) {
 #'
 #' @param pop Population that will be sampled with an auxiliary parameter in the second column.
 #' @param n Sample size.
-#' @param H Set size for each raking group.
+#' @param H Set size for each ranking group.
 #' @param K Number of rankers.
 #'
 #' @return A matrix with ranks from each ranker.

diff --git a/R/sbs_pps_sample.R b/R/sbs_pps_sample.R
@@ -82,6 +82,7 @@ sbs_pps_sample <- function(population, n, n_cores = getOption("n_cores", 1)) {
 #' - sbs_indices: sbs sample indices
 #' - pps_indices: pps sample indices
 #' - sizes_wo_sbs: measured sizes without sbs sample
+#' @keywords internal
 #'
 get_sbs_pps_sample_indices <- function(population, n, with_unique_pps = FALSE) {
   n_population <- dim(population)[1]

diff --git a/R/two_stage_cluster_sample.R b/R/two_stage_cluster_sample.R
@@ -0,0 +1,117 @@
+#' Generate two-stage cluster sampling on the population provided.
+#'
+#' @param pop Population that will be sampled with these ordered columns:
+#' 1. Parent id: an index to denotes the parent of the record
+#' 2. Parent auxiliary parameter: an auxiliary parameter for ranking parents
+#' 3. Child auxiliary parameter: an auxiliary parameter for ranking children
+#' @param sampling_strategies (first stage sampling strategy, second stage sampling strategy), e.g.,
+#'   `c('srs', 'jps')`.
+#' - `'srs'`: simple random sampling without replacement
+#' - `'jps'`: JPS sampling
+#' @param n Number of samples in the first stage.
+#' @param H Set size for each ranking group in the first stage.
+#' @param replace A boolean which specifies whether to sample with replacement or not in the first stage
+#'   (applicable only for JPS sampling).
+#' @param ni Number(s) of samples in the second stage. Can be a single number or a vector of `n` numbers.
+#' @param Hi Set size for each ranking group in the second stage. Can be a single number or a vector of `n`
+#'   numbers.
+#' @param replace_i A boolean which specifies whether to sample with replacement or not in the second stage
+#'   (applicable only for JPS sampling).
+#'
+#' @return A matrix with ranks from each ranker.
+#' @export
+#'
+#' @examples
+#' set.seed(112)
+#' parent_size <- 300
+#' child_size <- 50
+#' # the number of samples to be ranked in each set
+#' H <- 3
+#'
+#' sampling_strategies <- c("jps", "jps")
+#' replace <- FALSE
+#' mu <- 10
+#' sigma <- 4
+#' n <- 4
+#'
+#' parent_indices <- rep(1:parent_size, child_size)
+#' parent_aux <- abs(qnorm(1:parent_size / (parent_size + 1), mu, sigma) + 5 * rnorm(parent_size, 0, 1))
+#' child_aux <- abs(parent_aux + 10 * rnorm(parent_size * child_size, 0, 1))
+#'
+#' population <- cbind(parent_indices, rep(parent_aux, child_size), child_aux)
+#' two_stage_cluster_sample(population, sampling_strategies, n, H, replace, 6, 3, FALSE)
+#' #>       parent_id parent_rank child_id  child_aux child_rank
+#' #>  [1,]       201           1     7101  2.2349453          1
+#' #>  [2,]       201           1    12801  9.7175545          3
+#' #>  [3,]       201           1     6501  7.9207230          1
+#' #>  [4,]       201           1     9801  5.7644835          2
+#' #>  [5,]       201           1    10701 13.8089335          3
+#' #>  [6,]       201           1     3501  0.3598331          1
+#' #>  [7,]       254           2     8654 17.3059292          3
+#' #>  [8,]       254           2    11354 15.0837335          2
+#' #>  [9,]       254           2     9254  6.0103919          2
+#' #> [10,]       254           2     2954 12.7011502          2
+#' #> [11,]       254           2    14954  5.1158133          2
+#' #> [12,]       254           2    13754  5.8931551          1
+#' #> [13,]        74           1     8474  4.3393349          1
+#' #> [14,]        74           1     9674 15.0512523          2
+#' #> [15,]        74           1     6674 12.9022479          3
+#' #> [16,]        74           1      674  2.9209174          2
+#' #> [17,]        74           1     7274  7.2500468          3
+#' #> [18,]        74           1     6374  7.0925954          1
+#' #> [19,]       223           3     9223 28.4694257          3
+#' #> [20,]       223           3      223  4.4001977          1
+#' #> [21,]       223           3     9823 22.8676415          3
+#' #> [22,]       223           3    11923 26.4531048          3
+#' #> [23,]       223           3      823 20.8714211          2
+#' #> [24,]       223           3     9523  8.1783058          1
+#'
+two_stage_cluster_sample <- function(pop, sampling_strategies, n, H, replace, ni, Hi, replace_i) {
+  verify_two_stage_params(pop, sampling_strategies, n, H, replace, ni, Hi, replace_i)
+
+  pop <- cbind(pop, seq_len(dim(pop)[1]))
+  parent <- unique(pop[, c(1, 2)])
+  first_strategy <- sampling_strategies[1]
+  second_strategy <- sampling_strategies[2]
+
+  # first stage
+  if (first_strategy == "srs") {
+    first_stage_indices <- sample(seq_len(dim(parent)[1]), n)
+    first_stage_sample <- cbind(parent[first_stage_indices, ], 0)
+  } else if (first_strategy == "jps") {
+    first_stage_sample <- jps_sample(parent[, 2], n, H, 0, 1, replace, TRUE)
+
+    first_stage_indices <- first_stage_sample[, 1]
+    first_stage_sample[, 1] <- parent[first_stage_indices, 1]
+  }
+  first_stage_sample <- first_stage_sample[, c(1, 3)]
+
+  if (length(ni) == 1) {
+    ni <- rep(ni, n)
+  }
+  if (length(Hi) == 1) {
+    Hi <- rep(Hi, n)
+  }
+
+  sampling_matrix <- matrix(nrow = 0, ncol = 5)
+  for (i in 1:n) {
+    parent_filter <- pop[, 1] == first_stage_sample[i, 1]
+    children <- pop[parent_filter, c(4, 3)]
+
+    if (second_strategy == "srs") {
+      second_stage_indices <- sample(seq_len(dim(children)[1]), ni[i])
+      second_stage_sample <- cbind(children[second_stage_indices, ], 0)
+    } else if (second_strategy == "jps") {
+      second_stage_sample <- jps_sample(children[, 2], ni[i], Hi[i], 0, 1, replace, TRUE)
+
+      second_stage_indices <- second_stage_sample[, 1]
+      second_stage_sample[, 1] <- children[second_stage_indices, 1]
+    }
+
+    children_sample <- cbind(first_stage_sample[i, 1], first_stage_sample[i, 2], second_stage_sample)
+    sampling_matrix <- rbind(sampling_matrix, children_sample)
+  }
+
+  colnames(sampling_matrix) <- c("parent_id", "parent_rank", "child_id", colnames(pop)[3], "child_rank")
+  return(sampling_matrix)
+}
diff --git a/R/utils.R b/R/utils.R
@@ -68,6 +68,15 @@ is_whole_number <- function(x, tol = default_tolerance) {
   return(abs(x - round(x)) < tol)
 }
 
+is_positive_whole_numbers <- function(x, tol = default_tolerance) {
+  for (i in x) {
+    if (!is_positive_whole_number(i, tol)) {
+      return(FALSE)
+    }
+  }
+  return(TRUE)
+}
+
 must_be <- function(x, valid_values) {
   return(must_be_(valid_values)(x))
 }
@@ -139,9 +148,9 @@ verify_rss_wo_replace_params <- function(pop, n, H, K) {
   }
 }
 
-verify_jps_params <- function(pop, n, H, tau, K, with_replacement) {
+verify_jps_params <- function(pop, n, H, tau, K, replace, with_index) {
   verify_positive_whole_number(n, H, K)
-  verify_boolean(with_replacement)
+  verify_boolean(replace, with_index)
 
   if (n < H) {
     stop("`n` must >= `H`.")
@@ -153,7 +162,44 @@ verify_jps_params <- function(pop, n, H, tau, K, with_replacement) {
   }
 
   n_population <- length(pop)
-  if (!with_replacement) {
+  if (!replace) {
+    if (n_population < n * H) {
+      stop("The number of population must be at least `nH`.")
+    }
+  }
+}
+
+verify_two_stage_params <- function(pop, sampling_strategies, n, H, replace, ni, Hi, replace_i) {
+  verify_positive_whole_number(n, H)
+  verify_boolean(replace, replace_i)
+  verify_positive_whole_numbers(ni, Hi)
+
+  if (length(ni) != 1 && length(ni) != n) {
+    stop("`ni` must be a vector of 1 or `n` values.")
+  }
+
+  if (length(Hi) != 1 && length(Hi) != n) {
+    stop("`Hi` must be a vector of 1 or `n` values.")
+  }
+
+  if (length(sampling_strategies) != 2) {
+    stop("`sampling_strategies` must be a vector of 2 values.")
+  }
+
+  if (!all(sampling_strategies %in% c("srs", "jps"))) {
+    stop("`sampling_strategies` must be a vector of `'srs'` and/or `'jps'`.")
+  }
+
+  if (n < H) {
+    stop("`n` must be at least `H`.")
+  }
+
+  if (!all(ni >= Hi)) {
+    stop("ith value of `ni` must be at least ith value of `Hi`.")
+  }
+
+  n_population <- dim(pop)[[1]]
+  if (!replace && sampling_strategies[1] == "jps") {
     if (n_population < n * H) {
       stop("The number of population must be at least `nH`.")
     }
@@ -224,6 +270,10 @@ verify_positive_whole_number <- function(..., var_names = NULL) {
   verify_data_type(is_positive_whole_number, "a positive whole number", var_names, ...)
 }
 
+verify_positive_whole_numbers <- function(..., var_names = NULL) {
+  verify_data_type(is_positive_whole_numbers, "a vector of positive whole numbers", var_names, ...)
+}
+
 verify_must_be <- function(..., valid_values, var_names = NULL) {
   literal_values <- get_literal_values(valid_values)
   verify_data_type(must_be_(valid_values), literal_values, var_names, ...)