Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #15

Merged
merged 12 commits into from
Feb 12, 2025
Merged

Dev #15

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Type: Package
Package: RankedSetSampling
Package: InPlotSampling
Title: Easing the Application of Ranked Set Sampling in Practice
Version: 0.1.0
Date: 2021-02-09
Expand All @@ -11,15 +11,15 @@ Authors@R: c(
comment = c(ORCID = "0000-0001-5291-3600")),
person("Peter", "Kasprzak", , "[email protected]", role = "aut")
)
Description: The RankedSetSampling package provides a way for researchers
Description: The InPlotSampling package provides a way for researchers
to easily implement Ranked Set Sampling in practice. Ranked Set
Sampling was originally described by McIntyre (1952) (reprinted in
2005) <doi:10.1198/000313005X54180>. This package takes work by Omer
and Kravchuk (2021) <https://doi.org/10.1007/s13253-021-00439-1> and
enables easy use of the methods.
License: MIT + file LICENSE
URL: https://biometryhub.github.io/RankedSetSampling/
BugReports: https://github.com/biometryhub/RankedSetSampling/issues
URL: https://biometryhub.github.io/InPlotSampling/
BugReports: https://github.com/biometryhub/InPlotSampling/issues
Depends:
R (>= 3.5.0)
Imports:
Expand All @@ -36,4 +36,4 @@ LinkingTo:
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ export(rss_jps_estimate)
export(rss_sample)
export(sbs_pps_estimate)
export(sbs_pps_sample)
export(two_stage_cluster_sample)
importFrom(Rcpp,sourceCpp)
importFrom(stats,aggregate)
importFrom(stats,qt)
importFrom(stats,rnorm)
importFrom(stats,sd)
importFrom(stats,var)
useDynLib(RankedSetSampling, .registration = TRUE)
useDynLib(InPlotSampling, .registration = TRUE)
2 changes: 1 addition & 1 deletion R/CoefF.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' This function computes the coefficient of variance estimator
#'
#' @param H Set size for each raking group.
#' @param H Set size for each ranking group.
#' @param n Sample size.
#'
#' @return
Expand Down
4 changes: 2 additions & 2 deletions R/RankedSetSampling-package.R → R/InPlotSampling-package.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#' @name RankedSetSampling
#' @name InPlotSampling
"_PACKAGE"

# The following block is used by usethis to automatically manage
# roxygen namespace tags. Modify with care!
## usethis namespace: start
#' @importFrom Rcpp sourceCpp
#' @useDynLib RankedSetSampling, .registration = TRUE
#' @useDynLib InPlotSampling, .registration = TRUE
## usethis namespace: end
NULL

Expand Down
8 changes: 8 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' @keywords internal
pascal <- function(m, popsize, set) {
.Call(`_InPlotSampling_pascal`, m, popsize, set)
}

2 changes: 1 addition & 1 deletion R/jps_estimate.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Computes the estimator for JPS data
#'
#' @param data The data to use for estimation.
#' @param set_size Set size for each raking group.
#' @param set_size Set size for each ranking group.
#' @param replace Logical (default `TRUE`). Sample with replacement?
#' @param model_based An inference mode:
#' - `FALSE`: design based inference
Expand Down
2 changes: 1 addition & 1 deletion R/jps_estimate_single.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#'
#' @param ranks Ranks of Y.
#' @param y Response measurements.
#' @param set_size Set size for each raking group.
#' @param set_size Set size for each ranking group.
#' @param N Finite population size.
#' @param coef Coefficients used in variance computation when sample size is n.
#' @param coef_del Coefficients used in variance computation when the i-th unit is deleted.
Expand Down
20 changes: 15 additions & 5 deletions R/jps_sample.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
#' Generate JPS sampling on the provided population.
#'
#' @inheritParams rss_sample
#' @param pop Population that will be sampled.
#' @param n Sample size.
#' @param H Set size for each ranking group.
#' @param K Number of rankers.
#' @param tau A parameter which controls ranking quality.
#' @param replace A boolean which specifies whether to sample with replacement or not.
#' @param with_index A boolean which specifies whether to return the index of the sampled population.
#'
#' @return A matrix with ranks from each ranker.
#' @export
Expand Down Expand Up @@ -36,10 +41,11 @@
#' #> [9,] 8.701285 2 1 2
#' #> [10,] 13.323884 3 3 3
#'
jps_sample <- function(pop, n, H, tau, K, replace = FALSE) {
verify_jps_params(pop, n, H, tau, K, replace)
jps_sample <- function(pop, n, H, tau, K, replace = FALSE, with_index = FALSE) {
verify_jps_params(pop, n, H, tau, K, replace, with_index)

sampling_matrix <- matrix(sample(pop, n * H, replace = replace), ncol = H, nrow = n)
sampling_indices <- sample(seq_along(pop), n * H, replace = replace)
sampling_matrix <- matrix(pop[sampling_indices], ncol = H, nrow = n)

# rank each SRS unit post experimentally
jps_matrix <- matrix(0, ncol = K + 1, nrow = n)
Expand All @@ -55,6 +61,10 @@ jps_sample <- function(pop, n, H, tau, K, replace = FALSE) {
}

colnames(jps_matrix) <- c("Y", paste0("R", 1:K))
if (with_index) {
jps_matrix <- cbind(sampling_indices[1:n], jps_matrix)
colnames(jps_matrix)[1] <- "i"
}

return(jps_matrix)
}
#' @export
4 changes: 2 additions & 2 deletions R/rss_jps_estimate.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#' taus <- sigma * sqrt(1 / rhos^2 - 1)
#' population <- qnorm((1:population_size) / (population_size + 1), mu, sigma)
#'
#' data <- RankedSetSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement)
#' data <- InPlotSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement)
#' data <- data[order(data[, 2]), ]
#'
#' rss_jps_estimate(
Expand Down Expand Up @@ -75,7 +75,7 @@
#' x <- population + tau * rnorm(population_size, 0, 1)
#'
#' population <- cbind(population, x)
#' data <- RankedSetSampling::rss_sample(population, n, H, n_rankers, with_replacement)
#' data <- InPlotSampling::rss_sample(population, n, H, n_rankers, with_replacement)
#' data <- data[order(data[, 2]), ]
#'
#' rss_estimates <- rss_jps_estimate(
Expand Down
2 changes: 1 addition & 1 deletion R/rss_sample.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ rss_sample <- function(pop, n, H, K, replace = FALSE) {
#'
#' @param pop Population that will be sampled with an auxiliary parameter in the second column.
#' @param n Sample size.
#' @param H Set size for each raking group.
#' @param H Set size for each ranking group.
#' @param K Number of rankers.
#'
#' @return A matrix with ranks from each ranker.
Expand Down
1 change: 1 addition & 0 deletions R/sbs_pps_sample.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ sbs_pps_sample <- function(population, n, n_cores = getOption("n_cores", 1)) {
#' - sbs_indices: sbs sample indices
#' - pps_indices: pps sample indices
#' - sizes_wo_sbs: measured sizes without sbs sample
#' @keywords internal
#'
get_sbs_pps_sample_indices <- function(population, n, with_unique_pps = FALSE) {
n_population <- dim(population)[1]
Expand Down
117 changes: 117 additions & 0 deletions R/two_stage_cluster_sample.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#' Generate two-stage cluster sampling on the population provided.
#'
#' @param pop Population that will be sampled with these ordered columns:
#' 1. Parent id: an index to denotes the parent of the record
#' 2. Parent auxiliary parameter: an auxiliary parameter for ranking parents
#' 3. Child auxiliary parameter: an auxiliary parameter for ranking children
#' @param sampling_strategies (first stage sampling strategy, second stage sampling strategy), e.g.,
#' `c('srs', 'jps')`.
#' - `'srs'`: simple random sampling without replacement
#' - `'jps'`: JPS sampling
#' @param n Number of samples in the first stage.
#' @param H Set size for each ranking group in the first stage.
#' @param replace A boolean which specifies whether to sample with replacement or not in the first stage
#' (applicable only for JPS sampling).
#' @param ni Number(s) of samples in the second stage. Can be a single number or a vector of `n` numbers.
#' @param Hi Set size for each ranking group in the second stage. Can be a single number or a vector of `n`
#' numbers.
#' @param replace_i A boolean which specifies whether to sample with replacement or not in the second stage
#' (applicable only for JPS sampling).
#'
#' @return A matrix with ranks from each ranker.
#' @export
#'
#' @examples
#' set.seed(112)
#' parent_size <- 300
#' child_size <- 50
#' # the number of samples to be ranked in each set
#' H <- 3
#'
#' sampling_strategies <- c("jps", "jps")
#' replace <- FALSE
#' mu <- 10
#' sigma <- 4
#' n <- 4
#'
#' parent_indices <- rep(1:parent_size, child_size)
#' parent_aux <- abs(qnorm(1:parent_size / (parent_size + 1), mu, sigma) + 5 * rnorm(parent_size, 0, 1))
#' child_aux <- abs(parent_aux + 10 * rnorm(parent_size * child_size, 0, 1))
#'
#' population <- cbind(parent_indices, rep(parent_aux, child_size), child_aux)
#' two_stage_cluster_sample(population, sampling_strategies, n, H, replace, 6, 3, FALSE)
#' #> parent_id parent_rank child_id child_aux child_rank
#' #> [1,] 201 1 7101 2.2349453 1
#' #> [2,] 201 1 12801 9.7175545 3
#' #> [3,] 201 1 6501 7.9207230 1
#' #> [4,] 201 1 9801 5.7644835 2
#' #> [5,] 201 1 10701 13.8089335 3
#' #> [6,] 201 1 3501 0.3598331 1
#' #> [7,] 254 2 8654 17.3059292 3
#' #> [8,] 254 2 11354 15.0837335 2
#' #> [9,] 254 2 9254 6.0103919 2
#' #> [10,] 254 2 2954 12.7011502 2
#' #> [11,] 254 2 14954 5.1158133 2
#' #> [12,] 254 2 13754 5.8931551 1
#' #> [13,] 74 1 8474 4.3393349 1
#' #> [14,] 74 1 9674 15.0512523 2
#' #> [15,] 74 1 6674 12.9022479 3
#' #> [16,] 74 1 674 2.9209174 2
#' #> [17,] 74 1 7274 7.2500468 3
#' #> [18,] 74 1 6374 7.0925954 1
#' #> [19,] 223 3 9223 28.4694257 3
#' #> [20,] 223 3 223 4.4001977 1
#' #> [21,] 223 3 9823 22.8676415 3
#' #> [22,] 223 3 11923 26.4531048 3
#' #> [23,] 223 3 823 20.8714211 2
#' #> [24,] 223 3 9523 8.1783058 1
#'
two_stage_cluster_sample <- function(pop, sampling_strategies, n, H, replace, ni, Hi, replace_i) {
verify_two_stage_params(pop, sampling_strategies, n, H, replace, ni, Hi, replace_i)

pop <- cbind(pop, seq_len(dim(pop)[1]))
parent <- unique(pop[, c(1, 2)])
first_strategy <- sampling_strategies[1]
second_strategy <- sampling_strategies[2]

# first stage
if (first_strategy == "srs") {
first_stage_indices <- sample(seq_len(dim(parent)[1]), n)
first_stage_sample <- cbind(parent[first_stage_indices, ], 0)
} else if (first_strategy == "jps") {
first_stage_sample <- jps_sample(parent[, 2], n, H, 0, 1, replace, TRUE)

first_stage_indices <- first_stage_sample[, 1]
first_stage_sample[, 1] <- parent[first_stage_indices, 1]
}
first_stage_sample <- first_stage_sample[, c(1, 3)]

if (length(ni) == 1) {
ni <- rep(ni, n)
}
if (length(Hi) == 1) {
Hi <- rep(Hi, n)
}

sampling_matrix <- matrix(nrow = 0, ncol = 5)
for (i in 1:n) {
parent_filter <- pop[, 1] == first_stage_sample[i, 1]
children <- pop[parent_filter, c(4, 3)]

if (second_strategy == "srs") {
second_stage_indices <- sample(seq_len(dim(children)[1]), ni[i])
second_stage_sample <- cbind(children[second_stage_indices, ], 0)
} else if (second_strategy == "jps") {
second_stage_sample <- jps_sample(children[, 2], ni[i], Hi[i], 0, 1, replace, TRUE)

second_stage_indices <- second_stage_sample[, 1]
second_stage_sample[, 1] <- children[second_stage_indices, 1]
}

children_sample <- cbind(first_stage_sample[i, 1], first_stage_sample[i, 2], second_stage_sample)
sampling_matrix <- rbind(sampling_matrix, children_sample)
}

colnames(sampling_matrix) <- c("parent_id", "parent_rank", "child_id", colnames(pop)[3], "child_rank")
return(sampling_matrix)
}
56 changes: 53 additions & 3 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@ is_whole_number <- function(x, tol = default_tolerance) {
return(abs(x - round(x)) < tol)
}

is_positive_whole_numbers <- function(x, tol = default_tolerance) {
for (i in x) {
if (!is_positive_whole_number(i, tol)) {
return(FALSE)
}
}
return(TRUE)
}

must_be <- function(x, valid_values) {
return(must_be_(valid_values)(x))
}
Expand Down Expand Up @@ -139,9 +148,9 @@ verify_rss_wo_replace_params <- function(pop, n, H, K) {
}
}

verify_jps_params <- function(pop, n, H, tau, K, with_replacement) {
verify_jps_params <- function(pop, n, H, tau, K, replace, with_index) {
verify_positive_whole_number(n, H, K)
verify_boolean(with_replacement)
verify_boolean(replace, with_index)

if (n < H) {
stop("`n` must >= `H`.")
Expand All @@ -153,7 +162,44 @@ verify_jps_params <- function(pop, n, H, tau, K, with_replacement) {
}

n_population <- length(pop)
if (!with_replacement) {
if (!replace) {
if (n_population < n * H) {
stop("The number of population must be at least `nH`.")
}
}
}

verify_two_stage_params <- function(pop, sampling_strategies, n, H, replace, ni, Hi, replace_i) {
verify_positive_whole_number(n, H)
verify_boolean(replace, replace_i)
verify_positive_whole_numbers(ni, Hi)

if (length(ni) != 1 && length(ni) != n) {
stop("`ni` must be a vector of 1 or `n` values.")
}

if (length(Hi) != 1 && length(Hi) != n) {
stop("`Hi` must be a vector of 1 or `n` values.")
}

if (length(sampling_strategies) != 2) {
stop("`sampling_strategies` must be a vector of 2 values.")
}

if (!all(sampling_strategies %in% c("srs", "jps"))) {
stop("`sampling_strategies` must be a vector of `'srs'` and/or `'jps'`.")
}

if (n < H) {
stop("`n` must be at least `H`.")
}

if (!all(ni >= Hi)) {
stop("ith value of `ni` must be at least ith value of `Hi`.")
}

n_population <- dim(pop)[[1]]
if (!replace && sampling_strategies[1] == "jps") {
if (n_population < n * H) {
stop("The number of population must be at least `nH`.")
}
Expand Down Expand Up @@ -224,6 +270,10 @@ verify_positive_whole_number <- function(..., var_names = NULL) {
verify_data_type(is_positive_whole_number, "a positive whole number", var_names, ...)
}

verify_positive_whole_numbers <- function(..., var_names = NULL) {
verify_data_type(is_positive_whole_numbers, "a vector of positive whole numbers", var_names, ...)
}

verify_must_be <- function(..., valid_values, var_names = NULL) {
literal_values <- get_literal_values(valid_values)
verify_data_type(must_be_(valid_values), literal_values, var_names, ...)
Expand Down
Loading
Loading