Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Devel #72

Merged
merged 2 commits into from
Sep 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ Collate:
'grimmer-map-seq.R'
'grimmer-map-total-n.R'
'grimmer-rsprite2.R'
'import-standalone-obj-type.R'
'import-standalone-types-check.R'
'metadata.R'
'method-audit-seq.R'
'method-audit-total-n.R'
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ The package is now released under the MIT license.

- In the functions around `grim_ratio()`, the `x` argument must now be a string. This is consistent with `grim_map()`, `unround()`, etc.; and it prevents erroneous results that could previously occur by omitting trailing zeros.

- The GRIMMER implementation was debugged, so that `grimmer_map()` etc. may now yield different results in a few cases. In particular, the `items` argument now works correctly, thanks to Aurélien Allard and Lukas Wallrich (#58).

- `is_seq_dispersed()` now correctly returns `FALSE` if different numbers of missing values at the start and end of `x` mean that `x` cannot be dispersed around `from`.

## New features

- The `probability` column (see above) is created by a new function, `grim_probability()`.
Expand Down
296 changes: 147 additions & 149 deletions R/grimmer-rsprite2.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
# The idea is to take them as a basis for revamping scrutiny's implementation of
# GRIMMER, notably fixing the `items` argument.

# TODO: run the grimmer-replace-names.R script on the copy.

# # All the variable names:
# grimmer_scalar
# x
Expand All @@ -34,156 +32,156 @@

# Helper function ---------------------------------------------------------

# Determine minimum and maximum SDs for given scale ranges, N, and x.
sd_bounds_measure <- function(n, x, min_val, max_val, sd_prec = NULL, items = 1) {

if (is.null(sd_prec)) {
sd_prec <- max(nchar(sub("^[0-9]*", "", x)) - 1, 0)
}

result <- c(-Inf, Inf)

aMax <- min_val # "aMax" means "value of a to produce the max sd"
aMin <- floor(x*items)/items
bMax <- max(max_val, min_val + 1, aMin + 1) # sanity check (just max_val would normally be ok)
bMin <- aMin + 1/items
total <- round(x * n * items)/items

poss_values <- max_val
for (i in seq_len(items)) {
poss_values <- c(poss_values, min_val:(max_val-1) + (1 / items) * (i - 1))
}
poss_values <- sort(poss_values)

for (abm in list(c(aMin, bMin, 1), c(aMax, bMax, 2))) {

a <- abm[1]
b <- abm[2]
m <- abm[3]


k <- round((total - (n * b)) / (a - b))
k <- min(max(k, 1), n - 1) # ensure there is at least one of each of two numbers
vec <- c(rep(a, k), rep(b, n - k))
diff <- sum(vec) - total

if ((diff < 0)) {
vec <- c(rep(a, k - 1), a + abs(diff), rep(b, n - k))
}
else if ((diff > 0)) {
vec <- c(rep(a, k), b - diff, rep(b, n - k - 1))
}

if (round(x(vec), sd_prec) != round(x, sd_prec) | !all(floor(vec*10e9) %in% floor(poss_values*10e9))) {
stop("Error in calculating range of possible standard deviations")
}

result[m] <- round(sd(vec), sd_prec)
}

return(result)
}
# # Determine minimum and maximum SDs for given scale ranges, N, and x.
# sd_bounds_measure <- function(n, x, min_val, max_val, sd_prec = NULL, items = 1) {
#
# if (is.null(sd_prec)) {
# sd_prec <- max(nchar(sub("^[0-9]*", "", x)) - 1, 0)
# }
#
# result <- c(-Inf, Inf)
#
# aMax <- min_val # "aMax" means "value of a to produce the max sd"
# aMin <- floor(x*items)/items
# bMax <- max(max_val, min_val + 1, aMin + 1) # sanity check (just max_val would normally be ok)
# bMin <- aMin + 1/items
# total <- round(x * n * items)/items
#
# poss_values <- max_val
# for (i in seq_len(items)) {
# poss_values <- c(poss_values, min_val:(max_val-1) + (1 / items) * (i - 1))
# }
# poss_values <- sort(poss_values)
#
# for (abm in list(c(aMin, bMin, 1), c(aMax, bMax, 2))) {
#
# a <- abm[1]
# b <- abm[2]
# m <- abm[3]
#
#
# k <- round((total - (n * b)) / (a - b))
# k <- min(max(k, 1), n - 1) # ensure there is at least one of each of two numbers
# vec <- c(rep(a, k), rep(b, n - k))
# diff <- sum(vec) - total
#
# if ((diff < 0)) {
# vec <- c(rep(a, k - 1), a + abs(diff), rep(b, n - k))
# }
# else if ((diff > 0)) {
# vec <- c(rep(a, k), b - diff, rep(b, n - k - 1))
# }
#
# if (round(x(vec), sd_prec) != round(x, sd_prec) | !all(floor(vec*10e9) %in% floor(poss_values*10e9))) {
# stop("Error in calculating range of possible standard deviations")
# }
#
# result[m] <- round(sd(vec), sd_prec)
# }
#
# return(result)
# }



# Main function -----------------------------------------------------------

grimmer_scalar <- function(x, sd, n, m_prec = NULL, sd_prec = NULL, items = 1, min_val = NULL, max_val = NULL) {
if (is.null(m_prec)) {
m_prec <- max(nchar(sub("^[0-9]*", "", x)) - 1, 0)
}

if (is.null(sd_prec)) {
sd_prec <- max(nchar(sub("^[0-9]*", "", sd)) - 1, 0)
}

n_items = n * items

# Applies the GRIM test, and computes the possible x.
sum <- x * n_items
sum_real <- round(sum)
x_real <- sum_real / n_items

#Checks whether x and sd are within possible range
if (!is.null(min_val) & !is.null(max_val)) {
if (x < min_val | x > max_val) {
warning("The x must be between the scale minimum and maximum")
return(FALSE)
}
sd_bounds <- sd_bounds_measure(n, x, min_val, max_val, sd_prec, items)
if (sd < sd_bounds[1] | sd > sd_bounds[2]) {
warning("Given the scale minimum and maximum, the standard deviation has to be between ", sd_bounds[1], " and ", sd_bounds[2], ".")
return(FALSE)
}
}
# Creates functions to round a number consistently up or down, when the last digit is 5
round_down <- function(number, decimals = 2) {
to_round <- number * 10^(decimals + 1) - floor(number * 10^(decimals)) * 10
number_rounded <- ifelse(to_round == 5,
floor(number * 10^decimals) / 10^decimals,
round(number, digits = decimals))
return(number_rounded)
}

round_up <- function(number, decimals = 2) {
to_round <- number * 10^(decimals + 1) - floor(number * 10^(decimals)) * 10
number_rounded <- ifelse(to_round == 5,
ceiling(number * 10^decimals) / 10^decimals,
round(number, digits = decimals))
return(number_rounded)
}

# Applies the GRIM test, to see whether the reconstituted x is the same as the reported x (with both down and up rounding)

consistent_down <- round_down(number = x_real, decimals = m_prec) == x
consistent_up <- round_up(number = x_real, decimals = m_prec) == x

if (!consistent_down & !consistent_up) {
warning("GRIM inconsistent - so GRIMMER test cannot be run. See ?GRIM_test")
return(FALSE)
}

# Computes the lower and upper bounds for the sd.

sd_lower <- ifelse(sd < 5 / (10^(sd_prec+1)), 0, sd - 5 / (10^(sd_prec+1)))
sd_upper <- sd + 5 / (10^(sd_prec+1))

# Computes the lower and upper bounds for the sum of squares of items.

lower_bound <- ((n - 1) * sd_lower^2 + n * x_real^2)*items^2
upper_bound <- ((n - 1) * sd_upper^2 + n * x_real^2)*items^2

# Checks that there is at least an integer between the lower and upperbound

if (ceiling(lower_bound) > floor(upper_bound)) {
return(FALSE)
}

# Takes a vector of all the integers between the lowerbound and upperbound

integers_possible <- ceiling(lower_bound):floor(upper_bound)

# Creates the predicted variance and sd

var_predicted <- (integers_possible/items^2 - n * x_real^2) / (n - 1)
sd_predicted <- sqrt(var_predicted)

# Computes whether one sd_predicted matches the sd (trying to round both down and up)

sd_rounded_down <- round_down(sd_predicted, sd_prec)
sd_rounded_up <- round_up(sd_predicted, sd_prec)

matches_sd <- sd_rounded_down == sd | sd_rounded_up == sd

if (!any(matches_sd)) {
return(FALSE)
}

# Computes whether there is an integer of the correct parity between the lower and upper bounds.
parity <- sum_real %% 2
matches_parity <- integers_possible %% 2 == parity
return(any(matches_sd & matches_parity))

return(TRUE)
}
# grimmer_scalar <- function(x, sd, n, m_prec = NULL, sd_prec = NULL, items = 1, min_val = NULL, max_val = NULL) {
# if (is.null(m_prec)) {
# m_prec <- max(nchar(sub("^[0-9]*", "", x)) - 1, 0)
# }
#
# if (is.null(sd_prec)) {
# sd_prec <- max(nchar(sub("^[0-9]*", "", sd)) - 1, 0)
# }
#
# n_items = n * items
#
# # Applies the GRIM test, and computes the possible x.
# sum <- x * n_items
# sum_real <- round(sum)
# x_real <- sum_real / n_items
#
# #Checks whether x and sd are within possible range
# if (!is.null(min_val) & !is.null(max_val)) {
# if (x < min_val | x > max_val) {
# warning("The x must be between the scale minimum and maximum")
# return(FALSE)
# }
# sd_bounds <- sd_bounds_measure(n, x, min_val, max_val, sd_prec, items)
# if (sd < sd_bounds[1] | sd > sd_bounds[2]) {
# warning("Given the scale minimum and maximum, the standard deviation has to be between ", sd_bounds[1], " and ", sd_bounds[2], ".")
# return(FALSE)
# }
# }
# # Creates functions to round a number consistently up or down, when the last digit is 5
# round_down <- function(number, decimals = 2) {
# to_round <- number * 10^(decimals + 1) - floor(number * 10^(decimals)) * 10
# number_rounded <- ifelse(to_round == 5,
# floor(number * 10^decimals) / 10^decimals,
# round(number, digits = decimals))
# return(number_rounded)
# }
#
# round_up <- function(number, decimals = 2) {
# to_round <- number * 10^(decimals + 1) - floor(number * 10^(decimals)) * 10
# number_rounded <- ifelse(to_round == 5,
# ceiling(number * 10^decimals) / 10^decimals,
# round(number, digits = decimals))
# return(number_rounded)
# }
#
# # Applies the GRIM test, to see whether the reconstituted x is the same as the reported x (with both down and up rounding)
#
# consistent_down <- round_down(number = x_real, decimals = m_prec) == x
# consistent_up <- round_up(number = x_real, decimals = m_prec) == x
#
# if (!consistent_down & !consistent_up) {
# warning("GRIM inconsistent - so GRIMMER test cannot be run. See ?GRIM_test")
# return(FALSE)
# }
#
# # Computes the lower and upper bounds for the sd.
#
# sd_lower <- ifelse(sd < 5 / (10^(sd_prec+1)), 0, sd - 5 / (10^(sd_prec+1)))
# sd_upper <- sd + 5 / (10^(sd_prec+1))
#
# # Computes the lower and upper bounds for the sum of squares of items.
#
# lower_bound <- ((n - 1) * sd_lower^2 + n * x_real^2)*items^2
# upper_bound <- ((n - 1) * sd_upper^2 + n * x_real^2)*items^2
#
# # Checks that there is at least an integer between the lower and upperbound
#
# if (ceiling(lower_bound) > floor(upper_bound)) {
# return(FALSE)
# }
#
# # Takes a vector of all the integers between the lowerbound and upperbound
#
# integers_possible <- ceiling(lower_bound):floor(upper_bound)
#
# # Creates the predicted variance and sd
#
# var_predicted <- (integers_possible/items^2 - n * x_real^2) / (n - 1)
# sd_predicted <- sqrt(var_predicted)
#
# # Computes whether one sd_predicted matches the sd (trying to round both down and up)
#
# sd_rounded_down <- round_down(sd_predicted, sd_prec)
# sd_rounded_up <- round_up(sd_predicted, sd_prec)
#
# matches_sd <- sd_rounded_down == sd | sd_rounded_up == sd
#
# if (!any(matches_sd)) {
# return(FALSE)
# }
#
# # Computes whether there is an integer of the correct parity between the lower and upper bounds.
# parity <- sum_real %% 2
# matches_parity <- integers_possible %% 2 == parity
# return(any(matches_sd & matches_parity))
#
# return(TRUE)
# }

Binary file modified man/figures/README-unnamed-chunk-6-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading