From 69415b25e687f7a5c7b443929f780fe322b0f103 Mon Sep 17 00:00:00 2001
From: kauedesousa
Date: Thu, 9 Nov 2023 20:55:27 +0100
Subject: [PATCH] v1.0
---
DESCRIPTION | 8 +-
NAMESPACE | 2 +
NEWS.md | 8 +
R/AAA-getDataCM.R | 22 +-
R/getProjectProgress.R | 11 +-
R/getProjectsCM.R | 4 +-
R/randomise.R | 633 ------------------
R/randomize.R | 592 ++++++++++++++++
R/rankTricot.R | 340 +++++-----
README.md | 4 +-
codemeta.json | 122 +++-
docs/404.html | 2 +-
docs/CODE_OF_CONDUCT.html | 2 +-
docs/CONTRIBUTING.html | 2 +-
docs/LICENSE-text.html | 2 +-
docs/articles/Overview.html | 96 ++-
.../figure-html/plrankings-1.png | Bin 0 -> 63903 bytes
docs/articles/index.html | 2 +-
docs/authors.html | 23 +-
docs/index.html | 6 +-
docs/news/index.html | 15 +-
docs/pkgdown.yml | 4 +-
docs/reference/ClimMobTools.html | 2 +-
docs/reference/getDataCM.html | 38 +-
docs/reference/getProjectProgress.html | 30 +-
docs/reference/getProjectsCM.html | 21 +-
docs/reference/getTraitList.html | 19 +-
docs/reference/index.html | 6 +-
docs/reference/randomise.html | 2 +-
docs/reference/randomize.html | 203 ++++++
docs/reference/rankTricot.html | 78 ++-
docs/reference/rmGeoIdentity.html | 41 +-
docs/sitemap.xml | 3 +
inst/CITATION | 20 +
man/getDataCM.Rd | 17 +-
man/getProjectProgress.Rd | 11 +-
man/getProjectsCM.Rd | 4 +-
man/{randomise.Rd => randomize.Rd} | 43 +-
man/rankTricot.Rd | 19 +-
vignettes/Overview.R | 96 +--
vignettes/Overview.Rmd | 25 +-
41 files changed, 1504 insertions(+), 1074 deletions(-)
delete mode 100644 R/randomise.R
create mode 100644 R/randomize.R
create mode 100644 docs/articles/Overview_files/figure-html/plrankings-1.png
create mode 100644 docs/reference/randomize.html
create mode 100644 inst/CITATION
rename man/{randomise.Rd => randomize.Rd} (61%)
diff --git a/DESCRIPTION b/DESCRIPTION
index e0771f4..bcff510 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: ClimMobTools
Type: Package
Title: API Client for the 'ClimMob' Platform
-Version: 0.5.003
+Version: 1.0
Authors@R: c(person("Kauê", "de Sousa",
email = "desousa.kaue@gmail.com",
role = c("aut", "cre"),
@@ -26,17 +26,21 @@ Depends: R (>= 3.5.0)
Imports:
httr,
jsonlite,
+ lpSolve,
Matrix,
methods,
RSpectra,
+ stats,
utils
Suggests:
+ climatrends,
gosset,
knitr,
+ nasapower,
rmarkdown,
sf,
PlackettLuce,
testthat (>= 2.1.0)
-Language: en-GB
+Language: en-US
RoxygenNote: 7.2.1
VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index 9c874e0..6faa194 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -17,5 +17,7 @@ importFrom(httr,RETRY)
importFrom(httr,accept_json)
importFrom(httr,content)
importFrom(jsonlite,fromJSON)
+importFrom(lpSolve,lp)
importFrom(methods,as)
+importFrom(stats,runif)
importFrom(utils,tail)
diff --git a/NEWS.md b/NEWS.md
index a6af162..6dbb09f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,11 @@
+ClimMobTools 1.0 (2023-11-10)
+=========================
+
+### BUG FIXES
+
+* Fix an issue in `randomize()` to allocate blocks
+
+
ClimMobTools 0.4.6 (2022-08-11)
=========================
diff --git a/R/AAA-getDataCM.R b/R/AAA-getDataCM.R
index 845220a..f63a3fb 100644
--- a/R/AAA-getDataCM.R
+++ b/R/AAA-getDataCM.R
@@ -29,13 +29,22 @@
#' # This function only works with an API key
#' # the API key can be obtained once a free ClimMob account
#' # is created via https://climmob.net/
+#'
+#' library("ClimMobTools")
+#' my_key <- "ff05a174-28d0-4a40-ab5a-35dc486133a6"
#'
-#' my_key <- "92cec84d-44f5-4858-9ef0-bd872496311c"
+#' getDataCM(key = my_key,
+#' project = "beanaru23",
+#' userowner = "student",
+#' server = "1000farms")
+#'
+#' # get in the wide format
#'
#' getDataCM(key = my_key,
-#' project = "testmark",
-#' userowner = "kauedesousa",
-#' server = "testing")
+#' project = "beanaru23",
+#' userowner = "student",
+#' server = "1000farms",
+#' pivot.wider = TRUE)
#'
#' @seealso ClimMob website \url{https://climmob.net/}
#' @importFrom httr accept_json content RETRY
@@ -87,3 +96,8 @@ getDataCM <- function(key,
return(cmdata)
}
+
+
+
+
+
diff --git a/R/getProjectProgress.R b/R/getProjectProgress.R
index 070e2f3..da34f4c 100644
--- a/R/getProjectProgress.R
+++ b/R/getProjectProgress.R
@@ -17,13 +17,14 @@
#' # This function only works with an API key
#' # the API key can be obtained once a free ClimMob account
#' # is created via https://climmob.net/
-#'
-#' my_key <- "92cec84d-44f5-4858-9ef0-bd872496311c"
#'
+#' library("ClimMobTools")
+#' my_key <- "ff05a174-28d0-4a40-ab5a-35dc486133a6"
+#'
#' getProjectProgress(key = my_key,
-#' project = "testmark",
-#' userowner = "kauedesousa",
-#' server = "testing")
+#' project = "beanaru23",
+#' userowner = "student",
+#' server = "1000FARMS")
#'
#'
#' @seealso ClimMob website \url{https://climmob.net/}
diff --git a/R/getProjectsCM.R b/R/getProjectsCM.R
index cb4c5fb..f5a224a 100644
--- a/R/getProjectsCM.R
+++ b/R/getProjectsCM.R
@@ -69,9 +69,9 @@
#' # the API key can be obtained once a free ClimMob account
#' # is created via https://climmob.net/
#'
-#' my_key <- "92cec84d-44f5-4858-9ef0-bd872496311c"
+#' my_key <- "ff05a174-28d0-4a40-ab5a-35dc486133a6"
#'
-#' getProjectsCM(key = my_key, server = "testing")
+#' getProjectsCM(key = my_key, server = "1000FARMS")
#'
#' @seealso ClimMob website \url{https://climmob.net/}
#' @export
diff --git a/R/randomise.R b/R/randomise.R
deleted file mode 100644
index c04f807..0000000
--- a/R/randomise.R
+++ /dev/null
@@ -1,633 +0,0 @@
-#' Randomised group of items
-#'
-#' Set a randomised group of items for crowdsourcing citizen science.
-#' Generate designs for ranking of options. It is designed for tricot trials
-#' specifically (comparing 3 options), but it will also work with comparisons
-#' of any other number of options.
-#' The design strives for approximate A optimality, this means that it is robust
-#' to missing observations. It also strives for balance for positions of each option.
-#' Options are equally divided between first, second, third, etc. position.
-#' The strategy is to create a "pool" of combinations that does not repeat
-#' combinations and is A-optimal. Then this pool is ordered to make subsets of
-#' consecutive combinations also relatively balanced and A-optimal
-#'
-#' @author Jacob van Etten
-#' @param ncomp an integer for the number of items to be assigned to each package
-#' @param npackages an integer for the number of trial packages to be produced
-#' @param itemnames a character for the name of items tested in the project
-#' @param availability optional, a vector with integers indicating the
-#' number of packages available for each \var{itemnames}
-#' @param proportions optional, a numeric vector with the desired proportions
-#' for each \var{itemnames}
-#' @param ... additional arguments passed to methods
-#' @references
-#' Bailey and Cameron (2004). Combinations of optimal designs.
-#' \url{https://webspace.maths.qmul.ac.uk/l.h.soicher/designtheory.org/library/preprints/optimal.pdf}
-#' @return A dataframe with the randomised design
-#' @examples
-#' ncomp <- 3
-#' npackages <- 20
-#' itemnames <- c("apple","banana","grape","mango", "orange")
-#' availability <- c(5, 8, 50, 50, 50)
-#'
-#' randomise(ncomp = ncomp,
-#' npackages = npackages,
-#' itemnames = itemnames)
-#'
-#' randomise(ncomp = ncomp,
-#' npackages = npackages,
-#' itemnames = itemnames,
-#' availability = availability)
-#'
-#' @aliases randomize
-#' @importFrom Matrix Diagonal
-#' @importFrom methods as
-#' @importFrom RSpectra eigs
-#' @importFrom utils tail
-#' @export
-randomise <- function(npackages,
- itemnames,
- ncomp = 3,
- availability = NULL,
- proportions = NULL,
- ...) {
-
- dots <- list(...)
-
- comp <- dots[["comp"]]
-
- if (is.null(comp)) {
- comp <- 10
- }
-
- nitems <- length(itemnames)
-
- # check inputs
- if (!is.null(availability) & length(availability) != nitems) {
- stop("nitems is different than length of vector with availability")
- }
-
- if (nitems < 3) {
- stop("nitems must be larger than 2")
- }
-
- nneeded <- npackages * ncomp
-
- if (!is.null(availability)) {
-
- maxav <- which.max(availability)[1]
- availability[maxav] <- availability[maxav] + 1
-
- if (sum(availability) < nneeded) {
- stop("availability is not sufficient: smaller than npackages * ncomp \n")
- }
- if (length(availability) != nitems) {
- stop("length of vector availability should be nitems \n")
- }
- }
-
- if (!is.null(proportions)) {
-
- if (length(proportions) != nitems) {
- stop("length of vector proportions should be nitems")
-
- }
-
- if (sum(proportions) != 1) {
-
- proportions <- proportions / sum(proportions)
- warning("sum of proportions is not 1; values have been rescaled \n")
-
- }
- }
-
- # depth1 is the number of rows after the procedure starts to compare options with the Kirchoff index
- depth1 <- floor(nitems / ncomp * 2)
-
- # in the second round, it is also how far it 'looks back' in the sequential balancing
- depth2 <- min(20, floor(nitems / ncomp * 4))
-
- # Varieties indicated by integers
- varieties <- seq_len(nitems)
-
- # Full set of all combinations
- varcombinations <- t((.combn(varieties, ncomp)))
-
- if (is.null(availability) & is.null(proportions)) {
-
- # if the full set of combinations is small and can be covered at least once
- # the set will include each combination at least once
- ncomb <- dim(varcombinations)[1]
- n <- floor(npackages / ncomb)
- nfixed <- ncomb * n
- vars1 <- varcombinations[c(rep(1:(dim(varcombinations)[1]), times = n)), ]
-
- # the remaining combinations are sampled randomly but in a balanced way
- # this means that no combination enters more than once
- nremain <- npackages - nfixed
-
- # create set to get to full number of observers
- vars2 <- matrix(nrow = nremain, ncol = ncomp)
-
- # set up array with set of combinations
- varcomb <- matrix(0, nrow = nitems, ncol = nitems)
-
- if (dim(vars2)[1] > 0.5) {
-
- # select combinations for the vars2 set that optimize design
- for (i in 1:nremain) {
-
- # calculate frequency of each variety
- sumcomb <- (rowSums(varcomb) + colSums(varcomb)) / 2
-
- # priority of each combination is equal to Shannon index of varieties
- # in each combination
- prioritycomb <- apply(varcombinations, 1, function(x){
- .getShannonVector(x, sumcomb, nitems)
- })
-
- # highest priority to be selected is the combination which has the lowest
- # Shannon index
- selected <- which(prioritycomb == min(prioritycomb))
-
- # if there are ties, find out which combination reduces Kirchhoff index most
-
- if (length(selected) > 1 & i > depth1) {
-
- reduce <- max(2, min(comp, round(5000/npackages), round(200/nitems)))
-
- # randomly subsample from selected if there are too many combinations to check
- if (length(selected) > reduce) {
- selected <- sample(selected, reduce)
- }
-
- # get a nitems x nitems matrix with number of connections
-
- # calculate Kirchhoff index and select smallest value
- khi <- vector(length = length(selected))
- for (k in 1:length(selected)) {
-
- evalgraph <- varcomb
- index <- t(.combn(varcombinations[selected[k],], 2))
- evalgraph[index] <- evalgraph[index] + 1
- khi[k] <- .KirchhoffIndex(evalgraph)
-
- }
-
- selected <- selected[which(khi == min(khi))]
-
- }
-
- # if there are still ties between ranks of combinations, selected randomly
- # from the ties
- if (length(selected) > 1) {
- selected <- sample(selected, 1)
- }
-
- # assign the selected combination
- vars2[i,] <- varcombinations[selected,]
-
- varcomb[t(.combn(varcombinations[selected,],2))] <-
- varcomb[t(.combn(varcombinations[selected,],2))] + 1
-
- # remove used combination
- varcombinations <- varcombinations[-selected,]
-
- }
- }
-
- # merge vars1 and vars2 to create the full set of combinations
- vars <- rbind(vars1, vars2)
-
- # calculate allocations available to each item
- allocations <- as.integer(table(vars))
-
- }
-
- if (!is.null(availability) | !is.null(proportions)) {
-
- #create the objects available or proportions is they are not available
- if (is.null(availability)) {
- available <- rep(npackages, times = nitems)
- availability <- rep(npackages, times = nitems)
- } else {
- available <- availability
- }
- if (is.null(proportions)) {
- proportions <- rep(1/nitems, times = nitems) #available / sum(available)
- }
-
- #order vector from low availability to high as .smart.round will favour right size of vector
- #to resolve dilemmas, it will add more of the items that are more abundant
- names(available) <- varieties
- available <- sort(available)
- proportions <- proportions[as.integer(names(available))]
-
- # calculate the packages that are needed - this will round later numbers to higher values
- # if needed to fill the quota
- needed <- .smart.round((nneeded * proportions) / sum(proportions))
-
- # prepare inputs into loop
- allocations <- rep(0, times = nitems)
- names(allocations) <- names(available) #just to check, can be removed
- tremain <- 1
-
- while(tremain > 0) {
-
- allocate <- pmin(available, needed)
- available <- available - allocate
-
- allocations <- allocations + allocate
- tremaining <- nneeded - sum(allocations)
-
- needed <- available > 0
- needed <- needed * (tremaining / sum(needed))
- needed <- .smart.round(needed)
-
- tremain <- tremaining
-
- }
-
- #reorder the vector with allocations back to original order
- allocations <- allocations[match(1:nitems, as.integer(names(available)))] #should be in ascending order
-
- # prepare variables
- ncomb <- dim(varcombinations)[1]
- n <- floor(npackages / ncomb)
- nfixed <- ncomb * n
-
- # create set to get to full number of observers
- vars <- matrix(nrow = npackages, ncol = ncomp)
-
- # set up array with set of combinations
- varcomb <- matrix(0, nrow = nitems, ncol = nitems)
-
- if (dim(vars)[1] > 0.5) {
-
- # select combinations for the vars set that optimize design
- for (i in 1:npackages) {
-
- # calculate frequency of each variety and define input for Shannon function, which prefers
- # low and even values
- varfreq <- (rowSums(varcomb) + colSums(varcomb)) / 2
- sumcomb <- (varfreq / allocations) * mean(allocations) #adjust for unequal required allocations
-
- # priority of each combination is equal to Shannon index of varieties
- # in each combination
- prioritycomb <- apply(varcombinations, 1, function(x){
- .getShannonVector(x, sumcomb, nitems)
- })
-
- # highest priority to be selected is the combination which has the lowest
- # Shannon index
- selected <- which(prioritycomb == min(prioritycomb))
-
- # if there are ties, find out which combination reduces Kirchhoff index most
-
- if (length(selected) > 1 & i > depth1) {
-
- reduce <- max(2, min(length(selected), comp, round(5000/npackages), round(200/nitems)))
-
- # randomly subsample from selected if there are too many combinations to check
- if (length(selected) > reduce) {
- selected <- sample(selected, reduce)
- }
-
- # get a nitems x nitems matrix with number of connections
-
- # calculate Kirchhoff index and select smallest value
- khi <- vector(length = length(selected))
- for (k in 1:length(selected)) {
-
- evalgraph <- varcomb
- index <- t(.combn(varcombinations[selected[k],], 2))
- evalgraph[index] <- evalgraph[index] + 1
- khi[k] <- .KirchhoffIndex(evalgraph)
-
- }
-
- selected <- selected[which(khi == min(khi))]
-
- }
-
- # if there are still ties between ranks of combinations, selected randomly
- # from the ties
- if (length(selected) > 1) {
- selected <- sample(selected, 1)
- }
-
- # assign the selected combination
- vars[i,] <- varcombinations[selected,]
-
- varcomb[t(.combn(varcombinations[selected,],2))] <-
- varcomb[t(.combn(varcombinations[selected,],2))] + 1
-
- }
- }
-
- }
-
- # create empty object to contain ordered combinations of vars
- varOrdered <- matrix(NA, nrow = npackages, ncol = ncomp)
-
- # set up array with set of combinations
- varcomb <- matrix(0, nrow = nitems, ncol = nitems)
-
- # fill first row
- selected <- sample(1:npackages, 1)
- varcomb[t(.combn(vars[selected,],2))] <- 1
- varOrdered[1,] <- vars[selected,]
- vars <- vars[-selected,]
-
- # optimize the order of overall design by repeating a similar procedure to the above
- for (i in 2:(npackages-1)) {
-
- # calculate frequency of each variety
- sumcomb <- (rowSums(varcomb) + colSums(varcomb)) / 2
- sumcomb <- (sumcomb / allocations) * mean(allocations)
-
- # priority of each combination is equal to Shannon index of varieties
- # in each combination
- prioritycomb <- apply(vars, 1, function(x){
- .getShannonVector(x, sumcomb, nitems)
- })
-
- # highest priority to be selected is the combination which has the
- # lowest Shannon index
- selected <- which(prioritycomb == min(prioritycomb))
-
- # if there are ties, find out which combination reduces Kirchhoff index most
- if (length(selected) > 1 & i > depth1) {
-
- reduce <- max(2, min(comp, round(5000/npackages), round(200/nitems)))
-
- # randomly subsample from selected if there are too many combinations to check
- if (length(selected) > reduce) {
- selected <- sample(selected, reduce)
- }
-
- # get a nitems x nitems matrix with number of connections
- sumcombMatrix <- varcomb * 0
-
- # in this case, get matrix to calculate Kirchhoff index only for
- # last depth2 observers
- for (j in max(1,i-depth2):(i-1)) {
- index <- t(.combn(varOrdered[j,],2))
- sumcombMatrix[index] <- sumcombMatrix[index] + 1
-
- }
-
- # calculate Kirchhoff indices for the candidate matrix corresponding
- # to each row in selected
- khi <- vector(length = length(selected))
- for (k in 1:length(selected)) {
-
- evalgraph <- sumcombMatrix
- index <- t(.combn(vars[selected[k],], 2))
- evalgraph[index] <- evalgraph[index] + 1
- khi[k] <- .KirchhoffIndex(evalgraph)
-
- }
-
- # select combination that produces the lowest Kirchhoff index
- selected <- selected[which(khi == min(khi, na.rm=TRUE))]
-
- }
-
- # if there are still ties between ranks of combinations, select one randomly
- if (length(selected) > 1) {
- selected <- sample(selected, 1)
- }
-
- # assign the selected combination
- varOrdered[i,] <- vars[selected,]
- varcomb[t(.combn(vars[selected,], 2))] <- varcomb[t(.combn(vars[selected,], 2))] + 1
-
- # remove used combination
- vars <- vars[-selected, ]
-
- }
-
- # assign last one
- varOrdered[npackages,] <- vars
-
- # Equally distribute positions to achieve order balance
- # First create matrix with frequency of position of each of nitems
- position <- matrix(0, ncol = ncomp, nrow = nitems)
-
- # Sequentially reorder sets to achieve evenness in positions
- # Shannon is good here, because evenness values are proportional
- # the H denominator in the Shannon formula is the same
-
- for (i in 1:npackages) {
-
- varOrdered_all <- .getPerms(varOrdered[i,])
- varOrdered_Shannon <- apply(varOrdered_all, 1, function(x) {
- .getShannonMatrix(x, position)
- })
- varOrdered_i <- varOrdered_all[which(varOrdered_Shannon ==
- min(varOrdered_Shannon))[1],]
- varOrdered[i,] <- varOrdered_i
- pp <- position * 0
- pp[cbind(varOrdered_i,1:ncomp)] <- 1
- position <- position + pp
-
- }
-
- # The varOrdered matrix has the indices of the elements
- # Create the final matrix
- finalresults <- matrix(NA, ncol = ncomp, nrow = npackages)
-
- # loop over the rows and columns of the final matrix and put
- # the elements randomized
- # with the indexes in varOrdered
- for (i in seq_len(npackages)){
- for (j in seq_len(ncomp)){
- finalresults[i,j] <- itemnames[varOrdered[i,j]]
- }
- }
-
- dimnames(finalresults) <- list(seq_len(npackages),
- paste0("item_", LETTERS[1:ncomp]))
-
- finalresults <- as.data.frame(finalresults, stringsAsFactors = FALSE)
-
- r <- table(unlist(finalresults))[itemnames]
-
- if (!all(r <= availability)) {
-
- few <- itemnames[!r <= availability]
- nfew <- availability[!r <= availability]
- nmin <- r[!r <= availability]
-
- stop("You indicated the availability of ", paste(nfew, collapse = ", "), " packages for ",
- paste(few, collapse = ", "), " but you require a minimum of ",
- paste(nmin, collapse = ", "), " for the given items \n" )
-
- }
-
- class(finalresults) <- union("CM_df", class(finalresults))
-
- return(finalresults)
-
-}
-
-#' @inheritParams randomise
-#' @export
-randomize <- function(...){
-
- randomise(...)
-
-}
-
-# Define function for Kirchhoff index
-# This index determines which graph is connected in the most balanced way
-# In this context, lower values (lower resistance) is better
-.KirchhoffIndex <- function(x) {
- # The input matrix only has one triangle filled
- # First we make it symmetric
- x <- x + t(x)
-
- # Then some maths to get the Kirchhoff index
-
- # Using rARPACK:eigs, setting k to n-1 because we don't need the
- # last eigen value
- Laplacian <- methods::as(Matrix::Diagonal(x = colSums(x)) - x,
- "dsyMatrix")
- lambda <-
- try(RSpectra::eigs(Laplacian,
- k = (dim(Laplacian)[1] - 1),
- tol = 0.01,
- retvec = FALSE)$values, silent = TRUE)
- if (inherits(lambda, "try-error"))
- lambda <- Inf
- # RSpectra:eigs is faster than base:eigen
- # lambda <- eigen(Laplacian)$values
- # lambda <- lambda[-length(lambda)]
-
- return(sum(1 / lambda))
-
-}
-
-# get all permutations
-.getPerms <- function(x) {
- if (length(x) == 1) {
- return(x)
- }
- else {
- res <- matrix(nrow = 0, ncol = length(x))
- for (i in seq_along(x)) {
- res <- rbind(res, cbind(x[i], Recall(x[-i])))
- }
- return(res)
- }
-}
-
-# Shannon (as evenness measure)
-.shannon <- function(x){
- sum(ifelse(x == 0, 0, x * log(x)))
-}
-
-# Get Shannon index for order positions
-.getShannonMatrix <- function(x, position) {
- pp <- position * 0
- pp[cbind(x, 1:length(x))] <- 1
- pp <- position + pp
- return(.shannon(as.vector(pp)))
-
-}
-
-.getShannonVector <- function(x, sumcomb, nitems) {
- xi <- rep(0, times = nitems)
- xi[x] <- 1
- return(.shannon(sumcomb + xi))
-
-}
-
-.combn <- function (x, m, FUN = NULL, simplify = TRUE, ...)
-{
- stopifnot(length(m) == 1L, is.numeric(m))
- if (m < 0)
- stop("m < 0", domain = NA)
- if (is.numeric(x) && length(x) == 1L && x > 0 && trunc(x) ==
- x)
- x <- seq_len(x)
- n <- length(x)
- if (n < m)
- stop("n < m", domain = NA)
- x0 <- x
- if (simplify) {
- if (is.factor(x))
- x <- as.integer(x)
- }
- m <- as.integer(m)
- e <- 0
- h <- m
- a <- seq_len(m)
- nofun <- is.null(FUN)
- if (!nofun && !is.function(FUN))
- stop("'FUN' must be a function or NULL")
- len.r <- length(r <- if (nofun) x[a] else FUN(x[a], ...))
- count <- as.integer(round(choose(n, m)))
- if (simplify) {
- dim.use <- if (nofun)
- c(m, count)
- else {
- d <- dim(r)
- if (length(d) > 1L)
- c(d, count)
- else if (len.r > 1L)
- c(len.r, count)
- else c(d, count)
- }
- }
- if (simplify)
- out <- matrix(r, nrow = len.r, ncol = count)
- else {
- out <- vector("list", count)
- out[[1L]] <- r
- }
- if (m > 0) {
- i <- 2L
- nmmp1 <- n - m + 1L
- while (a[1L] != nmmp1) {
- if (e < n - h) {
- h <- 1L
- e <- a[m]
- j <- 1L
- }
- else {
- e <- a[m - h]
- h <- h + 1L
- j <- 1L:h
- }
- a[m - h + j] <- e + j
- r <- if (nofun)
- x[a]
- else FUN(x[a], ...)
- if (simplify)
- out[, i] <- r
- else out[[i]] <- r
- i <- i + 1L
- }
- }
- if (simplify) {
- if (is.factor(x0)) {
- levels(out) <- levels(x0)
- class(out) <- class(x0)
- }
- dim(out) <- dim.use
- }
- out
-}
-
-# Rounding values to closest integer while retaining the same sum
-# From https://stackoverflow.com/questions/32544646/round-vector-of-numerics-to-integer-while-preserving-their-sum
-.smart.round <- function(x) {
- y <- floor(x)
- indices <- utils::tail(order(x-y), round(sum(x)) - sum(y))
- y[indices] <- y[indices] + 1
- y
-}
-
-
diff --git a/R/randomize.R b/R/randomize.R
new file mode 100644
index 0000000..cda8ce8
--- /dev/null
+++ b/R/randomize.R
@@ -0,0 +1,592 @@
+#' Set an experimental incomplete block design
+#'
+#' Generate an incomplete block A-optional design. The function is optimized for
+#' incomplete blocks of three, but it will also work with comparisons of any
+#' other number of options.
+#' The design strives for approximate A optimality, this means that it is robust
+#' to missing observations. It also strives for balance for positions of each option.
+#' Options are equally divided between first, second, third, etc. position.
+#' The strategy is to create a "pool" of combinations that does not repeat
+#' combinations and is A-optimal. Then this pool is ordered to make subsets of
+#' consecutive combinations also relatively balanced and A-optimal
+#'
+#' @author Jacob van Etten
+#' @param ncomp an integer for the number of items to be assigned to each incomplete block
+#' @param npackages an integer for the number of incomplete blocks to be generated
+#' @param itemnames a character for the name of items tested in the experiment
+#' @param availability optional, a vector with integers indicating the
+#' number of plots available for each \var{itemnames}
+#' @param props optional, a numeric vector with the desired proportions
+#' for each \var{itemnames}
+#' @param ... additional arguments passed to methods
+#' @references
+#' Bailey and Cameron (2004). Combinations of optimal designs.
+#' \url{https://webspace.maths.qmul.ac.uk/l.h.soicher/designtheory.org/library/preprints/optimal.pdf}
+#' @return A dataframe with the randomized design
+#' @examples
+#' ncomp = 3
+#' npackages = 20
+#' itemnames = c("apple","banana","grape","mango", "orange")
+#' availability = c(5, 8, 50, 50, 50)
+#'
+#' randomize(ncomp = ncomp,
+#' npackages = npackages,
+#' itemnames = itemnames)
+#'
+#' randomize(ncomp = ncomp,
+#' npackages = npackages,
+#' itemnames = itemnames,
+#' availability = availability)
+#'
+#' @aliases randomise
+#' @importFrom Matrix Diagonal
+#' @importFrom methods as
+#' @importFrom RSpectra eigs
+#' @importFrom utils tail
+#' @importFrom stats runif
+#' @importFrom lpSolve lp
+#' @export
+randomize <- function(npackages,
+ itemnames,
+ ncomp = 3,
+ availability = NULL,
+ props = NULL,
+ ...) {
+
+ dots <- list(...)
+
+ comp <- dots[["comp"]]
+
+ if (is.null(comp)) {
+ comp <- 10
+ }
+
+ nitems <- length(itemnames)
+
+ # depth1 is the number of rows after the procedure starts to compare options with the Kirchoff index
+ depth1 <- floor(nitems / ncomp )
+
+ # Varieties indicated by integers
+ varieties <- seq_len(nitems)
+
+ # Full set of all combinations
+ varcombinations <- .combn(varieties, ncomp)
+
+ # check inputs
+ if (!is.null(availability) & (length(availability) != nitems)) {
+ stop("nitems is different than length of vector with availability")
+ }
+
+ if (nitems < 3) {
+ stop("nitems must be larger than 2")
+ }
+
+ nneeded <- npackages * ncomp
+
+ if (!is.null(availability)) {
+
+ if (sum(availability) < nneeded) {
+ stop("availability is not sufficient: smaller than npackages * ncomp \n")
+ }
+ if (length(availability) != nitems) {
+ stop("length of vector availability should be nitems \n")
+ }
+ }
+
+ if (!is.null(props)) {
+
+ if (length(props) != nitems) {
+ stop("length of vector props should be nitems")
+
+ }
+
+ if (sum(props) != 1) {
+
+ props <- props / sum(props)
+ warning("sum of props is not 1; values have been rescaled \n")
+
+ }
+ }
+
+ #create the objects available or props if they are not available
+ if (is.null(availability)) {
+ availability <- rep(npackages, times = nitems)
+ }
+
+ available <- availability
+
+ if (is.null(props)) {
+ props <- rep(1/nitems, times = nitems) #available / sum(available)
+ }
+
+ #order vector from low availability to high as .smart.round will favour right size of vector
+ #to resolve dilemmas, it will add more of the items that are more abundant
+ names(available) <- varieties
+ available <- sort(available)
+ props <- props[as.integer(names(available))]
+
+ # calculate the packages that are needed - this will round later numbers to higher values
+ # if needed to fill the quota
+ needed <- .smart.round((nneeded * props) / sum(props))
+
+ # prepare inputs into loop
+ allocations <- rep(0, times = nitems)
+ names(allocations) <- names(available) #just to check, can be removed
+ tremain <- 1
+
+ while(tremain > 0) {
+
+ allocate <- pmin(available, needed)
+ available <- available - allocate
+
+ allocations <- allocations + allocate
+ tremaining <- nneeded - sum(allocations)
+
+ needed <- available > 0
+ needed <- needed * (tremaining / sum(needed))
+ needed <- .smart.round(needed)
+
+ tremain <- tremaining
+
+ }
+
+ #reorder the vector with allocations back to original order
+ allocations <- allocations[match(1:nitems, as.integer(names(available)))] #should be in ascending order
+ allocationsMatrix <- makeAllocationsMatrix(allocations)
+
+ # prepare variables
+ ncomb <- dim(varcombinations)[1]
+ n <- floor(npackages / ncomb)
+ nfixed <- ncomb * n
+ n <- ceiling(npackages / ncomb)
+
+ # make a set of variety combinations
+ # repeating the combinations if the unique combinations are not sufficient
+ vars <- varcombinations[c(rep(1:(dim(varcombinations)[1]), times = n)), ]
+
+ # create matrix that will hold the blocks
+ blocks <- matrix(nrow = npackages, ncol = ncomp)
+
+ # set up array with set of combinations
+ varcomb <- matrix(0, nrow = nitems, ncol = nitems)
+
+ if (dim(vars)[1] > 0.5) {
+
+ # select combinations for the vars set that optimize design
+ for (i in 1:npackages) {
+
+ varcombScore <- apply(varcombinations, 1, function(x){
+ .getScoreBlocks(x, varcomb, allocationsMatrix)
+ })
+ # highest priority to be selected is the combination which has the highest score
+ selected <- which(varcombScore >= (max(varcombScore)))
+
+ # if there are ties, find out which combination reduces Kirchhoff index most
+
+ if (length(selected) > 1 & i > depth1) {
+
+ reduce <- max(2, min(length(selected), comp))
+
+ # randomly subsample from selected if there are too many combinations to check
+ if (length(selected) > reduce) {
+ selected <- sample(selected, reduce)
+ }
+
+ # calculate Kirchhoff index and select smallest value
+ khi <- vector(length = length(selected))
+ for (k in 1:length(selected)) {
+
+ evalgraph <- varcomb
+ index <- .combn(varcombinations[selected[k],], 2)
+ evalgraph[index] <- evalgraph[index] + 1
+ evalgraph[cbind(index[,2], index[,1])] <- evalgraph[cbind(index[,2], index[,1])] + 1
+ khi[k] <- .KirchhoffIndex(evalgraph / (allocationsMatrix+diag(nrow(allocationsMatrix))))
+
+ }
+
+ selected <- selected[which(khi == min(khi))]
+
+ }
+
+ # if there are still ties between ranks of combinations, selected randomly
+ # from the ties
+ if (length(selected) > 1) {
+ selected <- sample(selected, 1)
+ }
+
+ # assign the selected combination
+ blocks[i,] <- varcombinations[selected,]
+
+ index <- .combn(varcombinations[selected,],2)
+ varcomb[index] <- varcomb[index] + 1
+ varcomb[cbind(index[,2], index[,1])] <- varcomb[cbind(index[,2], index[,1])] + 1
+
+ }
+ }
+
+ varOrdered <- blocks
+
+ # Equally distribute positions to achieve order balance
+ # First create matrix with frequency of position of each of nitems
+ position <- matrix(0, ncol = ncomp, nrow = nitems)
+
+ # Sequentially reorder sets to achieve evenness in positions
+ # Shannon represents evenness here
+ # the H denominator in the Shannon formula is the same
+
+ for (i in 1:npackages) {
+
+ varOrdered_all <- .getPerms(varOrdered[i,])
+ varOrdered_Shannon <- apply(varOrdered_all, 1, function(x) {
+ .getShannonMatrix(x, position)
+ })
+ varOrdered_i <- varOrdered_all[which(varOrdered_Shannon ==
+ min(varOrdered_Shannon))[1],]
+ varOrdered[i,] <- varOrdered_i
+ pp <- position * 0
+ pp[cbind(varOrdered_i,1:ncomp)] <- 1
+ position <- position + pp
+
+ }
+
+ # The varOrdered matrix has the indices of the elements
+ # Create the final matrix
+ finalresults <- matrix(NA, ncol = ncomp, nrow = npackages)
+
+ # loop over the rows and columns of the final matrix and put
+ # the elements randomized
+ # with the indexes in varOrdered
+ for (i in seq_len(npackages)){
+ for (j in seq_len(ncomp)){
+ finalresults[i,j] <- itemnames[varOrdered[i,j]]
+ }
+ }
+
+ dimnames(finalresults) <- list(seq_len(npackages),
+ paste0("item_", LETTERS[1:ncomp]))
+
+ finalresults <- as.data.frame(finalresults, stringsAsFactors = FALSE)
+
+ r <- table(unlist(finalresults))[itemnames]
+
+ if (!is.null(availability)){
+ if (!all(r <= availability)) {
+
+ few <- itemnames[!r <= availability]
+ nfew <- availability[!r <= availability]
+ nmin <- r[!r <= availability]
+
+ warning("You indicated the availability of ", paste(nfew, collapse = ", "), " packages for ",
+ paste(few, collapse = ", "), " but you require a minimum of ",
+ paste(nmin, collapse = ", "), " for the given items. \nYou could try to run the randomization again to solve this issue." )
+
+ }
+ }
+
+ class(finalresults) <- union("CM_df", class(finalresults))
+
+ return(finalresults)
+
+}
+
+#' @inheritParams randomize
+#' @export
+randomise <- function(...){
+
+ randomize(...)
+
+}
+# Define function for Kirchhoff index
+# This index determines which graph is connected in the most balanced way
+# In this context, lower values (lower resistance) is better
+#' @noRd
+.KirchhoffIndex <- function(x) {
+
+ # Add a tiny bit of noise to avoid zeros
+ noise <- x * 0 + stats::runif(length(x))/length(x)^3
+ noise <- noise + t(noise)
+ x <- x + noise
+
+ # Then some maths to get the Kirchhoff index
+
+ # Using rARPACK:eigs, setting k to n-1 because we don't need the
+ # last eigen value
+ Laplacian <- methods::as(Matrix::Diagonal(x = colSums(x)) - x,
+ "dsyMatrix")
+
+ # RSpectra:eigs is faster than base:eigen
+ # The following would also work if we want to reduce a dependency
+ # lambda <- eigen(Laplacian)$values
+ # lambda <- lambda[-length(lambda)]
+ lambda <-
+ try(RSpectra::eigs(Laplacian,
+ k = (dim(Laplacian)[1] - 1),
+ tol = 0.01,
+ retvec = FALSE)$values, silent = TRUE)
+ if(inherits(lambda, "try-error")){lambda <- Inf}
+
+ return(sum(1 / lambda))
+
+}
+
+# get all permutations
+#' @noRd
+.getPerms <- function(x) {
+ if (length(x) == 1) {
+ return(x)
+ }
+ else {
+ res <- matrix(nrow = 0, ncol = length(x))
+ for (i in seq_along(x)) {
+ res <- rbind(res, cbind(x[i], Recall(x[-i])))
+ }
+ return(res)
+ }
+}
+
+# Shannon (as evenness measure)
+#' @noRd
+.shannon <- function(x){
+ sum(ifelse(x == 0, 0, x * log(x)))
+}
+
+# Get Shannon index for order positions
+#TODO check this function!!!
+#' @noRd
+.getShannonMatrix <- function(x, position) {
+
+ pp <- position * 0
+ pp[cbind(x, 1:length(x))] <- 1
+ pp <- position + pp
+ return(.shannon(as.vector(pp)))
+
+}
+
+#' @noRd
+.getScoreBlocks <- function(x, varcomb, allocationsMatrix) {
+
+ #get combinations from vector of varieties x
+ cb <- .combn(x,2)
+
+ #get progress
+ progress <- sum(varcomb) / sum(allocationsMatrix)
+
+ #get score pairwise
+ score1 <- varcomb[cb]/allocationsMatrix[cb] < progress
+
+ #get score sums
+ score2 <- colSums(varcomb)[x]/colSums(allocationsMatrix)[x] < progress
+
+ #the smallest available amount should be avoided, so this check penalizes it
+ if(any(score2 == FALSE) & min(colSums(varcomb)[x]) == min(colSums(varcomb))){score2[1] <- score2[1]-1}
+
+ #calculate total score
+ score <- sum(score1+score2)
+
+ return(score)
+
+}
+
+#' @noRd
+makeAllocationsMatrix <- function(allocations){
+
+ #prepare basic parameters and empty matrix
+ n1 <- length(allocations)
+ combs <- .combn(1:n1, 2)
+ n2 <- nrow(combs)
+ a <- matrix(0,nrow=n1, ncol=n2)
+
+ #fill matrix with constraints on row/column sums
+ for(i in 1:n1){
+
+ a[i,] <- (combs[,1] == i | combs[,2] == i)
+
+ }
+
+ #include auxiliary variable z to the matrix
+ #this variable will be maximized, pushing all values up equally
+ minShare <- pmin(allocations[combs[,1]], allocations[combs[,2]]) / ((n1-1)/2)
+ f.con <- rbind(a, -diag(n2))
+ f.con <- cbind(f.con, c(rep(0, times=n1), minShare))
+
+ # set up vector with allocations (column sums) and zeros for z constraint
+ f.rhs <- c(allocations, rep(0, times=n2))
+
+ #objective function emphasizes raising the z value, which increases an equal spread
+ f.obj <- c(rep(1, times=n2), max(allocations)^2)
+ f.dir <- rep("<=", times=n1+n2)
+
+ sol <- lpSolve::lp("max", f.obj, f.con, f.dir, f.rhs)
+ x <- sol$solution[1:n2]
+ result <- matrix(0, nrow=n1, ncol=n1)
+ result[combs] <- x
+ result <- result + t(result)
+ return(result)
+
+}
+
+#' @noRd
+.combn <- function (x, m, FUN = NULL, simplify = TRUE, ...)
+{
+ stopifnot(length(m) == 1L, is.numeric(m))
+ if (m < 0)
+ stop("m < 0", domain = NA)
+ if (is.numeric(x) && length(x) == 1L && x > 0 && trunc(x) ==
+ x)
+ x <- seq_len(x)
+ n <- length(x)
+ if (n < m)
+ stop("n < m", domain = NA)
+ x0 <- x
+ if (simplify) {
+ if (is.factor(x))
+ x <- as.integer(x)
+ }
+ m <- as.integer(m)
+ e <- 0
+ h <- m
+ a <- seq_len(m)
+ nofun <- is.null(FUN)
+ if (!nofun && !is.function(FUN))
+ stop("'FUN' must be a function or NULL")
+ len.r <- length(r <- if (nofun) x[a] else FUN(x[a], ...))
+ count <- as.integer(round(choose(n, m)))
+ if (simplify) {
+ dim.use <- if (nofun)
+ c(m, count)
+ else {
+ d <- dim(r)
+ if (length(d) > 1L)
+ c(d, count)
+ else if (len.r > 1L)
+ c(len.r, count)
+ else c(d, count)
+ }
+ }
+ if (simplify)
+ out <- matrix(r, nrow = len.r, ncol = count)
+ else {
+ out <- vector("list", count)
+ out[[1L]] <- r
+ }
+ if (m > 0) {
+ i <- 2L
+ nmmp1 <- n - m + 1L
+ while (a[1L] != nmmp1) {
+ if (e < n - h) {
+ h <- 1L
+ e <- a[m]
+ j <- 1L
+ }
+ else {
+ e <- a[m - h]
+ h <- h + 1L
+ j <- 1L:h
+ }
+ a[m - h + j] <- e + j
+ r <- if (nofun)
+ x[a]
+ else FUN(x[a], ...)
+ if (simplify)
+ out[, i] <- r
+ else out[[i]] <- r
+ i <- i + 1L
+ }
+ }
+ if (simplify) {
+ if (is.factor(x0)) {
+ levels(out) <- levels(x0)
+ class(out) <- class(x0)
+ }
+ dim(out) <- dim.use
+ }
+ return(t(out))
+}
+
+# Rounding values to closest integer while retaining the same sum
+# From https://stackoverflow.com/questions/32544646/round-vector-of-numerics-to-integer-while-preserving-their-sum
+#' @noRd
+.smart.round <- function(x) {
+ y <- floor(x)
+ indices <- utils::tail(order(x-y), round(sum(x)) - sum(y))
+ y[indices] <- y[indices] + 1
+ y
+}
+
+# #-----------------Run an example-------------------
+#
+# ncomp = 3
+# npackages = 238
+# vars = 15
+# proportions = rep(1, vars)/vars
+# itemnames = c(LETTERS[1:vars])
+# availability = rep(ceiling(238*3/vars), times=vars)
+# availability[2] = availability[2]*2
+# availability[5] = availability[5]/2
+#
+# a = randomise(ncomp = ncomp,
+# npackages = npackages,
+# itemnames = itemnames,
+# availability = availability)
+#
+#
+# a
+#
+# #----------Diagnostics-------------
+# #the only input assumed here is table a
+# #get item names from the table
+# ua = sort(unique(c(t(a))))
+# ncomp = ncol(a)
+#
+# comb_balance = matrix(0, nrow=length(ua), ncol=length(ua))
+#
+# for(i in 1:nrow(a)){
+#
+# j = t(combn(match(a[i,], ua),2))
+# comb_balance[j] = comb_balance[j] + 1
+#
+# }
+#
+# cb = comb_balance + t(comb_balance)
+# cb
+# #show total number of times options are included in packages
+# cb = rbind(cb, rowSums(cb)/2)
+#
+# #show result nicely
+# rownames(cb) = c(ua, "Total")
+# colnames(cb) = ua
+# cb
+# #check the distances between packages that contain the same option
+# #as a measure of sequential balance
+# d = matrix(NA, nrow = length(itemnames), ncol=ncomp)
+#
+# for(i in 1:length(ua)) {
+# s = apply(a, 1, function(x){sum(x==ua[i])})
+# di = diff(which(s==1))
+# hist(di)
+# d[i,] = c(mean(di), sd(di), max(di))
+# }
+# colnames(d) = c("mean", "sd", "max")
+# rownames(d) = ua
+#
+# # print d nicely
+# format(as.data.frame(d), digits=3)
+#
+# #check if options are equally distributed across columns
+# f = matrix(NA, nrow=length(ua), ncol=ncomp)
+# rownames(f) = ua
+# f
+# for(i in 1:ncomp){
+#
+# tai = table(a[,i])
+# f[names(tai),i] = tai
+#
+# }
+#
+# f
+# comb_balance
+# # connection graph
+# g = graph_from_adjacency_matrix(comb_balance+t(comb_balance), mode = "lower", weighted = "weight")
+# g
+# plot(g, edge.width = E(g)$weight, edge.label = E(g)$weight)
+
diff --git a/R/rankTricot.R b/R/rankTricot.R
index 8fe32b8..9ceb0a1 100644
--- a/R/rankTricot.R
+++ b/R/rankTricot.R
@@ -1,6 +1,6 @@
#' Build Plackett-Luce rankings from tricot dataset
#'
-#' Create an object of class "rankings" from tricot data.
+#' Create an object of class "rankings" from tricot data
#'
#' @author Kauê de Sousa and Jacob van Etten, with ideas from Heather Turner
#' @param data a data.frame with columns specified by items and input values
@@ -9,6 +9,8 @@
#' @param input a character or numerical vector for indexing the column(s)
#' containing the values in \code{data} to be ranked
#' @param group logical, if \code{TRUE} return an object of class "grouped_rankings"
+#' @param validate.rankings logical, if \code{TRUE} implements a check on ranking consistency
+#' looking for possible ties, NA or letters other than A, B, C. These entries are set to 0
#' @param additional.rank optional, a data frame for the comparisons between
#' tricot items and the local item
#' @param ... additional arguments passed to methods. See details
@@ -31,7 +33,7 @@
#'
#' # first build rankings with only tricot items
#' # and return an object of class 'rankings'
-#' R <- rankTricot(data = beans,
+#' R = rankTricot(data = beans,
#' items = c(1:3),
#' input = c(4:5))
#' head(R)
@@ -41,7 +43,7 @@
#' # pass the comparison with local item as an additional rankings, then
#' # each of the 3 varieties are compared separately with the local item
#' # and return an object of class grouped_rankings
-#' G <- rankTricot(data = beans,
+#' G = rankTricot(data = beans,
#' items = c(1:3),
#' input = c(4:5),
#' group = TRUE,
@@ -51,219 +53,194 @@
#' }
#'
#' @export
-rankTricot <- function(data, items, input,
- group = FALSE,
- additional.rank = NULL,
- ...) {
+rankTricot = function(data,
+ items,
+ input,
+ group = FALSE,
+ validate.rankings = FALSE,
+ additional.rank = NULL,
+ ...) {
# if tibble coerce into a data.frame
if (.is_tibble(data)) {
- data <- as.data.frame(data, stringsAsFactors = FALSE)
+ data = as.data.frame(data, stringsAsFactors = FALSE)
}
- items <- data[, items]
+ items = data[, items]
- input <- data[, input]
+ input = data[, input]
# get nrow
- n <- nrow(data)
+ n = nrow(data)
# get extra arguments
- dots <- list(...)
+ dots = list(...)
# if all data is required
- full.output <- dots[["full.output"]]
-
- # check number of comparisons to decide which way data will
- # be handled, if type is tricot or if it contains more comparisons
- ncomp <- ncol(items)
+ full.output = dots[["full.output"]]
- # with 3 comparisons
- if (ncomp == 3) {
-
- n <- nrow(items)
-
- # check for more than two missing labels in items
- mi <- rowSums(apply(items, 2, is.na))
- if (any(mi > 1)) {
- stop("Cannot handle more than 2 NAs per row in 'items',
+ n = nrow(items)
+
+ # check for more than two missing labels in items
+ mi = rowSums(apply(items, 2, is.na))
+ if (any(mi > 1)) {
+ stop("Cannot handle more than 2 NAs per row in 'items',
more than 2 NAs where found in rows ",
- paste(which(mi > 1), collapse = ", "), "\n")
- }
-
- # if there is one NA per row in items and observations
- # with only two items add a pseudo-item which will be removed later
- if (any(mi == 1)) {
- items[is.na(items)] <- "pseudoitem"
- }
-
- # data frame with items as matrix
- im <- as.matrix(items)
-
- # get the names of items
- itemnames <- unique(as.vector(im))
-
- # a Sparse matrix where rows are the observations
- # and columns the item names
- r <- matrix(0, nrow = n, ncol = length(itemnames))
- colnames(r) <- itemnames
+ paste(which(mi > 1), collapse = ", "), "\n")
+ }
+
+ # if there is one NA per row in items and observations
+ # with only two items add a pseudo-item which will be removed later
+ if (any(mi == 1)) {
+ items[is.na(items)] = "pseudoitem"
+ }
+
+ # validate rankings, and set to 0 if required
+ keep = .validate_rankings(input)
+
+ out = which(keep == FALSE)
+
+ # data frame with items as matrix
+ im = as.matrix(items)
+
+ # get the names of items
+ itemnames = unique(as.vector(im))
+
+ # a Sparse matrix where rows are the observations
+ # and columns the item names
+ r = matrix(0, nrow = n, ncol = length(itemnames))
+ colnames(r) = itemnames
+
+ # run over the rows filling the rankings that were observed
+ for(j in seq_len(n)){
- # run over the rows filling the rankings that were observed
- for(j in seq_len(n)){
-
- r[j, im[j,]] <- .setorder(as.vector(unlist(input[j,])))
+ r[j, im[j,]] = .setorder(as.vector(unlist(input[j,])))
- }
+ }
+
+ R = PlackettLuce::as.rankings(r)
+
+ # if ranking validation was required, rankings that did not passed the
+ # validation are set to 0, this does not affect the final length
+ # of the rankings
+ if (isTRUE(validate.rankings)) {
- a <- list(x = r)
+ R[!keep] = 0
- R <- do.call("as.rankings", args = a)
-
- # if full output is required, for internal use
- # put r into the ordering format
- if (isTRUE(full.output)) {
- r2 <- matrix("", nrow = n, ncol = 3)
- colnames(r2) <- c("best", "middle", "worst")
- r[r==0] <- NA
- for(j in seq_len(n)) {
- jr <- sort(r[j, !is.na(r[j, ])])
- if (sum(jr == 2) > 1) {
- names(jr)[jr == 2] <- paste(names(jr[jr == 2]), collapse = ", ")
- }
- r2[j, ] <- names(jr)
- }
- r <- r2
+ }
+
+ if (length(out) > 0) {
+ messag = paste0("Ties, NA's or letters different than A, B, C, were identified in rows ",
+ paste(out, collapse = ", "), "\n")
+ if (isFALSE(validate.rankings)) {
+ messag = paste(messag, "Use validate.rankings = TRUE to ignore these entries\n")
}
-
+ warning(messag)
}
- # with 4 or more comparisons
- if (ncomp >= 4) {
-
- r <- .pivot_tetra(i = items, r = input)
-
- # get item names
- itemnames <- sort(unique(as.vector(r)))
-
- # make a PlackettLuce rankings
- a <- list(x = r,
- input = "ordering",
- items = itemnames)
- R <- do.call("as.rankings", args = a)
-
+ # if full output is required, for internal use
+ # put r into the ordering format
+ if (isTRUE(full.output)) {
+ r2 = matrix("", nrow = n, ncol = 3)
+ colnames(r2) = c("best", "middle", "worst")
+ r[r==0] = NA
+ for(j in seq_len(n)) {
+ jr = sort(r[j, !is.na(r[j, ])])
+ if (sum(jr == 2) > 1) {
+ names(jr)[jr == 2] = paste(names(jr[jr == 2]), collapse = ", ")
+ }
+ r2[j, ] = names(jr)
+ }
+ r = r2
}
# if pseudo-item were added, it is removed
- pseudo <- grepl("pseudoitem", itemnames)
+ pseudo = grepl("pseudoitem", itemnames)
if (any(pseudo)) {
- R <- R[, !pseudo]
+ R = R[, !pseudo]
}
# check if additional rankings are required
if (!is.null(additional.rank)) {
# add comparisons with local rankings
- R <- .additional_rankings(i = items, R = R, add = additional.rank)
+ R = .additional_rankings(i = items, R = R, add = additional.rank)
}
# and into a grouped_rankings
- gi <- rep(seq_len(n), (nrow(R) / n))
-
- a <- list(x = R,
- index = gi)
-
- G <- do.call("group", args = a)
+ gi = rep(seq_len(n), (nrow(R) / n))
+ G = PlackettLuce::group(R, index = gi)
# check if all data is required
if (isTRUE(full.output)) {
- R <- list(PLranking = R, PLgrouped = G, myrank = r)
+ R = list(PLranking = R, PLgrouped = G, myrank = r)
}
# return a grouped_rankings if required
if (group) {
- R <- G
+ R = G
}
return(R)
}
+
+#' Validate rankings
+#'
+#' This check ranking consistency making sure that
+#' no NAs or ties are mantained in the final PlackettLuce ranking
+#'
+#' @param x data.frame with two columns indicating the tricot rankings
+#' @noRd
+.validate_rankings = function(x) {
+
+ ABC = apply(x, 1, function(y) {
+ all(y %in% LETTERS[1:3])
+ })
+
+ noNA = apply(x, 1, function(y) {
+ all(!is.na(y))
+ })
+
+ noDups = apply(x, 1, function(y) {
+ all(!duplicated(y))
+ })
+
+ keep = as.vector(ABC & noNA & noDups)
+
+ return(keep)
+
+}
+
+
#' Set the order of tricot rankings
-#' @param x a vector of length 3 with the tricot letters A, B, C,
-#' a Tie will be assigned the value 2
+#'
+#' This function set the indices to place the order of best worst
+#' technologies indicates in the tricot approach
+#'
+#' @param x a vector of length 2 with the LETTERS A, B or C, or Tie
+#' first element in the vector indicates the best technology,
+#' second element indicates the worst technology
#' @examples
-#' x <- c("C", "Tie")
+#' x = c("C", "Tie")
#' gosset:::.setorder(x)
#'
-#' x <- c("A", "B")
+#' x = c("A", "B")
#' gosset:::.setorder(x)
#' @noRd
-.setorder <- function(x){
- s <- rep(2, times = 3) #default value is 2
- L <- LETTERS[1:3]
+.setorder = function(x){
+ # default value is 2
+ s = rep(2, times = 3)
+ L = LETTERS[1:3]
# works backwards from C to A to give most importance
# to item(s) listed as better
- s[L %in% strsplit(x[2], split="")] <- 3
- s[L %in% strsplit(x[1], split="")] <- 1
+ s[L %in% strsplit(x[2], split = "")] = 3
+ s[L %in% strsplit(x[1], split = "")] = 1
- #avoid skipped numbers
- # s <- order(s)
return(s)
-}
-
-#' Tetra rankings
-#' This function deals with object in the tetra approach
-#' in ClimMob when four or more items are tested by each participant
-#' @param i is a dataframe with items
-#' @param r is a dataframe with rankings
-#' @noRd
-.pivot_tetra <- function(i, r){
-
- # fix names in r data
- names(r) <- paste0("PosItem", 1:ncol(r))
-
- # get the number of possible rankings
- nrank <- ncol(r)
- # number of rows
- n <- nrow(r)
-
- # handle NAs
- r[r==0] <- NA
-
- for (z in seq_len(nrank)) {
- rm <- is.na(i[, z]) | is.na(r[, z])
- i[rm , z] <- NA
- r[rm , z] <- NA
- }
- # check for more than two missing labels in items
- mi <- rowSums(t(apply(i, 1, is.na)))
- mi <- nrank - mi
- if( any(mi <= 1) ) {
- stop("Cannot handle more than 2 NAs per row in 'items' \n")
- }
-
- # if there is accepted NAs in items
- # put a label as pseudoitem which will be removed later
- if ( any(mi < nrank) ) {
- i <- t(apply(i, 1, function(p) {
- pi <- length(p[is.na(p)])
- p[is.na(p)] <- paste0("pseudoitem", 1:pi)
- p
- }))
- }
-
- # add a very high value if there is accepted NAs in rankings
- if (any(mi < nrank)) {
- r[is.na(r)] <- 999999
- }
-
- decoded <- .decode_ranking(i, r)
-
- return(decoded)
-
-}
-
+}
#' this function adds additional ranks, generally when a local item
#' is tested against the tricot items
@@ -273,18 +250,18 @@ rankTricot <- function(data, items, input,
#' indication whether the tricot items performed "Better" or "Worse"
#' compared to the local item
#' @noRd
-.additional_rankings <- function(i, R, add){
+.additional_rankings = function(i, R, add){
- n <- nrow(add)
+ n = nrow(add)
- ncomp <- ncol(i)
+ ncomp = ncol(i)
# convert it into characters
- add[1:ncol(add)] <- lapply(add[1:ncol(add)], as.character)
+ add[1:ncol(add)] = lapply(add[1:ncol(add)], as.character)
- add <- as.matrix(add)
+ add = as.matrix(add)
- i <- as.matrix(i)
+ i = as.matrix(i)
# treat these comparisons as additional rankings.
# first we convert the orderings of the items to
@@ -296,9 +273,9 @@ rankTricot <- function(data, items, input,
# make sure that values in add are integers
# where 1 means Better and 2 means Worse
- add <- apply(add, 2, function(x) {
- x <- ifelse(x == "Better" | x == 1, 1,
- ifelse(x == "Worse" | x == 2, 2, NA))
+ add = apply(add, 2, function(x) {
+ x = ifelse(x == "Better" | x == 1, 1,
+ ifelse(x == "Worse" | x == 2, 2, NA))
x
})
@@ -308,36 +285,31 @@ rankTricot <- function(data, items, input,
}
# add local to itemnames
- itemnames <- dimnames(R)[[2]]
- itemnames <- unique(c("Local Check", itemnames))
+ itemnames = dimnames(R)[[2]]
+ itemnames = unique(c("Local", itemnames))
- paired <- list()
+ paired = list()
for (p in seq_len(ncomp)) {
- ordering <- matrix("Local Check", nrow = n, ncol = 2)
- worse <- add[, p] == 2
+ ordering = matrix("Local", nrow = n, ncol = 2)
+ worse = add[, p] == 2
# name of winner
- ordering[!worse, 1] <- i[, p][!worse]
+ ordering[!worse, 1] = i[, p][!worse]
# name of loser
- ordering[worse, 2] <- i[, p][worse]
- paired[[p]] <- ordering
+ ordering[worse, 2] = i[, p][worse]
+ paired[[p]] = ordering
}
# we then convert these orderings to sub-rankings of the full set of items
# and combine them with the rankings
- paired <- lapply(paired, function(x) {
- a <- list(x = x,
- input = "ordering",
- items = itemnames)
- x <- do.call("as.rankings", args = a)
+ paired = lapply(paired, function(x) {
+ x = PlackettLuce::as.rankings(x, input = "ordering", items = itemnames)
})
- paired <- do.call("rbind", paired)
+ paired = do.call("rbind", paired)
- R <- rbind(R, paired)
+ R = rbind(R, paired)
return(R)
}
-
-
diff --git a/README.md b/README.md
index 778116d..330683b 100644
--- a/README.md
+++ b/README.md
@@ -4,9 +4,7 @@ ClimMobTools
[![CRAN](https://www.r-pkg.org/badges/version/ClimMobTools)](https://cran.r-project.org/package=ClimMobTools)
-[![CRANchecks](https://cranchecks.info/badges/worst/ClimMobTools)](https://cran.r-project.org/web/checks/check_results_ClimMobTools.html)
-[![codecov](https://codecov.io/gh/agrdatasci/ClimMobTools/master.svg)](https://codecov.io/github/agrdatasci/ClimMobTools?branch=master)
-[![lifecycle](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
+[![CRANchecks](https://badges.cranchecks.info/worst/ClimMobTools.svg)](https://cran.r-project.org/web/checks/check_results_ClimMobTools.html)
[![Downloads](https://cranlogs.r-pkg.org/badges/ClimMobTools)](https://cran.r-project.org/package=ClimMobTools)
diff --git a/codemeta.json b/codemeta.json
index a870025..70506d2 100644
--- a/codemeta.json
+++ b/codemeta.json
@@ -7,7 +7,7 @@
"codeRepository": "https://agrdatasci.github.io/ClimMobTools/",
"issueTracker": "https://github.com/agrdatasci/ClimMobTools/issues",
"license": "https://spdx.org/licenses/MIT",
- "version": "0.5.3",
+ "version": "1.0",
"programmingLanguage": {
"@type": "ComputerLanguage",
"name": "R",
@@ -50,6 +50,18 @@
}
],
"softwareSuggestions": [
+ {
+ "@type": "SoftwareApplication",
+ "identifier": "climatrends",
+ "name": "climatrends",
+ "provider": {
+ "@id": "https://cran.r-project.org",
+ "@type": "Organization",
+ "name": "Comprehensive R Archive Network (CRAN)",
+ "url": "https://cran.r-project.org"
+ },
+ "sameAs": "https://CRAN.R-project.org/package=climatrends"
+ },
{
"@type": "SoftwareApplication",
"identifier": "gosset",
@@ -74,6 +86,18 @@
},
"sameAs": "https://CRAN.R-project.org/package=knitr"
},
+ {
+ "@type": "SoftwareApplication",
+ "identifier": "nasapower",
+ "name": "nasapower",
+ "provider": {
+ "@id": "https://cran.r-project.org",
+ "@type": "Organization",
+ "name": "Comprehensive R Archive Network (CRAN)",
+ "url": "https://cran.r-project.org"
+ },
+ "sameAs": "https://CRAN.R-project.org/package=nasapower"
+ },
{
"@type": "SoftwareApplication",
"identifier": "rmarkdown",
@@ -156,6 +180,18 @@
"sameAs": "https://CRAN.R-project.org/package=jsonlite"
},
"4": {
+ "@type": "SoftwareApplication",
+ "identifier": "lpSolve",
+ "name": "lpSolve",
+ "provider": {
+ "@id": "https://cran.r-project.org",
+ "@type": "Organization",
+ "name": "Comprehensive R Archive Network (CRAN)",
+ "url": "https://cran.r-project.org"
+ },
+ "sameAs": "https://CRAN.R-project.org/package=lpSolve"
+ },
+ "5": {
"@type": "SoftwareApplication",
"identifier": "Matrix",
"name": "Matrix",
@@ -167,12 +203,12 @@
},
"sameAs": "https://CRAN.R-project.org/package=Matrix"
},
- "5": {
+ "6": {
"@type": "SoftwareApplication",
"identifier": "methods",
"name": "methods"
},
- "6": {
+ "7": {
"@type": "SoftwareApplication",
"identifier": "RSpectra",
"name": "RSpectra",
@@ -184,12 +220,88 @@
},
"sameAs": "https://CRAN.R-project.org/package=RSpectra"
},
- "7": {
+ "8": {
+ "@type": "SoftwareApplication",
+ "identifier": "stats",
+ "name": "stats"
+ },
+ "9": {
"@type": "SoftwareApplication",
"identifier": "utils",
"name": "utils"
},
"SystemRequirements": null
},
- "fileSize": "1217.505KB"
+ "fileSize": "1248.669KB",
+ "citation": [
+ {
+ "@type": "ScholarlyArticle",
+ "datePublished": "2023",
+ "author": [
+ {
+ "@type": "Person",
+ "givenName": "Carlos",
+ "familyName": "Quirós"
+ },
+ {
+ "@type": "Person",
+ "givenName": "Kauê",
+ "familyName": "de Sousa"
+ },
+ {
+ "@type": "Person",
+ "givenName": "Jonathan",
+ "familyName": "Steinke"
+ },
+ {
+ "@type": "Person",
+ "givenName": "Brandon",
+ "familyName": "Madriz"
+ },
+ {
+ "@type": "Person",
+ "givenName": "Marie-Angélique",
+ "familyName": "Laporte"
+ },
+ {
+ "@type": "Person",
+ "givenName": "Elizabeth",
+ "familyName": "Arnaud"
+ },
+ {
+ "@type": "Person",
+ "givenName": "Rhys",
+ "familyName": "Manners"
+ },
+ {
+ "@type": "Person",
+ "givenName": "Berta",
+ "familyName": "Ortiz-Crespo"
+ },
+ {
+ "@type": "Person",
+ "givenName": "Anna",
+ "familyName": "Müller"
+ },
+ {
+ "@type": "Person",
+ "givenName": "Jacob",
+ "familyName": "van Etten"
+ }
+ ],
+ "name": "{ClimMob: Software to Support Experimental Citizen Science in Agriculture}",
+ "identifier": "10.2139/ssrn.4463406",
+ "url": "http://dx.doi.org/10.2139/ssrn.4463406",
+ "@id": "https://doi.org/10.2139/ssrn.4463406",
+ "sameAs": "https://doi.org/10.2139/ssrn.4463406",
+ "isPartOf": {
+ "@type": "PublicationIssue",
+ "datePublished": "2023",
+ "isPartOf": {
+ "@type": ["PublicationVolume", "Periodical"],
+ "name": "{SSRN} Electronic Journal"
+ }
+ }
+ }
+ ]
}
diff --git a/docs/404.html b/docs/404.html
index 1e38c7a..df50c42 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -39,7 +39,7 @@
ClimMobTools
- 0.5
+ 1.0
diff --git a/docs/CODE_OF_CONDUCT.html b/docs/CODE_OF_CONDUCT.html
index 79921c7..089567b 100644
--- a/docs/CODE_OF_CONDUCT.html
+++ b/docs/CODE_OF_CONDUCT.html
@@ -17,7 +17,7 @@
ClimMobTools
- 0.5
+ 1.0
diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html
index ffb6f1f..ecbacd0 100644
--- a/docs/CONTRIBUTING.html
+++ b/docs/CONTRIBUTING.html
@@ -17,7 +17,7 @@
ClimMobTools
- 0.5
+ 1.0
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html
index 5fc9910..67edb31 100644
--- a/docs/LICENSE-text.html
+++ b/docs/LICENSE-text.html
@@ -17,7 +17,7 @@
ClimMobTools
- 0.5
+ 1.0
diff --git a/docs/articles/Overview.html b/docs/articles/Overview.html
index d2fe252..670b3b9 100644
--- a/docs/articles/Overview.html
+++ b/docs/articles/Overview.html
@@ -40,7 +40,7 @@
ClimMobTools
- 0.5
+ 1.0
@@ -90,10 +90,10 @@ ClimMobTools: API Client for the ‘ClimMob’
Kauê de
Sousa
- Department of Agricultural Sciences, Inland Norway University of
-Applied Sciences, Hamar, Norway
Digital Inclusion, Bioversity
-International, Montpelier,
-France
Jacob
+ Digital Inclusion, Bioversity International, Montpelier, France
+
Department of Agricultural Sciences, Inland Norway University of
+Applied Sciences, Hamar,
+Norway
Jacob
van Etten
Digital Inclusion, Bioversity International, Montpelier,
@@ -135,12 +135,16 @@ Usage
an API key from the ClimMob user’s account.
library("ClimMobTools")
+library("PlackettLuce")
+library("climatrends")
+library("nasapower")
# the API key
key <- "d39a3c66-5822-4930-a9d4-50e7da041e77"
dat <- getDataCM(key = key,
project = "breadwheat",
+ userowner = "gosset",
pivot.wider = TRUE)
@@ -154,10 +158,7 @@ Tricot data with environmenta
Here we use function temperature()
to compute the
temperature indices for the first 80 days after planting.
-library("climatrends")
-library("nasapower")
-
-dat$plantingdate <- as.Date(dat$plantingdate, format = "%Y-%m-%d")
+dat$plantingdate <- as.Date(dat$plantingdate, format = "%Y-%m-%d")
dat$lon <- as.numeric(dat$farm_geo_longitude)
dat$lat <- as.numeric(dat$farm_geo_latitude)
@@ -165,7 +166,20 @@ Tricot data with environmenta
day.one = dat[, "plantingdate"],
span = 80)
-temp
+temp
+#> maxDT minDT maxNT minNT DTR SU TR CFD WSDI CSDI T10p T90p
+#> <dbl> <dbl> <dbl> <dbl> <int> <int> <int> <int> <int> <int> <dbl> <dbl>
+#> 1: 30.75 20.65 17.67 4.63 15 2 0 0 3 2 5.50 27.74
+#> 2: 29.15 20.65 16.52 4.63 15 0 0 0 4 2 5.50 27.46
+#> 3: 32.76 20.65 18.49 4.63 15 7 0 0 8 2 5.50 29.16
+#> 4: 29.15 20.65 16.52 4.63 15 0 0 0 3 2 5.50 27.27
+#> 5: 29.15 20.65 16.52 4.63 15 0 0 0 4 2 5.50 26.22
+#> ---
+#> 489: 29.15 20.65 16.52 4.63 15 0 0 0 3 2 5.50 27.27
+#> 490: 29.15 20.65 16.52 4.63 15 0 0 0 3 2 5.50 27.27
+#> 491: 32.76 20.65 17.67 4.63 15 5 0 0 6 2 5.50 28.41
+#> 492: 29.15 20.65 16.52 4.63 15 0 0 0 3 2 5.50 27.27
+#> 493: 29.15 20.65 16.52 4.63 15 0 0 0 3 2 5.50 27.27