diff --git a/DESCRIPTION b/DESCRIPTION index 9f76543..17681ad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,7 @@ Imports: utils, sp, sf, raster, - reshape2, + data.table, plyr, cluster, Rcpp, @@ -35,7 +35,7 @@ Suggests: knitr, rmarkdown, testthat VignetteBuilder: knitr -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.3 Collate: 'RcppExports.R' 'clhs-internal.R' diff --git a/NAMESPACE b/NAMESPACE index 4db6d3e..88ccc8c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ import(Rcpp) import(RcppArmadillo) import(ggplot2) importFrom(cluster,daisy) +importFrom(data.table,melt) importFrom(graphics,hist) importFrom(grid,grid.layout) importFrom(grid,grid.newpage) @@ -27,7 +28,6 @@ importFrom(raster,nlayers) importFrom(raster,raster) importFrom(raster,rasterToPoints) importFrom(raster,stack) -importFrom(reshape2,melt) importFrom(sf,st_coordinates) importFrom(sf,st_geometry_type) importFrom(sf,st_set_geometry) diff --git a/R/RcppExports.R b/R/RcppExports.R index 2dbd9a1..24c66ce 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -25,7 +25,7 @@ #' @return list with sampled data, indices, objective values, cost value, and final continuous weights for each sample NULL -CppLHS <- function(xA, cost, strata, include, idx, factors, i_fact, nsample, cost_mode, iter, wCont, wFact, wCorr, etaMat, temperature, tdecrease, length_cycle) { - .Call('_clhs_CppLHS', PACKAGE = 'clhs', xA, cost, strata, include, idx, factors, i_fact, nsample, cost_mode, iter, wCont, wFact, wCorr, etaMat, temperature, tdecrease, length_cycle) +CppLHS <- function(xA, cost, strata, latlon, include, latlon_inc, idx, factors, continuous, i_fact, nsample, min_dist, cost_mode, iter, wCont, wFact, wCorr, wDist, etaMat, temperature, tdecrease, length_cycle) { + .Call('_clhs_CppLHS', PACKAGE = 'clhs', xA, cost, strata, latlon, include, latlon_inc, idx, factors, continuous, i_fact, nsample, min_dist, cost_mode, iter, wCont, wFact, wCorr, wDist, etaMat, temperature, tdecrease, length_cycle) } diff --git a/R/clhs-data.frame.R b/R/clhs-data.frame.R index 4e97bf2..35cf777 100644 --- a/R/clhs-data.frame.R +++ b/R/clhs-data.frame.R @@ -14,9 +14,11 @@ clhs.data.frame <- function( cost = NULL, # Number or name of the attribute used as a cost iter = 10000, # Number of max iterations use.cpp = TRUE, # use C++ code for metropolis-hasting loop? + latlon = NULL, + min.dist = NULL, temp = 1, # initial temperature tdecrease = 0.95, # temperature decrease rate - weights = list(numeric = 1, factor = 1, correlation = 1), # weight for continuous data , weight for correlation among data, weight for object data + weights = list(numeric = 1, factor = 1, correlation = 1, distance = 0.1), # weight for continuous data , weight for correlation among data, weight for object data eta = 1, obj.limit = -Inf, # Stopping criterion length.cycle = 10, # Number of cycles done at each constant temperature value @@ -67,7 +69,7 @@ clhs.data.frame <- function( for(i in 1:ncol(data_factor)){ data_factor[,i] <- as.numeric(data_factor[,i]) } - data <- as.matrix(cbind(data_factor,data_continuous)) + data <- as.matrix(cbind(data_factor,data_continuous))##here's the bug factIdx <- 1:n_factor ncont <- ncol(data_continuous) } else { @@ -96,24 +98,46 @@ clhs.data.frame <- function( } ) + if(is.null(latlon)){ + latlon <- matrix(c(0,0),nrow = 1, ncol = 2) + min.dist = 0 + }else{ + latlon <- as.matrix(latlon) + } + if(is.null(include)){ dat <- data inc <- dat[0,] ssize <- size + spat_inc <- matrix(c(0,0),nrow = 1, ncol = 2) + spat <- latlon }else{ + if(nrow(latlon) > 1){ + spat <- latlon[-include,] + spat_inc <- latlon[include,,drop = FALSE] + } + idx <- 1:nrow(data) + idx_new <- idx[-include] dat <- data[-include,] inc <- data[include,,drop = FALSE] ##keep as matrix if just one row ssize <- size - length(include) can.include <- 1:nrow(dat) } + + areContinuous = TRUE + if(length(continuous_strata) == 0){ + continuous_strata = matrix(c(0,0),nrow = 1, ncol = 2) + areContinuous = FALSE + } can.include <- can.include-1 ##convert to zero based for C - res <- CppLHS(xA = dat, cost = costVec, strata = continuous_strata, include = inc, idx = can.include, - factors = areFactors, i_fact = factIdx-1, nsample = ssize, cost_mode = costFlag, iter = iter, - wCont = weights$numeric, wFact = weights$factor, wCorr = weights$correlation, etaMat = eMat, + res <- CppLHS(xA = dat, cost = costVec, strata = continuous_strata, latlon = spat, include = inc, latlon_inc = spat_inc, idx = can.include, + factors = areFactors, continuous = areContinuous, i_fact = factIdx-1, nsample = ssize, min_dist = min.dist, cost_mode = costFlag, iter = iter, + wCont = weights$numeric, wFact = weights$factor, wCorr = weights$correlation, wDist = weights$distance, etaMat = eMat, temperature = temp, tdecrease = tdecrease, length_cycle = length.cycle) res$index_samples <- res$index_samples + 1 ##fix indexing difference if(!is.null(include)){ - res$index_samples <- c(res$index_samples,include) + samples2 <- idx_new[res$index_samples] + res$index_samples <- c(samples2,include) ##this is the problem } res$sampled_data <- x[res$index_samples,] diff --git a/R/clhs.R b/R/clhs.R index 33c2bdc..ef90cc8 100644 --- a/R/clhs.R +++ b/R/clhs.R @@ -53,6 +53,8 @@ #' @param use.cpp TRUE or FALSE. If set to TRUE, annealing process uses C++ code. #' This is ~ 150 times faster than the R version, but is less stable and currently #' doesn't accept track or obj.limit parameters. Default to TRUE. +#' @param latlon A dataframe or matrix with two columns containing the spatial coordinates, if minimum distance between points is required. Default to NULL. +#' @param min.dist Numeric value of minimum distance between sample points. #' @param temp The initial temperature at which the simulated annealing #' begins. Defaults to 1. #' @param tdecrease A number between 0 and 1, giving the rate at which @@ -134,4 +136,5 @@ #' #' @include clhs-data.frame.R #' @export -clhs <- function(x, size, must.include, can.include, cost, iter, use.cpp, temp, tdecrease, weights, eta, obj.limit, length.cycle, simple, progress, track, use.coords, ...) UseMethod("clhs") \ No newline at end of file +clhs <- function(x, size, must.include, can.include, cost, iter, use.cpp, latlon, min.dist, temp, tdecrease, weights, eta, obj.limit, length.cycle, simple, progress, track, use.coords, ...) UseMethod("clhs") + diff --git a/R/plot.R b/R/plot.R index 6fa02eb..84de5d6 100644 --- a/R/plot.R +++ b/R/plot.R @@ -38,7 +38,7 @@ #' #' #' @import ggplot2 -#' @importFrom reshape2 melt +#' @importFrom data.table melt #' @importFrom plyr dlply ddply #' @importFrom grid pushViewport viewport grid.newpage grid.layout #' @importFrom utils packageVersion diff --git a/README.md b/README.md index dbbd868..e6df440 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,11 @@ -[![R build status](https://github.com/pierreroudier/clhs/workflows/R-CMD-check/badge.svg)](https://github.com/pierreroudier/clhs/actions) -[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/clhs)](https://cran.r-project.org/package=clhs) -[![Total_Downloads](http://cranlogs.r-pkg.org/badges/grand-total/clhs)](https://cran.r-project.org/package=clhs) +# clhs development -# clhs - -A faster (C++) implementation of the conditioned Latin Hypercube Sampling method - -## Scope +This repo is a fork of the main clhs package, used for development (especially of the C++ functions). The original C++ version is on CRAN, but there are multiple updates to the development version, including bug fixes and specification of minimum distance between points. ## Installation -The C++ method is not yet on CRAN. +The CRAN package currently contains a bug if the C++ version is used along with `must.include`. -You can install it using the `devtools` package to install `clhs`: +You can install the development version using the `devtools` package: -`devtools::install_github("pierreroudier/clhs")` +`devtools::install_github("kdaust/clhs")` diff --git a/man/clhs.Rd b/man/clhs.Rd index bce0c08..759de35 100644 --- a/man/clhs.Rd +++ b/man/clhs.Rd @@ -17,6 +17,8 @@ clhs( cost, iter, use.cpp, + latlon, + min.dist, temp, tdecrease, weights, @@ -61,6 +63,10 @@ Metropolis-Hastings annealing process. Defaults to 10000.} This is ~ 150 times faster than the R version, but is less stable and currently doesn't accept track or obj.limit parameters. Default to TRUE.} +\item{latlon}{A dataframe or matrix with two columns containing the spatial coordinates, if minimum distance between points is required. Default to NULL.} + +\item{min.dist}{Numeric value of minimum distance between sample points.} + \item{temp}{The initial temperature at which the simulated annealing begins. Defaults to 1.} diff --git a/src/CppLHS.cpp b/src/CppLHS.cpp index 00ab455..b96af8b 100644 --- a/src/CppLHS.cpp +++ b/src/CppLHS.cpp @@ -91,10 +91,15 @@ NumericMatrix c_cor(NumericMatrix mat) { //pair list to store frequency table for factors typedef std::pair ptype; +//structure to store histogram results +struct tableResult { + NumericVector values; + IntegerVector oversampled; +}; //this function creates a frequency table of sampled factor data //it takes a vector v of sampled data, and a frequency table for the full factor //it returns the absolute values of the original table - the sampled frequencies -NumericVector table_cpp(const Rcpp::NumericVector & v, const NumericVector full){ +tableResult table_cpp(const Rcpp::NumericVector & v, const NumericVector full){ std::vector data = as>(v); unsigned int nTot = v.size(); // Create a map @@ -133,8 +138,21 @@ NumericVector table_cpp(const Rcpp::NumericVector & v, const NumericVector full) // Rcout << "Full: " << full << "\n"; //Rcout << "Curr Facts: " << result_vals << "\n" << "Orig: " << full << "\n"; - result_vals = abs(result_vals - full); - return (result_vals); + //result_vals = abs(result_vals - full); + result_vals = result_vals - full; + int maxv = which_max(result_vals); + double oversampled = strtod(tempLevs[maxv], NULL); + + IntegerVector isbad (v.size()); + for (int i = 0; i < v.size(); i++) { + if(data[i] == oversampled){ + isbad[i] = 1; + } + } + + struct tableResult out = {abs(result_vals), isbad}; + //which samples contribute to oversampled values? + return (out); } //structure to store histogram results @@ -180,13 +198,15 @@ histResult hist(NumericVector x, NumericVector breaks){ //based on C_bincount fr struct objResult { double objRes; std::vector obj_cont_res; + std::vector obj_distance; //store distance }; //objective function - calculates objective value for current sample //returns objResult struct -objResult obj_fn(arma::mat x, NumericMatrix strata, arma::mat include, bool factors, +objResult obj_fn(arma::mat x, arma::mat ll_curr, NumericMatrix strata, arma::mat include, + arma::mat latlon_inc,bool factors, bool continuous, arma::uvec i_fact, NumericMatrix cor_full, Rcpp::List fact_full, - double wCont, double wFact, double wCorr, arma::mat etaMat){ + double wCont, double wFact, double wCorr, double wDist, double min_dist, arma::mat etaMat){ int nsamps = x.n_rows; arma::mat x_all = join_vert(x,include);//join with include - does nothing if no include NumericMatrix fact_all; @@ -200,6 +220,7 @@ objResult obj_fn(arma::mat x, NumericMatrix strata, arma::mat include, bool fact int num_vars = x_all.n_cols; int num_obs = strata.nrow(); + double obj_corr = 0.0; NumericVector hist_cnt; IntegerMatrix hist_out(num_obs - 1, num_vars); IntegerMatrix hist_pos(num_obs - 1, num_vars); @@ -210,62 +231,123 @@ objResult obj_fn(arma::mat x, NumericMatrix strata, arma::mat include, bool fact NumericVector strata_curr; NumericVector hist_temp; NumericVector obj_cont; - NumericVector obj_position; + NumericVector obj_position (nsamps); NumericMatrix t2; std::vector obj_cont2; - //send each column to bincount function - for(int i = 0; i < num_vars; i++){ - //Rcout << "Hist idex is " << i << "\n"; - data = wrap(x_all.col(i)); - strata_curr = strata(_,i); - struct histResult hRes = hist(data,strata_curr); - bin_counts = hRes.counts; - sample_pos = bin_counts[hRes.position]; - hist_pos(_,i) = sample_pos; - hist_out(_,i) = bin_counts; + if(continuous){ + //send each column to bincount function + for(int i = 0; i < num_vars; i++){ + //Rcout << "Hist idex is " << i << "\n"; + data = wrap(x_all.col(i)); + strata_curr = strata(_,i); + struct histResult hRes = hist(data,strata_curr); + bin_counts = hRes.counts; + sample_pos = bin_counts[hRes.position]; + hist_pos(_,i) = sample_pos; + hist_out(_,i) = bin_counts; + } + //Rcout << "Histout: " << hist_out << "\n"; + //convert to arma mat because subtraction is faster + arma::mat hist2 = as(hist_out); + t2 = wrap(arma::abs(hist2 - etaMat));//subtract eta - either input matrix, or all 1 + obj_cont = rowSums(t2); + + //this is the sample.weights part in the R code + //basically the number of counts corresponding to each sample + arma::mat histPos2 = as(hist_pos); + t2 = wrap(arma::abs(histPos2 - etaMat)); + obj_position = rowSums(t2); + //Rcout << "Full ObjCont: " << obj_cont << "\n"; + //correlation matrix for current sample + NumericMatrix cor_new = c_cor(wrap(x_all)); + obj_corr = sum(abs(cor_full - cor_new)); } - //Rcout << "Histout: " << hist_out << "\n"; - //convert to arma mat because subtraction is faster - arma::mat hist2 = as(hist_out); - t2 = wrap(arma::abs(hist2 - etaMat));//subtract eta - either input matrix, or all 1 - obj_cont = rowSums(t2); - - //this is the sample.weights part in the R code - //basically the number of counts corresponding to each sample - arma::mat histPos2 = as(hist_pos); - t2 = wrap(arma::abs(histPos2 - etaMat)); - obj_position = rowSums(t2); - //Rcout << "Full ObjCont: " << obj_cont << "\n"; - //send factor data to get tabulated - NumericVector factRes(num_vars); + + NumericVector totalbad_fact (nsamps); + int num_vars2 = fact_full.size(); + NumericVector factRes(num_vars2); if(factors){ - int num_vars2 = fact_full.size(); - NumericVector temp(x_all.n_rows); + + NumericVector temp(fact_all.rows()); double total; + for(int i = 0; i < num_vars2; i++){ temp = fact_full[i]; total = sum(temp); temp = temp/total; - factRes[i] = sum(table_cpp(fact_all(_,i),temp)); + struct tableResult tRes = table_cpp(fact_all(_,i),temp); + total = sum(tRes.values); + //Rcout << "CurrIt: " << total << "\n"; + factRes[i] = total; + + for(int j = 0; j < totalbad_fact.size(); j++){ //sum up variables - if oversampled in all vars, will be higher + totalbad_fact[j] += tRes.oversampled[j]; + } } + //Rcout << "Factor Vector: " << factRes << "\n"; } + //Rcout << "FactRes: " << factRes << "\n"; - //correlation matrix for current sample - NumericMatrix cor_new = c_cor(wrap(x_all)); + + //Now calculate distances if applicable + double xdist, ydist; + double totdist; + //int numOver; + int ll_len = ll_curr.n_rows; + std::vector dist_all(ll_len,0); + double totalBad = 0.0; + //Rcout << "Length: " << ll_len << "\n"; + if(ll_len > 1){ + for(int i = 0; i < ll_len; i++){ + for(int j = i+1; j < ll_len; j++){ + //Rcout << "i: " << i << " j: " << j << "\n"; + xdist = ll_curr(i,0) - ll_curr(j,0); + ydist = ll_curr(i,1) - ll_curr(j,1); + totdist = sqrt(pow(xdist,2) + pow(ydist,2)); + //Rcout << "Reg distance: " << totdist << "\n"; + if(totdist < min_dist){ + dist_all[i]++; + totalBad += min_dist - totdist; + } + //Rcout << "Regular: " << dist_all << "\n"; + } + } + if(latlon_inc.n_rows > 1){//if there is latlon data for must.include + for(int i = 0; i < ll_len; i++){ + for(int j = 0; j < latlon_inc.n_rows; j++){ + xdist = ll_curr(i,0) - latlon_inc(j,0); + ydist = ll_curr(i,1) - latlon_inc(j,1); + totdist = sqrt(pow(xdist,2) + pow(ydist,2)); + //Rcout << "Include distance: " << totdist << "\n"; + if(totdist < min_dist){ + //Rcout << "Adding to dist_all \n"; + dist_all[i]++; + totalBad += min_dist - totdist; + } + // Rcout << "Include: "; + // for(int i: dist_all) + // Rcout << i << " "; + // Rcout << "\n"; + } + } + } + } - double obj_cor = sum(abs(cor_full - cor_new)); + int total_dist = accumulate(dist_all.begin(), dist_all.end(),0); + //Rcout << "Factor final: " << factRes << "\n"; //combined objective values - since corr_mat is lower tri, have to multiply by 2 - double objFinal = sum(obj_cont)*wCont + obj_cor*2*wCorr + sum(factRes)*wFact; + double objFinal = sum(obj_cont)*wCont + obj_corr*2*wCorr + sum(factRes)*wFact + totalBad*wDist; //Rcout << "FinalObj" << objFinal << "\n"; - obj_position = obj_position[Rcpp::Range(0,nsamps-1)]; //don't think I need this + //obj_position = obj_position[Rcpp::Range(0,nsamps-1)]; //don't think I need this + obj_position = obj_position + totalbad_fact; //add factor positions to continuous //note that now the continuous objective is only used for calculating the objective value //so we only return the objective_positions (i.e. sample.weights) - obj_cont2 = as>(obj_position); - struct objResult out = {objFinal, obj_cont2}; + obj_cont2 = as>(obj_position); //maybe add here? + struct objResult out = {objFinal, obj_cont2, dist_all}; return(out); } @@ -294,10 +376,10 @@ objResult obj_fn(arma::mat x, NumericMatrix strata, arma::mat include, bool fact //' @return list with sampled data, indices, objective values, cost value, and final continuous weights for each sample // [[Rcpp::export]] -List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, - arma::mat include, std::vector idx, bool factors, arma::uvec i_fact, - int nsample, bool cost_mode, int iter, double wCont, - double wFact, double wCorr, arma::mat etaMat, +List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, arma::mat latlon, + arma::mat include, arma::mat latlon_inc, std::vector idx, bool factors, bool continuous, arma::uvec i_fact, + int nsample, double min_dist, bool cost_mode, int iter, double wCont, + double wFact, double wCorr, double wDist, arma::mat etaMat, double temperature, double tdecrease, int length_cycle){ //initialise objects @@ -313,9 +395,12 @@ List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, IntegerVector prev_unsampled; NumericVector prev_contObj; arma::mat x_curr; + arma::mat ll_curr; std::vector delta_cont; std::vector delta_cont_prev; + std::vector bad_dist; + std::vector bad_dist_prev; int idx_removed; int idx_added; int spl_removed; @@ -326,6 +411,7 @@ List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, //std::vector idx(ndata); //std::iota(idx.begin(),idx.end(),0);//populate idx with seq(0:ndata) std::vector::iterator it_worse; + std::vector::iterator it_worse_int; int i_worse; IntegerVector idx2; NumericVector temp; @@ -337,12 +423,21 @@ List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, i_unsampled = vector_diff(idx,i_sampled); arma::uvec arm_isamp = arma::conv_to::from(i_sampled);//index needs to be aram::uvec to extract rows x_curr = xA.rows(arm_isamp); // is this efficient? + + if(latlon.n_rows > 1){ + ll_curr = latlon.rows(arm_isamp); + }else{ + ll_curr = latlon.row(0); //does this work + } //Rcout << "Finish initial sample \n"; arma::mat x_cont = xA; if(factors){ x_cont.shed_cols(i_fact);// remove factors for correlation } - NumericMatrix cor_full = c_cor(wrap(x_cont));//full correlation matrix + NumericMatrix cor_full; + if(continuous){ + cor_full = c_cor(wrap(x_cont));//full correlation matrix + } //setup table list for factors Rcpp::List factTab(i_fact.size()); @@ -354,11 +449,14 @@ List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, } } + //Rcout << "Starting function \n"; //initial objective value - struct objResult res = obj_fn(x_curr,strata,include,factors,i_fact,cor_full,factTab,wCont,wFact,wCorr,etaMat); + struct objResult res = obj_fn(x_curr,ll_curr,strata,include,latlon_inc,factors,continuous,i_fact, + cor_full,factTab,wCont,wFact,wCorr,wDist,min_dist,etaMat); obj = res.objRes; delta_cont = res.obj_cont_res; - //Rcout << "Finished function; obj = "<< obj << "\n"; + bad_dist = res.obj_distance; + NumericVector cost_values(iter, NA_REAL);//store cost @@ -377,6 +475,7 @@ List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, i_sampled_prev = i_sampled; i_unsampled_prev = i_unsampled; delta_cont_prev = delta_cont; + bad_dist_prev = bad_dist; //Rcout << "Deltacont: " << wrap(delta_cont) << "\n"; if(cost_mode){ @@ -412,15 +511,36 @@ List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, i_unsampled.erase(i_unsampled.begin()+idx_added); i_unsampled.push_back(spl_removed); // temp43 = wrap(i_sampled); - // Rcout << "i_samplednew: " << temp43 << "\n"; + // now remove worst for distance + if(latlon.n_rows > 1){//if using lat long + it_worse_int = std::max_element(bad_dist.begin(),bad_dist.end()); //returns max element + i_worse = std::distance(bad_dist.begin(), it_worse_int); //find location of worst + spl_removed = i_sampled[i_worse]; //this is the problem + //Rcout << "spl_removed " << spl_removed << "\n"; + idx_added = Rcpp::sample(i_unsampled.size(), 1, false)[0]; + idx_added--; + i_sampled.erase(i_sampled.begin()+i_worse); + i_sampled.push_back(i_unsampled[idx_added]); + i_unsampled.erase(i_unsampled.begin()+idx_added); + i_unsampled.push_back(spl_removed); + } } //Rcout << "sending to function \n"; arm_isamp = arma::conv_to::from(i_sampled); x_curr = xA.rows(arm_isamp); // is this efficient? + if(latlon.n_rows > 1){ + ll_curr = latlon.rows(arm_isamp); + }else{ + ll_curr = latlon.row(0); //does this work + } //calculate objective value - res = obj_fn(x_curr,strata,include,factors,i_fact,cor_full,factTab,wCont,wFact,wCorr,etaMat); //test this + res = obj_fn(x_curr,ll_curr,strata,include,latlon_inc,factors,continuous,i_fact, + cor_full,factTab,wCont,wFact,wCorr,wDist,min_dist,etaMat); obj = res.objRes; + //Rcout << "Finished function; obj = "<< obj << "\n"; delta_cont = res.obj_cont_res; + //Rcout << "Badsamples = "<< wrap(delta_cont) << "\n"; + bad_dist = res.obj_distance; NumericVector dcTemp = wrap(delta_cont); //update variables delta_obj = obj - prev_obj; @@ -443,6 +563,7 @@ List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, i_unsampled = i_unsampled_prev; obj = prev_obj; delta_cont = delta_cont_prev; + bad_dist = bad_dist_prev; if(cost_mode){ opCost = prev_opCost; } @@ -457,11 +578,13 @@ List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, } arm_isamp = arma::conv_to::from(i_sampled); x_curr = xA.rows(arm_isamp); + Rcout << "Finished function; obj = "<< obj << "\n"; Rcout << "vroom vroom \n"; //create list to return return List::create(_["sampled_data"] = x_curr, _["index_samples"] = i_sampled, _["obj"] = obj_values, _["cost"] = cost_values, - _["final_obj_continuous"] = delta_cont); + _["final_quality"] = delta_cont, + _["final_obj_distance"] = bad_dist); } \ No newline at end of file diff --git a/src/CppTesting.cpp b/src/CppTesting.cpp new file mode 100644 index 0000000..3d2f1d7 --- /dev/null +++ b/src/CppTesting.cpp @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include +//[[Rcpp::depends(RcppArmadillo)]] + +using namespace Rcpp; +using namespace std; +// [[Rcpp::plugins("cpp17")]] + + +// IntegerVector mywhich(const NumericVector v){ +// IntegerVector out = Rcpp::match(v, 2.0); +// return(out) +// } + +//pair list to store frequency table for factors +typedef std::pair ptype; + + +LogicalVector table_cpp_(const Rcpp::NumericVector & v, const NumericVector full){ + std::vector data = as>(v); + unsigned int nTot = v.size(); + // Create a map + Rcpp::CharacterVector tempLevs = full.attr("names");//need to made sure new vectors has same levels in same order + std::vector levels(tempLevs.size()); + for(int i = 0; i < tempLevs.size(); i++){ + levels[i] = std::strtod(tempLevs[i], NULL); + } + + std::map Elt; + Elt.clear(); + + // Initialize with all zero - this might be slow + for (unsigned int i = 0; i != levels.size(); ++i) { + Elt[levels[i]] = 0; + } + //count frequencies + for (int i = 0; i != v.size(); ++i) { + Elt[data[i]] += 1; + } + unsigned int n_obs = Elt.size(); + + std::vector sorted_Elt(Elt.begin(), Elt.end()); + Rcpp::NumericVector result_vals(n_obs); + + unsigned int count = 0; + double temp; + //Use iterators to access objects in map + for(std::vector::iterator it = sorted_Elt.begin(); it != sorted_Elt.end(); ++it){ + temp = it->second; + result_vals(count) = temp/nTot; + count++; + } + // Rcout << "Names: " << tempLevs << "\n"; + // Rcout << "Small: " << result_vals << "\n"; + // Rcout << "Full: " << full << "\n"; + //Rcout << "Curr Facts: " << result_vals << "\n" << "Orig: " << full << "\n"; + + //result_vals = abs(result_vals - full); + result_vals = result_vals - full; + int maxv = which_max(result_vals); + double oversampled = strtod(tempLevs[maxv], NULL); + + LogicalVector isbad (v.size()); + for (int i = 0; i < v.size(); i++) { + if(data[i] == oversampled){ + isbad[i] = 1; + } + } + //which samples contribute to oversampled values? + return (isbad); +} + + diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 2512ddf..2ccba03 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -6,36 +6,46 @@ using namespace Rcpp; +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + // CppLHS -List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, arma::mat include, std::vector idx, bool factors, arma::uvec i_fact, int nsample, bool cost_mode, int iter, double wCont, double wFact, double wCorr, arma::mat etaMat, double temperature, double tdecrease, int length_cycle); -RcppExport SEXP _clhs_CppLHS(SEXP xASEXP, SEXP costSEXP, SEXP strataSEXP, SEXP includeSEXP, SEXP idxSEXP, SEXP factorsSEXP, SEXP i_factSEXP, SEXP nsampleSEXP, SEXP cost_modeSEXP, SEXP iterSEXP, SEXP wContSEXP, SEXP wFactSEXP, SEXP wCorrSEXP, SEXP etaMatSEXP, SEXP temperatureSEXP, SEXP tdecreaseSEXP, SEXP length_cycleSEXP) { +List CppLHS(arma::mat xA, NumericVector cost, NumericMatrix strata, arma::mat latlon, arma::mat include, arma::mat latlon_inc, std::vector idx, bool factors, bool continuous, arma::uvec i_fact, int nsample, double min_dist, bool cost_mode, int iter, double wCont, double wFact, double wCorr, double wDist, arma::mat etaMat, double temperature, double tdecrease, int length_cycle); +RcppExport SEXP _clhs_CppLHS(SEXP xASEXP, SEXP costSEXP, SEXP strataSEXP, SEXP latlonSEXP, SEXP includeSEXP, SEXP latlon_incSEXP, SEXP idxSEXP, SEXP factorsSEXP, SEXP continuousSEXP, SEXP i_factSEXP, SEXP nsampleSEXP, SEXP min_distSEXP, SEXP cost_modeSEXP, SEXP iterSEXP, SEXP wContSEXP, SEXP wFactSEXP, SEXP wCorrSEXP, SEXP wDistSEXP, SEXP etaMatSEXP, SEXP temperatureSEXP, SEXP tdecreaseSEXP, SEXP length_cycleSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type xA(xASEXP); Rcpp::traits::input_parameter< NumericVector >::type cost(costSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type strata(strataSEXP); + Rcpp::traits::input_parameter< arma::mat >::type latlon(latlonSEXP); Rcpp::traits::input_parameter< arma::mat >::type include(includeSEXP); + Rcpp::traits::input_parameter< arma::mat >::type latlon_inc(latlon_incSEXP); Rcpp::traits::input_parameter< std::vector >::type idx(idxSEXP); Rcpp::traits::input_parameter< bool >::type factors(factorsSEXP); + Rcpp::traits::input_parameter< bool >::type continuous(continuousSEXP); Rcpp::traits::input_parameter< arma::uvec >::type i_fact(i_factSEXP); Rcpp::traits::input_parameter< int >::type nsample(nsampleSEXP); + Rcpp::traits::input_parameter< double >::type min_dist(min_distSEXP); Rcpp::traits::input_parameter< bool >::type cost_mode(cost_modeSEXP); Rcpp::traits::input_parameter< int >::type iter(iterSEXP); Rcpp::traits::input_parameter< double >::type wCont(wContSEXP); Rcpp::traits::input_parameter< double >::type wFact(wFactSEXP); Rcpp::traits::input_parameter< double >::type wCorr(wCorrSEXP); + Rcpp::traits::input_parameter< double >::type wDist(wDistSEXP); Rcpp::traits::input_parameter< arma::mat >::type etaMat(etaMatSEXP); Rcpp::traits::input_parameter< double >::type temperature(temperatureSEXP); Rcpp::traits::input_parameter< double >::type tdecrease(tdecreaseSEXP); Rcpp::traits::input_parameter< int >::type length_cycle(length_cycleSEXP); - rcpp_result_gen = Rcpp::wrap(CppLHS(xA, cost, strata, include, idx, factors, i_fact, nsample, cost_mode, iter, wCont, wFact, wCorr, etaMat, temperature, tdecrease, length_cycle)); + rcpp_result_gen = Rcpp::wrap(CppLHS(xA, cost, strata, latlon, include, latlon_inc, idx, factors, continuous, i_fact, nsample, min_dist, cost_mode, iter, wCont, wFact, wCorr, wDist, etaMat, temperature, tdecrease, length_cycle)); return rcpp_result_gen; END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_clhs_CppLHS", (DL_FUNC) &_clhs_CppLHS, 17}, + {"_clhs_CppLHS", (DL_FUNC) &_clhs_CppLHS, 22}, {NULL, NULL, 0} };