From 2e29a32e171c9e968c5afb1f68d6e4cd369d6744 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Wed, 8 Jun 2022 11:33:23 +0200 Subject: [PATCH 1/9] Try Makevars hack to trick CRAN static code analysis. --- src/Makevars | 7 ------- src/Makevars.in | 9 +++++++++ src/Makevars.win | 14 +++++++++----- 3 files changed, 18 insertions(+), 12 deletions(-) delete mode 100644 src/Makevars create mode 100644 src/Makevars.in diff --git a/src/Makevars b/src/Makevars deleted file mode 100644 index 69ecbf36..00000000 --- a/src/Makevars +++ /dev/null @@ -1,7 +0,0 @@ -PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS) -PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DSTRICT_R_HEADERS -PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) -# CXX_STD = CXX11 -# To build with OpenMP on MAC: -# PKG_CPPFLAGS=-Xclang -fopenmp -# PKG_LIBS=-lomp diff --git a/src/Makevars.in b/src/Makevars.in new file mode 100644 index 00000000..47acaa3a --- /dev/null +++ b/src/Makevars.in @@ -0,0 +1,9 @@ +PKG_CFLAGS = @PKG_CFLAGS@ @openmp_cflags@ +PKG_CXXFLAGS = @PKG_CFLAGS@ @openmp_cxxflags@ +PKG_LIBS = @PKG_LIBS@ @openmp_cxxflags@ + +all: $(SHLIB) + @echo PKG_CFLAGS = $(PKG_CFLAGS) + @echo PKG_CXXFLAGS = $(PKG_CXXFLAGS) -DSTRICT_R_HEADERS + @echo PKG_LIBS = $(PKG_LIBS) + mv $(SHLIB) collapse$(SHLIB_EXT) diff --git a/src/Makevars.win b/src/Makevars.win index e33d650a..fe51b989 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,5 +1,9 @@ -PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS) -O3 -PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DSTRICT_R_HEADERS -PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) -# CXX_STD = CXX11 -# PKG_CFLAGS += -O3 +PKG_CFLAGS = @PKG_CFLAGS@ @openmp_cflags@ +PKG_CXXFLAGS = @PKG_CFLAGS@ @openmp_cxxflags@ +PKG_LIBS = @PKG_LIBS@ @openmp_cxxflags@ + +all: $(SHLIB) + @echo PKG_CFLAGS = $(PKG_CFLAGS) -O3 + @echo PKG_CXXFLAGS = $(PKG_CXXFLAGS) -DSTRICT_R_HEADERS + @echo PKG_LIBS = $(PKG_LIBS) + mv $(SHLIB) collapse$(SHLIB_EXT) From 44842ef86ff3a595a10a5573a182c9fe32b02670 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Wed, 8 Jun 2022 11:37:06 +0200 Subject: [PATCH 2/9] Try Makevars hack to trick CRAN static code analysis - 2. --- src/Makevars.in | 2 +- src/Makevars.win | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Makevars.in b/src/Makevars.in index 47acaa3a..71e11ca8 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -6,4 +6,4 @@ all: $(SHLIB) @echo PKG_CFLAGS = $(PKG_CFLAGS) @echo PKG_CXXFLAGS = $(PKG_CXXFLAGS) -DSTRICT_R_HEADERS @echo PKG_LIBS = $(PKG_LIBS) - mv $(SHLIB) collapse$(SHLIB_EXT) + if [ "$(SHLIB)" != "collapse$(SHLIB_EXT)" ]; then mv $(SHLIB) collapse$(SHLIB_EXT); fi diff --git a/src/Makevars.win b/src/Makevars.win index fe51b989..2d5a162b 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -6,4 +6,4 @@ all: $(SHLIB) @echo PKG_CFLAGS = $(PKG_CFLAGS) -O3 @echo PKG_CXXFLAGS = $(PKG_CXXFLAGS) -DSTRICT_R_HEADERS @echo PKG_LIBS = $(PKG_LIBS) - mv $(SHLIB) collapse$(SHLIB_EXT) + if [ "$(SHLIB)" != "collapse$(SHLIB_EXT)" ]; then mv $(SHLIB) collapse$(SHLIB_EXT); fi From fa178187adf941ee72d241aa4dcaaf2eaabc3853 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Wed, 8 Jun 2022 11:40:52 +0200 Subject: [PATCH 3/9] Try Makevars hack to trick CRAN static code analysis - 3. --- src/Makevars.in | 3 ++- src/Makevars.win | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Makevars.in b/src/Makevars.in index 71e11ca8..0d6c4375 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -3,7 +3,8 @@ PKG_CXXFLAGS = @PKG_CFLAGS@ @openmp_cxxflags@ PKG_LIBS = @PKG_LIBS@ @openmp_cxxflags@ all: $(SHLIB) - @echo PKG_CFLAGS = $(PKG_CFLAGS) + @echo PKG_CFLAGS = $(PKG_CFLAGS) -O3 @echo PKG_CXXFLAGS = $(PKG_CXXFLAGS) -DSTRICT_R_HEADERS @echo PKG_LIBS = $(PKG_LIBS) if [ "$(SHLIB)" != "collapse$(SHLIB_EXT)" ]; then mv $(SHLIB) collapse$(SHLIB_EXT); fi + if [ "$(OS)" != "Windows_NT" ] && [ `uname -s` = 'Darwin' ]; then install_name_tool -id data_table$(SHLIB_EXT) data_table$(SHLIB_EXT); fi diff --git a/src/Makevars.win b/src/Makevars.win index 2d5a162b..0d6c4375 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -7,3 +7,4 @@ all: $(SHLIB) @echo PKG_CXXFLAGS = $(PKG_CXXFLAGS) -DSTRICT_R_HEADERS @echo PKG_LIBS = $(PKG_LIBS) if [ "$(SHLIB)" != "collapse$(SHLIB_EXT)" ]; then mv $(SHLIB) collapse$(SHLIB_EXT); fi + if [ "$(OS)" != "Windows_NT" ] && [ `uname -s` = 'Darwin' ]; then install_name_tool -id data_table$(SHLIB_EXT) data_table$(SHLIB_EXT); fi From 265e2fce37f2750a8b0c3bd43e93d22b9a00d305 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Wed, 8 Jun 2022 12:31:59 +0200 Subject: [PATCH 4/9] Try Makevars hack to trick CRAN static code analysis - 4. --- src/Makevars.in | 2 +- src/Makevars.win | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/Makevars.in b/src/Makevars.in index 0d6c4375..41240037 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -7,4 +7,4 @@ all: $(SHLIB) @echo PKG_CXXFLAGS = $(PKG_CXXFLAGS) -DSTRICT_R_HEADERS @echo PKG_LIBS = $(PKG_LIBS) if [ "$(SHLIB)" != "collapse$(SHLIB_EXT)" ]; then mv $(SHLIB) collapse$(SHLIB_EXT); fi - if [ "$(OS)" != "Windows_NT" ] && [ `uname -s` = 'Darwin' ]; then install_name_tool -id data_table$(SHLIB_EXT) data_table$(SHLIB_EXT); fi + if [ "$(OS)" != "Windows_NT" ] && [ `uname -s` = 'Darwin' ]; then install_name_tool -id collapse$(SHLIB_EXT) collapse$(SHLIB_EXT); fi diff --git a/src/Makevars.win b/src/Makevars.win index 0d6c4375..81f402e8 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,10 +1,5 @@ -PKG_CFLAGS = @PKG_CFLAGS@ @openmp_cflags@ -PKG_CXXFLAGS = @PKG_CFLAGS@ @openmp_cxxflags@ -PKG_LIBS = @PKG_LIBS@ @openmp_cxxflags@ - -all: $(SHLIB) - @echo PKG_CFLAGS = $(PKG_CFLAGS) -O3 - @echo PKG_CXXFLAGS = $(PKG_CXXFLAGS) -DSTRICT_R_HEADERS - @echo PKG_LIBS = $(PKG_LIBS) - if [ "$(SHLIB)" != "collapse$(SHLIB_EXT)" ]; then mv $(SHLIB) collapse$(SHLIB_EXT); fi - if [ "$(OS)" != "Windows_NT" ] && [ `uname -s` = 'Darwin' ]; then install_name_tool -id data_table$(SHLIB_EXT) data_table$(SHLIB_EXT); fi +## -- compiling for OpenMP +PKG_CFLAGS = -fopenmp -O3 +PKG_CXXFLAGS = -fopenmp -DSTRICT_R_HEADERS +## -- linking for OpenMP +PKG_LIBS = -fopenmp -lgomp From db76f35a44084515ec5fd4371bb5a38d286db09c Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Wed, 8 Jun 2022 13:24:42 +0200 Subject: [PATCH 5/9] Try Makevars hack to trick CRAN static code analysis - 5. --- src/Makevars | 6 ++++++ src/Makevars.in | 10 ---------- src/Makevars.win | 1 + 3 files changed, 7 insertions(+), 10 deletions(-) create mode 100644 src/Makevars delete mode 100644 src/Makevars.in diff --git a/src/Makevars b/src/Makevars new file mode 100644 index 00000000..026e03d4 --- /dev/null +++ b/src/Makevars @@ -0,0 +1,6 @@ +## -- compiling for OpenMP +PKG_CFLAGS = $($(subst OPENMP,OPENMP_CFLAGS,SHLIB_OPENMP)) +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DSTRICT_R_HEADERS +# CXX_STD = CXX11 +## -- linking for OpenMP +PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) diff --git a/src/Makevars.in b/src/Makevars.in deleted file mode 100644 index 41240037..00000000 --- a/src/Makevars.in +++ /dev/null @@ -1,10 +0,0 @@ -PKG_CFLAGS = @PKG_CFLAGS@ @openmp_cflags@ -PKG_CXXFLAGS = @PKG_CFLAGS@ @openmp_cxxflags@ -PKG_LIBS = @PKG_LIBS@ @openmp_cxxflags@ - -all: $(SHLIB) - @echo PKG_CFLAGS = $(PKG_CFLAGS) -O3 - @echo PKG_CXXFLAGS = $(PKG_CXXFLAGS) -DSTRICT_R_HEADERS - @echo PKG_LIBS = $(PKG_LIBS) - if [ "$(SHLIB)" != "collapse$(SHLIB_EXT)" ]; then mv $(SHLIB) collapse$(SHLIB_EXT); fi - if [ "$(OS)" != "Windows_NT" ] && [ `uname -s` = 'Darwin' ]; then install_name_tool -id collapse$(SHLIB_EXT) collapse$(SHLIB_EXT); fi diff --git a/src/Makevars.win b/src/Makevars.win index 81f402e8..d1e6fe91 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,5 +1,6 @@ ## -- compiling for OpenMP PKG_CFLAGS = -fopenmp -O3 PKG_CXXFLAGS = -fopenmp -DSTRICT_R_HEADERS +# CXX_STD = CXX11 ## -- linking for OpenMP PKG_LIBS = -fopenmp -lgomp From 393fd0cdd8fb8c036f123c1ef03a7b9e28347b3d Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Wed, 8 Jun 2022 13:46:10 +0200 Subject: [PATCH 6/9] Globally enforcing C++ 11. --- src/Makevars | 3 ++- src/Makevars.win | 3 ++- src/fbstats.cpp | 2 -- src/fdiff_fgrowth.cpp | 1 - src/flag.cpp | 1 - src/fnth_fmedian.cpp | 1 - src/mrtl_mctl.cpp | 1 - src/psmat.cpp | 1 - src/pwnobs.cpp | 1 - src/qF_qG.cpp | 1 - src/seqid_groupid.cpp | 1 - src/varying.cpp | 1 - 12 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/Makevars b/src/Makevars index 026e03d4..0e6c1f8f 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,6 +1,7 @@ ## -- compiling for OpenMP PKG_CFLAGS = $($(subst OPENMP,OPENMP_CFLAGS,SHLIB_OPENMP)) PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DSTRICT_R_HEADERS -# CXX_STD = CXX11 +## -- using C++ 11 +CXX_STD = CXX11 ## -- linking for OpenMP PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) diff --git a/src/Makevars.win b/src/Makevars.win index d1e6fe91..7d940fb3 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,6 +1,7 @@ ## -- compiling for OpenMP PKG_CFLAGS = -fopenmp -O3 PKG_CXXFLAGS = -fopenmp -DSTRICT_R_HEADERS -# CXX_STD = CXX11 +## -- using C++ 11 +CXX_STD = CXX11 ## -- linking for OpenMP PKG_LIBS = -fopenmp -lgomp diff --git a/src/fbstats.cpp b/src/fbstats.cpp index fcf52f9c..1fa5c780 100644 --- a/src/fbstats.cpp +++ b/src/fbstats.cpp @@ -1,4 +1,3 @@ -// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; @@ -977,7 +976,6 @@ SEXP fbstatslCpp(const List& x, bool ext = false, int ng = 0, const IntegerVecto // -// // [[Rcpp::plugins(cpp11)]] // #include // #include // using namespace Rcpp; diff --git a/src/fdiff_fgrowth.cpp b/src/fdiff_fgrowth.cpp index ef5266c8..88bab290 100644 --- a/src/fdiff_fgrowth.cpp +++ b/src/fdiff_fgrowth.cpp @@ -1,4 +1,3 @@ -// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; diff --git a/src/flag.cpp b/src/flag.cpp index ea75228a..30e029a5 100644 --- a/src/flag.cpp +++ b/src/flag.cpp @@ -1,4 +1,3 @@ -// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; diff --git a/src/fnth_fmedian.cpp b/src/fnth_fmedian.cpp index 05e6e895..088dea19 100644 --- a/src/fnth_fmedian.cpp +++ b/src/fnth_fmedian.cpp @@ -1,4 +1,3 @@ -// [[Rcpp::plugins(cpp11)]] #include // #define STRICT_R_HEADERS // Now defined globally in Makevars #include diff --git a/src/mrtl_mctl.cpp b/src/mrtl_mctl.cpp index 8a84c4c0..e4cf7371 100644 --- a/src/mrtl_mctl.cpp +++ b/src/mrtl_mctl.cpp @@ -1,4 +1,3 @@ -// // [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; diff --git a/src/psmat.cpp b/src/psmat.cpp index 9f818dc5..724d943e 100644 --- a/src/psmat.cpp +++ b/src/psmat.cpp @@ -1,4 +1,3 @@ -// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; diff --git a/src/pwnobs.cpp b/src/pwnobs.cpp index 3511cfaa..0e366d39 100644 --- a/src/pwnobs.cpp +++ b/src/pwnobs.cpp @@ -1,4 +1,3 @@ -// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; diff --git a/src/qF_qG.cpp b/src/qF_qG.cpp index b6b2aff9..e25afa6f 100644 --- a/src/qF_qG.cpp +++ b/src/qF_qG.cpp @@ -1,4 +1,3 @@ -// // [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; diff --git a/src/seqid_groupid.cpp b/src/seqid_groupid.cpp index 59d3d3ee..f27dd5d3 100644 --- a/src/seqid_groupid.cpp +++ b/src/seqid_groupid.cpp @@ -1,4 +1,3 @@ -// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; diff --git a/src/varying.cpp b/src/varying.cpp index 4a2df562..ddecd0fe 100644 --- a/src/varying.cpp +++ b/src/varying.cpp @@ -1,4 +1,3 @@ -// [[Rcpp::plugins(cpp11)]] #include using namespace Rcpp; From 8f6795e0f94bcd3f39a77c34a24583063d475dba Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Wed, 8 Jun 2022 14:04:09 +0200 Subject: [PATCH 7/9] Avoiding -Wmaybe-uninitialized warnings. --- src/kit_dup.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/kit_dup.c b/src/kit_dup.c index e2fe702b..f6caf400 100644 --- a/src/kit_dup.c +++ b/src/kit_dup.c @@ -11,7 +11,7 @@ // **************************************** SEXP dupVecIndex(SEXP x) { const int n = length(x); - int K, tx = TYPEOF(x), x_min = INT_MAX, x_max = INT_MIN, anyNA = 0; + int K = 0, tx = TYPEOF(x), x_min = INT_MAX, x_max = INT_MIN, anyNA = 0; size_t M; // if(n >= INT_MAX) error("Length of 'x' is too large. (Long vector not supported yet)"); // 1073741824 if (tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) { @@ -198,7 +198,7 @@ SEXP dupVecIndex(SEXP x) { SEXP dupVecIndexKeepNA(SEXP x) { const int n = length(x); - int K, tx = TYPEOF(x); + int K = 0, tx = TYPEOF(x); size_t M; // if(n >= INT_MAX) error("Length of 'x' is too large. (Long vector not supported yet)"); // 1073741824 if (tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) { @@ -620,7 +620,7 @@ SEXP groupAtVec(SEXP X, SEXP starts, SEXP naincl) { SEXP funiqueC(SEXP x) { const int n = length(x); if(n <= 1) return x; - int K, tx = TYPEOF(x); + int K = 0, tx = TYPEOF(x); size_t M; // if(n >= INT_MAX) error("Length of 'x' is too large. (Long vector not supported yet)"); // 1073741824 if (tx == STRSXP || tx == REALSXP || tx == CPLXSXP) { @@ -648,7 +648,7 @@ SEXP funiqueC(SEXP x) { int *restrict st = (int*)R_alloc((tx == LGLSXP || tx == 1000) ? (int)M : n, sizeof(int)); int g = 0; size_t id = 0; - SEXP res; + SEXP res = R_NilValue; switch (tx) { case LGLSXP: case 1000: // This is for factors or logical vectors where the size of the table is known @@ -673,7 +673,7 @@ SEXP funiqueC(SEXP x) { } Free(h); if(g == n) return x; - res = PROTECT(allocVector(tx == LGLSXP ? LGLSXP : INTSXP, g)); + PROTECT(res = allocVector(tx == LGLSXP ? LGLSXP : INTSXP, g)); int *restrict pres = INTEGER(res); for(int i = 0; i != g; ++i) pres[i] = px[st[i]]; } break; @@ -706,7 +706,7 @@ SEXP funiqueC(SEXP x) { } Free(h); if(g == n) return x; - res = PROTECT(allocVector(INTSXP, g)); + PROTECT(res = allocVector(INTSXP, g)); int *restrict pres = INTEGER(res); for(int i = 0; i != g; ++i) pres[i] = px[st[i]]; } break; @@ -726,7 +726,7 @@ SEXP funiqueC(SEXP x) { } Free(h); if(g == n) return x; - res = PROTECT(allocVector(REALSXP, g)); + PROTECT(res = allocVector(REALSXP, g)); double *restrict pres = REAL(res); for(int i = 0; i != g; ++i) pres[i] = px[st[i]]; } break; @@ -757,7 +757,7 @@ SEXP funiqueC(SEXP x) { } Free(h); if(g == n) return x; - res = PROTECT(allocVector(CPLXSXP, g)); + PROTECT(res = allocVector(CPLXSXP, g)); Rcomplex *restrict pres = COMPLEX(res); for(int i = 0; i != g; ++i) pres[i] = px[st[i]]; } break; @@ -775,7 +775,7 @@ SEXP funiqueC(SEXP x) { } Free(h); if(g == n) return x; - res = PROTECT(allocVector(STRSXP, g)); + PROTECT(res = allocVector(STRSXP, g)); SEXP *restrict pres = STRING_PTR(res); for(int i = 0; i != g; ++i) pres[i] = px[st[i]]; } break; From b6d6dc35491b6c6db8d2c2006e39e6e25832983f Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Wed, 8 Jun 2022 14:27:18 +0200 Subject: [PATCH 8/9] Small documentation fixes. --- man/collapse-options.Rd | 2 +- man/fast-data-manipulation.Rd | 4 ++-- man/fast-grouping-ordering.Rd | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/man/collapse-options.Rd b/man/collapse-options.Rd index b46063b2..b49c8e99 100644 --- a/man/collapse-options.Rd +++ b/man/collapse-options.Rd @@ -19,7 +19,7 @@ Note that none of these options will impact internal \emph{collapse} code, but they may change the way your programs run. \code{"manip"} is probably the safest option to start with. Specifying \code{"fast-fun"}, \code{"fast-stat-fun"}, \code{"fast-trfm-fun"} or \code{"all"} are ambitious as they replace basic R functions like \code{sum} and \code{max}, introducing \emph{collapse}'s \code{na.rm = TRUE} default and different behavior for matrices and data frames. These options also change some internal macros so that base R functions like \code{sum} or \code{max} called inside \code{fsummarise}, \code{fmutate} or \code{collap} will also receive vectorized execution. In other words, if you put \code{options(collapse_mask = "all")} before loading the package, and you have a collapse-compatible line of \emph{dplyr} code like \code{wlddev |> group_by(region, income) |> summarise(across(PCGDP:POP, sum))}, this will now receive fully optimized execution. Note however that because of \code{collapse}'s \code{na.rm = TRUE} default, the result will be different unless you add \code{na.rm = FALSE}. -In General, this option is for your convenience, if you want to write visually more appealing code or you want to translate existing \emph{dplyr} codes to \emph{collapse}. Use with care! \bold{Note} that the option takes effect upon loading the package (code is in the \code{.onLoad} file), not upon attaching it, so it needs to be set before any function from the package is accessed in any way by any code you run. A safe way to enable it is by using a \code{\link{.Rprofile}} file in your user or project directory (see also \href{https://www.statmethods.net/interface/customizing.html}{here} or \href{https://support.rstudio.com/hc/en-us/articles/360047157094-Managing-R-with-Rprofile-Renviron-Rprofile-site-Renviron-site-rsession-conf-and-repos-conf}{here}, the user-level file is located at \code{file.path(Sys.getenv("HOME"), ".Rprofile")} and can be edited using \code{file.edit(Sys.getenv("HOME"), ".Rprofile")}), or by using a \href{https://fastverse.github.io/fastverse/articles/fastverse_intro.html#custom-fastverse-configurations-for-projects}{\code{.fastverse}} configuration file in the project directory. +In General, this option is for your convenience, if you want to write visually more appealing code or you want to translate existing \emph{dplyr} codes to \emph{collapse}. Use with care! \bold{Note} that the option takes effect upon loading the package (code is in the \code{.onLoad} function), not upon attaching it, so it needs to be set before any function from the package is accessed in any way by any code you run. A safe way to enable it is by using a \code{\link{.Rprofile}} file in your user or project directory (see also \href{https://www.statmethods.net/interface/customizing.html}{here} or \href{https://support.rstudio.com/hc/en-us/articles/360047157094-Managing-R-with-Rprofile-Renviron-Rprofile-site-Renviron-site-rsession-conf-and-repos-conf}{here}, the user-level file is located at \code{file.path(Sys.getenv("HOME"), ".Rprofile")} and can be edited using \code{file.edit(Sys.getenv("HOME"), ".Rprofile")}), or by using a \href{https://fastverse.github.io/fastverse/articles/fastverse_intro.html#custom-fastverse-configurations-for-projects}{\code{.fastverse}} configuration file in the project directory. diff --git a/man/fast-data-manipulation.Rd b/man/fast-data-manipulation.Rd index 40631187..a704a790 100644 --- a/man/fast-data-manipulation.Rd +++ b/man/fast-data-manipulation.Rd @@ -34,7 +34,7 @@ \code{\link[=fselect]{fselect(<-)}} \tab\tab No methods, for data frames \tab\tab Fast select or replace columns (non-standard evaluation) \cr \code{\link[=get_vars]{get_vars(<-)}}, \code{\link[=num_vars]{num_vars(<-)}}, \code{\link[=cat_vars]{cat_vars(<-)}}, \code{\link[=char_vars]{char_vars(<-)}}, \code{\link[=fact_vars]{fact_vars(<-)}}, \code{\link[=logi_vars]{logi_vars(<-)}}, \code{\link[=date_vars]{date_vars(<-)}} \tab\tab No methods, for data frames \tab\tab Fast select or replace columns \cr \code{\link[=add_vars]{add_vars(<-)}} \tab\tab No methods, for data frames \tab\tab Fast add columns \cr - \code{\link{fsubset}} \tab\tab \code{default, matrix, data.frame} \tab\tab Fast subset data (non-standard evaluation) \cr + \code{\link{fsubset}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame} \tab\tab Fast subset data (non-standard evaluation) \cr \code{\link{ss}} \tab\tab No methods, for data frames \tab\tab Fast subset data frames \cr \code{\link{fsummarise}} \tab\tab No methods, for data frames \tab\tab Fast data aggregation \cr @@ -42,7 +42,7 @@ \code{\link{fmutate}}, \code{\link[=ftransform]{(f/set)ftransform(<-)}} \tab\tab No methods, for data frames \tab\tab Compute, modify or delete columns (non-standard evaluation) \cr %\code{\link{settransform}} \tab\tab No methods, for data frames \tab\tab Compute, modify or delete columns by reference (non-standard evaluation) \cr \code{\link[=fcompute]{fcompute(v)}} \tab\tab No methods, for data frames \tab\tab Compute or modify columns, returned in a new data frame (non-standard evaluation) \cr - \code{\link[=roworder]{roworder(v)}} \tab\tab No methods, for data frames \tab\tab Reorder rows and return data frame (standard and non-standard evaluation) \cr + \code{\link[=roworder]{roworder(v)}} \tab\tab No methods, for data frames incl. pdata.frame \tab\tab Reorder rows and return data frame (standard and non-standard evaluation) \cr \code{\link[=colorder]{colorder(v)}} \tab\tab No methods, for data frames \tab\tab Reorder columns and return data frame (standard and non-standard evaluation) \cr \code{\link[=frename]{(f/set)rename}}, \code{\link[=frename]{(set)relabel}} \tab\tab No methods, for all objects with 'names' attribute \tab\tab Rename and return object / relabel columns in a data frame. \cr } diff --git a/man/fast-grouping-ordering.Rd b/man/fast-grouping-ordering.Rd index 1a397cf7..25ceb2b6 100644 --- a/man/fast-grouping-ordering.Rd +++ b/man/fast-grouping-ordering.Rd @@ -36,11 +36,11 @@ \section{Table of Functions}{ \tabular{lllll}{\emph{ Function / S3 Generic } \tab\tab \emph{ Methods } \tab\tab \emph{ Description } \cr \code{\link[=radixorder]{radixorder(v)}} \tab\tab No methods, for data frames and vectors \tab\tab Radix-based ordering + grouping information \cr - \code{\link[=roworder]{roworder(v)}} \tab\tab No methods, for data frames \tab\tab Row sorting/reordering \cr + \code{\link[=roworder]{roworder(v)}} \tab\tab No methods, for data frames incl. pdata.frame \tab\tab Row sorting/reordering \cr \code{\link{group}} \tab\tab No methods, for data frames and vectors \tab\tab Hash-based grouping + grouping information \cr \code{\link{GRP}} \tab\tab \code{default, GRP, factor, qG, grouped_df, pseries, pdata.frame} \tab\tab Fast grouping and a flexible grouping object \cr \code{\link{fgroup_by}} \tab\tab No methods, for data frames \tab\tab Fast grouped data frame \cr - \code{\link{funique}}, \code{\link{fnunique}} \tab\tab \code{default, data.frame, sf, pseries, pdata.frame} \tab\tab Fast (number of) unique values/rows \cr + \code{\link{funique}}, \code{\link{fnunique}} \tab\tab \code{default, data.frame, sf, pseries, pdata.frame, list} \tab\tab Fast (number of) unique values/rows \cr \code{\link{qF}} \tab\tab No methods, for vectors \tab\tab Quick factor generation \cr \code{\link{qG}} \tab\tab No methods, for vectors \tab\tab Quick grouping of vectors and a 'factor-light' class \cr \code{\link{fdroplevels}} \tab\tab \code{factor, data.frame, list} \tab\tab Fast removal of unused factor levels \cr From 10f1322d525459c054f8b830a657e5c969019e63 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Wed, 8 Jun 2022 14:28:02 +0200 Subject: [PATCH 9/9] This is v1.8.4, sent to CRAN. --- DESCRIPTION | 2 +- NEWS.md | 4 + docs/404.html | 2 +- docs/LICENSE-text.html | 2 +- docs/articles/collapse_and_data.table.html | 2 +- docs/articles/collapse_and_dplyr.html | 2 +- docs/articles/collapse_and_plm.html | 2 +- docs/articles/collapse_and_sf.html | 2 +- docs/articles/collapse_intro.html | 2 +- docs/articles/index.html | 2 +- docs/authors.html | 2 +- docs/index.html | 2 +- docs/news/collapse1.7.digest.html | 2 +- docs/news/index.html | 1742 ++++++++++++++--- docs/reference/BY.html | 2 +- docs/reference/GGDC10S.html | 2 +- docs/reference/GRP.html | 2 +- docs/reference/TRA.html | 2 +- docs/reference/across.html | 2 +- docs/reference/arithmetic.html | 2 +- docs/reference/collap.html | 2 +- docs/reference/collapse-options.html | 11 +- docs/reference/collapse-package.html | 2 +- docs/reference/collapse-renamed.html | 2 +- docs/reference/colorder.html | 2 +- docs/reference/dapply.html | 2 +- docs/reference/data-transformations.html | 2 +- docs/reference/descr.html | 2 +- docs/reference/efficient-programming.html | 2 +- docs/reference/extract_list.html | 2 +- docs/reference/fFtest.html | 2 +- docs/reference/fast-data-manipulation.html | 15 +- docs/reference/fast-grouping-ordering.html | 11 +- .../reference/fast-statistical-functions.html | 2 +- docs/reference/fbetween_fwithin.html | 2 +- docs/reference/fcumsum.html | 2 +- docs/reference/fdiff.html | 2 +- docs/reference/fdroplevels.html | 2 +- docs/reference/ffirst_flast.html | 2 +- docs/reference/fgrowth.html | 2 +- docs/reference/fhdbetween_fhdwithin.html | 2 +- docs/reference/flag.html | 2 +- docs/reference/flm.html | 2 +- docs/reference/fmean.html | 2 +- docs/reference/fmedian.html | 2 +- docs/reference/fmin_fmax.html | 2 +- docs/reference/fmode.html | 2 +- docs/reference/fndistinct.html | 2 +- docs/reference/fnobs.html | 2 +- docs/reference/fnth.html | 2 +- docs/reference/fprod.html | 2 +- docs/reference/frename.html | 2 +- docs/reference/fscale.html | 2 +- docs/reference/fsubset.html | 2 +- docs/reference/fsum.html | 2 +- docs/reference/fsummarise.html | 2 +- docs/reference/ftransform.html | 2 +- docs/reference/funique.html | 2 +- docs/reference/fvar_fsd.html | 2 +- docs/reference/group.html | 2 +- docs/reference/groupid.html | 2 +- docs/reference/index.html | 2 +- docs/reference/indexing.html | 2 +- docs/reference/is.unlistable.html | 2 +- docs/reference/is_unlistable.html | 2 +- docs/reference/ldepth.html | 2 +- docs/reference/list-processing.html | 2 +- docs/reference/pad.html | 2 +- docs/reference/psacf.html | 2 +- docs/reference/psmat.html | 2 +- docs/reference/pwcor_pwcov_pwnobs.html | 2 +- docs/reference/qF.html | 2 +- docs/reference/qsu.html | 2 +- docs/reference/qtab.html | 2 +- docs/reference/quick-conversion.html | 2 +- docs/reference/radixorder.html | 2 +- docs/reference/rapply2d.html | 2 +- docs/reference/recode-replace.html | 2 +- docs/reference/roworder.html | 2 +- docs/reference/rsplit.html | 2 +- docs/reference/select_replace_vars.html | 2 +- docs/reference/seqid.html | 2 +- docs/reference/small-helpers.html | 2 +- docs/reference/summary-statistics.html | 2 +- docs/reference/t_list.html | 2 +- docs/reference/time-series-panel-series.html | 2 +- docs/reference/timeid.html | 2 +- docs/reference/unlist2d.html | 2 +- docs/reference/varying.html | 2 +- docs/reference/wlddev.html | 2 +- 90 files changed, 1589 insertions(+), 364 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index bc4fd437..f7f32f8d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: collapse Title: Advanced and Fast Data Transformation -Version: 1.8.3 +Version: 1.8.4 Authors@R: c( person("Sebastian", "Krantz", role = c("aut", "cre"), email = "sebastian.krantz@graduateinstitute.ch"), diff --git a/NEWS.md b/NEWS.md index 785f8371..30856a5c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# collapse 1.8.4 + +* Makevars text substitution hack to have CRAN accept a package that combines C, C++ and OpenMP. Thanks also to @MichaelChirico for pointing me in the right direction. + # collapse 1.8.3 * Significant speed improvement in `qF/qG` (factor-generation) for character vectors with more than 100,000 obs and many levels if `sort = TRUE` (the default). For details see the `method` argument of `?qF`. diff --git a/docs/404.html b/docs/404.html index 0b74fc34..52f9b30f 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index bee7ad95..3082fb0b 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,7 +17,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/articles/collapse_and_data.table.html b/docs/articles/collapse_and_data.table.html index d57e69c9..d23873df 100644 --- a/docs/articles/collapse_and_data.table.html +++ b/docs/articles/collapse_and_data.table.html @@ -31,7 +31,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/articles/collapse_and_dplyr.html b/docs/articles/collapse_and_dplyr.html index f5b0cf12..5a199771 100644 --- a/docs/articles/collapse_and_dplyr.html +++ b/docs/articles/collapse_and_dplyr.html @@ -31,7 +31,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/articles/collapse_and_plm.html b/docs/articles/collapse_and_plm.html index 265940cf..6e91555c 100644 --- a/docs/articles/collapse_and_plm.html +++ b/docs/articles/collapse_and_plm.html @@ -31,7 +31,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/articles/collapse_and_sf.html b/docs/articles/collapse_and_sf.html index 4ce5f256..a38999b5 100644 --- a/docs/articles/collapse_and_sf.html +++ b/docs/articles/collapse_and_sf.html @@ -31,7 +31,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/articles/collapse_intro.html b/docs/articles/collapse_intro.html index fcc9c149..09d2c026 100644 --- a/docs/articles/collapse_intro.html +++ b/docs/articles/collapse_intro.html @@ -31,7 +31,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/articles/index.html b/docs/articles/index.html index 36144571..ae691354 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -71,7 +71,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/authors.html b/docs/authors.html index ab0c1fd6..de174c2a 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/index.html b/docs/index.html index ee2fc4de..2317bfd5 100644 --- a/docs/index.html +++ b/docs/index.html @@ -60,7 +60,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/news/collapse1.7.digest.html b/docs/news/collapse1.7.digest.html index 1f068a88..1189d0a2 100644 --- a/docs/news/collapse1.7.digest.html +++ b/docs/news/collapse1.7.digest.html @@ -18,7 +18,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/news/index.html b/docs/news/index.html index 89e4f5f6..82c67db7 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 @@ -61,128 +61,488 @@

Changelog

Source: NEWS.md +
+ +
  • Makevars text substitution hack to have CRAN accept a package that +combines C, C++ and OpenMP. Thanks also to @MichaelChirico for pointing +me in the right direction.
  • +
-
  • Significant speed improvement in qF/qG (factor-generation) for character vectors with more than 100,000 obs and many levels if sort = TRUE (the default). For details see the method argument of ?qF.

  • -
  • Optimizations in fmode and fndistinct for singleton groups.

  • +
    • Significant speed improvement in qF/qG +(factor-generation) for character vectors with more than 100,000 obs and +many levels if sort = TRUE (the default). For details see +the method argument of ?qF.

    • +
    • Optimizations in fmode and fndistinct +for singleton groups.

-
  • Fixed some rchk issues found by Thomas Kalibera from CRAN.

  • +
    • Fixed some rchk issues found by Thomas Kalibera from +CRAN.

    • faster funique.default method.

    • -
    • group now also internally optimizes on ‘qG’ objects.

    • +
    • group now also internally optimizes on ‘qG’ +objects.

-
  • Added function fnunique (yet another alternative to data.table::uniqueN, kit::uniqLen or dplyr::n_distinct, and principally a simple wrapper for attr(group(x), "N.groups")). At present fnunique generally outperforms the others on data frames.

  • -
  • finteraction has an additional argument factor = TRUE. Setting factor = FALSE returns a ‘qG’ object, which is more efficient if just an integer id but no factor object itself is required.

  • -
  • Operators (see .OPERATOR_FUN) have been improved a bit such that id-variables selected in the .data.frame (by, w or t arguments) or .pdata.frame methods (variables in the index) are not computed upon even if they are numeric (since the default is cols = is.numeric). In general, if cols is a function used to select columns of a certain data type, id variables are excluded from computation even if they are of that data type. It is still possible to compute on id variables by explicitly selecting them using names or indices passed to cols, or including them in the lhs of a formula passed to by.

  • +
    • Added function fnunique (yet another alternative to +data.table::uniqueN, kit::uniqLen or +dplyr::n_distinct, and principally a simple wrapper for +attr(group(x), "N.groups")). At present +fnunique generally outperforms the others on data +frames.

    • +
    • finteraction has an additional argument +factor = TRUE. Setting factor = FALSE returns +a ‘qG’ object, which is more efficient if just an integer id but no +factor object itself is required.

    • +
    • Operators (see .OPERATOR_FUN) have been improved a +bit such that id-variables selected in the .data.frame +(by, w or t arguments) or +.pdata.frame methods (variables in the index) are not +computed upon even if they are numeric (since the default is +cols = is.numeric). In general, if cols is a +function used to select columns of a certain data type, id variables are +excluded from computation even if they are of that data type. It is +still possible to compute on id variables by explicitly selecting them +using names or indices passed to cols, or including them in +the lhs of a formula passed to by.

    • -

      Further efforts to facilitate adding the group-count in fsummarise and fmutate:

      -
      • if options(collapse_mask = "all") before loading the package, an additional function n() is exported that works just like dplyr:::n(). (Note that internal optimization flags for n are always on, so if you really want the function to be called n() without setting options(collapse_mask = "all"), you could also do n <- GRPN or n <- collapse:::n)
      • -
      • otherwise the same can now always be done using GRPN(). The previous uses of GRPN are unaltered i.e. GRPN can also: -
        • fetch group sizes directly grouping object or grouped data frame i.e. data |> gby(id) |> GRPN() or data %>% gby(id) %>% ftransform(N = GRPN(.)) (note the dot).
        • -
        • compute group sizes on the fly, for example fsubset(data, GRPN(id) > 10L) or fsubset(data, GRPN(list(id1, id2)) > 10L) or GRPN(data, by = ~ id1 + id2).
        • +

          Further efforts to facilitate adding the group-count in +fsummarise and fmutate:

          +
          • if options(collapse_mask = "all") before loading the +package, an additional function n() is exported that works +just like dplyr:::n(). (Note that internal optimization +flags for n are always on, so if you really want the +function to be called n() without setting +options(collapse_mask = "all"), you could also do +n <- GRPN or n <- collapse:::n)
          • +
          • otherwise the same can now always be done using GRPN(). +The previous uses of GRPN are unaltered +i.e. GRPN can also: +
            • fetch group sizes directly grouping object or grouped data frame +i.e. data |> gby(id) |> GRPN() or +data %>% gby(id) %>% ftransform(N = GRPN(.)) (note +the dot).
            • +
            • compute group sizes on the fly, for example +fsubset(data, GRPN(id) > 10L) or +fsubset(data, GRPN(list(id1, id2)) > 10L) or +GRPN(data, by = ~ id1 + id2).
-

collapse 1.8.0, released mid of May 2022, brings enhanced support for indexed computations on time series and panel data by introducing flexible ‘indexed_frame’ and ‘indexed_series’ classes and surrounding infrastructure, sets a modest start to OpenMP multithreading as well as data transformation by reference in statistical functions, and enhances the packages descriptive statistics toolset.

+

collapse 1.8.0, released mid of May 2022, brings enhanced +support for indexed computations on time series and panel data by +introducing flexible ‘indexed_frame’ and ‘indexed_series’ classes and +surrounding infrastructure, sets a modest start to OpenMP multithreading +as well as data transformation by reference in statistical functions, +and enhances the packages descriptive statistics toolset.

Changes to functionality

-
  • Functions Recode, replace_non_finite, depreciated since collapse v1.1.0 and is.regular, depreciated since collapse v1.5.1 and clashing with a more important function in the zoo package, are now removed.

  • -
  • Fast Statistical Functions operating on numeric data (such as fmean, fmedian, fsum, fmin, fmax, …) now preserve attributes in more cases. Previously these functions did not preserve attributes for simple computations using the default method, and only preserved attributes in grouped computations if !is.object(x) (see NEWS section for collapse 1.4.0). This meant that fmin and fmax did not preserve the attributes of Date or POSIXct objects, and none of these functions preserved ‘units’ objects (used a lot by the sf package). Now, attributes are preserved if !inherits(x, "ts"), that is the new default of these functions is to generally keep attributes, except for ‘ts’ objects where doing so obviously causes an unwanted error (note that ‘xts’ and others are handled by the matrix or data.frame method where other principles apply, see NEWS for 1.4.0). An exception are the functions fnobs and fndistinct where the previous default is kept.

  • -
  • Time Series Functions flag, fdiff, fgrowth and psacf/pspacf/psccf (and the operators L/F/D/Dlog/G) now internally process time objects passed to the t argument (where is.object(t) && is.numeric(unclass(t))) via a new function called timeid which turns them into integer vectors based on the greatest common divisor (GCD) (see below). Previously such objects were converted to factor. This can change behavior of code e.g. a ‘Date’ variable representing monthly data may be regular when converted to factor, but is now irregular and regarded as daily data (with a GCD of 1) because of the different day counts of the months. Users should fix such code by either by calling qG on the time variable (for grouping / factor-conversion) or using appropriate classes e.g. zoo::yearmon. Note that plain numeric vectors where !is.object(t) are still used directly for indexation without passing them through timeid (which can still be applied manually if desired).

  • -
  • BY now has an argument reorder = TRUE, which casts elements in the original order if NROW(result) == NROW(x) (like fmutate). Previously the result was just in order of the groups, regardless of the length of the output. To obtain the former outcome users need to set reorder = FALSE.

  • -
  • options("collapse_DT_alloccol") was removed, the default is now fixed at 100. The reason is that data.table automatically expands the range of overallocated columns if required (so the option is not really necessary), and calling R options from C slows down C code and can cause problems in parallel code.

  • +
    • Functions Recode, replace_non_finite, +depreciated since collapse v1.1.0 and is.regular, +depreciated since collapse v1.5.1 and clashing with a more +important function in the zoo package, are now +removed.

    • +
    • Fast Statistical Functions operating on numeric data +(such as fmean, fmedian, fsum, +fmin, fmax, …) now preserve attributes in more +cases. Previously these functions did not preserve attributes for simple +computations using the default method, and only preserved attributes in +grouped computations if !is.object(x) (see NEWS section for +collapse 1.4.0). This meant that fmin and fmax +did not preserve the attributes of Date or POSIXct objects, and none of +these functions preserved ‘units’ objects (used a lot by the sf +package). Now, attributes are preserved if +!inherits(x, "ts"), that is the new default of these +functions is to generally keep attributes, except for ‘ts’ objects where +doing so obviously causes an unwanted error (note that ‘xts’ and others +are handled by the matrix or data.frame method where other principles +apply, see NEWS for 1.4.0). An exception are the functions +fnobs and fndistinct where the previous +default is kept.

    • +
    • Time Series Functions flag, +fdiff, fgrowth and +psacf/pspacf/psccf (and the operators +L/F/D/Dlog/G) now internally process time objects passed to +the t argument (where +is.object(t) && is.numeric(unclass(t))) via a new +function called timeid which turns them into integer +vectors based on the greatest common divisor (GCD) (see below). +Previously such objects were converted to factor. This can change +behavior of code e.g. a ‘Date’ variable representing monthly data may be +regular when converted to factor, but is now irregular and regarded as +daily data (with a GCD of 1) because of the different day counts of the +months. Users should fix such code by either by calling qG +on the time variable (for grouping / factor-conversion) or using +appropriate classes e.g. zoo::yearmon. Note that plain +numeric vectors where !is.object(t) are still used directly +for indexation without passing them through timeid (which +can still be applied manually if desired).

    • +
    • BY now has an argument reorder = TRUE, +which casts elements in the original order if +NROW(result) == NROW(x) (like fmutate). +Previously the result was just in order of the groups, regardless of the +length of the output. To obtain the former outcome users need to set +reorder = FALSE.

    • +
    • options("collapse_DT_alloccol") was removed, the +default is now fixed at 100. The reason is that data.table +automatically expands the range of overallocated columns if required (so +the option is not really necessary), and calling R options from C slows +down C code and can cause problems in parallel code.

Bug Fixes

-
  • Fixed a bug in fcumsum that caused a segfault during grouped operations on larger data, due to flawed internal memory allocation. Thanks @Gulde91 for reporting #237.

  • -
  • Fixed a bug in across caused by two function(x) statements being passed in a list e.g. mtcars |> fsummarise(acr(mpg, list(ssdd = function(x) sd(x), mu = function(x) mean(x)))). Thanks @trang1618 for reporting #233.

  • -
  • Fixed an issue in across() when logical vectors were used to select column on grouped data e.g. mtcars %>% gby(vs, am) %>% smr(acr(startsWith(names(.), "c"), fmean)) now works without error.

  • -
  • qsu gives proper output for length 1 vectors e.g. qsu(1).

  • -
  • collapse depends on R > 3.3.0, due to the use of newer C-level macros introduced then. The earlier indication of R > 2.1.0 was only based on R-level code and misleading. Thanks @ben-schwen for reporting #236. I will try to maintain this dependency for as long as possible, without being too restrained by development in R’s C API and the ALTREP system in particular, which collapse might utilize in the future.

  • +
    • Fixed a bug in fcumsum that caused a segfault during +grouped operations on larger data, due to flawed internal memory +allocation. Thanks @Gulde91 for reporting #237.

    • +
    • Fixed a bug in across caused by two +function(x) statements being passed in a list +e.g. mtcars |> fsummarise(acr(mpg, list(ssdd = function(x) sd(x), mu = function(x) mean(x)))). +Thanks @trang1618 for reporting #233.

    • +
    • Fixed an issue in across() when logical vectors were +used to select column on grouped data +e.g. mtcars %>% gby(vs, am) %>% smr(acr(startsWith(names(.), "c"), fmean)) +now works without error.

    • +
    • qsu gives proper output for length 1 vectors +e.g. qsu(1).

    • +
    • collapse depends on R > 3.3.0, due to the use of +newer C-level macros introduced then. The earlier indication of R > +2.1.0 was only based on R-level code and misleading. Thanks @ben-schwen +for reporting #236. I will try to maintain this dependency for as long +as possible, without being too restrained by development in R’s C API +and the ALTREP system in particular, which collapse might +utilize in the future.

Additions

  • -

    Introduction of ‘indexed_frame’,‘indexed_series’ and ‘index_df’ classes: fast and flexible indexed time series and panel data classes that inherit from plm’s ‘pdata.frame’, ‘pseries’ and ‘pindex’ classes. These classes take full advantage of collapse’s computational infrastructure, are class-agnostic i.e. they can be superimposed upon any data frame or vector/matrix like object while maintaining most of the functionality of that object, support both time series and panel data, natively handle irregularity, and supports ad-hoc computations inside arbitrary data masking functions and model formulas. This infrastructure comprises of additional functions and methods, and modification of some existing functions and ‘pdata.frame’ / ‘pseries’ methods.

    -
    • New functions: findex_by/iby, findex/ix, unindex, reindex, is_irregular, to_plm.

    • -
    • New methods: [.indexed_series, [.indexed_frame, [<-.indexed_frame, $.indexed_frame, $<-.indexed_frame, [[.indexed_frame, [[<-.indexed_frame, [.index_df, fsubset.pseries, fsubset.pdata.frame, funique.pseries, funique.pdata.frame, roworder(v) (internal) na_omit (internal), print.indexed_series, print.indexed_frame, print.index_df, Math.indexed_series, Ops.indexed_series.

    • -
    • Modification of ‘pseries’ and ‘pdata.frame’ methods for functions flag/L/F, fdiff/D/Dlog, fgrowth/G, fcumsum, psmat, psacf/pspacf/psccf, fscale/STD, fbetween/B, fwithin/W, fhdbetween/HDB, fhdwithin/HDW, qsu and varying to take advantage of ‘indexed_frame’ and ‘indexed_series’ while continuing to work as before with ‘pdata.frame’ and ‘pseries’.

    • -

    For more information and details see help("indexing").

    +

    Introduction of ‘indexed_frame’,‘indexed_series’ and ‘index_df’ +classes: fast and flexible indexed time series and panel data classes +that inherit from plm’s ‘pdata.frame’, ‘pseries’ and ‘pindex’ +classes. These classes take full advantage of collapse’s +computational infrastructure, are class-agnostic i.e. they can be +superimposed upon any data frame or vector/matrix like object while +maintaining most of the functionality of that object, support both time +series and panel data, natively handle irregularity, and supports ad-hoc +computations inside arbitrary data masking functions and model formulas. +This infrastructure comprises of additional functions and methods, and +modification of some existing functions and ‘pdata.frame’ / ‘pseries’ +methods.

    +
    • New functions: findex_by/iby, +findex/ix, unindex, reindex, +is_irregular, to_plm.

    • +
    • New methods: [.indexed_series, +[.indexed_frame, [<-.indexed_frame, +$.indexed_frame, $<-.indexed_frame, +[[.indexed_frame, [[<-.indexed_frame, +[.index_df, fsubset.pseries, +fsubset.pdata.frame, funique.pseries, +funique.pdata.frame, roworder(v) (internal) +na_omit (internal), print.indexed_series, +print.indexed_frame, print.index_df, +Math.indexed_series, +Ops.indexed_series.

    • +
    • Modification of ‘pseries’ and ‘pdata.frame’ methods for functions +flag/L/F, fdiff/D/Dlog, +fgrowth/G, fcumsum, psmat, +psacf/pspacf/psccf, fscale/STD, +fbetween/B, fwithin/W, +fhdbetween/HDB, fhdwithin/HDW, +qsu and varying to take advantage of +‘indexed_frame’ and ‘indexed_series’ while continuing to work as before +with ‘pdata.frame’ and ‘pseries’.

    • +

    For more information and details see +help("indexing").

  • -
  • Added function timeid: Generation of an integer-id/time-factor from time or date sequences represented by integer of double vectors (such as ‘Date’, ‘POSIXct’, ‘ts’, ‘yearmon’, ‘yearquarter’ or plain integers / doubles) by a numerically quite robust greatest common divisor method (see below). This function is used internally in findex_by, reindex and also in evaluation of the t argument to functions like flag/fdiff/fgrowth whenever is.object(t) && is.numeric(unclass(t)) (see also note above).

  • -
  • Programming helper function vgcd to efficiently compute the greatest common divisor from a vector or positive integer or double values (which should ideally be unique and sorted as well, timeid uses vgcd(sort(unique(diff(sort(unique(na_rm(x)))))))). Precision for doubles is up to 6 digits.

  • -
  • Programming helper function frange: A significantly faster alternative to base::range, which calls both min and max. Note that frange inherits collapse’s global na.rm = TRUE default.

  • -
  • Added function qtab/qtable: A versatile and computationally more efficient alternative to base::table. Notably, it also supports tabulations with frequency weights, and computation of a statistic over combinations of variables. Objects are of class ‘qtab’ that inherits from ‘table’. Thus all ‘table’ methods apply to it.

  • -
  • TRA was rewritten in C, and now has an additional argument set = TRUE which toggles data transformation by reference. The function setTRA was added as a shortcut which additionally returns the result invisibly. Since TRA is usually accessed internally through the like-named argument to Fast Statistical Functions, passing set = TRUE to those functions yields an internal call to setTRA. For example fmedian(num_vars(iris), g = iris$Species, TRA = "-", set = TRUE) subtracts the species-wise median from the numeric variables in the iris dataset, modifying the data in place and returning the result invisibly. Similarly the argument can be added in other workflows such as iris |> fgroup_by(Species) |> fmutate(across(1:2, fmedian, set = TRUE)) or mtcars |> ftransform(mpg = mpg %+=% hp, wt = fsd(wt, cyl, TRA = "replace_fill", set = TRUE)). Note that such chains must be ended by invisible() if no printout is wanted.

  • -
  • Exported helper function greorder, the companion to gsplit to reorder output in fmutate (and now also in BY): let g be a ‘GRP’ object (or something coercible such as a vector) and x a vector, then greorder orders data in y = unlist(gsplit(x, g)) such that identical(greorder(y, g), x).

  • +
  • Added function timeid: Generation of an +integer-id/time-factor from time or date sequences represented by +integer of double vectors (such as ‘Date’, ‘POSIXct’, ‘ts’, ‘yearmon’, +‘yearquarter’ or plain integers / doubles) by a numerically quite robust +greatest common divisor method (see below). This function is used +internally in findex_by, reindex and also in +evaluation of the t argument to functions like +flag/fdiff/fgrowth whenever +is.object(t) && is.numeric(unclass(t)) (see also +note above).

  • +
  • Programming helper function vgcd to efficiently +compute the greatest common divisor from a vector or positive integer or +double values (which should ideally be unique and sorted as well, +timeid uses +vgcd(sort(unique(diff(sort(unique(na_rm(x)))))))). +Precision for doubles is up to 6 digits.

  • +
  • Programming helper function frange: A significantly +faster alternative to base::range, which calls both +min and max. Note that frange +inherits collapse’s global na.rm = TRUE +default.

  • +
  • Added function qtab/qtable: A versatile and +computationally more efficient alternative to base::table. +Notably, it also supports tabulations with frequency weights, and +computation of a statistic over combinations of variables. Objects are +of class ‘qtab’ that inherits from ‘table’. Thus all ‘table’ methods +apply to it.

  • +
  • TRA was rewritten in C, and now has an additional +argument set = TRUE which toggles data transformation by +reference. The function setTRA was added as a shortcut +which additionally returns the result invisibly. Since TRA +is usually accessed internally through the like-named argument to +Fast Statistical Functions, passing set = TRUE to +those functions yields an internal call to setTRA. For +example +fmedian(num_vars(iris), g = iris$Species, TRA = "-", set = TRUE) +subtracts the species-wise median from the numeric variables in the iris +dataset, modifying the data in place and returning the result invisibly. +Similarly the argument can be added in other workflows such as +iris |> fgroup_by(Species) |> fmutate(across(1:2, fmedian, set = TRUE)) +or +mtcars |> ftransform(mpg = mpg %+=% hp, wt = fsd(wt, cyl, TRA = "replace_fill", set = TRUE)). +Note that such chains must be ended by invisible() if no +printout is wanted.

  • +
  • Exported helper function greorder, the companion to +gsplit to reorder output in fmutate (and now +also in BY): let g be a ‘GRP’ object (or +something coercible such as a vector) and x a vector, then +greorder orders data in +y = unlist(gsplit(x, g)) such that +identical(greorder(y, g), x).

Improvements

-
  • fmean, fprod, fmode and fndistinct were rewritten in C, providing performance improvements, particularly in fmode and fndistinct, and improvements for integers in fmean and fprod.

  • -
  • OpenMP multithreading in fsum, fmean, fmedian, fnth, fmode and fndistinct, implemented via an additional nthreads argument. The default is to use 1 thread, which internally calls a serial version of the code in fsum and fmean (thus no change in the default behavior). The plan is to slowly roll this out over all statistical functions and then introduce options to set alternative global defaults. Multi-threading internally works different for different functions, see the nthreads argument documentation of a particular function. Unfortunately I currently cannot guarantee thread safety, as parallelization of complex loops entails some tricky bugs and I have limited time to sort these out. So please report bugs, and if you happen to have experience with OpenMP please consider examining the code and making some suggestions.

  • -
  • TRA has an additional option "replace_NA", e.g. wlddev |> fgroup_by(iso3c) |> fmutate(across(PCGDP:POP, fmedian, TRA = "replace_NA")) performs median value imputation of missing values. Similarly for a matrix X <- matrix(na_insert(rnorm(1e7)), ncol = 100), fmedian(X, TRA = "replace_NA", set = TRUE) (column-wise median imputation by reference).

  • -
  • All Fast Statistical Functions support zero group sizes (e.g. grouping with a factor that has unused levels will always produce an output of length nlevels(x) with 0 or NA elements for the unused levels). Previously this produced an error message with counting/ordinal functions fmode, fndistinct, fnth and fmedian.

  • -
  • ‘GRP’ objects now also contain a ‘group.starts’ item in the 8’th slot that gives the first positions of the unique groups, and is returned alongside the groups whenever return.groups = TRUE. This now benefits ffirst when invoked with na.rm = FALSE, e.g. wlddev %>% fgroup_by(country) %>% ffirst(na.rm = FALSE) is now just as efficient as funique(wlddev, cols = "country"). Note that no additional computing cost is incurred by preserving the ‘group.starts’ information.

  • -
  • Conversion methods GRP.factor, GRP.qG, GRP.pseries, GRP.pdata.frame and GRP.grouped_df now also efficiently check if grouping vectors are sorted (the information is stored in the “ordered” element of ‘GRP’ objects). This leads to performance improvements in gsplit / greorder and dependent functions such as BY and rsplit if factors are sorted.

  • -
  • descr() received some performance improvements (up to 2x for categorical data), and has an additional argument sort.table, allowing frequency tables for categorical variables to be sorted by frequency ("freq") or by table values ("value"). The new default is ("freq"), which presents tables in decreasing order of frequency. A method [.descr was added allowing ‘descr’ objects to be subset like a list. The print method was also enhanced, and by default now prints 14 values with the highest frequency and groups the remaining values into a single ... %s Others category. Furthermore, if there are any missing values in the column, the percentage of values missing is now printed behind Statistics. Additional arguments reverse and stepwise allow printing in reverse order and/or one variable at a time.

  • -
  • whichv (and operators %==%, %!=%) now also support comparisons of equal-length arguments e.g. 1:3 %==% 1:3. Note that this should not be used to compare 2 factors.

  • -
  • Added some code to the .onLoad function that checks for the existence of a .fastverse configuration file containing a setting for _opt_collapse_mask: If found the code makes sure that the option takes effect before the package is loaded. This means that inside projects using the fastverse and options("collapse_mask") to replace base R / dplyr functions, collapse cannot be loaded without the masking being applied, making it more secure to utilize this feature. For more information about function masking see help("collapse-options") and for .fastverse configuration files see the fastverse vignette.

  • -
  • Added hidden .list methods for fhdwithin/HDW and fhdbetween/HDB. As for the other .FAST_FUN this is just a wrapper for the data frame method and meant to be used on unclassed data frames.

  • +
    • fmean, fprod, fmode and +fndistinct were rewritten in C, providing performance +improvements, particularly in fmode and +fndistinct, and improvements for integers in +fmean and fprod.

    • +
    • OpenMP multithreading in fsum, fmean, +fmedian, fnth, fmode and +fndistinct, implemented via an additional +nthreads argument. The default is to use 1 thread, which +internally calls a serial version of the code in fsum and +fmean (thus no change in the default behavior). The plan is +to slowly roll this out over all statistical functions and then +introduce options to set alternative global defaults. Multi-threading +internally works different for different functions, see the +nthreads argument documentation of a particular function. +Unfortunately I currently cannot guarantee thread safety, as +parallelization of complex loops entails some tricky bugs and I have +limited time to sort these out. So please report bugs, and if you happen +to have experience with OpenMP please consider examining the code and +making some suggestions.

    • +
    • TRA has an additional option +"replace_NA", +e.g. wlddev |> fgroup_by(iso3c) |> fmutate(across(PCGDP:POP, fmedian, TRA = "replace_NA")) +performs median value imputation of missing values. Similarly for a +matrix X <- matrix(na_insert(rnorm(1e7)), ncol = 100), +fmedian(X, TRA = "replace_NA", set = TRUE) (column-wise +median imputation by reference).

    • +
    • All Fast Statistical Functions support zero group sizes +(e.g. grouping with a factor that has unused levels will always produce +an output of length nlevels(x) with 0 or +NA elements for the unused levels). Previously this +produced an error message with counting/ordinal functions +fmode, fndistinct, fnth and +fmedian.

    • +
    • ‘GRP’ objects now also contain a ‘group.starts’ item in the 8’th +slot that gives the first positions of the unique groups, and is +returned alongside the groups whenever +return.groups = TRUE. This now benefits ffirst +when invoked with na.rm = FALSE, +e.g. wlddev %>% fgroup_by(country) %>% ffirst(na.rm = FALSE) +is now just as efficient as +funique(wlddev, cols = "country"). Note that no additional +computing cost is incurred by preserving the ‘group.starts’ +information.

    • +
    • Conversion methods GRP.factor, GRP.qG, +GRP.pseries, GRP.pdata.frame and +GRP.grouped_df now also efficiently check if grouping +vectors are sorted (the information is stored in the “ordered” element +of ‘GRP’ objects). This leads to performance improvements in +gsplit / greorder and dependent functions such +as BY and rsplit if factors are +sorted.

    • +
    • descr() received some performance improvements (up +to 2x for categorical data), and has an additional argument +sort.table, allowing frequency tables for categorical +variables to be sorted by frequency ("freq") or by table +values ("value"). The new default is ("freq"), +which presents tables in decreasing order of frequency. A method +[.descr was added allowing ‘descr’ objects to be subset +like a list. The print method was also enhanced, and by default now +prints 14 values with the highest frequency and groups the remaining +values into a single ... %s Others category. Furthermore, +if there are any missing values in the column, the percentage of values +missing is now printed behind Statistics. Additional +arguments reverse and stepwise allow printing +in reverse order and/or one variable at a time.

    • +
    • whichv (and operators %==%, +%!=%) now also support comparisons of equal-length +arguments e.g. 1:3 %==% 1:3. Note that this should not be +used to compare 2 factors.

    • +
    • Added some code to the .onLoad function that checks +for the existence of a .fastverse configuration file +containing a setting for _opt_collapse_mask: If found the +code makes sure that the option takes effect before the package is +loaded. This means that inside projects using the fastverse and +options("collapse_mask") to replace base R / dplyr +functions, collapse cannot be loaded without the masking being +applied, making it more secure to utilize this feature. For more +information about function masking see +help("collapse-options") and for .fastverse +configuration files see the fastverse +vignette.

    • +
    • Added hidden .list methods for +fhdwithin/HDW and fhdbetween/HDB. As for the +other .FAST_FUN this is just a wrapper for the data frame +method and meant to be used on unclassed data frames.

    • ss() supports unnamed lists / data frames.

    • -
    • The t and w arguments in ‘grouped_df’ methods (NSE) and where formula input is allowed, supports ad-hoc transformations. E.g. wlddev %>% gby(iso3c) %>% flag(t = qG(date)) or L(wlddev, 1, ~ iso3c, ~qG(date)), similarly qsu(wlddev, w = ~ log(POP)), wlddev %>% gby(iso3c) %>% collapg(w = log(POP)) or wlddev %>% gby(iso3c) %>% nv() %>% fmean(w = log(POP)).

    • -
    • Small improvements to group() algorithm, avoiding some cases where the hash function performed badly, particularly with integers.

    • -
    • Function GRPnames now has a sep argument to choose a separator other than ".".

    • +
    • The t and w arguments in ‘grouped_df’ +methods (NSE) and where formula input is allowed, supports ad-hoc +transformations. E.g. +wlddev %>% gby(iso3c) %>% flag(t = qG(date)) or +L(wlddev, 1, ~ iso3c, ~qG(date)), similarly +qsu(wlddev, w = ~ log(POP)), +wlddev %>% gby(iso3c) %>% collapg(w = log(POP)) or +wlddev %>% gby(iso3c) %>% nv() %>% fmean(w = log(POP)).

    • +
    • Small improvements to group() algorithm, avoiding +some cases where the hash function performed badly, particularly with +integers.

    • +
    • Function GRPnames now has a sep +argument to choose a separator other than ".".

-
  • Corrected a C-level bug in gsplit that could lead R to crash in some instances (gsplit is used internally in fsummarise, fmutate, BY and collap to perform computations with base R (non-optimized) functions).

  • -
  • Ensured that BY.grouped_df always (by default) returns grouping columns in aggregations i.e. iris |> gby(Species) |> nv() |> BY(sum) now gives the same as iris |> gby(Species) |> nv() |> fsum().

  • -
  • A . was added to the first argument of functions fselect, fsubset, colorder and fgroup_by, i.e. fselect(x, ...) -> fselect(.x, ...). The reason for this is that over time I added the option to select-rename columns e.g. fselect(mtcars, cylinders = cyl), which was not offered when these functions were created. This presents problems if columns should be renamed into x, e.g. fselect(mtcars, x = cyl) failed, see #221. Renaming the first argument to .x somewhat guards against such situations. I think this change is worthwhile to implement, because it makes the package more robust going forward, and usually the first argument of these functions is never invoked explicitly. I really hope this breaks nobody’s code.

  • -
  • Added a function GRPN to make it easy to add a column of group sizes e.g. mtcars %>% fgroup_by(cyl,vs,am) %>% ftransform(Sizes = GRPN(.)) or mtcars %>% ftransform(Sizes = GRPN(list(cyl, vs, am))) or GRPN(mtcars, by = ~cyl+vs+am).

  • -
  • Added [.pwcor and [.pwcov, to be able to subset correlation/covariance matrices without loosing the print formatting.

  • +
    • Corrected a C-level bug in gsplit that could lead R +to crash in some instances (gsplit is used internally in +fsummarise, fmutate, BY and +collap to perform computations with base R (non-optimized) +functions).

    • +
    • Ensured that BY.grouped_df always (by default) +returns grouping columns in aggregations +i.e. iris |> gby(Species) |> nv() |> BY(sum) now +gives the same as +iris |> gby(Species) |> nv() |> fsum().

    • +
    • A . was added to the first argument of functions +fselect, fsubset, colorder and +fgroup_by, +i.e. fselect(x, ...) -> fselect(.x, ...). The reason for +this is that over time I added the option to select-rename columns +e.g. fselect(mtcars, cylinders = cyl), which was not +offered when these functions were created. This presents problems if +columns should be renamed into x, +e.g. fselect(mtcars, x = cyl) failed, see #221. +Renaming the first argument to .x somewhat guards against +such situations. I think this change is worthwhile to implement, because +it makes the package more robust going forward, and usually the first +argument of these functions is never invoked explicitly. I really hope +this breaks nobody’s code.

    • +
    • Added a function GRPN to make it easy to add a +column of group sizes +e.g. mtcars %>% fgroup_by(cyl,vs,am) %>% ftransform(Sizes = GRPN(.)) +or +mtcars %>% ftransform(Sizes = GRPN(list(cyl, vs, am))) +or GRPN(mtcars, by = ~cyl+vs+am).

    • +
    • Added [.pwcor and [.pwcov, to be able +to subset correlation/covariance matrices without loosing the print +formatting.

-
  • Also ensuring tidyverse examples are in \donttest{} and building without the dplyr testing file to avoid issues with static code analysis on CRAN.

  • -
  • 20-50% Speed improvement in gsplit (and therefore in fsummarise, fmutate, collap and BY when invoked with base R functions) when grouping with GRP(..., sort = TRUE, return.order = TRUE). To enable this by default, the default for argument return.order in GRP was set to sort, which retains the ordering vector (needed for the optimization). Retaining the ordering vector uses up some memory which can possibly adversely affect computations with big data, but with big data sort = FALSE usually gives faster results anyway, and you can also always set return.order = FALSE (also in fgroup_by, collap), so this default gives the best of both worlds.

  • -
  • An ancient depreciated argument sort.row (replaced by sort in 2020) is now removed from collap. Also arguments return.order and method were added to collap providing full control of the grouping that happens internally.
  • +
    • Also ensuring tidyverse examples are in \donttest{} +and building without the dplyr testing file to avoid issues +with static code analysis on CRAN.

    • +
    • 20-50% Speed improvement in gsplit (and therefore in +fsummarise, fmutate, collap and +BY when invoked with base R functions) when +grouping with GRP(..., sort = TRUE, return.order = TRUE). +To enable this by default, the default for argument +return.order in GRP was set to +sort, which retains the ordering vector (needed for the +optimization). Retaining the ordering vector uses up some memory which +can possibly adversely affect computations with big data, but with big +data sort = FALSE usually gives faster results anyway, and +you can also always set return.order = FALSE (also in +fgroup_by, collap), so this default gives the +best of both worlds.

    • +
    • An ancient depreciated argument sort.row (replaced by +sort in 2020) is now removed from collap. Also +arguments return.order and method were added +to collap providing full control of the grouping that +happens internally.
-
  • Tests needed to be adjusted for the upcoming release of dplyr 1.0.8 which involves an API change in mutate. fmutate will not take over these changes i.e. fmutate(..., .keep = "none") will continue to work like dplyr::transmute. Furthermore, no more tests involving dplyr are run on CRAN, and I will also not follow along with any future dplyr API changes.

  • -
  • The C-API macro installTrChar (used in the new massign function) was replaced with installChar to maintain backwards compatibility with R versions prior to 3.6.0. Thanks @tedmoorman #213.

  • -
  • Minor improvements to group(), providing increased performance for doubles and also increased performance when the second grouping variable is integer, which turned out to be very slow in some instances.

  • +
    • Tests needed to be adjusted for the upcoming release of +dplyr 1.0.8 which involves an API change in +mutate. fmutate will not take over these +changes i.e. fmutate(..., .keep = "none") will continue to +work like dplyr::transmute. Furthermore, no more tests +involving dplyr are run on CRAN, and I will also not follow +along with any future dplyr API changes.

    • +
    • The C-API macro installTrChar (used in the new +massign function) was replaced with +installChar to maintain backwards compatibility with R +versions prior to 3.6.0. Thanks @tedmoorman #213.

    • +
    • Minor improvements to group(), providing increased +performance for doubles and also increased performance when the second +grouping variable is integer, which turned out to be very slow in some +instances.

-
  • Removed tests involving the weights package (which is not available on R-devel CRAN checks).

  • -
  • fgroup_by is more flexible, supporting computing columns e.g. fgroup_by(GGDC10S, Variable, Decade = floor(Year / 10) * 10) and various programming options e.g. fgroup_by(GGDC10S, 1:3), fgroup_by(GGDC10S, c("Variable", "Country")), or fgroup_by(GGDC10S, is.character). You can also use column sequences e.g. fgroup_by(GGDC10S, Country:Variable, Year), but this should not be mixed with computing columns. Compute expressions may also not include the : function.

  • -
  • More memory efficient attribute handling in C/C++ (using C-API macro SHALLOW_DUPLICATE_ATTRIB instead of DUPLICATE_ATTRIB) in most places.

  • +
    • Removed tests involving the weights package (which is +not available on R-devel CRAN checks).

    • +
    • fgroup_by is more flexible, supporting computing +columns +e.g. fgroup_by(GGDC10S, Variable, Decade = floor(Year / 10) * 10) +and various programming options +e.g. fgroup_by(GGDC10S, 1:3), +fgroup_by(GGDC10S, c("Variable", "Country")), or +fgroup_by(GGDC10S, is.character). You can also use column +sequences e.g. fgroup_by(GGDC10S, Country:Variable, Year), +but this should not be mixed with computing columns. Compute expressions +may also not include the : function.

    • +
    • More memory efficient attribute handling in C/C++ (using C-API +macro SHALLOW_DUPLICATE_ATTRIB instead of +DUPLICATE_ATTRIB) in most places.

-
  • Ensured that the base pipe |> is not used in tests or examples, to avoid errors on CRAN checks with older versions of R.

  • -
  • Also adjusted psacf / pspacf / psccf to take advantage of the faster grouping by group.

  • +
    • Ensured that the base pipe |> is not used in +tests or examples, to avoid errors on CRAN checks with older versions of +R.

    • +
    • Also adjusted psacf / pspacf / +psccf to take advantage of the faster grouping by +group.

  • Fixed minor C/C++ issues flagged in CRAN checks.

  • -
  • Added option ties = "last" to fmode.

  • -
  • Added argument stable.algo to qsu. Setting stable.algo = FALSE toggles a faster calculation of the standard deviation, yielding 2x speedup on large datasets.

  • -
  • Fast Statistical Functions now internally use group for grouping data if both g and TRA arguments are used, yielding efficiency gains on unsorted data.

  • -
  • Ensured that fmutate and fsummarise can be called if collapse is not attached.

  • +
  • Added option ties = "last" to +fmode.

  • +
  • Added argument stable.algo to qsu. +Setting stable.algo = FALSE toggles a faster calculation of +the standard deviation, yielding 2x speedup on large datasets.

  • +
  • Fast Statistical Functions now internally use +group for grouping data if both g and +TRA arguments are used, yielding efficiency gains on +unsorted data.

  • +
  • Ensured that fmutate and fsummarise can +be called if collapse is not attached.

-

collapse 1.7.0, released mid January 2022, brings major improvements in the computational backend of the package, it’s data manipulation capabilities, and a whole set of new functions that enable more flexible and memory efficient R programming - significantly enhancing the language itself. For the vast majority of codes, updating to 1.7 should not cause any problems.

+

collapse 1.7.0, released mid January 2022, brings major +improvements in the computational backend of the package, it’s data +manipulation capabilities, and a whole set of new functions that enable +more flexible and memory efficient R programming - significantly +enhancing the language itself. For the vast majority of codes, updating +to 1.7 should not cause any problems.

Changes to functionality

-
  • num_vars is now implemented in C, yielding a massive performance increase over checking columns using vapply(x, is.numeric, logical(1)). It selects columns where (is.double(x) || is.integer(x)) && !is.object(x). This provides the same results for most common classes found in data frames (e.g. factors and date columns are not numeric), however it is possible for users to define methods for is.numeric for other objects, which will not be respected by num_vars anymore. A prominent example are base R’s ‘ts’ objects i.e. is.numeric(AirPassengers) returns TRUE, but is.object(AirPassengers) is also TRUE so the above yields FALSE, implying - if you happened to work with data frames of ‘ts’ columns - that num_vars will now not select those anymore. Please make me aware if there are other important classes that are found in data frames and where is.numeric returns TRUE. num_vars is also used internally in collap so this might affect your aggregations.

  • -
  • In flag, fdiff and fgrowth, if a plain numeric vector is passed to the t argument such that is.double(t) && !is.object(t), it is coerced to integer using as.integer(t) and directly used as time variable, rather than applying ordered grouping first. This is to avoid the inefficiency of grouping, and owes to the fact that in most data imported into R with various packages, the time (year) variables are coded as double although they should be integer (I also don’t know of any cases where time needs to be indexed by a non-date variable with decimal places). Note that the algorithm internally handles irregularity in the time variable so this is not a problem. Should this break any code, kindly raise an issue on GitHub.

  • -
  • The function setrename now truly renames objects by reference (without creating a shallow copy). The same is true for vlabels<- (which was rewritten in C) and a new function setrelabel. Thus additional care needs to be taken (with use inside functions etc.) as the renaming will take global effects unless a shallow copy of the data was created by some prior operation inside the function. If in doubt, better use frename which creates a shallow copy.

  • -
  • Some improvements to the BY function, both in terms of performance and security. Performance is enhanced through a new C function gsplit, providing split-apply-combine computing speeds competitive with dplyr on a much broader range of R objects. Regarding Security: if the result of the computation has the same length as the original data, names / rownames and grouping columns (for grouped data) are only added to the result object if known to be valid, i.e. if the data was originally sorted by the grouping columns (information recorded by GRP.default(..., sort = TRUE), which is called internally on non-factor/GRP/qG objects). This is because BY does not reorder data after the split-apply-combine step (unlike dplyr::mutate); data are simply recombined in the order of the groups. Because of this, in general, BY should be used to compute summary statistics (unless data are sorted before grouping). The added security makes this explicit.

  • -
  • Added a method length.GRP giving the length of a grouping object. This could break code calling length on a grouping object before (which just returned the length of the list).

  • -
  • Functions renamed in collapse 1.6.0 will now print a message telling you to use the updated names. The functions under the old names will stay around for 1-3 more years.

  • +
    • num_vars is now implemented in C, yielding a massive +performance increase over checking columns using +vapply(x, is.numeric, logical(1)). It selects columns where +(is.double(x) || is.integer(x)) && !is.object(x). +This provides the same results for most common classes found in data +frames (e.g. factors and date columns are not numeric), however it is +possible for users to define methods for is.numeric for +other objects, which will not be respected by num_vars +anymore. A prominent example are base R’s ‘ts’ objects +i.e. is.numeric(AirPassengers) returns TRUE, +but is.object(AirPassengers) is also TRUE so +the above yields FALSE, implying - if you happened to work +with data frames of ‘ts’ columns - that num_vars will now +not select those anymore. Please make me aware if there are other +important classes that are found in data frames and where +is.numeric returns TRUE. num_vars +is also used internally in collap so this might affect your +aggregations.

    • +
    • In flag, fdiff and +fgrowth, if a plain numeric vector is passed to the +t argument such that +is.double(t) && !is.object(t), it is coerced to +integer using as.integer(t) and directly used as time +variable, rather than applying ordered grouping first. This is to avoid +the inefficiency of grouping, and owes to the fact that in most data +imported into R with various packages, the time (year) variables are +coded as double although they should be integer (I also don’t know of +any cases where time needs to be indexed by a non-date variable with +decimal places). Note that the algorithm internally handles irregularity +in the time variable so this is not a problem. Should this break any +code, kindly raise an issue on GitHub.

    • +
    • The function setrename now truly renames objects by +reference (without creating a shallow copy). The same is true for +vlabels<- (which was rewritten in C) and a new function +setrelabel. Thus additional care needs to be taken (with +use inside functions etc.) as the renaming will take global effects +unless a shallow copy of the data was created by some prior operation +inside the function. If in doubt, better use frename which +creates a shallow copy.

    • +
    • Some improvements to the BY function, both in terms +of performance and security. Performance is enhanced through a new C +function gsplit, providing split-apply-combine computing +speeds competitive with dplyr on a much broader range of R +objects. Regarding Security: if the result of the computation has the +same length as the original data, names / rownames and grouping columns +(for grouped data) are only added to the result object if known to be +valid, i.e. if the data was originally sorted by the grouping columns +(information recorded by GRP.default(..., sort = TRUE), +which is called internally on non-factor/GRP/qG objects). This is +because BY does not reorder data after the +split-apply-combine step (unlike dplyr::mutate); data are +simply recombined in the order of the groups. Because of this, in +general, BY should be used to compute summary statistics +(unless data are sorted before grouping). The added security makes this +explicit.

    • +
    • Added a method length.GRP giving the length of a +grouping object. This could break code calling length on a +grouping object before (which just returned the length of the +list).

    • +
    • Functions renamed in collapse 1.6.0 will now print a message +telling you to use the updated names. The functions under the old names +will stay around for 1-3 more years.

    • The passing of argument order instead of sort in function GRP (from a very early version of collapse), is now disabled.
    • +-->
      • The passing of argument order instead of +sort in function GRP (from a very early +version of collapse), is now disabled.

Bug Fixes

-
  • Fixed a bug in some functions using Welfords Online Algorithm (fvar, fsd, fscale and qsu) to calculate variances, occurring when initial or final zero weights caused the running sum of weights in the algorithm to be zero, yielding a division by zero and NA as output although a value was expected. These functions now skip zero weights alongside missing weights, which also implies that you can pass a logical vector to the weights argument to very efficiently calculate statistics on a subset of data (e.g. using qsu).
  • +
    • Fixed a bug in some functions using Welfords Online Algorithm +(fvar, fsd, fscale and +qsu) to calculate variances, occurring when initial or +final zero weights caused the running sum of weights in the algorithm to +be zero, yielding a division by zero and NA as output +although a value was expected. These functions now skip zero weights +alongside missing weights, which also implies that you can pass a +logical vector to the weights argument to very efficiently calculate +statistics on a subset of data (e.g. using qsu).

Additions

Basic Computational Infrastructure
-
  • Function group was added, providing a low-level interface to a new unordered grouping algorithm based on hashing in C and optimized for R’s data structures. The algorithm was heavily inspired by the great kit package of Morgan Jacob, and now feeds into the package through multiple central functions (including GRP / fgroup_by, funique and qF) when invoked with argument sort = FALSE. It is also used in internal groupings performed in data transformation functions such as fwithin (when no factor or ‘GRP’ object is provided to the g argument). The speed of the algorithm is very promising (often superior to radixorder), and it could be used in more places still. I welcome any feedback on it’s performance on different datasets.

  • -
  • Function gsplit provides an efficient alternative to split based on grouping objects. It is used as a new backend to rsplit (which also supports data frame) as well as BY, collap, fsummarise and fmutate - for more efficient grouped operations with functions external to the package.

  • -
  • Added multiple functions to facilitate memory efficient programming (written in C). These include elementary mathematical operations by reference (setop, %+=%, %-=%, %*=%, %/=%), supporting computations involving integers and doubles on vectors, matrices and data frames (including row-wise operations via setop) with no copies at all. Furthermore a set of functions which check a single value against a vector without generating logical vectors: whichv, whichNA (operators %==% and %!=% which return indices and are significantly faster than ==, especially inside functions like fsubset), anyv and allv (allNA was already added before). Finally, functions setv and copyv speed up operations involving the replacement of a value (x[x == 5] <- 6) or of a sequence of values from a equally sized object (x[x == 5] <- y[x == 5], or x[ind] <- y[ind] where ind could be pre-computed vectors or indices) in vectors and data frames without generating any logical vectors or materializing vector subsets.

  • -
  • Function vlengths was added as a more efficient alternative to lengths (without method dispatch, simply coded in C).

  • -
  • Function massign provides a multivariate version of assign (written in C, and supporting all basic vector types). In addition the operator %=% was added as an efficient multiple assignment operator. (It is called %=% and not %<-% to facilitate the translation of Matlab or Python codes into R, and because the zeallot package already provides multiple-assignment operators (%<-% and %->%), which are significantly more versatile, but orders of magnitude slower than %=%)

  • +
    • Function group was added, providing a low-level +interface to a new unordered grouping algorithm based on hashing in C +and optimized for R’s data structures. The algorithm was heavily +inspired by the great kit package of Morgan Jacob, and now +feeds into the package through multiple central functions (including +GRP / fgroup_by, funique and +qF) when invoked with argument sort = FALSE. +It is also used in internal groupings performed in data transformation +functions such as fwithin (when no factor or ‘GRP’ object +is provided to the g argument). The speed of the algorithm +is very promising (often superior to radixorder), and it +could be used in more places still. I welcome any feedback on it’s +performance on different datasets.

    • +
    • Function gsplit provides an efficient alternative to +split based on grouping objects. It is used as a new +backend to rsplit (which also supports data frame) as well +as BY, collap, fsummarise and +fmutate - for more efficient grouped operations with +functions external to the package.

    • +
    • Added multiple functions to facilitate memory efficient +programming (written in C). These include elementary mathematical +operations by reference (setop, %+=%, +%-=%, %*=%, %/=%), supporting +computations involving integers and doubles on vectors, matrices and +data frames (including row-wise operations via setop) with +no copies at all. Furthermore a set of functions which check a single +value against a vector without generating logical vectors: +whichv, whichNA (operators %==% +and %!=% which return indices and are significantly faster +than ==, especially inside functions like +fsubset), anyv and allv +(allNA was already added before). Finally, functions +setv and copyv speed up operations involving +the replacement of a value (x[x == 5] <- 6) or of a +sequence of values from a equally sized object +(x[x == 5] <- y[x == 5], or +x[ind] <- y[ind] where ind could be +pre-computed vectors or indices) in vectors and data frames without +generating any logical vectors or materializing vector subsets.

    • +
    • Function vlengths was added as a more efficient +alternative to lengths (without method dispatch, simply +coded in C).

    • +
    • Function massign provides a multivariate version of +assign (written in C, and supporting all basic vector +types). In addition the operator %=% was added as an +efficient multiple assignment operator. (It is called %=% +and not %<-% to facilitate the translation of Matlab or +Python codes into R, and because the zeallot package +already provides multiple-assignment operators (%<-% and +%->%), which are significantly more versatile, but +orders of magnitude slower than %=%)

High-Level Features
-
  • Fully fledged fmutate function that provides functionality analogous to dplyr::mutate (sequential evaluation of arguments, including arbitrary tagged expressions and across statements). fmutate is optimized to work together with the packages Fast Statistical and Data Transformation Functions, yielding fast, vectorized execution, but also benefits from gsplit for other operations.

  • -
  • across() function implemented for use inside fsummarise and fmutate. It is also optimized for Fast Statistical and Data Transformation Functions, but performs well with other functions too. It has an additional arguments .apply = FALSE which will apply functions to the entire subset of the data instead of individual columns, and thus allows for nesting tibbles and estimating models or correlation matrices by groups etc.. across() also supports an arbitrary number of additional arguments which are split and evaluated by groups if necessary. Multiple across() statements can be combined with tagged vector expressions in a single call to fsummarise or fmutate. Thus the computational framework is pretty general and similar to data.table, although less efficient with big datasets.

  • -
  • Added functions relabel and setrelabel to make interactive dealing with variable labels a bit easier. Note that both functions operate by reference. (Through vlabels<- which is implemented in C. Taking a shallow copy of the data frame is useless in this case because variable labels are attributes of the columns, not of the frame). The only difference between the two is that setrelabel returns the result invisibly.

  • -
  • function shortcuts rnm and mtt added for frename and fmutate. across can also be abbreviated using acr.

  • -
  • Added two options that can be invoked before loading of the package to change the namespace: options(collapse_mask = c(...)) can be set to export copies of selected (or all) functions in the package that start with f removing the leading f e.g. fsubset -> subset (both fsubset and subset will be exported). This allows masking base R and dplyr functions (even basic functions such as sum, mean, unique etc. if desired) with collapse’s fast functions, facilitating the optimization of existing codes and allowing you to work with collapse using a more natural namespace. The package has been internally insulated against such changes, but of course they might have major effects on existing codes. Also options(collapse_F_to_FALSE = FALSE) can be invoked to get rid of the lead operator F, which masks base::F (an issue raised by some people who like to use T/F instead of TRUE/FALSE). Read the help page ?collapse-options for more information.

  • +
    • Fully fledged fmutate function that provides +functionality analogous to dplyr::mutate (sequential +evaluation of arguments, including arbitrary tagged expressions and +across statements). fmutate is optimized to +work together with the packages Fast Statistical and Data +Transformation Functions, yielding fast, vectorized execution, but +also benefits from gsplit for other operations.

    • +
    • across() function implemented for use inside +fsummarise and fmutate. It is also optimized +for Fast Statistical and Data Transformation Functions, but +performs well with other functions too. It has an additional arguments +.apply = FALSE which will apply functions to the entire +subset of the data instead of individual columns, and thus allows for +nesting tibbles and estimating models or correlation matrices by groups +etc.. across() also supports an arbitrary number of +additional arguments which are split and evaluated by groups if +necessary. Multiple across() statements can be combined +with tagged vector expressions in a single call to +fsummarise or fmutate. Thus the computational +framework is pretty general and similar to data.table, although +less efficient with big datasets.

    • +
    • Added functions relabel and setrelabel +to make interactive dealing with variable labels a bit easier. Note that +both functions operate by reference. (Through vlabels<- +which is implemented in C. Taking a shallow copy of the data frame is +useless in this case because variable labels are attributes of the +columns, not of the frame). The only difference between the two is that +setrelabel returns the result invisibly.

    • +
    • function shortcuts rnm and mtt added +for frename and fmutate. across +can also be abbreviated using acr.

    • +
    • Added two options that can be invoked before loading of the +package to change the namespace: +options(collapse_mask = c(...)) can be set to export copies +of selected (or all) functions in the package that start with +f removing the leading f +e.g. fsubset -> subset (both +fsubset and subset will be exported). This +allows masking base R and dplyr functions (even basic functions such as +sum, mean, unique etc. if +desired) with collapse’s fast functions, facilitating the +optimization of existing codes and allowing you to work with +collapse using a more natural namespace. The package has been +internally insulated against such changes, but of course they might have +major effects on existing codes. Also +options(collapse_F_to_FALSE = FALSE) can be invoked to get +rid of the lead operator F, which masks +base::F (an issue raised by some people who like to use +T/F instead of +TRUE/FALSE). Read the help page +?collapse-options for more information.

Improvements

-
  • Package loads faster (because I don’t fetch functions from some other C/C++ heavy packages in .onLoad anymore, which implied unnecessary loading of a lot of DLLs).

  • -
  • fsummarise is now also fully featured supporting evaluation of arbitrary expressions and across() statements. Note that mixing Fast Statistical Functions with other functions in a single expression can yield unintended outcomes, read more at ?fsummarise.

  • -
  • funique benefits from group in the default sort = FALSE, configuration, providing extra speed and unique values in first-appearance order in both the default and the data frame method, for all data types.

  • -
  • Function ss supports both empty i or j.

  • -
  • The printout of fgroup_by also shows minimum and maximum group size for unbalanced groupings.

  • -
  • In ftransformv/settransformv and fcomputev, the vars argument is also evaluated inside the data frame environment, allowing NSE specifications using column names e.g. ftransformv(data, c(col1, col2:coln), FUN).

  • -
  • qF with option sort = FALSE now generates factors with levels in first-appearance order (instead of a random order assigned by the hash function), and can also be called on an existing factor to recast the levels in first-appearance order. It is also faster with sort = FALSE (thanks to group).

  • -
  • finteraction has argument sort = FALSE to also take advantage of group.

  • -
  • rsplit has improved performance through gsplit, and an additional argument use.names, which can be used to return an unnamed list.

  • -
  • Speedup in vtypes and functions num_vars, cat_vars, char_vars, logi_vars and fact_vars. Note than num_vars behaves slightly differently as discussed above.

  • -
  • vlabels(<-) / setLabels rewritten in C, giving a ~20x speed improvement. Note that they now operate by reference.

  • -
  • vlabels, vclasses and vtypes have a use.names argument. The default is TRUE (as before).

  • -
  • colorder can rename columns on the fly and also has a new mode pos = "after" to place all selected columns after the first selected one, e.g.: colorder(mtcars, cyl, vs_new = vs, am, pos = "after"). The pos = "after" option was also added to roworderv.

  • -
  • add_stub and rm_stub have an additional cols argument to apply a stub to certain columns only e.g. add_stub(mtcars, "new_", cols = 6:9).

  • -
  • namlab has additional arguments N and Ndistinct, allowing to display number of observations and distinct values next to variable names, labels and classes, to get a nice and quick overview of the variables in a large dataset.

  • -
  • copyMostAttrib only copies the "row.names" attribute when known to be valid.

  • -
  • na_rm can now be used to efficiently remove empty or NULL elements from a list.

  • -
  • flag, fdiff and fgrowth produce less messages (i.e. no message if you don’t use a time variable in grouped operations, and messages about computations on highly irregular panel data only if data length exceeds 10 million obs.).

  • -
  • The print methods of pwcor and pwcov now have a return argument, allowing users to obtain the formatted correlation matrix, for exporting purposes.

  • -
  • replace_NA, recode_num and recode_char have improved performance and an additional argument set to take advantage of setv to change (some) data by reference. For replace_NA, this feature is mature and setting set = TRUE will modify all selected columns in place and return the data invisibly. For recode_num and recode_char only a part of the transformations are done by reference, thus users will still have to assign the data to preserve changes. In the future, this will be improved so that set = TRUE toggles all transformations to be done by reference.

  • +
    • Package loads faster (because I don’t fetch functions from some +other C/C++ heavy packages in .onLoad anymore, which +implied unnecessary loading of a lot of DLLs).

    • +
    • fsummarise is now also fully featured supporting +evaluation of arbitrary expressions and across() +statements. Note that mixing Fast Statistical Functions with +other functions in a single expression can yield unintended outcomes, +read more at ?fsummarise.

    • +
    • funique benefits from group in the +default sort = FALSE, configuration, providing extra speed +and unique values in first-appearance order in both the default and the +data frame method, for all data types.

    • +
    • Function ss supports both empty i or +j.

    • +
    • The printout of fgroup_by also shows minimum and +maximum group size for unbalanced groupings.

    • +
    • In ftransformv/settransformv and +fcomputev, the vars argument is also evaluated +inside the data frame environment, allowing NSE specifications using +column names +e.g. ftransformv(data, c(col1, col2:coln), FUN).

    • +
    • qF with option sort = FALSE now +generates factors with levels in first-appearance order (instead of a +random order assigned by the hash function), and can also be called on +an existing factor to recast the levels in first-appearance order. It is +also faster with sort = FALSE (thanks to +group).

    • +
    • finteraction has argument sort = FALSE +to also take advantage of group.

    • +
    • rsplit has improved performance through +gsplit, and an additional argument use.names, +which can be used to return an unnamed list.

    • +
    • Speedup in vtypes and functions +num_vars, cat_vars, char_vars, +logi_vars and fact_vars. Note than +num_vars behaves slightly differently as discussed +above.

    • +
    • vlabels(<-) / setLabels rewritten in +C, giving a ~20x speed improvement. Note that they now operate by +reference.

    • +
    • vlabels, vclasses and +vtypes have a use.names argument. The default +is TRUE (as before).

    • +
    • colorder can rename columns on the fly and also has +a new mode pos = "after" to place all selected columns +after the first selected one, e.g.: +colorder(mtcars, cyl, vs_new = vs, am, pos = "after"). The +pos = "after" option was also added to +roworderv.

    • +
    • add_stub and rm_stub have an additional +cols argument to apply a stub to certain columns only +e.g. add_stub(mtcars, "new_", cols = 6:9).

    • +
    • namlab has additional arguments N and +Ndistinct, allowing to display number of observations and +distinct values next to variable names, labels and classes, to get a +nice and quick overview of the variables in a large dataset.

    • +
    • copyMostAttrib only copies the +"row.names" attribute when known to be valid.

    • +
    • na_rm can now be used to efficiently remove empty or +NULL elements from a list.

    • +
    • flag, fdiff and fgrowth +produce less messages (i.e. no message if you don’t use a time variable +in grouped operations, and messages about computations on highly +irregular panel data only if data length exceeds 10 million +obs.).

    • +
    • The print methods of pwcor and pwcov +now have a return argument, allowing users to obtain the +formatted correlation matrix, for exporting purposes.

    • +
    • replace_NA, recode_num and +recode_char have improved performance and an additional +argument set to take advantage of setv to +change (some) data by reference. For replace_NA, this +feature is mature and setting set = TRUE will modify all +selected columns in place and return the data invisibly. For +recode_num and recode_char only a part of the +transformations are done by reference, thus users will still have to +assign the data to preserve changes. In the future, this will be +improved so that set = TRUE toggles all transformations to +be done by reference.

    -
    • The plot method for panel series matrices and arrays plot.psmat was improved slightly. It now supports custom colours and drawing of a grid.

    • -
    • settransform and settransformv can now be called without attaching the package e.g. collapse::settransform(data, ...). These errored before when collapse is not loaded because they are simply wrappers around data <- ftransform(data, ...). I’d like to note from a discussion that avoiding shallow copies with <- (e.g. via :=) does not appear to yield noticeable performance gains. Where data.table is faster on big data this mostly has to do with parallelism and sometimes with algorithms, generally not memory efficiency.

    • -
    • Functions setAttrib, copyAttrib and copyMostAttrib only make a shallow copy of lists, not of atomic vectors (which amounts to doing a full copy and is inefficient). Thus atomic objects are now modified in-place.

    • -
    • Small improvements: Calling qF(x, ordered = FALSE) on an ordered factor will remove the ordered class, the operators L, F, D, Dlog, G, B, W, HDB, HDW and functions like pwcor now work on unnamed matrices or data frames.

    • +
      • The plot method for panel series matrices and arrays +plot.psmat was improved slightly. It now supports custom +colours and drawing of a grid.

      • +
      • settransform and settransformv can now +be called without attaching the package +e.g. collapse::settransform(data, ...). These errored +before when collapse is not loaded because they are simply +wrappers around data <- ftransform(data, ...). I’d like +to note from a discussion +that avoiding shallow copies with <- (e.g. via +:=) does not appear to yield noticeable performance gains. +Where data.table is faster on big data this mostly has to do +with parallelism and sometimes with algorithms, generally not memory +efficiency.

      • +
      • Functions setAttrib, copyAttrib and +copyMostAttrib only make a shallow copy of lists, not of +atomic vectors (which amounts to doing a full copy and is inefficient). +Thus atomic objects are now modified in-place.

      • +
      • Small improvements: Calling qF(x, ordered = FALSE) +on an ordered factor will remove the ordered class, the operators +L, F, D, Dlog, +G, B, W, HDB, +HDW and functions like pwcor now work on +unnamed matrices or data frames.

-
  • A test that occasionally fails on Mac is removed, and all unit testing is now removed from CRAN. collapse has close to 10,000 unit tests covering all central pieces of code. Half of these tests depend on generated data, and for some reasons there is always a test or two that occasionally fail on some operating system (usually not Windows), requiring me to submit a patch. This is not constructive to either the development or the use of this package, therefore tests are now removed from CRAN. They are still run on codecov.io, and every new release is thoroughly tested on Windows.
  • +
    • A test that occasionally fails on Mac is removed, and all unit +testing is now removed from CRAN. collapse has close to 10,000 +unit tests covering all central pieces of code. Half of these tests +depend on generated data, and for some reasons there is always a test or +two that occasionally fail on some operating system (usually not +Windows), requiring me to submit a patch. This is not constructive to +either the development or the use of this package, therefore tests are +now removed from CRAN. They are still run on codecov.io, and every new +release is thoroughly tested on Windows.

Changes to Functionality

-
  • The first argument of ftransform was renamed to .data from X. This was done to enable the user to transform columns named “X”. For the same reason the first argument of frename was renamed to .x from x (not .data to make it explicit that .x can be any R object with a “names” attribute). It is not possible to depreciate X and x without at the same time undoing the benefits of the argument renaming, thus this change is immediate and code breaking in rare cases where the first argument is explicitly set.

  • -
  • The function is.regular to check whether an R object is atomic or list-like is depreciated and will be removed before the end of the year. This was done to avoid a namespace clash with the zoo package (#127).

  • +
    • The first argument of ftransform was renamed to +.data from X. This was done to enable the user +to transform columns named “X”. For the same reason the first argument +of frename was renamed to .x from +x (not .data to make it explicit that +.x can be any R object with a “names” attribute). It is not +possible to depreciate X and x without at the +same time undoing the benefits of the argument renaming, thus this +change is immediate and code breaking in rare cases where the first +argument is explicitly set.

    • +
    • The function is.regular to check whether an R object +is atomic or list-like is depreciated and will be removed before the end +of the year. This was done to avoid a namespace clash with the +zoo package (#127).

Bug Fixes

@@ -342,94 +1159,273 @@

Bug Fixes#99). +unlist2d produced a subsetting error if an empty list +was present in the list-tree. This is now fixed, empty or +NULL elements in the list-tree are simply ignored +(#99).

Additions

-
  • A function fsummarize was added to facilitate translating dplyr / data.table code to collapse. Like collap, it is only very fast when used with the Fast Statistical Functions.

  • -
  • A function t_list is made available to efficiently transpose lists of lists.

  • +
    • A function fsummarize was added to facilitate +translating dplyr / data.table code to +collapse. Like collap, it is only very fast when +used with the Fast Statistical Functions.

    • +
    • A function t_list is made available to efficiently +transpose lists of lists.

Improvements

-
  • C files are compiled -O3 on Windows, which gives a boost of around 20% for the grouping mechanism applied to character data.
  • +
    • C files are compiled -O3 on Windows, which gives a boost of around +20% for the grouping mechanism applied to character data.

A small patch for 1.5.0 that:

-
  • Fixes a numeric precision issue when grouping doubles (e.g. before qF(wlddev$LIFEEX) gave an error, now it works).

  • -
  • Fixes a minor issue with fhdwithin when applied to pseries and fill = FALSE.

  • +
    • Fixes a numeric precision issue when grouping doubles +(e.g. before qF(wlddev$LIFEEX) gave an error, now it +works).

    • +
    • Fixes a minor issue with fhdwithin when applied to +pseries and fill = FALSE.

-

collapse 1.5.0, released early January 2021, presents important refinements and some additional functionality.

+

collapse 1.5.0, released early January 2021, presents +important refinements and some additional functionality.

Back to CRAN

-
  • I apologize for inconveniences caused by the temporal archival of collapse from December 19, 2020. This archival was caused by the archival of the important lfe package on the 4th of December. collapse depended on lfe for higher-dimensional centering, providing the fhdbetween / fhdwithin functions for generalized linear projecting / partialling out. To remedy the damage caused by the removal of lfe, I had to rewrite fhdbetween / fhdwithin to take advantage of the demeaning algorithm provided by fixest, which has some quite different mechanics. Beforehand, I made some significant changes to fixest::demean itself to make this integration happen. The CRAN deadline was the 18th of December, and I realized too late that I would not make this. A request to CRAN for extension was declined, so collapse got archived on the 19th. I have learned from this experience, and collapse is now sufficiently insulated that it will not be taken off CRAN even if all suggested packages were removed from CRAN.
  • +
    • I apologize for inconveniences caused by the temporal archival of +collapse from December 19, 2020. This archival was caused by +the archival of the important lfe package on the 4th of +December. collapse depended on lfe for +higher-dimensional centering, providing the +fhdbetween / fhdwithin functions for generalized linear +projecting / partialling out. To remedy the damage caused by the removal +of lfe, I had to rewrite fhdbetween / fhdwithin to +take advantage of the demeaning algorithm provided by fixest, +which has some quite different mechanics. Beforehand, I made some +significant changes to fixest::demean itself to make this +integration happen. The CRAN deadline was the 18th of December, and I +realized too late that I would not make this. A request to CRAN for +extension was declined, so collapse got archived on the 19th. I +have learned from this experience, and collapse is now +sufficiently insulated that it will not be taken off CRAN even if all +suggested packages were removed from CRAN.

Bug Fixes

-
  • Segfaults in several Fast Statistical Functions when passed numeric(0) are fixed (thanks to @eshom and @acylam, #101). The default behavior is that all collapse functions return numeric(0) again, except for fnobs, fndistinct which return 0L, and fvar, fsd which return NA_real_.
  • +
    • Segfaults in several Fast Statistical Functions when passed +numeric(0) are fixed (thanks to @eshom and @acylam, #101). The +default behavior is that all collapse functions return +numeric(0) again, except for fnobs, +fndistinct which return 0L, and +fvar, fsd which return +NA_real_.

Changes to Functionality

-
  • Functions fhdwithin / HDW and fhdbetween / HDB have been reworked, delivering higher performance and greater functionality: For higher-dimensional centering and heterogeneous slopes, the demean function from the fixest package is imported (conditional on the availability of that package). The linear prediction and partialling out functionality is now built around flm and also allows for weights and different fitting methods.

  • -
  • In collap, the default behavior of give.names = "auto" was altered when used together with the custom argument. Before the function name was always added to the column names. Now it is only added if a column is aggregated with two different functions. I apologize if this breaks any code dependent on the new names, but this behavior just better reflects most common use (applying only one function per column), as well as STATA’s collapse.

  • -
  • For list processing functions like get_elem, has_elem etc. the default for the argument DF.as.list was changed from TRUE to FALSE. This means if a nested lists contains data frame’s, these data frame’s will not be searched for matching elements. This default also reflects the more common usage of these functions (extracting entire data frame’s or computed quantities from nested lists rather than searching / subsetting lists of data frame’s). The change also delivers a considerable performance gain.

  • -
  • Vignettes were outsourced to the website. This nearly halves the size of the source package, and should induce users to appreciate the built-in documentation. The website also makes for much more convenient reading and navigation of these book-style vignettes.
  • +
    • Functions fhdwithin / HDW and +fhdbetween / HDB have been reworked, delivering higher +performance and greater functionality: For higher-dimensional centering +and heterogeneous slopes, the demean function from the +fixest package is imported (conditional on the availability of +that package). The linear prediction and partialling out functionality +is now built around flm and also allows for weights and +different fitting methods.

    • +
    • In collap, the default behavior of +give.names = "auto" was altered when used together with the +custom argument. Before the function name was always added +to the column names. Now it is only added if a column is aggregated with +two different functions. I apologize if this breaks any code dependent +on the new names, but this behavior just better reflects most common use +(applying only one function per column), as well as STATA’s +collapse.

    • +
    • For list processing functions like get_elem, +has_elem etc. the default for the argument +DF.as.list was changed from TRUE to +FALSE. This means if a nested lists contains data frame’s, +these data frame’s will not be searched for matching elements. This +default also reflects the more common usage of these functions +(extracting entire data frame’s or computed quantities from nested lists +rather than searching / subsetting lists of data frame’s). The change +also delivers a considerable performance gain.

    • +
    • Vignettes were outsourced to the website. +This nearly halves the size of the source package, and should induce +users to appreciate the built-in documentation. The website also makes +for much more convenient reading and navigation of these book-style +vignettes.

Additions

-
  • Added a set of 10 operators %rr%, %r+%, %r-%, %r*%, %r/%, %cr%, %c+%, %c-%, %c*%, %c/% to facilitate and speed up row- and column-wise arithmetic operations involving a vector and a matrix / data frame / list. For example X %r*% v efficiently multiplies every row of X with v. Note that more advanced functionality is already provided in TRA(), dapply() and the Fast Statistical Functions, but these operators are intuitive and very convenient to use in matrix or matrix-style code, or in piped expressions.

  • -
  • Added function missing_cases (opposite of complete.cases and faster for data frame’s / lists).

  • +
    • Added a set of 10 operators %rr%, %r+%, +%r-%, %r*%, %r/%, +%cr%, %c+%, %c-%, +%c*%, %c/% to facilitate and speed up row- and +column-wise arithmetic operations involving a vector and a matrix / data +frame / list. For example X %r*% v efficiently multiplies +every row of X with v. Note that more advanced +functionality is already provided in TRA(), +dapply() and the Fast Statistical Functions, but +these operators are intuitive and very convenient to use in matrix or +matrix-style code, or in piped expressions.

    • +
    • Added function missing_cases (opposite of +complete.cases and faster for data frame’s / +lists).

    • Added function allNA for atomic vectors.

    • -
    • New vignette about using collapse together with data.table, available online.

    • +
    • New vignette about using collapse together with +data.table, available online.

Improvements

-
  • Time series functions and operators flag / L / F, fdiff / D / Dlog and fgrowth / G now natively support irregular time series and panels, and feature a ‘complete approach’ i.e. values are shifted around taking full account of the underlying time-dimension!
  • -
  • Functions pwcor and pwcov can now compute weighted correlations on the pairwise or complete observations, supported by C-code that is (conditionally) imported from the weights package.

  • +
    • Time series functions and operators flag / L / F, +fdiff / D / Dlog and fgrowth / G now natively +support irregular time series and panels, and feature a ‘complete +approach’ i.e. values are shifted around taking full account of the +underlying time-dimension!
    • +
    • Functions pwcor and pwcov can now +compute weighted correlations on the pairwise or complete observations, +supported by C-code that is (conditionally) imported from the +weights package.

    • fFtest now also supports weights.

    • -
    • collap now provides an easy workaround to aggregate some columns using weights and others without. The user may simply append the names of Fast Statistical Functions with _uw to disable weights. Example: collapse::collap(mtcars, ~ cyl, custom = list(fmean_uw = 3:4, fmean = 8:10), w = ~ wt) aggregates columns 3 through 4 using a simple mean and columns 8 through 10 using the weighted mean.

    • -
    • The parallelism in collap using parallel::mclapply has been reworked to operate at the column-level, and not at the function level as before. It is still not available for Windows though. The default number of cores was set to mc.cores = 2L, which now gives an error on windows if parallel = TRUE.

    • -
    • function recode_char now has additional options ignore.case and fixed (passed to grepl), for enhanced recoding character data based on regular expressions.

    • -
    • rapply2d now has classes argument permitting more flexible use.

    • -
    • na_rm and some other internal functions were rewritten in C. na_rm is now 2x faster than x[!is.na(x)] with missing values and 10x faster without missing values.

    • +
    • collap now provides an easy workaround to aggregate +some columns using weights and others without. The user may simply +append the names of Fast Statistical Functions with +_uw to disable weights. Example: +collapse::collap(mtcars, ~ cyl, custom = list(fmean_uw = 3:4, fmean = 8:10), w = ~ wt) +aggregates columns 3 through 4 using a simple mean and columns 8 through +10 using the weighted mean.

    • +
    • The parallelism in collap using +parallel::mclapply has been reworked to operate at the +column-level, and not at the function level as before. It is still not +available for Windows though. The default number of cores was set to +mc.cores = 2L, which now gives an error on windows if +parallel = TRUE.

    • +
    • function recode_char now has additional options +ignore.case and fixed (passed to +grepl), for enhanced recoding character data based on +regular expressions.

    • +
    • rapply2d now has classes argument +permitting more flexible use.

    • +
    • na_rm and some other internal functions were +rewritten in C. na_rm is now 2x faster than +x[!is.na(x)] with missing values and 10x faster without +missing values.

-
  • An improvement to the [.GRP_df method enabling the use of most data.table methods (such as :=) on a grouped data.table created with fgroup_by.

  • +
    • An improvement to the [.GRP_df method enabling the +use of most data.table methods (such as :=) on a +grouped data.table created with +fgroup_by.

    • Some documentation updates by Kevin Tappe.

collapse 1.4.1 is a small patch for 1.4.0 that:

-
  • fixes clang-UBSAN and rchk issues in 1.4.0 (minor bugs in compiled code resulting, in this case, from trying to coerce a NaN value to integer, and failing to protect a shallow copy of a variable).

  • -
  • Adds a method [.GRP_df that allows robust subsetting of grouped objects created with fgroup_by (thanks to Patrice Kiener for flagging this).

  • +
    • fixes clang-UBSAN and rchk issues in 1.4.0 (minor bugs in +compiled code resulting, in this case, from trying to coerce a +NaN value to integer, and failing to protect a shallow copy +of a variable).

    • +
    • Adds a method [.GRP_df that allows robust subsetting +of grouped objects created with fgroup_by (thanks to +Patrice Kiener for flagging this).

-

collapse 1.4.0, released early November 2020, presents some important refinements, particularly in the domain of attribute handling, as well as some additional functionality. The changes make collapse smarter, more broadly compatible and more secure, and should not break existing code.

+

collapse 1.4.0, released early November 2020, presents some +important refinements, particularly in the domain of attribute handling, +as well as some additional functionality. The changes make +collapse smarter, more broadly compatible and more secure, and +should not break existing code.

Changes to Functionality

-
  • Deep Matrix Dispatch / Extended Time Series Support: The default methods of all statistical and transformation functions dispatch to the matrix method if is.matrix(x) && !inherits(x, "matrix") evaluates to TRUE. This specification avoids invoking the default method on classed matrix-based objects (such as multivariate time series of the xts / zoo class) not inheriting a ‘matrix’ class, while still allowing the user to manually call the default method on matrices (objects with implicit or explicit ‘matrix’ class). The change implies that collapse’s generic statistical functions are now well suited to transform xts / zoo and many other time series and matrix-based classes.

  • -
  • Fully Non-Destructive Piped Workflow: fgroup_by(x, ...) now only adds a class grouped_df, not classes table_df, tbl, grouped_df, and preserves all classes of x. This implies that workflows such as x %>% fgroup_by(...) %>% fmean etc. yields an object xAG of the same class and attributes as x, not a tibble as before. collapse aims to be as broadly compatible, class-agnostic and attribute preserving as possible.

  • +
    • Deep Matrix Dispatch / Extended Time Series Support: The +default methods of all statistical and transformation functions dispatch +to the matrix method if +is.matrix(x) && !inherits(x, "matrix") evaluates to +TRUE. This specification avoids invoking the default method +on classed matrix-based objects (such as multivariate time series of the +xts / zoo class) not inheriting a ‘matrix’ class, +while still allowing the user to manually call the default method on +matrices (objects with implicit or explicit ‘matrix’ class). The change +implies that collapse’s generic statistical functions are now +well suited to transform xts / zoo and many other time +series and matrix-based classes.

    • +
    • Fully Non-Destructive Piped Workflow: +fgroup_by(x, ...) now only adds a class +grouped_df, not classes table_df, tbl, +grouped_df, and preserves all classes of x. This +implies that workflows such as +x %>% fgroup_by(...) %>% fmean etc. yields an object +xAG of the same class and attributes as x, not +a tibble as before. collapse aims to be as broadly compatible, +class-agnostic and attribute preserving as possible.

    • -Thorough and Controlled Object Conversions: Quick conversion functions qDF, qDT and qM now have additional arguments keep.attr and class providing precise user control over object conversions in terms of classes and other attributes assigned / maintained. The default (keep.attr = FALSE) yields hard conversions removing all but essential attributes from the object. E.g. before qM(EuStockMarkets) would just have returned EuStockMarkets (because is.matrix(EuStockMarkets) is TRUE) whereas now the time series class and ‘tsp’ attribute are removed. qM(EuStockMarkets, keep.attr = TRUE) returns EuStockMarkets as before.
    • +Thorough and Controlled Object Conversions: Quick +conversion functions qDF, qDT and +qM now have additional arguments keep.attr and +class providing precise user control over object +conversions in terms of classes and other attributes assigned / +maintained. The default (keep.attr = FALSE) yields +hard conversions removing all but essential attributes from the +object. E.g. before qM(EuStockMarkets) would just have +returned EuStockMarkets (because +is.matrix(EuStockMarkets) is TRUE) whereas now +the time series class and ‘tsp’ attribute are removed. +qM(EuStockMarkets, keep.attr = TRUE) returns +EuStockMarkets as before.
    • -

      Smarter Attribute Handling: Drawing on the guidance given in the R Internals manual, the following standards for optimal non-destructive attribute handling are formalized and communicated to the user:

      -
      • The default and matrix methods of the Fast Statistical Functions preserve attributes of the input in grouped aggregations (‘names’, ‘dim’ and ‘dimnames’ are suitably modified). If inputs are classed objects (e.g. factors, time series, checked by is.object), the class and other attributes are dropped. Simple (non-grouped) aggregations of vectors and matrices do not preserve attributes, unless drop = FALSE in the matrix method. An exemption is made in the default methods of functions ffirst, flast and fmode, which always preserve the attributes (as the input could well be a factor or date variable).

      • -
      • The data frame methods are unaltered: All attributes of the data frame and columns in the data frame are preserved unless the computation result from each column is a scalar (not computing by groups) and drop = TRUE (the default).

      • -
      • Transformations with functions like flag, fwithin, fscale etc. are also unaltered: All attributes of the input are preserved in the output (regardless of whether the input is a vector, matrix, data.frame or related classed object). The same holds for transformation options modifying the input (“-”, “-+”, “/”, “+”, “*”, “%%”, “-%%”) when using TRA() function or the TRA = "..." argument to the Fast Statistical Functions.

      • -
      • For TRA ‘replace’ and ‘replace_fill’ options, the data type of the STATS is preserved, not of x. This provides better results particularly with functions like fnobs and fndistinct. E.g. previously fnobs(letters, TRA = "replace") would have returned the observation counts coerced to character, because letters is character. Now the result is integer typed. For attribute handling this means that the attributes of x are preserved unless x is a classed object and the data types of x and STATS do not match. An exemption to this rule is made if x is a factor and an integer (non-factor) replacement is offered to STATS. In that case the attributes of x are copied exempting the ‘class’ and ‘levels’ attribute, e.g. so that fnobs(iris$Species, TRA = "replace") gives an integer vector, not a (malformed) factor. In the unlikely event that STATS is a classed object, the attributes of STATS are preserved and the attributes of x discarded.

      • +

        Smarter Attribute Handling: Drawing on the guidance +given in the R Internals manual, the following standards for optimal +non-destructive attribute handling are formalized and communicated to +the user:

        +
        • The default and matrix methods of the Fast Statistical +Functions preserve attributes of the input in grouped aggregations +(‘names’, ‘dim’ and ‘dimnames’ are suitably modified). If inputs are +classed objects (e.g. factors, time series, checked by +is.object), the class and other attributes are dropped. +Simple (non-grouped) aggregations of vectors and matrices do not +preserve attributes, unless drop = FALSE in the matrix +method. An exemption is made in the default methods of functions +ffirst, flast and fmode, which +always preserve the attributes (as the input could well be a factor or +date variable).

        • +
        • The data frame methods are unaltered: All attributes of the data +frame and columns in the data frame are preserved unless the computation +result from each column is a scalar (not computing by groups) and +drop = TRUE (the default).

        • +
        • Transformations with functions like flag, +fwithin, fscale etc. are also unaltered: All +attributes of the input are preserved in the output (regardless of +whether the input is a vector, matrix, data.frame or related classed +object). The same holds for transformation options modifying the input +(“-”, “-+”, “/”, “+”, “*”, “%%”, “-%%”) when using TRA() +function or the TRA = "..." argument to the Fast +Statistical Functions.

        • +
        • For TRA ‘replace’ and ‘replace_fill’ options, the +data type of the STATS is preserved, not of x. This provides better +results particularly with functions like fnobs and +fndistinct. E.g. previously +fnobs(letters, TRA = "replace") would have returned the +observation counts coerced to character, because letters is +character. Now the result is integer typed. For attribute handling this +means that the attributes of x are preserved unless x is a classed +object and the data types of x and STATS do not match. An exemption to +this rule is made if x is a factor and an integer (non-factor) +replacement is offered to STATS. In that case the attributes of x are +copied exempting the ‘class’ and ‘levels’ attribute, e.g. so that +fnobs(iris$Species, TRA = "replace") gives an integer +vector, not a (malformed) factor. In the unlikely event that STATS is a +classed object, the attributes of STATS are preserved and the attributes +of x discarded.

      @@ -437,149 +1433,370 @@

      Changes to Functionality
      • -Reduced Dependency Burden: The dependency on the lfe package was made optional. Functions fhdwithin / fhdbetween can only perform higher-dimensional centering if lfe is available. Linear prediction and centering with a single factor (among a list of covariates) is still possible without installing lfe. This change means that collapse now only depends on base R and Rcpp and is supported down to R version 2.10.
      • +Reduced Dependency Burden: The dependency on the +lfe package was made optional. Functions fhdwithin +/ fhdbetween can only perform higher-dimensional centering +if lfe is available. Linear prediction and centering with a +single factor (among a list of covariates) is still possible without +installing lfe. This change means that collapse now +only depends on base R and Rcpp and is supported down to R +version 2.10.

Additions

-
  • Added function rsplit for efficient (recursive) splitting of vectors and data frames.

  • -
  • Added function fdroplevels for very fast missing level removal + added argument drop to qF and GRP.factor, the default is drop = FALSE. The addition of fdroplevels also enhances the speed of the fFtest function.

  • -
  • fgrowth supports annualizing / compounding growth rates through added power argument.

  • -
  • A function flm was added for bare bones (weighted) linear regression fitting using different efficient methods: 4 from base R (.lm.fit, solve, qr, chol), using fastLm from RcppArmadillo (if installed), or fastLm from RcppEigen (if installed).

  • -
  • Added function qTBL to quickly convert R objects to tibble.

  • -
  • helpers setAttrib, copyAttrib and copyMostAttrib exported for fast attribute handling in R (similar to attributes<-(), these functions return a shallow copy of the first argument with the set of attributes replaced, but do not perform checks for attribute validity like attributes<-(). This can yield large performance gains with big objects).

  • -
  • helper cinv added wrapping the expression chol2inv(chol(x)) (efficient inverse of a symmetric, positive definite matrix via Choleski factorization).

  • -
  • A shortcut gby is now available to abbreviate the frequently used fgroup_by function.

  • -
  • A print method for grouped data frames of any class was added.

  • +
    • Added function rsplit for efficient (recursive) +splitting of vectors and data frames.

    • +
    • Added function fdroplevels for very fast missing +level removal + added argument drop to qF and +GRP.factor, the default is drop = FALSE. The +addition of fdroplevels also enhances the speed of the +fFtest function.

    • +
    • fgrowth supports annualizing / compounding growth +rates through added power argument.

    • +
    • A function flm was added for bare bones (weighted) +linear regression fitting using different efficient methods: 4 from base +R (.lm.fit, solve, qr, +chol), using fastLm from +RcppArmadillo (if installed), or fastLm from +RcppEigen (if installed).

    • +
    • Added function qTBL to quickly convert R objects to +tibble.

    • +
    • helpers setAttrib, copyAttrib and +copyMostAttrib exported for fast attribute handling in R +(similar to attributes<-(), these functions return a +shallow copy of the first argument with the set of attributes replaced, +but do not perform checks for attribute validity like +attributes<-(). This can yield large performance gains +with big objects).

    • +
    • helper cinv added wrapping the expression +chol2inv(chol(x)) (efficient inverse of a symmetric, +positive definite matrix via Choleski factorization).

    • +
    • A shortcut gby is now available to abbreviate the +frequently used fgroup_by function.

    • +
    • A print method for grouped data frames of any class was +added.

Improvements

-
  • Faster internal methods for factors for funique, fmode and fndistinct.
  • -
  • The grouped_df methods for flag, fdiff, fgrowth now also support multiple time variables to identify a panel e.g. data %>% fgroup_by(region, person_id) %>% flag(1:2, list(month, day)).

  • -
  • More security features for fsubset.data.frame / ss, ss is now internal generic and also supports subsetting matrices.

  • -
  • In some functions (like na_omit), passing double values (e.g. 1 instead of integer 1L) or negative indices to the cols argument produced an error or unexpected behavior. This is now fixed in all functions.

  • -
  • Fixed a bug in helper function all_obj_equal occurring if objects are not all equal.

  • -
  • Some performance improvements through increased use of pointers and C API functions.

  • +
    • Faster internal methods for factors for funique, +fmode and fndistinct.
    • +
    • The grouped_df methods for flag, +fdiff, fgrowth now also support multiple time +variables to identify a panel +e.g. data %>% fgroup_by(region, person_id) %>% flag(1:2, list(month, day)).

    • +
    • More security features for fsubset.data.frame / +ss, ss is now internal generic and also +supports subsetting matrices.

    • +
    • In some functions (like na_omit), passing double +values (e.g. 1 instead of integer 1L) or +negative indices to the cols argument produced an error or +unexpected behavior. This is now fixed in all functions.

    • +
    • Fixed a bug in helper function all_obj_equal +occurring if objects are not all equal.

    • +
    • Some performance improvements through increased use of pointers +and C API functions.

-

collapse 1.3.2, released mid September 2020:

-
  • Fixed a small bug in fndistinct for grouped distinct value counts on logical vectors.

  • -
  • Additional security for ftransform, which now efficiently checks the names of the data and replacement arguments for uniqueness, and also allows computing and transforming list-columns.

  • -
  • Added function ftransformv to facilitate transforming selected columns with function - a very efficient replacement for dplyr::mutate_if and dplyr::mutate_at.

  • -
  • frename now allows additional arguments to be passed to a renaming function.

  • +

    collapse 1.3.2, released mid September 2020: +

    +
    • Fixed a small bug in fndistinct for grouped distinct +value counts on logical vectors.

    • +
    • Additional security for ftransform, which now +efficiently checks the names of the data and replacement arguments for +uniqueness, and also allows computing and transforming +list-columns.

    • +
    • Added function ftransformv to facilitate +transforming selected columns with function - a very efficient +replacement for dplyr::mutate_if and +dplyr::mutate_at.

    • +
    • frename now allows additional arguments to be passed +to a renaming function.

-

collapse 1.3.1, released end of August 2020, is a patch for v1.3.0 that takes care of some unit test failures on certain operating systems (mostly because of numeric precision issues). It provides no changes to the code or functionality.

+

collapse 1.3.1, released end of August 2020, is a patch for v1.3.0 +that takes care of some unit test failures on certain operating systems +(mostly because of numeric precision issues). It provides no changes to +the code or functionality.

-

collapse 1.3.0, released mid August 2020:

+

collapse 1.3.0, released mid August 2020: +

Changes to Functionality

-
  • dapply and BY now drop all unnecessary attributes if return = "matrix" or return = "data.frame" are explicitly requested (the default return = "same" still seeks to preserve the input data structure).

  • -
  • unlist2d now saves integer rownames if row.names = TRUE and a list of matrices without rownames is passed, and id.factor = TRUE generates a normal factor not an ordered factor. It is however possible to write id.factor = "ordered" to get an ordered factor id.

  • -
  • fdiff argument logdiff renamed to log, and taking logs is now done in R (reduces size of C++ code and does not generate as many NaN’s). logdiff may still be used, but it may be deactivated in the future. Also in the matrix and data.frame methods for flag, fdiff and fgrowth, columns are only stub-renamed if more than one lag/difference/growth rate is computed.

  • +
    • dapply and BY now drop all unnecessary +attributes if return = "matrix" or +return = "data.frame" are explicitly requested (the default +return = "same" still seeks to preserve the input data +structure).

    • +
    • unlist2d now saves integer rownames if +row.names = TRUE and a list of matrices without rownames is +passed, and id.factor = TRUE generates a normal factor not +an ordered factor. It is however possible to write +id.factor = "ordered" to get an ordered factor id.

    • +
    • fdiff argument logdiff renamed to +log, and taking logs is now done in R (reduces size of C++ +code and does not generate as many NaN’s). logdiff may +still be used, but it may be deactivated in the future. Also in the +matrix and data.frame methods for flag, fdiff +and fgrowth, columns are only stub-renamed if more than one +lag/difference/growth rate is computed.

Additions

-
  • Added fnth for fast (grouped, weighted) n’th element/quantile computations.

  • -
  • Added roworder(v) and colorder(v) for fast row and column reordering.

  • -
  • Added frename and setrename for fast and flexible renaming (by reference).

  • -
  • Added function fungroup, as replacement for dplyr::ungroup, intended for use with fgroup_by.

  • -
  • fmedian now supports weights, computing a decently fast (grouped) weighted median based on radix ordering.

  • -
  • fmode now has the option to compute min and max mode, the default is still simply the first mode.

  • -
  • fwithin now supports quasi-demeaning (added argument theta) and can thus be used to manually estimate random-effects models.

  • -
  • funique is now generic with a default vector and data.frame method, providing fast unique values and rows of data. The default was changed to sort = FALSE.

  • -
  • The shortcut gvr was created for get_vars(..., regex = TRUE).

  • -
  • A helper .c was introduced for non-standard concatenation (i.e. .c(a, b) == c("a", "b")).

  • +
    • Added fnth for fast (grouped, weighted) n’th +element/quantile computations.

    • +
    • Added roworder(v) and colorder(v) for +fast row and column reordering.

    • +
    • Added frename and setrename for fast +and flexible renaming (by reference).

    • +
    • Added function fungroup, as replacement for +dplyr::ungroup, intended for use with +fgroup_by.

    • +
    • fmedian now supports weights, computing a decently +fast (grouped) weighted median based on radix ordering.

    • +
    • fmode now has the option to compute min and max +mode, the default is still simply the first mode.

    • +
    • fwithin now supports quasi-demeaning (added argument +theta) and can thus be used to manually estimate +random-effects models.

    • +
    • funique is now generic with a default vector and +data.frame method, providing fast unique values and rows of data. The +default was changed to sort = FALSE.

    • +
    • The shortcut gvr was created for +get_vars(..., regex = TRUE).

    • +
    • A helper .c was introduced for non-standard +concatenation (i.e. .c(a, b) == c("a", "b")).

Improvements

-
  • fmode and fndistinct have become a bit faster.

  • -
  • fgroup_by now preserves data.table’s.

  • -
  • ftransform now also supports a data.frame as replacement argument, which automatically replaces matching columns and adds unmatched ones. Also ftransform<- was created as a more formal replacement method for this feature.

  • -
  • collap columns selected through cols argument are returned in the order selected if keep.col.order = FALSE. Argument sort.row is depreciated, and replace by argument sort. In addition the decreasing and na.last arguments were added and handed down to GRP.default.

  • -
  • radixorder ‘sorted’ attribute is now always attached.

  • -
  • stats::D which is masked when collapse is attached, is now preserved through methods D.expression and D.call.

  • -
  • GRP option call = FALSE to omit a call to match.call -> minor performance improvement.

  • -
  • Several small performance improvements through rewriting some internal helper functions in C and reworking some R code.

  • -
  • Performance improvements for some helper functions, setRownames / setColnames, na_insert etc.

  • -
  • Increased scope of testing statistical functions. The functionality of the package is now secured by 7700 unit tests covering all central bits and pieces.

  • +
    • fmode and fndistinct have become a bit +faster.

    • +
    • fgroup_by now preserves +data.table’s.

    • +
    • ftransform now also supports a data.frame as +replacement argument, which automatically replaces matching columns and +adds unmatched ones. Also ftransform<- was created as a +more formal replacement method for this feature.

    • +
    • collap columns selected through cols +argument are returned in the order selected if +keep.col.order = FALSE. Argument sort.row is +depreciated, and replace by argument sort. In addition the +decreasing and na.last arguments were added +and handed down to GRP.default.

    • +
    • radixorder ‘sorted’ attribute is now always +attached.

    • +
    • stats::D which is masked when collapse is attached, +is now preserved through methods D.expression and +D.call.

    • +
    • GRP option call = FALSE to omit a call +to match.call -> minor performance improvement.

    • +
    • Several small performance improvements through rewriting some +internal helper functions in C and reworking some R code.

    • +
    • Performance improvements for some helper functions, +setRownames / setColnames, +na_insert etc.

    • +
    • Increased scope of testing statistical functions. The +functionality of the package is now secured by 7700 unit tests covering +all central bits and pieces.

-

collapse 1.2.1, released end of May 2020:

-
  • Minor fixes for 1.2.0 issues that prevented correct installation on Mac OS X and a vignette rebuilding error on solaris.

  • -
  • fmode.grouped_df with groups and weights now saves the sum of the weights instead of the max (this makes more sense as the max only applies if all elements are unique).

  • +

    collapse 1.2.1, released end of May 2020: +

    +
    • Minor fixes for 1.2.0 issues that prevented correct installation +on Mac OS X and a vignette rebuilding error on solaris.

    • +
    • fmode.grouped_df with groups and weights now saves +the sum of the weights instead of the max (this makes more sense as the +max only applies if all elements are unique).

-

collapse 1.2.0, released mid May 2020:

+

collapse 1.2.0, released mid May 2020: +

Changes to Functionality

-
  • grouped_df methods for fast statistical functions now always attach the grouping variables to the output in aggregations, unless argument keep.group_vars = FALSE. (formerly grouping variables were only attached if also present in the data. Code hinged on this feature should be adjusted)

  • -
  • qF ordered argument default was changed to ordered = FALSE, and the NA level is only added if na.exclude = FALSE. Thus qF now behaves exactly like as.factor.

  • -
  • Recode is depreciated in favor of recode_num and recode_char, it will be removed soon. Similarly replace_non_finite was renamed to replace_Inf.

  • -
  • In mrtl and mctl the argument ret was renamed return and now takes descriptive character arguments (the previous version was a direct C++ export and unsafe, code written with these functions should be adjusted).

  • -
  • GRP argument order is depreciated in favor of argument decreasing. order can still be used but will be removed at some point.

  • +
    • grouped_df methods for fast statistical functions now +always attach the grouping variables to the output in aggregations, +unless argument keep.group_vars = FALSE. (formerly grouping +variables were only attached if also present in the data. Code hinged on +this feature should be adjusted)

    • +
    • qF ordered argument default was changed +to ordered = FALSE, and the NA level is only +added if na.exclude = FALSE. Thus qF now +behaves exactly like as.factor.

    • +
    • Recode is depreciated in favor of +recode_num and recode_char, it will be removed +soon. Similarly replace_non_finite was renamed to +replace_Inf.

    • +
    • In mrtl and mctl the argument +ret was renamed return and now takes +descriptive character arguments (the previous version was a direct C++ +export and unsafe, code written with these functions should be +adjusted).

    • +
    • GRP argument order is depreciated in +favor of argument decreasing. order can still +be used but will be removed at some point.

Bug Fixes

-
  • Fixed a bug in flag where unused factor levels caused a group size error.
  • +
    • Fixed a bug in flag where unused factor levels caused a +group size error.

Additions

  • Added a suite of functions for fast data manipulation:

    • -fselect selects variables from a data frame and is equivalent but much faster than dplyr::select.
    • +fselect selects variables from a data frame and is +equivalent but much faster than dplyr::select.
    • -fsubset is a much faster version of base::subset to subset vectors, matrices and data.frames. The function ss was also added as a faster alternative to [.data.frame.
    • +fsubset is a much faster version of +base::subset to subset vectors, matrices and data.frames. +The function ss was also added as a faster alternative to +[.data.frame.
    • -ftransform is a much faster update of base::transform, to transform data frames by adding, modifying or deleting columns. The function settransform does all of that by reference.
    • +ftransform is a much faster update of +base::transform, to transform data frames by adding, +modifying or deleting columns. The function settransform +does all of that by reference.
    • -fcompute is equivalent to ftransform but returns a new data frame containing only the columns computed from an existing one.
    • +fcompute is equivalent to ftransform but +returns a new data frame containing only the columns computed from an +existing one.
    • -na_omit is a much faster and enhanced version of base::na.omit.
    • +na_omit is a much faster and enhanced version of +base::na.omit.
    • -replace_NA efficiently replaces missing values in multi-type data.
    • +replace_NA efficiently replaces missing values in +multi-type data.
  • -
  • Added function fgroup_by as a much faster version of dplyr::group_by based on collapse grouping. It attaches a ‘GRP’ object to a data frame, but only works with collapse’s fast functions. This allows dplyr like manipulations that are fully collapse based and thus significantly faster, i.e. data %>% fgroup_by(g1,g2) %>% fselect(cola,colb) %>% fmean. Note that data %>% dplyr::group_by(g1,g2) %>% dplyr::select(cola,colb) %>% fmean still works, in which case the dplyr ‘group’ object is converted to ‘GRP’ as before. However data %>% fgroup_by(g1,g2) %>% dplyr::summarize(...) does not work.

  • -
  • Added function varying to efficiently check the variation of multi-type data over a dimension or within groups.

  • -
  • Added function radixorder, same as base::order(..., method = "radix") but more accessible and with built-in grouping features.

  • -
  • Added functions seqid and groupid for generalized run-length type id variable generation from grouping and time variables. seqid in particular strongly facilitates lagging / differencing irregularly spaced panels using flag, fdiff etc.

  • -
  • fdiff now supports quasi-differences i.e. xt − ρxt − 1 and quasi-log differences i.e. log(xt) − ρlog(xt − 1). an arbitrary ρ can be supplied.

  • -
  • Added a Dlog operator for faster access to log-differences.

  • +
  • Added function fgroup_by as a much faster version of +dplyr::group_by based on collapse grouping. It +attaches a ‘GRP’ object to a data frame, but only works with +collapse’s fast functions. This allows dplyr like +manipulations that are fully collapse based and thus +significantly faster, +i.e. data %>% fgroup_by(g1,g2) %>% fselect(cola,colb) %>% fmean. +Note that +data %>% dplyr::group_by(g1,g2) %>% dplyr::select(cola,colb) %>% fmean +still works, in which case the dplyr ‘group’ object is +converted to ‘GRP’ as before. However +data %>% fgroup_by(g1,g2) %>% dplyr::summarize(...) +does not work.

  • +
  • Added function varying to efficiently check the +variation of multi-type data over a dimension or within groups.

  • +
  • Added function radixorder, same as +base::order(..., method = "radix") but more accessible and +with built-in grouping features.

  • +
  • Added functions seqid and groupid for +generalized run-length type id variable generation from grouping and +time variables. seqid in particular strongly facilitates +lagging / differencing irregularly spaced panels using +flag, fdiff etc.

  • +
  • fdiff now supports quasi-differences i.e. xt − ρxt − 1 +and quasi-log differences i.e. log(xt) − ρlog(xt − 1). +an arbitrary ρ can be +supplied.

  • +
  • Added a Dlog operator for faster access to +log-differences.

Improvements

-
  • Faster grouping with GRP and faster factor generation with added radix method + automatic dispatch between hash and radix method. qF is now ~ 5x faster than as.factor on character and around 30x faster on numeric data. Also qG was enhanced.

  • +
    • Faster grouping with GRP and faster factor +generation with added radix method + automatic dispatch between hash and +radix method. qF is now ~ 5x faster than +as.factor on character and around 30x faster on numeric +data. Also qG was enhanced.

    • Further slight speed tweaks here and there.

    • -
    • collap now provides more control for weighted aggregations with additional arguments w, keep.w and wFUN to aggregate the weights as well. The defaults are keep.w = TRUE and wFUN = fsum. A specialty of collap remains that keep.by and keep.w also work for external objects passed, so code of the form collap(data, by, FUN, catFUN, w = data$weights) will now have an aggregated weights vector in the first column.

    • -
    • qsu now also allows weights to be passed in formula i.e. qsu(data, by = ~ group, pid = ~ panelid, w = ~ weights).

    • -
    • fgrowth has a scale argument, the default is scale = 100 which provides growth rates in percentage terms (as before), but this may now be changed.

    • -
    • All statistical and transformation functions now have a hidden list method, so they can be applied to unclassed list-objects as well. An error is however provided in grouped operations with unequal-length columns.

    • +
    • collap now provides more control for weighted +aggregations with additional arguments w, +keep.w and wFUN to aggregate the weights as +well. The defaults are keep.w = TRUE and +wFUN = fsum. A specialty of collap remains +that keep.by and keep.w also work for external +objects passed, so code of the form +collap(data, by, FUN, catFUN, w = data$weights) will now +have an aggregated weights vector in the first +column.

    • +
    • qsu now also allows weights to be passed in formula +i.e. qsu(data, by = ~ group, pid = ~ panelid, w = ~ weights).

    • +
    • fgrowth has a scale argument, the +default is scale = 100 which provides growth rates in +percentage terms (as before), but this may now be changed.

    • +
    • All statistical and transformation functions now have a hidden +list method, so they can be applied to unclassed list-objects as well. +An error is however provided in grouped operations with unequal-length +columns.

-

collapse 1.1.0 released early April 2020:

-
  • Fixed remaining gcc10, LTO and valgrind issues in C/C++ code, and added some more tests (there are now ~ 5300 tests ensuring that collapse statistical functions perform as expected).

  • -
  • Fixed the issue that supplying an unnamed list to GRP(), i.e. GRP(list(v1, v2)) would give an error. Unnamed lists are now automatically named ‘Group.1’, ‘Group.2’, etc…

  • -
  • Fixed an issue where aggregating by a single id in collap() (i.e. collap(data, ~ id1)), the id would be coded as factor in the aggregated data.frame. All variables including id’s now retain their class and attributes in the aggregated data.

  • -
  • Added weights (w) argument to fsum and fprod.

  • -
  • Added an argument mean = 0 to fwithin / W. This allows simple and grouped centering on an arbitrary mean, 0 being the default. For grouped centering mean = "overall.mean" can be specified, which will center data on the overall mean of the data. The logical argument add.global.mean = TRUE used to toggle this in collapse 1.0.0 is therefore depreciated.

  • -
  • Added arguments mean = 0 (the default) and sd = 1 (the default) to fscale / STD. These arguments now allow to (group) scale and center data to an arbitrary mean and standard deviation. Setting mean = FALSE will just scale data while preserving the mean(s). Special options for grouped scaling are mean = "overall.mean" (same as fwithin / W), and sd = "within.sd", which will scale the data such that the standard deviation of each group is equal to the within- standard deviation (= the standard deviation computed on the group-centered data). Thus group scaling a panel-dataset with mean = "overall.mean" and sd = "within.sd" harmonizes the data across all groups in terms of both mean and variance. The fast algorithm for variance calculation toggled with stable.algo = FALSE was removed from fscale. Welford’s numerically stable algorithm used by default is fast enough for all practical purposes. The fast algorithm is still available for fvar and fsd.

  • -
  • Added the modulus (%%) and subtract modulus (-%%) operations to TRA().

  • -
  • Added the function finteraction, for fast interactions, and as_character_factor to coerce a factor, or all factors in a list, to character (analogous to as_numeric_factor). Also exported the function ckmatch, for matching with error message showing non-matched elements.

  • +

    collapse 1.1.0 released early April 2020: +

    +
    • Fixed remaining gcc10, LTO and valgrind issues in C/C++ code, and +added some more tests (there are now ~ 5300 tests ensuring that +collapse statistical functions perform as expected).

    • +
    • Fixed the issue that supplying an unnamed list to +GRP(), i.e. GRP(list(v1, v2)) would give an +error. Unnamed lists are now automatically named ‘Group.1’, ‘Group.2’, +etc…

    • +
    • Fixed an issue where aggregating by a single id in +collap() (i.e. collap(data, ~ id1)), the id +would be coded as factor in the aggregated data.frame. All variables +including id’s now retain their class and attributes in the aggregated +data.

    • +
    • Added weights (w) argument to fsum and +fprod.

    • +
    • Added an argument mean = 0 to +fwithin / W. This allows simple and grouped centering on an +arbitrary mean, 0 being the default. For grouped centering +mean = "overall.mean" can be specified, which will center +data on the overall mean of the data. The logical argument +add.global.mean = TRUE used to toggle this in +collapse 1.0.0 is therefore depreciated.

    • +
    • Added arguments mean = 0 (the default) and +sd = 1 (the default) to fscale / STD. These +arguments now allow to (group) scale and center data to an arbitrary +mean and standard deviation. Setting mean = FALSE will just +scale data while preserving the mean(s). Special options for grouped +scaling are mean = "overall.mean" (same as +fwithin / W), and sd = "within.sd", which will +scale the data such that the standard deviation of each group is equal +to the within- standard deviation (= the standard deviation computed on +the group-centered data). Thus group scaling a panel-dataset with +mean = "overall.mean" and sd = "within.sd" +harmonizes the data across all groups in terms of both mean and +variance. The fast algorithm for variance calculation toggled with +stable.algo = FALSE was removed from fscale. +Welford’s numerically stable algorithm used by default is fast enough +for all practical purposes. The fast algorithm is still available for +fvar and fsd.

    • +
    • Added the modulus (%%) and subtract modulus +(-%%) operations to TRA().

    • +
    • Added the function finteraction, for fast +interactions, and as_character_factor to coerce a factor, +or all factors in a list, to character (analogous to +as_numeric_factor). Also exported the function +ckmatch, for matching with error message showing +non-matched elements.

-
  • First version of the package featuring only the functions collap and qsu based on code shared by Sebastian Krantz on R-devel, February 2019.

  • -
  • Major rework of the package using Rcpp and data.table internals, introduction of fast statistical functions and operators and expansion of the scope of the package to a broad set of data transformation and exploration tasks. Several iterations of enhancing speed of R code. Seamless integration of collapse with dplyr, plm and data.table. CRAN release of collapse 1.0.0 on 19th March 2020.

  • +
    • First version of the package featuring only the functions +collap and qsu based on code shared by +Sebastian Krantz on R-devel, February 2019.

    • +
    • Major rework of the package using Rcpp and data.table internals, +introduction of fast statistical functions and operators and expansion +of the scope of the package to a broad set of data transformation and +exploration tasks. Several iterations of enhancing speed of R code. +Seamless integration of collapse with dplyr, +plm and data.table. CRAN release of collapse +1.0.0 on 19th March 2020.

@@ -595,7 +1812,8 @@

collapse Package Options

  • "fast-fun" adds the functions contained in the macro: .FAST_FUN.

  • "fast-stat-fun" adds the functions contained in the macro: .FAST_STAT_FUN.

  • "fast-trfm-fun" adds the functions contained in: setdiff(.FAST_FUN, .FAST_STAT_FUN).

  • -
  • "all" turns on all of the above, and additionally exports a function n() for use in summarise and mutate.

  • +
  • "all" turns on all of the above, and additionally exports a function n() for use in summarise and mutate.

  • Note that none of these options will impact internal collapse code, but they may change the way your programs run. "manip" is probably the safest option to start with. Specifying "fast-fun", "fast-stat-fun", "fast-trfm-fun" or "all" are ambitious as they replace basic R functions like sum and max, introducing collapse's na.rm = TRUE default and different behavior for matrices and data frames. These options also change some internal macros so that base R functions like sum or max called inside fsummarise, fmutate or collap will also receive vectorized execution. In other words, if you put options(collapse_mask = "all") before loading the package, and you have a collapse-compatible line of dplyr code like wlddev |> group_by(region, income) |> summarise(across(PCGDP:POP, sum)), this will now receive fully optimized execution. Note however that because of collapse's na.rm = TRUE default, the result will be different unless you add na.rm = FALSE.

    -

    In General, this option is for your convenience, if you want to write visually more appealing code or you want to translate existing dplyr codes to collapse. Use with care! Note that the option takes effect upon loading the package (code is in the .onLoad file), not upon attaching it, so it needs to be set before any function from the package is accessed in any way by any code you run. A safe way to enable it is by using a .Rprofile file in your user or project directory (see also here or here, the user-level file is located at file.path(Sys.getenv("HOME"), ".Rprofile") and can be edited using file.edit(Sys.getenv("HOME"), ".Rprofile")), or by using a .fastverse configuration file in the project directory.

    +

    In General, this option is for your convenience, if you want to write visually more appealing code or you want to translate existing dplyr codes to collapse. Use with care! Note that the option takes effect upon loading the package (code is in the .onLoad function), not upon attaching it, so it needs to be set before any function from the package is accessed in any way by any code you run. A safe way to enable it is by using a .Rprofile file in your user or project directory (see also here or here, the user-level file is located at file.path(Sys.getenv("HOME"), ".Rprofile") and can be edited using file.edit(Sys.getenv("HOME"), ".Rprofile")), or by using a .fastverse configuration file in the project directory.

    @@ -120,7 +120,8 @@

    See also

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown +2.0.2.

    diff --git a/docs/reference/collapse-package.html b/docs/reference/collapse-package.html index 1b1f7f9b..e06371dd 100644 --- a/docs/reference/collapse-package.html +++ b/docs/reference/collapse-package.html @@ -25,7 +25,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/collapse-renamed.html b/docs/reference/collapse-renamed.html index 794866b6..daf86768 100644 --- a/docs/reference/collapse-renamed.html +++ b/docs/reference/collapse-renamed.html @@ -72,7 +72,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/colorder.html b/docs/reference/colorder.html index 0de42b05..b811c226 100644 --- a/docs/reference/colorder.html +++ b/docs/reference/colorder.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/dapply.html b/docs/reference/dapply.html index 1395d63e..44faa238 100644 --- a/docs/reference/dapply.html +++ b/docs/reference/dapply.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/data-transformations.html b/docs/reference/data-transformations.html index 4f778b1a..90d53835 100644 --- a/docs/reference/data-transformations.html +++ b/docs/reference/data-transformations.html @@ -48,7 +48,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/descr.html b/docs/reference/descr.html index a1023d53..23c94839 100644 --- a/docs/reference/descr.html +++ b/docs/reference/descr.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/efficient-programming.html b/docs/reference/efficient-programming.html index 017c6649..a2991fe0 100644 --- a/docs/reference/efficient-programming.html +++ b/docs/reference/efficient-programming.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/extract_list.html b/docs/reference/extract_list.html index 9610f83c..5403482d 100644 --- a/docs/reference/extract_list.html +++ b/docs/reference/extract_list.html @@ -25,7 +25,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fFtest.html b/docs/reference/fFtest.html index fa28c3c6..a39e6b30 100644 --- a/docs/reference/fFtest.html +++ b/docs/reference/fFtest.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fast-data-manipulation.html b/docs/reference/fast-data-manipulation.html index 3770511a..32c559bf 100644 --- a/docs/reference/fast-data-manipulation.html +++ b/docs/reference/fast-data-manipulation.html @@ -1,7 +1,7 @@ - Fast Data Manipulation — fast-data-manipulation • collapseFast Data Manipulation — fast-data-manipulation • collapse collapse - 1.8.3 + 1.8.4 @@ -78,10 +78,10 @@

    Fast Data Manipulation

    collapse provides the following functions for fast manipulation of (mostly) data frames.

    • fselect is a much faster alternative to dplyr::select to select columns using expressions involving column names. get_vars is a more versatile and programmer friendly function to efficiently select and replace columns by names, indices, logical vectors, regular expressions or using functions to identify columns.

    • The functions num_vars, cat_vars, char_vars, fact_vars, logi_vars and date_vars are convenience functions to efficiently select and replace columns by data type.

    • add_vars efficiently adds new columns at any position within a data frame (default at the end). This can be done vie replacement (i.e. add_vars(data) <- newdata) or returning the appended data (i.e. add_vars(data, newdata1, newdata2, ...)). Because of the latter, add_vars is also a more efficient alternative to cbind.data.frame.

    • -
    • fsubset is a much faster version of subset to efficiently subset vectors, matrices and data frames. If the non-standard evaluation offered by fsubset is not needed, the function ss is a much faster and also more secure alternative to [.data.frame.

    • +
    • fsubset is a much faster version of subset to efficiently subset vectors, matrices and data frames. If the non-standard evaluation offered by fsubset is not needed, the function ss is a much faster and also more secure alternative to [.data.frame.

    • fsummarise is a much faster version of dplyr::summarise when used together with the Fast Statistical Functions and fgroup_by, with whom it also supports super fast weighted aggregation.

    • fmutate is a much faster version of dplyr::mutate when used together with the Fast Statistical Functions as well as fast Data Transformation Functions and fgroup_by.

    • -
    • ftransform is a much faster version of transform, which also supports list input and nested pipelines. settransform does all of that by reference, i.e. it modifies the data frame in the global environment. fcompute is similar to ftransform but only returns modified and computed columns in a new data frame.

    • +
    • ftransform is a much faster version of transform, which also supports list input and nested pipelines. settransform does all of that by reference, i.e. it modifies the data frame in the global environment. fcompute is similar to ftransform but only returns modified and computed columns in a new data frame.

    • roworder is a fast substitute for dplyr::arrange, but the syntax is inspired by data.table::setorder.

    • colorder efficiently reorders columns in a data frame, see also data.table::setcolorder.

    • frename is a fast substitute for dplyr::rename, to efficiently rename various objects. setrename renames objects by reference. relabel and setrelabel do the same thing for variable labels (see also vlabels).

    • @@ -90,8 +90,8 @@

      Fast Data Manipulation

      Table of Functions

      -

      Function / S3 Generic Methods Description
      fselect(<-)No methods, for data framesFast select or replace columns (non-standard evaluation)
      get_vars(<-), num_vars(<-), cat_vars(<-), char_vars(<-), fact_vars(<-), logi_vars(<-), date_vars(<-)No methods, for data framesFast select or replace columns
      add_vars(<-)No methods, for data framesFast add columns
      fsubsetdefault, matrix, data.frameFast subset data (non-standard evaluation)
      ssNo methods, for data framesFast subset data frames
      fsummariseNo methods, for data framesFast data aggregation
      fmutate, (f/set)ftransform(<-)No methods, for data framesCompute, modify or delete columns (non-standard evaluation)
      - fcompute(v)No methods, for data framesCompute or modify columns, returned in a new data frame (non-standard evaluation)
      roworder(v)No methods, for data framesReorder rows and return data frame (standard and non-standard evaluation)
      colorder(v)No methods, for data framesReorder columns and return data frame (standard and non-standard evaluation)
      (f/set)rename, (set)relabelNo methods, for all objects with 'names' attributeRename and return object / relabel columns in a data frame.
      +

      Function / S3 Generic Methods Description
      fselect(<-)No methods, for data framesFast select or replace columns (non-standard evaluation)
      get_vars(<-), num_vars(<-), cat_vars(<-), char_vars(<-), fact_vars(<-), logi_vars(<-), date_vars(<-)No methods, for data framesFast select or replace columns
      add_vars(<-)No methods, for data framesFast add columns
      fsubsetdefault, matrix, data.frame, pseries, pdata.frameFast subset data (non-standard evaluation)
      ssNo methods, for data framesFast subset data frames
      fsummariseNo methods, for data framesFast data aggregation
      fmutate, (f/set)ftransform(<-)No methods, for data framesCompute, modify or delete columns (non-standard evaluation)
      + fcompute(v)No methods, for data framesCompute or modify columns, returned in a new data frame (non-standard evaluation)
      roworder(v)No methods, for data frames incl. pdata.frameReorder rows and return data frame (standard and non-standard evaluation)
      colorder(v)No methods, for data framesReorder columns and return data frame (standard and non-standard evaluation)
      (f/set)rename, (set)relabelNo methods, for all objects with 'names' attributeRename and return object / relabel columns in a data frame.

      See also

      @@ -109,7 +109,8 @@

      See also

      -

      Site built with pkgdown 2.0.2.

      +

      Site built with pkgdown +2.0.2.

      diff --git a/docs/reference/fast-grouping-ordering.html b/docs/reference/fast-grouping-ordering.html index 193abe92..403afbd5 100644 --- a/docs/reference/fast-grouping-ordering.html +++ b/docs/reference/fast-grouping-ordering.html @@ -34,7 +34,7 @@ collapse - 1.8.3 + 1.8.4 @@ -78,12 +78,12 @@

      Fast Grouping and Ordering

      -

      collapse provides the following functions to efficiently group and order data:

      • radixorder, provides fast radix-ordering through direct access to the method order(..., method = "radix"), as well as the possibility to return some attributes very useful for grouping data and finding unique elements. radixorderv exists as a programmers alternative. The function roworder(v) efficiently reorders a data frame based on an ordering computed by radixorderv. +

        collapse provides the following functions to efficiently group and order data:

        • radixorder, provides fast radix-ordering through direct access to the method order(..., method = "radix"), as well as the possibility to return some attributes very useful for grouping data and finding unique elements. radixorderv exists as a programmers alternative. The function roworder(v) efficiently reorders a data frame based on an ordering computed by radixorderv.

        • group provides fast grouping in first-appearance order of rows, based on a hashing algorithm in C. Objects have class 'qG', see below.

        • GRP creates collapse grouping objects of class 'GRP' based on radixorderv or group. 'GRP' objects form the central building block for grouped operations and programming in collapse and are very efficient inputs to all collapse functions supporting grouped operations.

        • fgroup_by provides a fast replacement for dplyr::group_by, creating a grouped data frame (or data.table / tibble etc.) with a 'GRP' object attached. This grouped frame can be used for grouped operations using collapse's fast functions.

        • -
        • funique is a faster version of unique. The data frame method also allows selecting unique rows according to a subset of the columns. fnunique efficiently calculates the number of unique values/rows.

        • +
        • funique is a faster version of unique. The data frame method also allows selecting unique rows according to a subset of the columns. fnunique efficiently calculates the number of unique values/rows.

        • qF, shorthand for 'quick-factor' implements very fast factor generation from atomic vectors using either radix ordering method = "radix" or hashing method = "hash". Factors can also be used for efficient grouped programming with collapse functions, especially if they are generated using qF(x, na.exclude = FALSE) which assigns a level to missing values and attaches a class 'na.included' ensuring that no additional missing value checks are executed by collapse functions.

        • qG, shorthand for 'quick-group', generates a kind of factor-light without the levels attribute but instead an attribute providing the number of levels. Optionally the levels / groups can be attached, but without converting them to character. Objects have a class 'qG', which is also recognized in the collapse ecosystem.

        • fdroplevels is a substantially faster replacement for droplevels.

        • @@ -96,7 +96,7 @@

          Fast Grouping and Ordering

          Table of Functions

          -

          Function / S3 Generic Methods Description
          radixorder(v)No methods, for data frames and vectorsRadix-based ordering + grouping information
          roworder(v)No methods, for data framesRow sorting/reordering
          groupNo methods, for data frames and vectorsHash-based grouping + grouping information
          GRPdefault, GRP, factor, qG, grouped_df, pseries, pdata.frameFast grouping and a flexible grouping object
          fgroup_byNo methods, for data framesFast grouped data frame
          funique, fnuniquedefault, data.frame, sf, pseries, pdata.frameFast (number of) unique values/rows
          qFNo methods, for vectorsQuick factor generation
          qGNo methods, for vectorsQuick grouping of vectors and a 'factor-light' class
          fdroplevelsfactor, data.frame, listFast removal of unused factor levels
          finteractionNo methods, for data frames and vectorsFast interactions
          groupidNo methods, for vectorsRun-length type group-id
          seqidNo methods, for integer vectorsRun-length type integer sequence-id
          timeidNo methods, for integer or double vectorsInteger-id from time/date sequences
          +

          Function / S3 Generic Methods Description
          radixorder(v)No methods, for data frames and vectorsRadix-based ordering + grouping information
          roworder(v)No methods, for data frames incl. pdata.frameRow sorting/reordering
          groupNo methods, for data frames and vectorsHash-based grouping + grouping information
          GRPdefault, GRP, factor, qG, grouped_df, pseries, pdata.frameFast grouping and a flexible grouping object
          fgroup_byNo methods, for data framesFast grouped data frame
          funique, fnuniquedefault, data.frame, sf, pseries, pdata.frame, listFast (number of) unique values/rows
          qFNo methods, for vectorsQuick factor generation
          qGNo methods, for vectorsQuick grouping of vectors and a 'factor-light' class
          fdroplevelsfactor, data.frame, listFast removal of unused factor levels
          finteractionNo methods, for data frames and vectorsFast interactions
          groupidNo methods, for vectorsRun-length type group-id
          seqidNo methods, for integer vectorsRun-length type integer sequence-id
          timeidNo methods, for integer or double vectorsInteger-id from time/date sequences
      -

      Site built with pkgdown 2.0.2.

      +

      Site built with pkgdown +2.0.2.

      diff --git a/docs/reference/fast-statistical-functions.html b/docs/reference/fast-statistical-functions.html index 9c5b3623..a4d0ba0c 100644 --- a/docs/reference/fast-statistical-functions.html +++ b/docs/reference/fast-statistical-functions.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fbetween_fwithin.html b/docs/reference/fbetween_fwithin.html index 89ef558c..fce6b723 100644 --- a/docs/reference/fbetween_fwithin.html +++ b/docs/reference/fbetween_fwithin.html @@ -21,7 +21,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fcumsum.html b/docs/reference/fcumsum.html index 127cb14d..848ceebf 100644 --- a/docs/reference/fcumsum.html +++ b/docs/reference/fcumsum.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fdiff.html b/docs/reference/fdiff.html index b71d4c70..0ba83c64 100644 --- a/docs/reference/fdiff.html +++ b/docs/reference/fdiff.html @@ -21,7 +21,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fdroplevels.html b/docs/reference/fdroplevels.html index acc77ed1..3283a962 100644 --- a/docs/reference/fdroplevels.html +++ b/docs/reference/fdroplevels.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/ffirst_flast.html b/docs/reference/ffirst_flast.html index 634e43ea..e1c7c3ae 100644 --- a/docs/reference/ffirst_flast.html +++ b/docs/reference/ffirst_flast.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fgrowth.html b/docs/reference/fgrowth.html index 6cee9015..42bb4619 100644 --- a/docs/reference/fgrowth.html +++ b/docs/reference/fgrowth.html @@ -21,7 +21,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fhdbetween_fhdwithin.html b/docs/reference/fhdbetween_fhdwithin.html index 1aab6d9f..1569734b 100644 --- a/docs/reference/fhdbetween_fhdwithin.html +++ b/docs/reference/fhdbetween_fhdwithin.html @@ -20,7 +20,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/flag.html b/docs/reference/flag.html index 58fd7b52..03b209c7 100644 --- a/docs/reference/flag.html +++ b/docs/reference/flag.html @@ -20,7 +20,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/flm.html b/docs/reference/flm.html index a6ce7d84..c7e490d6 100644 --- a/docs/reference/flm.html +++ b/docs/reference/flm.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fmean.html b/docs/reference/fmean.html index db99d0fc..dffab384 100644 --- a/docs/reference/fmean.html +++ b/docs/reference/fmean.html @@ -20,7 +20,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fmedian.html b/docs/reference/fmedian.html index 51ff75d3..fa0270d9 100644 --- a/docs/reference/fmedian.html +++ b/docs/reference/fmedian.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fmin_fmax.html b/docs/reference/fmin_fmax.html index 2f449d37..f14a9059 100644 --- a/docs/reference/fmin_fmax.html +++ b/docs/reference/fmin_fmax.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fmode.html b/docs/reference/fmode.html index f96b5fc6..68737528 100644 --- a/docs/reference/fmode.html +++ b/docs/reference/fmode.html @@ -20,7 +20,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fndistinct.html b/docs/reference/fndistinct.html index b7e2e318..cc9351ce 100644 --- a/docs/reference/fndistinct.html +++ b/docs/reference/fndistinct.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fnobs.html b/docs/reference/fnobs.html index 2e7f3406..ccc131d7 100644 --- a/docs/reference/fnobs.html +++ b/docs/reference/fnobs.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fnth.html b/docs/reference/fnth.html index e36a2ce2..bc58b237 100644 --- a/docs/reference/fnth.html +++ b/docs/reference/fnth.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fprod.html b/docs/reference/fprod.html index d3780260..5e5ae5c7 100644 --- a/docs/reference/fprod.html +++ b/docs/reference/fprod.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/frename.html b/docs/reference/frename.html index 2786dce5..aa1fa533 100644 --- a/docs/reference/frename.html +++ b/docs/reference/frename.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fscale.html b/docs/reference/fscale.html index e39cbbb3..3128d59f 100644 --- a/docs/reference/fscale.html +++ b/docs/reference/fscale.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fsubset.html b/docs/reference/fsubset.html index c942edb5..e3768433 100644 --- a/docs/reference/fsubset.html +++ b/docs/reference/fsubset.html @@ -20,7 +20,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fsum.html b/docs/reference/fsum.html index e0d7c873..502bca3f 100644 --- a/docs/reference/fsum.html +++ b/docs/reference/fsum.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fsummarise.html b/docs/reference/fsummarise.html index fb661357..e7cbddee 100644 --- a/docs/reference/fsummarise.html +++ b/docs/reference/fsummarise.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/ftransform.html b/docs/reference/ftransform.html index 60ef0f77..334e46a6 100644 --- a/docs/reference/ftransform.html +++ b/docs/reference/ftransform.html @@ -20,7 +20,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/funique.html b/docs/reference/funique.html index 20c8206a..0786420e 100644 --- a/docs/reference/funique.html +++ b/docs/reference/funique.html @@ -21,7 +21,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/fvar_fsd.html b/docs/reference/fvar_fsd.html index 94d23e28..6a60fbe6 100644 --- a/docs/reference/fvar_fsd.html +++ b/docs/reference/fvar_fsd.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/group.html b/docs/reference/group.html index 946cb4a3..2f5a624f 100644 --- a/docs/reference/group.html +++ b/docs/reference/group.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/groupid.html b/docs/reference/groupid.html index 42f4a4c9..f3fbf6e6 100644 --- a/docs/reference/groupid.html +++ b/docs/reference/groupid.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/index.html b/docs/reference/index.html index eb20ed94..6b45bf89 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/indexing.html b/docs/reference/indexing.html index c2c5d2ff..c970087e 100644 --- a/docs/reference/indexing.html +++ b/docs/reference/indexing.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/is.unlistable.html b/docs/reference/is.unlistable.html index 903b4e84..ebdf60c2 100644 --- a/docs/reference/is.unlistable.html +++ b/docs/reference/is.unlistable.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/is_unlistable.html b/docs/reference/is_unlistable.html index 1304d203..b30db13e 100644 --- a/docs/reference/is_unlistable.html +++ b/docs/reference/is_unlistable.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/ldepth.html b/docs/reference/ldepth.html index 9037d4f3..5b4411b2 100644 --- a/docs/reference/ldepth.html +++ b/docs/reference/ldepth.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/list-processing.html b/docs/reference/list-processing.html index 14103e36..ad564565 100644 --- a/docs/reference/list-processing.html +++ b/docs/reference/list-processing.html @@ -41,7 +41,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/pad.html b/docs/reference/pad.html index a77028ee..03d2459c 100644 --- a/docs/reference/pad.html +++ b/docs/reference/pad.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/psacf.html b/docs/reference/psacf.html index ce3cea5e..f9c686b7 100644 --- a/docs/reference/psacf.html +++ b/docs/reference/psacf.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/psmat.html b/docs/reference/psmat.html index 5c968ed6..56d52df6 100644 --- a/docs/reference/psmat.html +++ b/docs/reference/psmat.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/pwcor_pwcov_pwnobs.html b/docs/reference/pwcor_pwcov_pwnobs.html index a0cac7ca..07786d4f 100644 --- a/docs/reference/pwcor_pwcov_pwnobs.html +++ b/docs/reference/pwcor_pwcov_pwnobs.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/qF.html b/docs/reference/qF.html index d89daf99..bd9ba47b 100644 --- a/docs/reference/qF.html +++ b/docs/reference/qF.html @@ -22,7 +22,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/qsu.html b/docs/reference/qsu.html index d2d5f850..cad3507d 100644 --- a/docs/reference/qsu.html +++ b/docs/reference/qsu.html @@ -20,7 +20,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/qtab.html b/docs/reference/qtab.html index d644426f..0f166bf9 100644 --- a/docs/reference/qtab.html +++ b/docs/reference/qtab.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/quick-conversion.html b/docs/reference/quick-conversion.html index 427868d7..d3471b27 100644 --- a/docs/reference/quick-conversion.html +++ b/docs/reference/quick-conversion.html @@ -26,7 +26,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/radixorder.html b/docs/reference/radixorder.html index bdbe7e17..bc2a13e4 100644 --- a/docs/reference/radixorder.html +++ b/docs/reference/radixorder.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/rapply2d.html b/docs/reference/rapply2d.html index 0e5d8e50..c30c80e4 100644 --- a/docs/reference/rapply2d.html +++ b/docs/reference/rapply2d.html @@ -26,7 +26,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/recode-replace.html b/docs/reference/recode-replace.html index a316c9de..d88643b8 100644 --- a/docs/reference/recode-replace.html +++ b/docs/reference/recode-replace.html @@ -25,7 +25,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/roworder.html b/docs/reference/roworder.html index 17734bc7..08229133 100644 --- a/docs/reference/roworder.html +++ b/docs/reference/roworder.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/rsplit.html b/docs/reference/rsplit.html index 73241c2f..9d11b296 100644 --- a/docs/reference/rsplit.html +++ b/docs/reference/rsplit.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/select_replace_vars.html b/docs/reference/select_replace_vars.html index fd2566ef..11a3f7e2 100644 --- a/docs/reference/select_replace_vars.html +++ b/docs/reference/select_replace_vars.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/seqid.html b/docs/reference/seqid.html index 18bcab06..5030357a 100644 --- a/docs/reference/seqid.html +++ b/docs/reference/seqid.html @@ -20,7 +20,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/small-helpers.html b/docs/reference/small-helpers.html index 8813d536..147bab59 100644 --- a/docs/reference/small-helpers.html +++ b/docs/reference/small-helpers.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/summary-statistics.html b/docs/reference/summary-statistics.html index d06caa48..f3be8ba0 100644 --- a/docs/reference/summary-statistics.html +++ b/docs/reference/summary-statistics.html @@ -27,7 +27,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/t_list.html b/docs/reference/t_list.html index 1b73ae90..ed9e2dff 100644 --- a/docs/reference/t_list.html +++ b/docs/reference/t_list.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/time-series-panel-series.html b/docs/reference/time-series-panel-series.html index 118d4cdf..b59cf5a5 100644 --- a/docs/reference/time-series-panel-series.html +++ b/docs/reference/time-series-panel-series.html @@ -28,7 +28,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/timeid.html b/docs/reference/timeid.html index a868dc2a..06401080 100644 --- a/docs/reference/timeid.html +++ b/docs/reference/timeid.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/unlist2d.html b/docs/reference/unlist2d.html index 880755b7..ebb02210 100644 --- a/docs/reference/unlist2d.html +++ b/docs/reference/unlist2d.html @@ -20,7 +20,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/varying.html b/docs/reference/varying.html index 9e92e688..3e5b7a6f 100644 --- a/docs/reference/varying.html +++ b/docs/reference/varying.html @@ -19,7 +19,7 @@ collapse - 1.8.3 + 1.8.4 diff --git a/docs/reference/wlddev.html b/docs/reference/wlddev.html index 46f2ae96..a01c6d6d 100644 --- a/docs/reference/wlddev.html +++ b/docs/reference/wlddev.html @@ -20,7 +20,7 @@ collapse - 1.8.3 + 1.8.4