Skip to content

Commit

Permalink
Merge pull request #778 from tjmahr/master
Browse files Browse the repository at this point in the history
WIP deprecating drake_cache_log()
  • Loading branch information
wlandau-lilly authored Mar 10, 2019
2 parents fe21d5c + 97d59a5 commit 15402d3
Show file tree
Hide file tree
Showing 12 changed files with 96 additions and 106 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
- Deprecate and rename `predict_load_balancing()` to `predict_workers()`.
- Deprecate `this_cache()` and defer to `get_cache()` and `storr::storr_rds()` for simplicity.
- Change the default value of `hover` to `FALSE` in visualization functions. Improves speed. Also a breaking change.
- Deprecate `drake_cache_log_file()`. We recommend using `make()` with the `cache_log_file` argument to create the cache log. This way ensures that the log is always up to date with `make()` results.


# Version 6.2.1
Expand Down
57 changes: 5 additions & 52 deletions R/api-cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -290,67 +290,20 @@ recover_cache_ <- function(
cache
}

#' @title Generate a flat text log file
#' to represent the state of the cache.
#' @description
#' This functionality is like
#' `make(..., cache_log_file = TRUE)`,
#' but separated and more customizable.
#' The `drake_cache_log_file()` function writes a flat text file
#' to represents the state of all the targets and imports in the cache.
#' If you call it after each [make()]
#' and put the log file under version control,
#' you can track the changes to your results over time.
#' This way, your data is versioned alongside your code
#' in a easy-to-view format. Hopefully, this functionality
#' is a step toward better data versioning tools.
#' @seealso [drake_cache_log()], [make()], [get_cache()]
#' @export
#' @return There is no return value, but a log file is generated.
#' @param file character scalar, name of the flat text log file.
#'
#' @inheritParams cached
#'
#' @param jobs Number of jobs/workers for parallel processing.
#'
#' @param targets_only Logical, whether to output information
#' only on the targets in your workflow plan data frame.
#' If `targets_only` is `FALSE`, the output will
#' include the hashes of both targets and imports.
#'
#' @examples
#' \dontrun{
#' test_with_dir("Quarantine side effects.", {
#' if (suppressWarnings(require("knitr"))) {
#' # Load drake's canonical example.
#' load_mtcars_example() # Get the code with drake_example()
#' # Run the project and save a flat text log file.
#' make(my_plan)
#' drake_cache_log_file() # writes drake_cache.log
#' # The above 2 lines are equivalent to make(my_plan, cache_log_file = TRUE) # nolint
#' # At this point, put drake_cache.log under version control
#' # (e.g. with 'git add drake_cache.log') alongside your code.
#' # Now, every time you run your project, your commit history
#' # of hash_lot.txt is a changelog of the project's results.
#' # It shows which targets and imports changed on every commit.
#' # It is extremely difficult to track your results this way
#' # by putting the raw '.drake/' cache itself under version control.
#' }
#' })
#' }
drake_cache_log_file <- function(
# Generate a flat text log file to represent the state of the cache.
drake_cache_log_file_ <- function(
file = "drake_cache.log",
path = getwd(),
search = TRUE,
cache = drake::get_cache(path = path, search = search, verbose = verbose),
verbose = 1L,
jobs = 1,
jobs = 1L,
targets_only = FALSE
) {
if (!length(file) || identical(file, FALSE)) {
return(invisible())
} else if (identical(file, TRUE)) {
file <- formals(drake_cache_log_file)$file
file <- formals(drake_cache_log_file_)$file
}
out <- drake_cache_log(
path = path,
Expand Down Expand Up @@ -398,7 +351,7 @@ drake_cache_log_file <- function(
#' of your cache. To define your own hash algorithm,
#' you can create your own `storr` cache and give it a hash algorithm
#' (e.g. `storr_rds(hash_algorithm = "murmur32")`)
#' @seealso [drake_cache_log_file()], [cached()], [get_cache()]
#' @seealso [cached()], [get_cache()]
#' @export
#' @return Data frame of the hash keys of the targets and imports
#' in the cache
Expand Down
2 changes: 1 addition & 1 deletion R/exec-session.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ initialize_session <- function(config) {
}

conclude_session <- function(config) {
drake_cache_log_file(
drake_cache_log_file_(
file = config$cache_log_file,
cache = config$cache,
jobs = config$jobs
Expand Down
7 changes: 3 additions & 4 deletions R/preprocess-config.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@
#' Assign target-level retries with an optional `retries`
#' column in `plan`.
#'
#' @param force Logical. If `FALSE` (default) then `drake`
#' @param force Logical. If `FALSE` (default) then `drake`
#' imposes checks if the cache was created with an old
#' and incompatible version of drake.
#' If there is an incompatibility, `make()` stops to
Expand Down Expand Up @@ -198,8 +198,7 @@
#' @param cache_log_file Name of the cache log file to write.
#' If `TRUE`, the default file name is used (`drake_cache.log`).
#' If `NULL`, no file is written.
#' If activated, this option uses
#' [drake_cache_log_file()] to write a flat text file
#' If activated, this option writes a flat text file
#' to represent the state of the cache
#' (fingerprints of all the targets and imports).
#' If you put the log file under version control, your commit history
Expand Down Expand Up @@ -311,7 +310,7 @@
#' should wait for the workers to post before assigning them
#' targets. Should usually be `TRUE`. Set to `FALSE`
#' for `make(parallelism = "future_lapply", jobs = n)`
#' (`n > 1`) when combined with `future::plan(future::sequential)`.
#' (`n > 1`) when combined with `future::plan(future::sequential)`.
#' This argument only applies to parallel computing with persistent workers
#' (`make(parallelism = x)`, where `x` could be `"mclapply"`,
#' `"parLapply"`, or `"future_lapply"`).
Expand Down
45 changes: 43 additions & 2 deletions R/utils-deprecate.R
Original file line number Diff line number Diff line change
Expand Up @@ -1510,7 +1510,7 @@ in_progress <- function(
running(path, search, cache, verbose )
}

#' @title Deprecated. Load an existing drake files system cache
#' @title Deprecated. Load an existing drake files system cache
#' if it exists or create a new one otherwise.
#' @description Deprecated on 2019-01-13.
#' @export
Expand Down Expand Up @@ -1546,7 +1546,7 @@ recover_cache <- function(
force, verbose, fetch_cache, console_log_file)
}

#' @title Deprecated. For drake caches, list the `storr` cache
#' @title Deprecated. For drake caches, list the `storr` cache
#' namespaces that store target-level information.
#' @description Deprecated on 2019-01-13.
#' @export
Expand Down Expand Up @@ -1860,3 +1860,44 @@ this_cache <- function(
console_log_file = console_log_file
)
}

#' @title Deprecated. Generate a flat text log file to represent the state of
#' the cache.
#' @description Deprecated on 2019-03-09.
#' @details Calling this function to create a log file and later calling
#' `make()` makes the log file out of date. Therefore, we recommend using
#' `make()` with the `cache_log_file` argument to create the cache log. This
#' way ensures that the log is always up to date with `make()` results.
#' @seealso [drake_cache_log()], [make()], [get_cache()]
#' @export
#' @inheritParams cached
#' @param file character scalar, name of the flat text log file.
#' @param jobs Number of jobs/workers for parallel processing.
#' @param targets_only Logical, whether to output information only on the
#' targets in your workflow plan data frame. If `targets_only` is `FALSE`, the
#' output will include the hashes of both targets and imports.
#' @keywords internal
#' @return There is no return value, but a log file is generated.
#' @examples
#' # Deprecated
drake_cache_log_file <- function(
file = "drake_cache.log",
path = getwd(),
search = TRUE,
cache = drake::get_cache(path = path, search = search, verbose = verbose),
verbose = 1L,
jobs = 1L,
targets_only = FALSE
) {
.Deprecated(
new = "",
package = "drake",
msg = paste(
"`drake_cache_log_file()` is deprecated.",
"To ensure cache log is always up to date, create the cache log using",
"`make()` with the `cache_log_file` argument."
)
)

drake_cache_log_file_(file, path, search, cache, verbale, jobs, targets_only)
}
1 change: 0 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ reference:
- '`new_cache`'
- '`find_cache`'
- '`drake_cache_log`'
- '`drake_cache_log_file`'
- '`drake_gc`'
- '`drake_get_session_info`'
- '`get_cache`'
Expand Down
2 changes: 1 addition & 1 deletion man/drake_cache_log.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 16 additions & 38 deletions man/drake_cache_log_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/drake_config.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/make.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 23 additions & 3 deletions tests/testthat/test-cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,6 @@ test_with_dir("cache functions work from various working directories", {
n_a <- nrow(all_hashes)
n_s <- nrow(some_hashes)
expect_true(n_a > n_s && n_s > 0)
expect_false(file.exists("log.txt"))
drake_cache_log_file(file = "log.txt")
expect_true(file.exists("log.txt"))

# drake_gc() should not remove any important targets/imports.
x <- cached()
Expand Down Expand Up @@ -518,3 +515,26 @@ test_with_dir("can filter progress", {
progress(progress = "stuck"),
"should be one of")
})

test_with_dir("make() writes a cache log file", {
skip_on_cran() # CRAN gets whitelist tests only (check time limits).
plan <- drake_plan(a = TRUE, b = TRUE)
expect_false(file.exists("log.txt"))
make(plan, cache_log_file = "log.txt")
expect_true(file.exists("log.txt"))

# Check structure of cache
log1 <- read.table("log.txt", header = TRUE, stringsAsFactors = FALSE)
expect_equal(log1$type, c("target", "target"))
expect_equal(log1$name, c("a", "b"))

# Change plan so cache has to change.
plan <- drake_plan(a = TRUE, b = FALSE)
make(plan, cache_log_file = "log.txt")
log2 <- read.table("log.txt", header = TRUE, stringsAsFactors = FALSE)

expect_equal(log1$hash[1], log2$hash[1])

# Changed parts of cache are different.
expect_false(log1$hash[2] == log2$hash[2])
})
1 change: 1 addition & 0 deletions tests/testthat/test-deprecate.R
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ test_with_dir("deprecate misc utilities", {
expect_warning(prune_drake_graph(config$graph, "small"))
expect_warning(predict_load_balancing(config), regexp = "deprecated")
expect_warning(tmp <- this_cache(), regexp = "deprecated")
expect_warning(drake_cache_log_file(), regexp = "deprecated")
})

test_with_dir("deprecated arguments", {
Expand Down

0 comments on commit 15402d3

Please sign in to comment.