initial commit :)

andrewGhazi · Jun 30, 2024 · 12c0c00 · 12c0c00
commit 12c0c00
Show file tree

Hide file tree

Showing 23 changed files with 1,250 additions and 0 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,11 @@
+^.*\.Rproj$
+^\.git$
+^\.github$
+^\.gitignore$
+^\.Rbuildignore$
+^\.RData$
+^\.Rhistory$
+^\.Rproj\.user$
+^NOTICE$
+^LICENSE\.md$
+^README\.Rmd$
diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml
@@ -0,0 +1,51 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on: [push, pull_request]
+
+name: check
+
+jobs:
+  check:
+    runs-on: ${{ matrix.config.os }}
+
+    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {os: ubuntu-latest, r: 'release'}
+
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      NOT_CRAN: true
+      R_KEEP_PKG_SOURCE: yes
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: ${{ matrix.config.r }}
+          extra-repositories: 'https://mc-stan.org/r-packages/'
+          http-user-agent: ${{ matrix.config.http-user-agent }}
+          use-public-rspm: true
+
+      - name: Install CmdStan
+        shell: Rscript {0}
+        run: |
+          install.packages("cmdstanr", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
+          cmdstanr::check_cmdstan_toolchain(fix = TRUE)
+          cmdstanr::install_cmdstan()
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck, local::.
+          needs: check
+          cache-version: 2
+
+      - uses: r-lib/actions/check-r-package@v2
+        with:
+          upload-snapshots: true
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,11 @@
+*~
+.Rapp.history
+.RData
+.Rhistory
+.Rproj.user/
+inst/stan/**
+!inst/stan/**/*.*
+inst/stan/**/*.exe
+inst/stan/**/*.EXE
+*.dll
+dyingforacup.Rproj
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,23 @@
+Package: dyingforacup
+Type: Package
+Title: Example Package With Instantiate
+Version: 0.0.1
+Author: You
+Maintainer: The package maintainer <[email protected]>
+Description: Demonstrate how to create an R package with pre-compiled Stan
+  models.
+Depends:
+  R (>= 4.0.0)
+Imports:
+    cli,
+    collapse,
+    data.table,
+    instantiate,
+    rlang
+Additional_repositories:
+  https://mc-stan.org/r-packages/
+SystemRequirements: CmdStan (https://mc-stan.org/users/interfaces/cmdstan)
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 7.3.1
+License: GPL (>= 3)
diff --git a/LICENSE.md b/LICENSE.md
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,15 @@
+# Generated by roxygen2: do not edit by hand
+
+export(run_gp)
+export(suggest_next)
+import(collapse)
+importFrom(data.table,":=")
+importFrom(data.table,.BY)
+importFrom(data.table,.EACHI)
+importFrom(data.table,.GRP)
+importFrom(data.table,.I)
+importFrom(data.table,.N)
+importFrom(data.table,.NGRP)
+importFrom(data.table,.SD)
+importFrom(data.table,data.table)
+importFrom(instantiate,stan_package_model)
diff --git a/NOTICE b/NOTICE
@@ -0,0 +1,34 @@
+This package includes components from other open-source software. The projects and licenses are listed below.
+
+* CmdStan (https://github.com/stan-dev/cmdstan) by Stan Developers and their Assignees.
+
+BSD 3-Clause License
+=====================
+
+Copyright (c) 2014, Stan
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+* Neither the name of the {organization} nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/R/check_input.R b/R/check_input.R
@@ -0,0 +1,13 @@
+#' Check input data.frame
+#' @description
+#' The input data frame should have a limited number of columns and at least two rows
+#' 
+check_df = function(dat, call = rlang::caller_env()) {
+  cn = colnames(dat)
+
+  if (!("rating" %in% cn)) cli::cli_abort("No `rating` column detected in input.", call = call)
+
+  cli::cli_alert("Detected brew parameters {.val {cn[!(cn %in% 'rating')]}}")
+
+  # if (nrow(dat) < 2) cli::cli_abort("Input needs at least two existing observations.", call = call)
+}
diff --git a/R/dyingforacup-package.R b/R/dyingforacup-package.R
@@ -0,0 +1,20 @@
+#' @keywords internal
+#' @name dyingforacup
+#' @description A package using `instantiate` for coffee optimization.
+#' @family help
+#' @importFrom instantiate stan_package_model
+#' @import collapse
+"_PACKAGE"
+
+## usethis namespace: start
+#' @importFrom data.table :=
+#' @importFrom data.table .BY
+#' @importFrom data.table .EACHI
+#' @importFrom data.table .GRP
+#' @importFrom data.table .I
+#' @importFrom data.table .N
+#' @importFrom data.table .NGRP
+#' @importFrom data.table .SD
+#' @importFrom data.table data.table
+## usethis namespace: end
+NULL
diff --git a/R/model.R b/R/model.R
@@ -0,0 +1,33 @@
+#' @title Fit the GP model.
+#' @family models
+#' @description Fit the GP Stan model and return posterior summaries.
+#' @returns a draws data frame
+#' @param X a matrix of scaled brew parameters
+#' @param y Numeric vector of ratings (NORMAL scale, not 0-10!)
+#' @param X_pred a matrix of grid points of in scaled brew parameter space to evaluate the GP at
+#' @param ... Named arguments to the `sample()` method of CmdStan model
+#' @param verbose logical indicating whether to print messages
+#'   objects: <https://mc-stan.org/cmdstanr/reference/model-method-sample.html>
+#' @examples
+#' if (instantiate::stan_cmdstan_exists()) {
+#'   run_gp_model(y = rnorm(5))
+#' }
+run_gp_model = function(X, y, X_pred, ..., verbose) {
+
+  model = instantiate::stan_package_model(
+    name = "gp_mod",
+    package = "dyingforacup"
+  )
+
+  data_list = list(N      = length(y),
+                   N_pred = nrow(X_pred),
+                   D      = ncol(X),
+                   x      = X,
+                   x_pred = X_pred,
+                   y      = y)
+
+  fit = model$sample(data = data_list,
+                     ...)
+
+  fit$draws(format = "data.frame")
+}
diff --git a/R/run.R b/R/run.R
@@ -0,0 +1,83 @@
+#' @export
+run_gp = function(dat, ...) {
+
+  check_df(dat)
+
+  # TODO adapt centering/scaling, generalize to arbitrary # of parameters
+  dat = dat |>
+    mtt(gs_cent = (grinder_setting - 9) / 5 * 3,
+        temp_cent = (temp - 190) / (20) * 3,
+        bloom_cent = (bloom_time - 30) / 30 * 3) |>
+    qDT()
+
+  g_map = data.table(g = seq(4,14, by = .5)) |>
+    mtt(gc = (g - 9) / 5 * 3)
+
+  t_map = data.table(t = seq(170, 210, by = 5),
+                     tc = (seq(170, 210, by = 5) - 190) / 20 * 3)
+
+  b_map = data.table(b = seq(0, 60, by = 10),
+                     bc = ((seq(0, 60, by = 10) - 30) / 30) * 3 )
+
+  x_grid = expand.grid(gc = g_map$gc,
+                       tc = t_map$tc,
+                       bc = b_map$bc) |>
+    qM()
+
+  X = dat |> slt(gs_cent, temp_cent, bloom_cent) |> qM()
+
+  list(run_gp_model(X, dat$rating, x_grid, ...),
+       x_grid)
+}
+
+#' @export
+suggest_next = function(dat, x_grid, ...) {
+
+  run_res = run_gp(dat, ...)
+  gp_res = run_res[[1]]
+  x_grid = run_res[[2]]
+
+  obs_max = max(dat$rating)
+
+  offset = .25
+  minus_max = qM(gp_res |> get_vars("f_star", regex = TRUE)) - obs_max - offset
+
+  w = 1*(minus_max > 0)
+
+  acq = minus_max * w
+
+  max_pred_dens = fsum(acq) |> which.max()
+
+  if (max_pred_dens == 1) cli::cli_warn("Selected the first grid point as maximum of the acquisition function. You may need to run the chains for longer.")
+
+  pred_g = x_grid[max_pred_dens,,drop=FALSE][,"gc"]
+
+  acq_post = data.table(variable = colnames(acq),
+                        mean = acq |> colMeans(),
+                        i = 1:ncol(acq))
+
+  post_range = acq_post$mean |> range()
+
+  # qDT(x_grid) |> mtt(i = 1:nrow(x_grid)) |>
+  #   sbt(dplyr::near(gc, pred_g)) |>
+  #   join(acq_post, on = "i", validate = "1:1") |>
+  #   ggplot(aes(tc, bc)) +
+  #   geom_tile(aes(fill = mean)) +
+  #   scale_fill_viridis_c(limits = post_range)
+
+  g_map = data.table(g = seq(4,14, by = .5)) |>
+    mtt(gc = (g - 9) / 5 * 3)
+
+  t_map = data.table(t = seq(170, 210, by = 5),
+                     tc = (seq(170, 210, by = 5) - 190) / 20 * 3)
+
+  b_map = data.table(b = seq(0, 60, by = 10),
+                     bc = ((seq(0, 60, by = 10) - 30) / 30) * 3 )
+
+  x_grid[max_pred_dens,,drop=FALSE] |>
+    qDT() |>
+    join(g_map, verbose = FALSE) |>
+    join(t_map, verbose = FALSE) |>
+    join(b_map, verbose = FALSE)
+
+}
diff --git a/README.Rmd b/README.Rmd
@@ -0,0 +1,63 @@
+---
+output: github_document
+---
+
+<!-- README.md is generated from README.Rmd. Please edit that file -->
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  fig.path = "man/figures/README-",
+  out.width = "100%"
+)
+```
+
+# dyingforacup
+
+[DO YOU FOLKS LIKE COFFEE?!](https://www.youtube.com/watch?v=RJC9DXQAd7U)
+
+<!-- badges: start -->
+<!-- badges: end -->
+
+This is the package I use to optimize my coffee brewing with [Bayesian Optimization](https://www.youtube.com/watch?v=wZODGJzKmD0)
+
+## Installation
+
+You need to install [`cmdstanr`](https://mc-stan.org/cmdstanr/index.html) to use this package:
+
+```{r eval=FALSE}
+# we recommend running this is a fresh R session or restarting your current session
+install.packages("cmdstanr", repos = c("https://stan-dev.r-universe.dev", getOption("repos")))
+```
+
+You can install the development version of `dyingforacup` like so:
+
+```{r eval=FALSE}
+remotes::install_github('andrewGhazi/dyingforacup', type = "source")
+```
+
+## Example
+
+Give the `run_gp()` function a data frame of brew parameters with an 
+
+```{r eval=FALSE}
+library(dyingforacup)
+
+
+dat = tibble::tribble(
+  ~grinder_setting, ~temp, ~bloom_time, ~rating,
+  # 8, 193, 25, 1.1,
+  7, 195, 20, -.7,
+  9, 179, 45, -1,
+  9, 195, 25, -.5,
+) 
+
+
+suggest_next(dat,
+             iter_sampling = 4000, 
+             refresh = 1250, 
+             show_exceptions = FALSE, 
+             adapt_delta = .95, 
+             parallel_chains = 4)
+```