Merge pull request #133 from bayesmix-dev/development

Development
bayesmix-dev · May 18, 2022 · d1d8a68 · d1d8a68
2 parents 40ce3cb + 4591a44
commit d1d8a68
Show file tree

Hide file tree

Showing 150 changed files with 9,530 additions and 5,739 deletions.
diff --git a/.gitignore b/.gitignore
@@ -16,7 +16,17 @@ sftp-config.json
 *.local.*
 # MacOS storage files
 .DS_Store
+.dockerignore
+.ipynb_checkpoints/
+docs/_build/
+resources/benchmarks/datasets
+resources/2d
 #CLion cash
 .idea/
 # Build debug folder
 cmake-build-debug/
+
+# .old folders
+src/hierarchies/updaters/.old/
+test/.old/
+examples/gamma_hierarchy/.old/
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -90,6 +90,7 @@ set(INCLUDE_PATHS
   ${CMAKE_CURRENT_LIST_DIR}/lib/math
   ${CMAKE_CURRENT_LIST_DIR}/lib/math/lib/boost_1.72.0
   ${CMAKE_CURRENT_LIST_DIR}/lib/math/lib/eigen_3.3.9
+  ${CMAKE_CURRENT_LIST_DIR}/lib/math/lib/sundials_5.5.0/include
   # TBB already included
   ${CMAKE_CURRENT_BINARY_DIR}
   ${protobuf_SOURCE_DIR}/src

diff --git a/README.md b/README.md
@@ -8,17 +8,15 @@ Current state of the software:
 
 - `bayesmix` performs inference for mixture models of the kind
 
+<img src="https://latex.codecogs.com/svg.image?\begin{align*}y_1,\dots,y_n&space;&\sim&space;\int&space;k(\cdot&space;\mid&space;\theta)P(\mathrm{d}\theta)\\P&space;&\sim&space;\Pi\end{align*}&space;" title="\begin{align*}y_1,\dots,y_n &\sim \int k(\cdot \mid \theta)P(\mathrm{d}\theta)\\P &\sim \Pi\end{align*} " />
+
+<!---
 <a href="https://www.codecogs.com/eqnedit.php?latex=y_1,&space;\ldots,&space;y_n&space;\sim&space;\int&space;k(\cdot&space;\mid&space;\theta)&space;P(d\theta)" target="_blank"><img src="https://latex.codecogs.com/gif.latex?y_1,&space;\ldots,&space;y_n&space;\sim&space;\int&space;k(\cdot&space;\mid&space;\theta)&space;P(d\theta)" title="y_1, \ldots, y_n \sim \int k(\cdot \mid \theta) \Pi(d\theta)" /></a>
 
 <a href="https://www.codecogs.com/eqnedit.php?latex=P&space;\sim&space;\Pi" target="_blank"><img src="https://latex.codecogs.com/gif.latex?P&space;\sim&space;\Pi" title="\Pi \sim P" /></a>
+-->
 
-where P is either the Dirichlet process or the Pitman--Yor process
-
-- We currently support univariate and multivariate location-scale mixture of Gaussian densities
-
-- Inference is carried out using algorithms such as Algorithm 2 in [Neal (2000)](http://www.stat.columbia.edu/npbayes/papers/neal_sampling.pdf)
-
-- Serialization of the MCMC chains is possible using Google's [Protocol Buffers](https://developers.google.com/protocol-buffers) aka `protobuf`
+For descriptions of the models supported in our library, discussion of software design, and examples, please refer to the following paper: https://arxiv.org/abs/2205.08144
 
 # Installation
 
@@ -29,7 +27,7 @@ where P is either the Dirichlet process or the Pitman--Yor process
 To install and use `bayesmix`, please `cd` to the folder to which you wish to install it, and clone this repository with the following command-line instruction:
 
 ```shell
-git clone --recurse-submodule [email protected]:bayesmix-dev/bayesmix.git
+git clone --recurse-submodules [email protected]:bayesmix-dev/bayesmix.git
 ```
 
 Then, by using `cd bayesmix`, you will enter the newly downloaded folder.
@@ -39,8 +37,8 @@ To build the executable for the main file `run_mcmc.cc`, please use the followin
 ```shell
 mkdir build
 cd build
-cmake .. -DDISABLE_DOCS=on -DDISABLE_BENCHMARKS=on -DDISABLE_TESTS=on
-make run
+cmake .. -DDISABLE_DOCS=ON -DDISABLE_BENCHMARKS=ON -DDISABLE_TESTS=ON
+make run_mcmc
 cd ..
 ```
 
@@ -97,3 +95,4 @@ Documentation is available at https://bayesmix.readthedocs.io.
 # Contributions are welcome!
 
 Please check out [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to collaborate with us.
+You can also head to our [issues page](https://github.com/bayesmix-dev/bayesmix/issues) to check for useful enhancements needed.
diff --git a/benchmarks/lpd_grid.cc b/benchmarks/lpd_grid.cc
@@ -1,7 +1,7 @@
 #include <benchmark/benchmark.h>
 
-#include <Eigen/Dense>
 #include <iostream>
+#include <stan/math/rev.hpp>
 
 #include "src/utils/distributions.h"
 #include "utils.h"
@@ -37,7 +37,6 @@ Eigen::VectorXd lpdf_naive(const Eigen::MatrixXd &x,
   return out;
 }
 
-
 Eigen::VectorXd lpdf_fully_optimized(const Eigen::MatrixXd &x,
                                      const Eigen::VectorXd &mean,
                                      const Eigen::MatrixXd &prec_chol,
@@ -84,7 +83,6 @@ static void BM_gauss_lpdf_naive(benchmark::State &state) {
   }
 }
 
-
 static void BM_gauss_lpdf_fully_optimized(benchmark::State &state) {
   int dim = state.range(0);
   Eigen::VectorXd mean = Eigen::VectorXd::Zero(dim);
@@ -99,7 +97,6 @@ static void BM_gauss_lpdf_fully_optimized(benchmark::State &state) {
   }
 }
 
-
 BENCHMARK(BM_gauss_lpdf_cov)->RangeMultiplier(2)->Range(2, 2 << 4);
 BENCHMARK(BM_gauss_lpdf_cov)->RangeMultiplier(2)->Range(2, 2 << 4);
 BENCHMARK(BM_gauss_lpdf_naive)->RangeMultiplier(2)->Range(2, 2 << 4);

diff --git a/benchmarks/nnw_marg_lpdf.cc b/benchmarks/nnw_marg_lpdf.cc
@@ -1,7 +1,7 @@
 #include <benchmark/benchmark.h>
 
-#include <Eigen/Dense>
 #include <iostream>
+#include <stan/math/rev.hpp>
 
 #include "utils.h"
 

diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in
@@ -1819,7 +1819,7 @@ PAPER_TYPE             = a4
 # If left blank no extra packages will be included.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
-EXTRA_PACKAGES         =
+EXTRA_PACKAGES         = bm
 
 # The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
 # generated LaTeX document. The header should contain everything until the first
@@ -1893,7 +1893,7 @@ USE_PDFLATEX           = YES
 # The default value is: NO.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
-LATEX_BATCHMODE        = NO
+LATEX_BATCHMODE        = YES
 
 # If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
 # index chapters (such as File Index, Compound Index, etc.) in the output.

diff --git a/docs/algorithms.rst b/docs/algorithms.rst
@@ -20,6 +20,9 @@ Algorithms
 .. doxygenclass:: Neal8Algorithm
    :project: bayesmix
    :members:
+.. doxygenclass:: SplitAndMergeAlgorithm
+   :project: bayesmix
+   :members:
 .. doxygenclass:: ConditionalAlgorithm
    :project: bayesmix
    :members:

diff --git a/docs/conf.py b/docs/conf.py
@@ -57,8 +57,6 @@ def configureDoxyfile(input_dir, output_dir):
 
 html_theme = 'haiku'
 
-html_static_path = ['_static']
-
 highlight_language = 'cpp'
 
 imgmath_latex = 'latex'
diff --git a/docs/hierarchies.rst b/docs/hierarchies.rst
@@ -6,24 +6,38 @@ Hierarchies
 In our algorithms, we store a vector of hierarchies, each of which represent a parameter :math:`\theta_h`.
 The hierarchy implements all the methods needed to update :math:`\theta_h`: sampling from the prior distribution :math:`P_0`, the full-conditional distribution (given the data {:math:`y_i` such that :math:`c_i = h`} ) and so on.
 
+In BayesMix, each choice of :math:`G_0` is implemented in a different ``PriorModel`` object and each choice of :math:`k(\cdot \mid \cdot)` in a ``Likelihood`` object, so that it is straightforward to create a new ``Hierarchy`` using one of the already implemented priors or likelihoods.
+The sampling from the full conditional of :math:`\theta_h` is performed in an ``Updater`` class.
+`State` classes  are used to store  parameters :math:`\theta_h` s of every mixture component.
+Their main purpose is to handle serialization and de-serialization of the state
+
+.. toctree::
+   :maxdepth: 1
+   :caption: API: hierarchies submodules
+
+   likelihoods
+   prior_models
+   updaters
+   states
+
 
 -------------------------
 Main operations performed
 -------------------------
 
 A hierarchy must be able to perform the following operations:
 
-1. Sample from the prior distribution: generate :math:`\theta_h \sim P_0` [``sample_prior``]
-2. Sample from the 'full conditional' distribution: generate theta_h from the distribution :math:`p(\theta_h \mid \cdots ) \propto P_0(\theta_h) \prod_{i: c_i = h} k(y_i | \theta_h)` [``sample_full_conditional``]
-3. Update the hyperparameters involved in :math:`P_0` [``update_hypers``]
-4. Evaluate the likelihood in one point, i.e. :math:`k(x | \theta_h)` for theta_h the current value of the parameters [``like_lpdf``]
-5. When :math:`k` and :math:`P_0` are conjugate, we must also be able to compute the marginal/prior predictive distribution in one point, i.e. :math:`m(x) = \int k(x | \theta) P_0(d\theta)`, and the conditional predictive distribution :math:`m(x | \textbf{y} ) = \int k(x | \theta) P_0(d\theta | \{y_i: c_i = h\})` [``prior_pred_lpdf``, ``conditional_pred_lpdf``]
+a. Sample from the prior distribution: generate :math:`\theta_h \sim P_0` [``sample_prior``]
+b. Sample from the 'full conditional' distribution: generate theta_h from the distribution :math:`p(\theta_h \mid \cdots ) \propto P_0(\theta_h) \prod_{i: c_i = h} k(y_i | \theta_h)` [``sample_full_conditional``]
+c. Update the hyperparameters involved in :math:`P_0` [``update_hypers``]
+d. Evaluate the likelihood in one point, i.e. :math:`k(x | \theta_h)` for theta_h the current value of the parameters [``like_lpdf``]
+e. When :math:`k` and :math:`P_0` are conjugate, we must also be able to compute the marginal/prior predictive distribution in one point, i.e. :math:`m(x) = \int k(x | \theta) P_0(d\theta)`, and the conditional predictive distribution :math:`m(x | \textbf{y} ) = \int k(x | \theta) P_0(d\theta | \{y_i: c_i = h\})` [``prior_pred_lpdf``, ``conditional_pred_lpdf``]
 
 Moreover, the following utilities are needed:
 
-6. write the current state :math:`\theta_h` into a appropriately defined Protobuf message [``write_state_to_proto``]
-7. restore theta_h from a given Protobuf message [``set_state_from_proto``]
-8. write the values of the hyperparameters in :math:`P_0` to a Protobuf message [``write_hypers_to_proto``]
+f. write the current state :math:`\theta_h` into a appropriately defined Protobuf message [``write_state_to_proto``]
+g. restore theta_h from a given Protobuf message [``set_state_from_proto``]
+h. write the values of the hyperparameters in :math:`P_0` to a Protobuf message [``write_hypers_to_proto``]
 
 
 In each hierarchy, we also keep track of which data points are allocated to the hierarchy.
@@ -44,19 +58,18 @@ A template class ``BaseHierarchy`` inherits from ``AbstractHierarchy`` and imple
 
 Instead, child classes must implement:
 
-1. ``like_lpdf``: evaluates :math:`k(x | \theta_h)`
-2. ``marg_lpdf``: evaluates m(x) given some parameters :math:`\theta_h` (could be both the hyperparameters in :math:`P_0` or the paramters given by the full conditionals)
-3. ``draw``: samples from :math:`P_0` given the parameters
-4. ``clear_summary_statistics``: clears all the summary statistics
-5. ``update_hypers``: performs the update of parameters in :math:`P_0` given all the :math:`\theta_h` (passed as a vector of protobuf Messages)
-6. ``initialize_state``: initializes the current :math:`\theta_h` given the hyperparameters in :math:`P_0`
-7. ``initialize_hypers``: initializes the hyperparameters in :math:`P_0` given their hyperprior
-8. ``update_summary_statistics``: updates the summary statistics when an observation is allocated or de-allocated from the hierarchy
-9. ``get_posterior_parameters``: returns the paramters of the full conditional distribution **possible only when** :math:`P_0` **and** :math:`k` **are conjugate**
-10. ``set_state_from_proto``
-11. ``write_state_to_proto``
-12. ``write_hypers_to_proto``
-
+a. ``like_lpdf``: evaluates :math:`k(x | \theta_h)`
+b. ``marg_lpdf``: evaluates m(x) given some parameters :math:`\theta_h` (could be both the hyperparameters in :math:`P_0` or the paramters given by the full conditionals)
+c. ``draw``: samples from :math:`P_0` given the parameters
+d. ``clear_summary_statistics``: clears all the summary statistics
+e. ``update_hypers``: performs the update of parameters in :math:`P_0` given all the :math:`\theta_h` (passed as a vector of protobuf Messages)
+f. ``initialize_state``: initializes the current :math:`\theta_h` given the hyperparameters in :math:`P_0`
+g. ``initialize_hypers``: initializes the hyperparameters in :math:`P_0` given their hyperprior
+h. ``update_summary_statistics``: updates the summary statistics when an observation is allocated or de-allocated from the hierarchy
+i. ``get_posterior_parameters``: returns the paramters of the full conditional distribution **possible only when** :math:`P_0` **and** :math:`k` **are conjugate**
+j. ``set_state_from_proto``
+k. ``write_state_to_proto``
+l. ``write_hypers_to_proto``
 
 Note that not all of these members are declared virtual in ``AbstractHierarchy`` or ``BaseHierarchy``: this is because virtual members are only the ones that must be called from outside the ``Hierarchy``, the other ones are handled via CRTP. Not having them virtual saves a lot of lookups in the vtables.
 The ``BaseHierarchy`` class takes 4 template parameters:
@@ -66,22 +79,24 @@ The ``BaseHierarchy`` class takes 4 template parameters:
 3. ``Hyperparams`` is usually a struct representing the parameters in :math:`P_0`
 4. ``Prior`` must be a protobuf object encoding the prior parameters.
 
+.. Finally, a ``ConjugateHierarchy`` takes care of the implementation of some methods that are specific to conjugate models.
 
-Finally, a ``ConjugateHierarchy`` takes care of the implementation of some methods that are specific to conjugate models.
 
--------
-Classes
--------
+----------------
+Abstract Classes
+----------------
 
 .. doxygenclass:: AbstractHierarchy
    :project: bayesmix
    :members:
 .. doxygenclass:: BaseHierarchy
    :project: bayesmix
    :members:
-.. doxygenclass:: ConjugateHierarchy
-   :project: bayesmix
-   :members:
+
+---------------------------------
+Classes for Conjugate Hierarchies
+---------------------------------
+
 .. doxygenclass:: NNIGHierarchy
    :project: bayesmix
    :members:
@@ -91,3 +106,17 @@ Classes
 .. doxygenclass:: LinRegUniHierarchy
    :project: bayesmix
    :members:
+
+-------------------------------------
+Classes for Non-Conjugate Hierarchies
+-------------------------------------
+
+.. doxygenclass:: NNxIGHierarchy
+   :project: bayesmix
+   :members:
+.. doxygenclass:: LapNIGHierarchy
+   :project: bayesmix
+   :members:
+.. doxygenclass:: FAHierarchy
+   :project: bayesmix
+   :members:
diff --git a/docs/index.rst b/docs/index.rst
@@ -32,7 +32,8 @@ There are currently three submodules to the ``bayesmix`` library, represented by
 Further, we employ Protocol buffers for several purposes, including serialization. The list of all protos with their docs is available in the ``protos`` link below.
 
 .. toctree::
-   :maxdepth: 1
+   :maxdepth: 2
+   :titlesonly:
    :caption: API: library submodules
 
    algorithms
@@ -43,11 +44,15 @@ Further, we employ Protocol buffers for several purposes, including serializatio
    utils
 
 
-
 Tutorials
 =========
 
-:doc:`tutorial`
+.. toctree::
+   :maxdepth: 1
+
+   tutorial
+
+.. :doc:`tutorial`
 
 
 Python interface

diff --git a/docs/likelihoods.rst b/docs/likelihoods.rst
@@ -0,0 +1,85 @@
+bayesmix/hierarchies/likelihoods
+
+Likelihoods
+===========
+
+The ``Likelihood`` sub-module represents the likelihood we have assumed for the data in a given cluster. Each ``Likelihood`` class represents the sampling model
+
+.. math::
+    y_1, \ldots, y_k \mid \bm{\tau} \stackrel{\small\mathrm{iid}}{\sim} f(\cdot \mid \bm{\tau})
+
+for a specific choice of the probability density function :math:`f`.
+
+-------------------------
+Main operations performed
+-------------------------
+
+A likelihood object must be able to perform the following operations:
+
+a. First of all, we require the ``lpdf()`` and ``lpdf\_grid()`` methods, which simply evaluate the loglikelihood in a given point or in a grid of points (also in case of a \emph{dependent} likelihood, i.e., with covariates associated to each observation) [``lpdf()`` and ``lpdf_grid``]
+b. In case you want to rely on a Metropolis-like updater, the likelihood needs to evaluation of the likelihood of the whole cluster starting from the vector of unconstrained parameters [``cluster_lpdf_from_unconstrained()``]. Observe that the ``AbstractLikelihood`` class provides two such methods, one returning a ``double`` and one returning a ``stan::math::var``. The latter is used to automatically compute the gradient of the likelihood via Stan's automatic differentiation, if needed. In practice, users do not need to implement both methods separately and can implement only one templated method
+c. manage the insertion and deletion of a datum in the cluster [``add_datum`` and ``remove_datum``]
+d. update the summary statistics associated to the likelihood [``update_summary_statistics``]. Summary statistics (when available) are used to evaluate the likelihood function on the whole cluster, as well as to perform the posterior updates of :math:`\bm{\tau}`. This usually gives a substantial speedup
+
+--------------
+Code structure
+--------------
+
+In principle, the ``Likelihood`` classes are responsible only of evaluating the log-likelihood function given a specific choice of parameters :math:`\bm{\tau}`.
+Therefore, a simple inheritance structure would seem appropriate. However, the nature of the parameters :math:`\bm{\tau}` can be very different across different models (think for instance of the difference between the univariate normal and the multivariate normal paramters). As such, we employ CRTP to manage the polymorphic nature of ``Likelihood`` classes.
+
+The class ``AbstractLikelihood`` defines the API, i.e. all the methods that need to be called from outside of a ``Likelihood`` class.
+A template class ``BaseLikelihood`` inherits from ``AbstractLikelihood`` and implements some of the necessary virtual methods, which need not be implemented by the child classes.
+
+Instead, child classes **must** implement:
+
+a. ``compute_lpdf``: evaluates :math:`k(x \mid \theta_h)`
+b. ``update_sum_stats``: updates the summary statistics when an observation is allocated or de-allocated from the hierarchy
+c. ``clear_summary_statistics``: clears all the summary statistics
+d. ``is_dependent``: defines if the given likelihood depends on covariates
+e. ``is_multivariate``: defines if the given likelihood is for multivariate data
+
+In case the likelihood needs to be used in a Metropolis-like updater, child classes **should** also implement:
+
+f. ``cluster_lpdf_from_unconstrained``: evaluates :math:`\prod_{i: c_i = h} k(x_i \mid \tilde{\theta}_h)`, where :math:`\tilde{\theta}_h` is the vector of unconstrained parameters.
+
+----------------
+Abstract Classes
+----------------
+
+.. doxygenclass:: AbstractLikelihood
+    :project: bayesmix
+    :members:
+.. doxygenclass:: BaseLikelihood
+    :project: bayesmix
+    :members:
+
+----------------------------------
+Classes for Univariate Likelihoods
+----------------------------------
+
+.. doxygenclass:: UniNormLikelihood
+    :project: bayesmix
+    :members:
+    :protected-members:
+.. doxygenclass:: UniLinRegLikelihood
+    :project: bayesmix
+    :members:
+    :protected-members:
+.. doxygenclass:: LaplaceLikelihood
+    :project: bayesmix
+    :members:
+    :protected-members:
+
+------------------------------------
+Classes for Multivariate Likelihoods
+------------------------------------
+
+.. doxygenclass:: MultiNormLikelihood
+    :project: bayesmix
+    :members:
+    :protected-members:
+.. doxygenclass:: FALikelihood
+    :project: bayesmix
+    :members:
+    :protected-members: