Skip to content

Commit

Permalink
add OpenMP support to matrix mult bottleneck sections
Browse files Browse the repository at this point in the history
  • Loading branch information
MikeDMorgan committed Feb 2, 2024
1 parent e1f4c81 commit f57d57f
Show file tree
Hide file tree
Showing 11 changed files with 107 additions and 27 deletions.
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ URL: https://marionilab.github.io/miloR
BugReports: https://github.com/MarioniLab/miloR/issues
biocViews: SingleCell, MultipleComparison, FunctionalGenomics, Software
LinkingTo: Rcpp,
RcppArmadillo,
RcppEigen
RcppArmadillo
Depends: R (>= 4.0.0),
edgeR
Imports: BiocNeighbors,
Expand Down
Binary file modified data/sim_nbglmm.RData
Binary file not shown.
2 changes: 1 addition & 1 deletion src/Makevars
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#PKG_CXXFLAGS = $(CXX11STD) -Wall -pedantic
CXX11STD = CXX11
PKG_CXXFLAGS = $(CFLAGS) $(CXX11STD)
PKG_CXXFLAGS = $(CFLAGS) $(CXX11STD) $(SHLIB_OPENMP_CXXFLAGS)
PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
ALL_CXXFLAGS = $(R_XTRA_CXXFLAGS) $(PKG_CXXFLAGS) $(CXXPICFLAGS) $(SHLIB_CXXFLAGS) $(CXXFLAGS)
1 change: 0 additions & 1 deletion src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#include <RcppArmadillo.h>
#include <RcppEigen.h>
#include <Rcpp.h>

using namespace Rcpp;
Expand Down
3 changes: 2 additions & 1 deletion src/computeMatrices.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include<Rcpp.h>
#include "utils.h"
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::plugins(openmp)]]
using namespace Rcpp;

arma::vec computeYStar(arma::mat X, arma::vec curr_beta, arma::mat Z, arma::mat Dinv, arma::vec curr_u, arma::vec y,
Expand Down Expand Up @@ -96,7 +97,7 @@ arma::mat computeVStar(arma::mat Z, arma::mat G, arma::mat W){
}


arma::mat computePREML (arma::mat Vsinv, arma::mat X){
arma::mat computePREML (const arma::mat& Vsinv, const arma::mat& X){
int n = Vsinv.n_cols;

arma::mat P(n, n);
Expand Down
2 changes: 1 addition & 1 deletion src/computeMatrices.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ arma::mat computeW (double disp, arma::mat Dinv, std::string vardist);
arma::mat computeWNB(double disp, arma::mat Dinv);
arma::mat computeWPoisson(arma::mat Dinv);
arma::mat computeVStar (arma::mat Z, arma::mat G, arma::mat W);
arma::mat computePREML (arma::mat Vsinv, arma::mat X);
arma::mat computePREML (const arma::mat& Vsinv, const arma::mat& X);
arma::mat initialiseG (Rcpp::List rlevels, arma::vec sigmas);
arma::mat initialiseG_G (Rcpp::List u_indices, arma::vec sigmas, arma::mat Kin);
arma::mat invGmat (Rcpp::List rlevels, arma::vec sigmas);
Expand Down
2 changes: 0 additions & 2 deletions src/fitGeneticPLGlmm.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#include<RcppArmadillo.h>
#include<RcppEigen.h>
#include<string>
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppEigen)]]
#include "paramEst.h"
#include "computeMatrices.h"
#include "invertPseudoVar.h"
Expand Down
2 changes: 0 additions & 2 deletions src/inference.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#include "inference.h"
#include<RcppArmadillo.h>
#include<RcppEigen.h>
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppEigen)]]
// using namespace Rcpp;

// All functions used for inference
Expand Down
97 changes: 83 additions & 14 deletions src/paramEst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
#include "computeMatrices.h"
#include "utils.h"
#include<RcppArmadillo.h>
#include<RcppEigen.h>
#include<cmath>

// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::plugins(openmp)]]
// using namespace Rcpp;

// All functions used in parameter estimation
Expand Down Expand Up @@ -43,9 +43,11 @@ arma::mat sigmaInfoREML_arma (const Rcpp::List& pvstari, const arma::mat& P){

// this is a symmetric matrix so only need to fill the upper or
// lower triangle to make it O(n^2/2) rather than O(n^2)
#pragma omp parallel for
for(int i=0; i < c; i++){
const arma::mat& _ipP = pvstari[i]; // this is P * \d Var/ \dsigma

#pragma omp parallel for
for(int j=i; j < c; j++){
const arma::mat& P_jp = pvstari[j]; // this is P * \d Var/ \dsigma
arma::mat a_ij(_ipP * P_jp); // this is the biggest bottleneck - it takes >2s!
Expand Down Expand Up @@ -247,9 +249,19 @@ arma::vec estHasemanElstonGenetic(const arma::mat& Z, const arma::mat& PREML,
unsigned int n = ystar.size();
unsigned int c = u_indices.size(); // number of variance components
unsigned long nsq = n * (n + 1)/2; //size of vectorised components using just upper or lower triangle of covariance matrix

unsigned int i, j; // Declare loop variables i and j for OpenMP
double _ycovij; // Declare temp_value
arma::mat Ycovar(n, n);
Ycovar = PREML * (ystar * ystar.t()) * PREML; // project onto REML P matrix

// direct computation of Ycovar with OpenMP?
#pragma omp parallel for private(i, j, _ycovij)
for (int i = 0; i < n; i++) {
for (int j = i; j < n; j++) {
arma::mat _tmpMat = arma::trans(PREML.row(j)) % (ystar * ystar(j));
double _ycovij = arma::dot(PREML.row(i), _tmpMat);
Ycovar(i, j) = _ycovij;
}
}

// select the upper triangular elements, including the diagonal
arma::uvec upper_indices = trimatu_ind(arma::size(Ycovar));
Expand Down Expand Up @@ -279,14 +291,21 @@ arma::vec estHasemanElston(const arma::mat& Z, const arma::mat& PREML, const Rcp
unsigned int n = ystar.size();
unsigned int c = u_indices.size(); // number of variance components
unsigned long nsq = n * (n + 1)/2; //size of vectorised components using just upper or lower triangle of covariance matrix
unsigned int i, j; // Declare loop variables i and j for OpenMP
double _ycovij; // Declare temp_value

// sparsify just the multiplication steps.
// arma::mat Ycovar(n, n);
arma::mat Ycovar(n, n);
arma::mat YT(ystar * ystar.t());

Ycovar = PREML * YT * PREML; // project onto REML P matrix
// Ycovar = PREML * (ystar * ystar.t()) * PREML; // project onto REML P matrix
// direct computation of Ycovar with OpenMP?
#pragma omp parallel for private(i, j, _ycovij)
for (int i = 0; i < n; i++) {
for (int j = i; j < n; j++) {
arma::mat _tmpMat = arma::trans(PREML.row(j)) % (ystar * ystar(j));
double _ycovij = arma::dot(PREML.row(i), _tmpMat);
Ycovar(i, j) = _ycovij;
}
}

// select the upper triangular elements, including the diagonal
arma::uvec upper_indices = trimatu_ind(arma::size(Ycovar));
Expand Down Expand Up @@ -351,14 +370,22 @@ arma::vec estHasemanElstonConstrained(const arma::mat& Z, const arma::mat& PREML
unsigned int n = ystar.size();
unsigned int c = u_indices.size(); // number of variance components
unsigned long nsq = n * (n + 1)/2; //size of vectorised components using just upper or lower triangle of covariance matrix
unsigned int i, j; // Declare loop variables i and j for OpenMP
double _ycovij; // Declare temp_value

// sparsification doesn't seem to help much
// arma::mat Ycovar(n, n);
arma::mat Ycovar(n, n);
arma::mat YT(ystar * ystar.t());

Ycovar = PREML * YT * PREML; // project onto REML P matrix
// Ycovar = PREML * (ystar * ystar.t()) * PREML; // project onto REML P matrix
// direct computation of Ycovar with OpenMP?
#pragma omp parallel for private(i, j, _ycovij)
for (int i = 0; i < n; i++) {
for (int j = i; j < n; j++) {
arma::mat _tmpMat = arma::trans(PREML.row(j)) % (ystar * ystar(j));
double _ycovij = arma::dot(PREML.row(i), _tmpMat);
Ycovar(i, j) = _ycovij;
}
}

// select the upper triangular elements, including the diagonal
arma::uvec upper_indices = trimatu_ind(arma::size(Ycovar));
arma::vec Ybig = Ycovar(upper_indices);
Expand Down Expand Up @@ -454,9 +481,19 @@ arma::vec estHasemanElstonConstrainedGenetic(const arma::mat& Z, const arma::mat
unsigned int n = ystar.size();
unsigned int c = u_indices.size(); // number of variance components
unsigned long nsq = n * (n + 1)/2; //size of vectorised components using just upper or lower triangle of covariance matrix
unsigned int i, j; // Declare loop variables i and j for OpenMP
double _ycovij; // Declare temp_value

arma::mat Ycovar(n, n);
Ycovar = PREML * (ystar * ystar.t()) * PREML; // project onto REML P matrix
// direct computation of Ycovar with OpenMP?
#pragma omp parallel for private(i, j, _ycovij)
for (int i = 0; i < n; i++) {
for (int j = i; j < n; j++) {
arma::mat _tmpMat = arma::trans(PREML.row(j)) % (ystar * ystar(j));
double _ycovij = arma::dot(PREML.row(i), _tmpMat);
Ycovar(i, j) = _ycovij;
}
}

// select the upper triangular elements, including the diagonal
arma::uvec upper_indices = trimatu_ind(arma::size(Ycovar));
Expand Down Expand Up @@ -648,6 +685,8 @@ arma::mat vectoriseZ(const arma::mat& Z, const Rcpp::List& u_indices, const arma
int c = u_indices.size();
int n = Z.n_rows;
unsigned long nsq = n * (n + 1)/2;
int j, k, l, a;
double temp_value;

Rcpp::List _Zelements(1);
arma::mat bigI(n, n, arma::fill::eye); // this should be the identity which we vectorise
Expand All @@ -665,10 +704,40 @@ arma::mat vectoriseZ(const arma::mat& Z, const Rcpp::List& u_indices, const arma
unsigned int qmin = arma::min(u_idx);
unsigned int qmax = arma::max(u_idx);
_subZ = Z.cols(qmin-1, qmax-1);
arma::mat _subZT = _subZ * _subZ.t();

arma::mat _ZZT(n, n);
_ZZT = P * (_subZ * _subZ.t()) * P; // REML projection is v.slow
arma::mat _pZZT(n, n);
// Can we turn this into a for loop and use OpenMP?
for (int j = 0; j < n; j++) {
// j = rows of P
#pragma omp parallel for reduction(+:temp_value)
for (int k = 0; k < n; k++) {
// k = columns of P
temp_value = 0.0;
for(int a=0; a < n; a++){
temp_value += P(j, a) * _subZT(a, k);
}
_pZZT(j, k) = temp_value; // Apply P again for symmetry
}

}

// another loop?
for (int j = 0; j < n; j++) {
// j = rows of P
#pragma omp parallel for reduction(+:temp_value)
for (int k = 0; k < n; k++) {
// k = columns of P
temp_value = 0.0;
for(int a=0; a < n; a++){
temp_value += _pZZT(j, a) * P(a, k);
}
_ZZT(j, k) = temp_value; // Apply P again for symmetry
}

}
// _ZZT = _pZZT * P; // REML projection is v.slow
// vectorise
arma::vec _vecZ = _ZZT(upper_indices);
arma::mat _vecZZT = _Zelements(0);
Expand Down
3 changes: 1 addition & 2 deletions src/paramEst.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
#define PARAMEST_H

#include<RcppArmadillo.h>
#include<RcppEigen.h>
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::plugins(openmp)]]

arma::vec sigmaScoreREML_arma (const Rcpp::List& pvstar_i, const arma::vec& ystar,
const arma::mat& P, const arma::vec& curr_beta,
Expand Down
19 changes: 18 additions & 1 deletion src/pseudovarPartial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,29 @@ List pseudovarPartial_P(List V_partial, const arma::mat& P){
// A Rcpp specific implementation that uses positional indexing rather than character indexes
// don't be tempted to sparsify this - the overhead of casting is too expensive
unsigned int items = V_partial.size();
unsigned int n = P.n_cols;
List outlist(items);
double temp_value;

for(unsigned int i = 0; i < items; i++){
// Need to output an S4 object - arma::sp_mat uses implicit interconversion for support dg Matrices
arma::mat _omat = V_partial(i);
arma::mat omat(P * _omat);
arma::mat omat(n, n);

// Can we turn this into a for loop and use OpenMP?
for (int j = 0; j < n; j++) {
// j = rows of P
#pragma omp parallel for reduction(+:temp_value)
for (int k = 0; k < n; k++) {
// k = columns of P
temp_value = 0.0;
for(int a=0; a < n; a++){
temp_value += P(j, a) * _omat(a, k);
}
omat(j, k) = temp_value; // Apply P again for symmetry
}
}

outlist[i] = omat;
}

Expand Down

0 comments on commit f57d57f

Please sign in to comment.