Batch processing of Chemtab Source Computation (#506)

* commiting clarifying comments and & function definitions + some helpful utility functions for debugging * fixed problem with previous commit where consts weren't being used where needed * first draft of dummy batch api changes essentially passing to dummy batch function which manually calls the unbatched version, but good test case. * added dummy batched version of every function, each of which relies on the single dummy batched ChemTabModelComputeFunction * commiting first working draft of batched processing (implemented on chemistry source only for now) * clarified/corrected confusing & outdated comment * commiting reformatted code & version bump * reworked version so this would be a patch increment since it is just optimization * Updated doxygen comments and addressed various style issues with the PR
UBCHREST · Jan 28, 2025 · e0535af · e0535af
1 parent a230510
commit e0535af
Show file tree

Hide file tree

Showing 4 changed files with 191 additions and 63 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.18.4)
 include(config/petscCompilers.cmake)
 
 # Set the project details
-project(ablateLibrary VERSION 0.12.35)
+project(ablateLibrary VERSION 0.12.36)
 
 # Load the Required 3rd Party Libaries
 pkg_check_modules(PETSc REQUIRED IMPORTED_TARGET GLOBAL PETSc)

diff --git a/src/eos/chemTab.cpp b/src/eos/chemTab.cpp
@@ -1,7 +1,5 @@
 #include "chemTab.hpp"
-
 #include <eos/tChem.hpp>
-#include <fstream>
 
 #ifdef WITH_TENSORFLOW
 #include <yaml-cpp/yaml.h>
@@ -90,9 +88,8 @@ ablate::eos::ChemTab::~ChemTab() {
     TF_DeleteSession(session, status);
     TF_DeleteSessionOptions(sessionOpts);
     TF_DeleteStatus(status);
-    free(sourceEnergyScaler);
-    for (std::size_t i = 0; i < speciesNames.size(); i++) free(Wmat[i]);
 
+    for (std::size_t i = 0; i < speciesNames.size(); i++) free(Wmat[i]);
     free(Wmat);
 }
 
@@ -153,11 +150,49 @@ void ablate::eos::ChemTab::LoadBasisVectors(std::istream &inputStream, std::size
 #define safe_free(ptr) \
     if (ptr != NULL) free(ptr)
 
-void ablate::eos::ChemTab::ChemTabModelComputeFunction(PetscReal density, const PetscReal densityProgressVariable[], PetscReal *predictedSourceEnergy, PetscReal *progressVariableSource,
+void ablate::eos::ChemTab::ChemTabModelComputeFunction(PetscReal density, const PetscReal densityProgressVariables[], PetscReal *densityEnergySource, PetscReal *densityProgressVariableSource,
                                                        PetscReal *densityMassFractions) const {
+    ChemTabModelComputeFunction(&density, &densityProgressVariables, &densityEnergySource, &densityProgressVariableSource, &densityMassFractions, 1);
+}
+
+// Verified to work 11/7/23
+// NOTE: id arg is to index a potentially batched outputValues argument
+void ablate::eos::ChemTab::ExtractModelOutputsAtPoint(const PetscReal density, PetscReal *densityEnergySource, PetscReal *densityProgressVariableSource, PetscReal *densityMassFractions,
+                                                      const std::array<TF_Tensor *, 2> &outputValues, size_t id) const {
+    // store physical variables (e.g. souener & mass fractions)
+    float *outputArray;                                                                        // Dwyer: as counterintuitive as it may be static dependents come second, it did pass its tests!
+    outputArray = ((float *)TF_TensorData(outputValues[1])) + id * (speciesNames.size() + 1);  // also increment by id for batch processing
+    auto p = (PetscReal)outputArray[0];
+    if (densityEnergySource != nullptr) *densityEnergySource += p * density;
+
+    // store inverted mass fractions
+    if (densityMassFractions) {
+        for (size_t i = 0; i < speciesNames.size(); i++) {
+            densityMassFractions[i] = (PetscReal)outputArray[i + 1] * density;  // i+1 b/c i==0 is souener!
+        }
+    }
+
+    // store CPV sources
+    outputArray = ((float *)TF_TensorData(outputValues[0])) + id * (progressVariablesNames.size() - 1);
+    if (densityProgressVariableSource != nullptr) {
+        // densityProgressVariableSource[0] = 0;  // Zmix source is always 0!
+
+        // -1 b/c we don't want to go out of bounds with the +1 below, also to prevent integer overflow
+        for (size_t i = 0; i < (progressVariablesNames.size() - 1); ++i) {
+            densityProgressVariableSource[i + 1] += (PetscReal)outputArray[i] * density;  // +1 b/c we are manually filling in Zmix source value (to 0)
+        }
+    }
+}
+
+#define safe_id(array, i) (array ? array[i] : nullptr)
+
+void ablate::eos::ChemTab::ChemTabModelComputeFunction(const PetscReal density[], const PetscReal *const *const densityProgressVariables, PetscReal **densityEnergySource,
+                                                       PetscReal **densityProgressVariableSource, PetscReal **densityMassFractions, size_t batch_size) const {
     //********* Get Input tensor
     const std::size_t numInputs = 1;
 
+    // WTF is t0? & What is index 0? <-- t0 is input graph op & index 0 is
+    // index of the input (which is only 0 b/c there is only 1 input)
     TF_Output t0 = {TF_GraphOperationByName(graph, "serving_default_input_1"), 0};
 
     if (t0.oper == nullptr) throw std::runtime_error("ERROR: Failed TF_GraphOperationByName serving_default_input_1");
@@ -166,60 +201,43 @@ void ablate::eos::ChemTab::ChemTabModelComputeFunction(PetscReal density, const
     //********* Get Output tensor
     const std::size_t numOutputs = 2;
 
+    // NOTE: these names actually do make sense even though implicitly t_sourceenergy also includes inverse outputs
     TF_Output t_sourceenergy = {TF_GraphOperationByName(graph, "StatefulPartitionedCall"), 0};
     TF_Output t_sourceterms = {TF_GraphOperationByName(graph, "StatefulPartitionedCall"), 1};
 
     if (t_sourceenergy.oper == nullptr) throw std::runtime_error("ERROR: Failed TF_GraphOperationByName StatefulPartitionedCall:0");
     if (t_sourceterms.oper == nullptr) throw std::runtime_error("ERROR: Failed TF_GraphOperationByName StatefulPartitionedCall:1");
     std::array<TF_Output, numOutputs> output = {t_sourceenergy, t_sourceterms};
 
-    //********* Allocate data for inputs & outputs
+    //********* Allocate input_data for inputs & outputs
     std::array<TF_Tensor *, numInputs> inputValues = {nullptr};
     std::array<TF_Tensor *, numOutputs> outputValues = {nullptr, nullptr};
 
     std::size_t ndims = 2;
 
     // according to Varun this should work for including Zmix
     auto ninputs = progressVariablesNames.size();
-    int64_t dims[] = {1, (int)ninputs};
-    float data[ninputs];
+    int64_t dims[] = {(int)batch_size, (int)ninputs};
+    float input_data[batch_size][ninputs];  // NOTE: we changed the 1st dimension from 1 --> batch_size
 
-    for (std::size_t i = 0; i < ninputs; i++) {
-        data[i] = (float)(densityProgressVariable[i] / density);
-    }
+    // NOTE: this should be modified sufficiently to support batch inputs!
+    for (size_t i = 0; i < batch_size; i++)
+        for (std::size_t j = 0; j < ninputs; j++) input_data[i][j] = (float)(densityProgressVariables[i][j] / density[i]);
 
-    std::size_t ndata = ninputs * sizeof(float);
-    TF_Tensor *input_tensor = TF_NewTensor(TF_FLOAT, dims, (int)ndims, data, ndata, &NoOpDeallocator, nullptr);
+    std::size_t ndata = sizeof(input_data);
+    TF_Tensor *input_tensor = TF_NewTensor(TF_FLOAT, dims, (int)ndims, input_data, ndata, &NoOpDeallocator, nullptr);
     if (input_tensor == nullptr) throw std::runtime_error("ERROR: Failed TF_NewTensor");
 
+    // there is only 1 input tensor per model eval (unlike e.g. outputs)
     inputValues[0] = input_tensor;
 
     TF_SessionRun(session, nullptr, input.data(), inputValues.data(), (int)numInputs, output.data(), outputValues.data(), (int)numOutputs, nullptr, 0, nullptr, status);
     if (TF_GetCode(status) != TF_OK) throw std::runtime_error(TF_Message(status));
     //********** Extract source predictions
 
-    // store physical variables (e.g. souener & mass fractions)
-    float *outputArray;  // Dwyer: as counter intuitive as it may be static dependents come second, it did pass its tests!
-    outputArray = (float *)TF_TensorData(outputValues[1]);
-    auto p = (PetscReal)outputArray[0];
-    if (predictedSourceEnergy != nullptr) *predictedSourceEnergy += p * density;
-
-    // store inverted mass fractions
-    if (densityMassFractions) {
-        for (size_t i = 0; i < speciesNames.size(); i++) {
-            densityMassFractions[i] = (PetscReal)outputArray[i + 1] * density;  // i+1 b/c i==0 is souener!
-        }
-    }
-
-    // store CPV sources
-    outputArray = (float *)TF_TensorData(outputValues[0]);
-    if (progressVariableSource != nullptr) {
-        // progressVariableSource[0] = 0;  // Zmix source is always 0!  We
-
-        // -1 b/c we don't want to go out of bounds with the +1 below, also int is to prevent integer overflow
-        for (size_t i = 0; i < (progressVariablesNames.size() - 1); ++i) {
-            progressVariableSource[i + 1] += (PetscReal)outputArray[i] * density;  // +1 b/c we are manually filling in Zmix source value (to 0)
-        }
+    // reiterate the same extraction process for each member of the batch
+    for (size_t i = 0; i < batch_size; i++) {
+        ExtractModelOutputsAtPoint(density[i], safe_id(densityEnergySource, i), safe_id(densityProgressVariableSource, i), safe_id(densityMassFractions, i), outputValues, i);
     }
 
     // free allocated vectors
@@ -231,21 +249,35 @@ void ablate::eos::ChemTab::ChemTabModelComputeFunction(PetscReal density, const
     }
 }
 
-void ablate::eos::ChemTab::ComputeMassFractions(const PetscReal *progressVariables, PetscReal *densityMassFractions, PetscReal density) const {
-    // call model using generalized invocation method (usable for inversion & source computation)
-    ChemTabModelComputeFunction(density, progressVariables, nullptr, nullptr, densityMassFractions);
-}
-
-void ablate::eos::ChemTab::ComputeMassFractions(const std::vector<PetscReal> &progressVariables, std::vector<PetscReal> &massFractions, PetscReal density) const {
+void ablate::eos::ChemTab::ComputeMassFractions(std::vector<PetscReal> &progressVariables, std::vector<PetscReal> &massFractions, PetscReal density) const {
     if (progressVariables.size() != progressVariablesNames.size()) {
         throw std::invalid_argument("The Progress variable size is expected to be " + std::to_string(progressVariablesNames.size()));
     }
     if (massFractions.size() != speciesNames.size()) {
         throw std::invalid_argument("The Species names for massFractions is expected to be " + std::to_string(progressVariablesNames.size()));
     }
+    // the naming is wrong on purpose so that it will conform to tests.
     ComputeMassFractions(progressVariables.data(), massFractions.data(), density);
+    // ComputeProgressVariables(massFractions.data(), progressVariables.data());
+}
+
+void ablate::eos::ChemTab::ComputeMassFractions(const PetscReal *densityProgressVariables, PetscReal *densityMassFractions, const PetscReal density) const {
+    // call model using generalized invocation method (usable for inversion & source computation)
+    ChemTabModelComputeFunction(density, densityProgressVariables, nullptr, nullptr, densityMassFractions);
+}
+
+void ablate::eos::ChemTab::ComputeMassFractions(const PetscReal *const *densityProgressVariables, PetscReal **densityMassFractions, const PetscReal density[], size_t n) const {
+    ChemTabModelComputeFunction(density, densityProgressVariables, nullptr, nullptr, densityMassFractions, n);
 }
 
+// Batched Version
+void ablate::eos::ChemTab::ComputeProgressVariables(const PetscReal *const *massFractions, PetscReal *const *progressVariables, size_t n) const {
+    for (size_t i = 0; i < n; i++) {
+        ComputeProgressVariables(massFractions[i], progressVariables[i]);
+    }
+}
+
+// Apparently only used for tests!
 void ablate::eos::ChemTab::ComputeProgressVariables(const std::vector<PetscReal> &massFractions, std::vector<PetscReal> &progressVariables) const {
     if (progressVariables.size() != progressVariablesNames.size()) {
         throw std::invalid_argument("The Progress variable size is expected to be " + std::to_string(progressVariablesNames.size()));
@@ -256,6 +288,7 @@ void ablate::eos::ChemTab::ComputeProgressVariables(const std::vector<PetscReal>
     ComputeProgressVariables(massFractions.data(), progressVariables.data());
 }
 
+// This is real one used elsewhere probably because it is faster
 void ablate::eos::ChemTab::ComputeProgressVariables(const PetscReal *massFractions, PetscReal *progressVariables) const {
     // c = W'y
     for (size_t i = 0; i < progressVariablesNames.size(); i++) {
@@ -267,13 +300,38 @@ void ablate::eos::ChemTab::ComputeProgressVariables(const PetscReal *massFractio
     }
 }
 
-void ablate::eos::ChemTab::ChemistrySource(PetscReal density, const PetscReal densityProgressVariable[], PetscReal *densityEnergySource, PetscReal *progressVariableSource) const {
+inline double L2_norm(PetscReal *array, int n) {
+    double norm = 0;
+    for (int i = 0; i < n; i++) {
+        norm += pow(array[i], 2);
+    }
+    norm = pow(norm / n, 0.5);
+    return norm;
+}
+
+inline void print_array(std::string prefix, PetscReal *array, const int n) {
+    std::cout << prefix;
+    for (int i = 0; i < n; i++) {
+        std::cout << array[i] << ", ";
+    }
+    std::cout << std::endl;
+}
+
+void ablate::eos::ChemTab::ChemistrySource(const PetscReal density, const PetscReal densityProgressVariables[], PetscReal *densityEnergySource, PetscReal *densityProgressVariableSource) const {
     // call model using generalized invocation method (usable for inversion & source computation)
-    ChemTabModelComputeFunction(density, densityProgressVariable, densityEnergySource, progressVariableSource, nullptr);
+    ChemTabModelComputeFunction(density, densityProgressVariables, densityEnergySource, densityProgressVariableSource, nullptr);
+}
+
+// Batched Version
+void ablate::eos::ChemTab::ChemistrySource(const PetscReal *const density, const PetscReal *const *const densityProgressVariables, PetscReal **densityEnergySource,
+                                           PetscReal **densityProgressVariableSource, size_t n) const {
+    // call model using generalized invocation method (usable for inversion & source computation)
+    ChemTabModelComputeFunction(density, densityProgressVariables, densityEnergySource, densityProgressVariableSource, nullptr, n);
 }
 
 void ablate::eos::ChemTab::View(std::ostream &stream) const { stream << "EOS: " << type << std::endl; }
 
+// How does this work? should we be overriding SourceCalc class methods or this method here?
 std::shared_ptr<ablate::eos::ChemistryModel::SourceCalculator> ablate::eos::ChemTab::CreateSourceCalculator(const std::vector<domain::Field> &fields, const ablate::domain::Range &cellRange) {
     // Look for the euler field
     auto eulerField = std::find_if(fields.begin(), fields.end(), [](const auto &field) { return field.name == ablate::finiteVolume::CompressibleFlowFields::EULER_FIELD; });
@@ -332,6 +390,7 @@ ablate::eos::EOSFunction ablate::eos::ChemTab::GetFieldFunctionFunction(const st
             // Compute the mass fractions from progress
             std::vector<PetscReal> yi(speciesNames.size());
 
+            // TODO: change for batch processing!! (ask Matt about it)
             // compute the progress variables and put into conserved for now
             ComputeMassFractions(progress, yi.data());
 
@@ -377,6 +436,7 @@ ablate::eos::EOSFunction ablate::eos::ChemTab::GetFieldFunctionFunction(const st
             // Compute the mass fractions from progress
             PetscReal yi[speciesNames.size()];
 
+            // TODO: change for batch processing!! (ask Matt about it)
             // compute the progress variables and put into conserved for now
             ComputeMassFractions(progress, yi);
 
@@ -426,6 +486,7 @@ PetscErrorCode ablate::eos::ChemTab::ComputeMassFractions(PetscReal time, PetscI
     // Get the density from euler
     PetscReal density = u[uOff[EULER] + finiteVolume::CompressibleFlowFields::RHO];
 
+    // TODO: change for batch processing!! (ask Matt about it)
     // call the compute mass fractions
     chemTab->ComputeMassFractions(u + uOff[DENSITY_PROGRESS], u + uOff[DENSITY_YI], density);
 
@@ -444,6 +505,8 @@ ablate::eos::ChemTab::ChemTabSourceCalculator::ChemTabSourceCalculator(PetscInt
                                                                        std::shared_ptr<ChemTab> chemTabModel)
     : densityOffset(densityOffset), densityEnergyOffset(densityEnergyOffset), densityProgressVariableOffset(densityProgressVariableOffset), chemTabModel(std::move(chemTabModel)) {}
 
+// NOTE: I'm not sure however I believe that this could be the ONLY place that needs updating for Batch processing??
+// Comments seem to indicate it is the case...
 void ablate::eos::ChemTab::ChemTabSourceCalculator::AddSource(const ablate::domain::Range &cellRange, Vec locX, Vec locFVec) {
     // get access to the xArray, fArray
     PetscScalar *fArray;
@@ -452,23 +515,45 @@ void ablate::eos::ChemTab::ChemTabSourceCalculator::AddSource(const ablate::doma
     VecGetArrayRead(locX, &xArray) >> utilities::PetscUtilities::checkError;
 
     // Get the solution dm
-    DM dm;
+    DM dm;  // NOTE: DM is topological space (i.e. grid)
     VecGetDM(locFVec, &dm) >> utilities::PetscUtilities::checkError;
 
+    // Here we store the batched pointers needed for multiple chemTabModel->ChemistrySource() calls
+    PetscInt buffer_len = cellRange.end - cellRange.start;
+    PetscScalar allDensity[buffer_len];
+    const PetscScalar *allDensityCPV[buffer_len];
+    PetscScalar *allDensityEnergySource[buffer_len];
+    PetscScalar *allDensityCPVSource[buffer_len];
+    // PetscScalar* allSourceAtCell[buffer_len]; // apparently these can't be used b/c of secret offsets
+    // const PetscScalar* allSolutionAtCell[buffer_len]; // apparently these can't be used b/c of secret offsets
+
+    // NOTE: this clunky approach has been verified to work for batch processsing
     // March over each cell in the range
     for (PetscInt c = cellRange.start; c < cellRange.end; ++c) {
         const PetscInt iCell = cellRange.points ? cellRange.points[c] : c;
+        // Dwyer: iCell is the "point"
 
-        // Get the current state variables for this cell
         PetscScalar *sourceAtCell = nullptr;
         DMPlexPointLocalRef(dm, iCell, fArray, &sourceAtCell) >> utilities::PetscUtilities::checkError;
 
-        // Get the current state variables for this cell
+        // Get the current state variables for this cell (CPVs)
         const PetscScalar *solutionAtCell = nullptr;
         DMPlexPointLocalRead(dm, iCell, xArray, &solutionAtCell) >> utilities::PetscUtilities::checkError;
-
-        chemTabModel->ChemistrySource(solutionAtCell[densityOffset], solutionAtCell + densityProgressVariableOffset, sourceAtCell + densityEnergyOffset, sourceAtCell + densityProgressVariableOffset);
+        // NOTE: DMPlexPointLocalRef() is Read/Write while DMPlexPointLocalRead() is Read only
+
+        // store this cell's attributes into arg vectors
+        size_t index = c - cellRange.start;
+        allDensity[index] = solutionAtCell[densityOffset];
+        allDensityCPV[index] = solutionAtCell + densityProgressVariableOffset;
+        allDensityEnergySource[index] = sourceAtCell + densityEnergyOffset;
+        allDensityCPVSource[index] = sourceAtCell + densityProgressVariableOffset;
     }
+
+    // Using batch overloaded version
+    // TODO: consider/optimize the problem that is likely requires loop to copy these arrays into TF inputs??
+    chemTabModel->ChemistrySource(allDensity, allDensityCPV, allDensityEnergySource, allDensityCPVSource, buffer_len);
+    // NOTE: These "offsets" are pointers since they are CONSTANT class attributes!
+
     // cleanup
     VecRestoreArray(locFVec, &fArray) >> utilities::PetscUtilities::checkError;
     VecRestoreArrayRead(locX, &xArray) >> utilities::PetscUtilities::checkError;