From 43b5138dcecd92aa919a26afca9ef15f6937b22d Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Mon, 18 Dec 2023 14:37:06 -0500
Subject: [PATCH 1/2] add a method to compute the T derivative of an NSE table
 quantity (#1407)

this works by differentiating the cubic interpolant
---
 nse_tabular/nse_table.H                       | 153 +++++++++++++++++-
 .../test_nse_interp/ci-benchmarks/aprox19.out |  14 +-
 unit_test/test_nse_interp/nse_cell.H          |  29 ++++
 3 files changed, 188 insertions(+), 8 deletions(-)
diff --git a/nse_tabular/nse_table.H b/nse_tabular/nse_table.H
index 7b9cfb6e99..360465aa15 100644
--- a/nse_tabular/nse_table.H
+++ b/nse_tabular/nse_table.H
@@ -121,6 +121,10 @@ int nse_get_ye_index(const Real ye) {
     return ic0 + 1;
 }
 
+///
+/// given 4 points (xs, fs), with spacing dx, return the interplated
+/// value of f at point x by fitting a cubic to the points
+///
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 Real cubic(const Real* xs, const Real* fs, const Real dx, const Real x) {
 
@@ -129,15 +133,40 @@ Real cubic(const Real* xs, const Real* fs, const Real dx, const Real x) {
     // to the data (xs, fs)
     // we take x_i to be x[1]
 
-    Real a = (3 * fs[1] - 3 * fs[2] + fs[3] - fs[0]) / (6 * std::pow(dx, 3));
+    Real a = (3 * fs[1] - 3 * fs[2] + fs[3] - fs[0]) / (6 * amrex::Math::powi<3>(dx));
     Real b = (-2 * fs[1] + fs[2] + fs[0]) / (2 * dx * dx);
     Real c = (-3 * fs[1] + 6 * fs[2] - fs[3] - 2 * fs[0]) / (6 * dx);
     Real d = fs[1];
 
-    return a * std::pow(x - xs[1], 3) + b * std::pow(x - xs[1], 2) + c * (x - xs[1]) + d;
+    return a * amrex::Math::powi<3>(x - xs[1]) +
+           b * amrex::Math::powi<2>(x - xs[1]) + c * (x - xs[1]) + d;
 
 }
 
+///
+/// given 4 points (xs, fs), with spacing dx between the xs, return
+/// the derivative of f at point x by fitting a cubic to the
+/// points and differentiating the interpolant
+///
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+Real cubic_deriv(const Real* xs, const Real* fs, const Real dx, const Real x) {
+
+    // fit a cubic of the form
+    // f(x) = a (x - x_i)**3 + b (x - x_i)**2 + c (x - x_i) + d
+    // to the data (xs, fs)
+    // we take x_i to be x[1]
+    // then return dfdx = 3 a (x - x_i)**2 + 2 b (x - x_i) + c
+
+    Real a = (3 * fs[1] - 3 * fs[2] + fs[3] - fs[0]) / (6 * amrex::Math::powi<3>(dx));
+    Real b = (-2 * fs[1] + fs[2] + fs[0]) / (2 * dx * dx);
+    Real c = (-3 * fs[1] + 6 * fs[2] - fs[3] - 2 * fs[0]) / (6 * dx);
+    //Real d = fs[1];
+
+    return 3.0_rt * a * amrex::Math::powi<2>(x - xs[1]) + 2.0_rt * b * (x - xs[1]) + c;
+
+}
+
+
 template <typename T>
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 Real trilinear(const int ir1, const int it1, const int ic1,
@@ -236,6 +265,68 @@ Real tricubic(const int ir0, const int it0, const int ic0,
 
 }
 
+///
+/// take the temperature derivative of a table quantity by differentiating
+/// the cubic interpolant
+///
+template <typename T>
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+Real tricubic_dT(const int ir0, const int it0, const int ic0,
+                 const Real rho, const Real temp, const Real ye, const T& data) {
+
+    Real yes[] = {nse_table_ye(ic0),
+                  nse_table_ye(ic0+1),
+                  nse_table_ye(ic0+2),
+                  nse_table_ye(ic0+3)};
+
+    Real Ts[] = {nse_table_logT(it0),
+                 nse_table_logT(it0+1),
+                 nse_table_logT(it0+2),
+                 nse_table_logT(it0+3)};
+
+    Real rhos[] = {nse_table_logrho(ir0),
+                   nse_table_logrho(ir0+1),
+                   nse_table_logrho(ir0+2),
+                   nse_table_logrho(ir0+3)};
+
+    // first do the 16 ye interpolations
+
+    // the first index will be rho and the second will be T
+    Real d1[4][4];
+
+    for (int ii = 0; ii < 4; ++ii) {
+        for (int jj = 0; jj < 4; ++jj) {
+
+            Real _d[] = {data(nse_idx(ir0+ii, it0+jj, ic0)),
+                         data(nse_idx(ir0+ii, it0+jj, ic0+1)),
+                         data(nse_idx(ir0+ii, it0+jj, ic0+2)),
+                         data(nse_idx(ir0+ii, it0+jj, ic0+3))};
+
+            // note that the ye values are monotonically decreasing,
+            // so the "dx" needs to be negative
+            d1[ii][jj] = cubic(yes, _d, -nse_table_size::dye, ye);
+        }
+    }
+
+    // now do the 4 rho interpolations (one in each T plane)
+
+    Real d2[4];
+
+    for (int jj = 0; jj < 4; ++jj) {
+
+        Real _d[] = {d1[0][jj], d1[1][jj], d1[2][jj], d1[3][jj]};
+        d2[jj] = cubic(rhos, _d, nse_table_size::dlogrho, rho);
+    }
+
+    // finally do the remaining interpolation over T, but return
+    // the derivative of the interpolant
+
+    Real val = cubic_deriv(Ts, d2, nse_table_size::dlogT, temp);
+
+    return val;
+
+}
+
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void nse_interp(nse_table_t& nse_state, bool skip_X_fill=false) {
 
@@ -300,13 +391,13 @@ void nse_interp(nse_table_t& nse_state, bool skip_X_fill=false) {
         // so we offset one to the left and also ensure that we don't go off the table
 
         int ir0 = nse_get_logrho_index(rholog) - 1;
-        ir0 = amrex::max(1, amrex::min(nse_table_size::nden-3, ir0));
+        ir0 = std::clamp(ir0, 1, nse_table_size::nden-3);
 
         int it0 = nse_get_logT_index(tlog) - 1;
-        it0 = amrex::max(1, amrex::min(nse_table_size::ntemp-3, it0));
+        it0 = std::clamp(it0, 1, nse_table_size::ntemp-3);
 
         int ic0 = nse_get_ye_index(yet) - 1;
-        ic0 = amrex::max(1, amrex::min(nse_table_size::nye-3, ic0));
+        ic0 = std::clamp(ic0, 1, nse_table_size::nye-3);
 
         nse_state.abar = tricubic(ir0, it0, ic0, rholog, tlog, yet, abartab);
         nse_state.bea = tricubic(ir0, it0, ic0, rholog, tlog, yet, beatab);
@@ -329,4 +420,56 @@ void nse_interp(nse_table_t& nse_state, bool skip_X_fill=false) {
 
 }
 
+
+///
+/// compute the temperature derivative of the table quantity data
+/// at the point T, rho, ye by using cubic interpolation
+///
+template <typename T>
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+Real
+nse_interp_dT(const Real temp, const Real rho, const Real ye, const T& data) {
+
+    Real rholog = std::log10(rho);
+    {
+        Real rmin = nse_table_size::logrho_min;
+        Real rmax = nse_table_size::logrho_max;
+
+        rholog = std::clamp(rholog, rmin, rmax);
+    }
+
+    Real tlog = std::log10(temp);
+    {
+        Real tmin = nse_table_size::logT_min;
+        Real tmax = nse_table_size::logT_max;
+
+        tlog = std::clamp(tlog, tmin, tmax);
+    }
+
+    Real yet = ye;
+    {
+        Real yemin = nse_table_size::ye_min;
+        Real yemax = nse_table_size::ye_max;
+
+        yet = std::clamp(yet, yemin, yemax);
+    }
+
+    int ir0 = nse_get_logrho_index(rholog) - 1;
+    ir0 = std::clamp(ir0, 1, nse_table_size::nden-3);
+
+    int it0 = nse_get_logT_index(tlog) - 1;
+    it0 = std::clamp(it0, 1, nse_table_size::ntemp-3);
+
+    int ic0 = nse_get_ye_index(yet) - 1;
+    ic0 = std::clamp(ic0, 1, nse_table_size::nye-3);
+
+    // note: this is returning the derivative wrt log10(T), so we need to
+    // convert to d/dT
+
+    Real ddatadT = tricubic_dT(ir0, it0, ic0, rholog, tlog, yet, data) / (std::log(10.0_rt) * temp);
+
+    return ddatadT;
+
+}
+
 #endif
diff --git a/unit_test/test_nse_interp/ci-benchmarks/aprox19.out b/unit_test/test_nse_interp/ci-benchmarks/aprox19.out
index a881ca2f75..42a1b41381 100644
--- a/unit_test/test_nse_interp/ci-benchmarks/aprox19.out
+++ b/unit_test/test_nse_interp/ci-benchmarks/aprox19.out
@@ -1,5 +1,5 @@
-Initializing AMReX (23.11-5-gd36463103dae)...
-AMReX (23.11-5-gd36463103dae) initialized
+Initializing AMReX (23.12-8-g43d71da32fa4)...
+AMReX (23.12-8-g43d71da32fa4) initialized
 starting the single zone burn...
 reading the NSE table (C++) ...
 rho, T, Ye = 1230000000 5180000000 0.472
@@ -57,4 +57,12 @@ X(Fe54) = 0.9104458693
 X(Ni56) = 0.0003104223608
 X(n) = 2.004738472e-08
 X(p) = 9.620335872e-06
-AMReX (23.11-5-gd36463103dae) finalized
+
+testing temperature derivatives of cubic
+first finite-difference derivatives
+dAbar/dT = -1.070778042e-09
+dbea/dT = -6.886297725e-12
+now using derivative of the interpolant
+dAbar/dT = -1.070778122e-09
+dbea/dT = -6.886272826e-12
+AMReX (23.12-8-g43d71da32fa4) finalized
diff --git a/unit_test/test_nse_interp/nse_cell.H b/unit_test/test_nse_interp/nse_cell.H
index e6ea2e3421..1ab91c2c87 100644
--- a/unit_test/test_nse_interp/nse_cell.H
+++ b/unit_test/test_nse_interp/nse_cell.H
@@ -139,4 +139,33 @@ void nse_cell_c()
     }
 
 
+    std::cout << std::endl;
+    std::cout << "testing temperature derivatives of cubic" << std::endl;
+
+    std::cout << "first finite-difference derivatives" << std::endl;
+
+    Real abar_old = nse_state.abar;
+    Real bea_old = nse_state.bea;
+    Real T_old = nse_state.T;
+
+    const Real eps = 1.e-8_rt;
+    nse_state.T *= (1.0_rt + eps);
+
+    nse_interp(nse_state);
+
+    std::cout << "dAbar/dT = " << (nse_state.abar - abar_old) / (nse_state.T - T_old) << std::endl;
+    std::cout << "dbea/dT = " << (nse_state.bea - bea_old) / (nse_state.T - T_old) << std::endl;
+
+    std::cout << "now using derivative of the interpolant" << std::endl;
+
+    Real dabardT = nse_interp_dT(temperature, density, ye,
+                                 nse_table::abartab);
+
+    Real dbeadT = nse_interp_dT(temperature, density, ye,
+                                 nse_table::beatab);
+
+    std::cout << "dAbar/dT = " << dabardT << std::endl;
+    std::cout << "dbea/dT = " << dbeadT << std::endl;
+
+
 }

From 01577f32112fc8ad4c6ccc509c222d0e41fee062 Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Mon, 18 Dec 2023 16:42:45 -0500
Subject: [PATCH 2/2] Revert "Start moving towards zero-based indexing with
 linpack (#536)" (#1412)

This reverts commit 1f0c666f18c0418a3ab0aa817de00f8fd52ebac7.

Co-authored-by: Max Katz <maxpkatz@gmail.com>
---
 util/linpack.H | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/util/linpack.H b/util/linpack.H
index bdb826a915..5fc8c8baf1 100644
--- a/util/linpack.H
+++ b/util/linpack.H
@@ -8,38 +8,35 @@
 
 template <int num_eqs>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void dgesl (RArray2D& a1, IArray1D& pivot1, RArray1D& b1)
+void dgesl (RArray2D& a, IArray1D& pivot, RArray1D& b)
 {
-    auto const& a = reinterpret_cast<Array2D<Real, 0, num_eqs-1, 0, num_eqs-1>&>(a1);
-    auto const& pivot = reinterpret_cast<Array1D<short, 0, num_eqs-1>&>(pivot1);
-    auto& b = reinterpret_cast<Array1D<Real, 0, num_eqs-1>&>(b1);
 
     int nm1 = num_eqs - 1;
 
     // solve a * x = b
     // first solve l * y = b
     if (nm1 >= 1) {
-        for (int k = 0; k < nm1; ++k) {
-            int l = pivot(k) - 1;
+        for (int k = 1; k <= nm1; ++k) {
+            int l = pivot(k);
             Real t = b(l);
             if (l != k) {
                 b(l) = b(k);
                 b(k) = t;
             }
 
-            for (int j = k+1; j < num_eqs; ++j) {
+            for (int j = k+1; j <= num_eqs; ++j) {
                 b(j) += t * a(j,k);
             }
         }
     }
 
     // now solve u * x = y
-    for (int kb = 0; kb < num_eqs; ++kb) {
+    for (int kb = 1; kb <= num_eqs; ++kb) {
 
-        int k = num_eqs - kb - 1;
+        int k = num_eqs + 1 - kb;
         b(k) = b(k) / a(k,k);
         Real t = -b(k);
-        for (int j = 0; j < k; ++j) {
+        for (int j = 1; j <= k-1; ++j) {
             b(j) += t * a(j,k);
         }
     }
@@ -50,10 +47,8 @@ void dgesl (RArray2D& a1, IArray1D& pivot1, RArray1D& b1)
 
 template <int num_eqs>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void dgefa (RArray2D& a1, IArray1D& pivot1, int& info)
+void dgefa (RArray2D& a, IArray1D& pivot, int& info)
 {
-    auto& a = reinterpret_cast<Array2D<Real, 0, num_eqs-1, 0, num_eqs-1>&>(a1);
-    auto& pivot = reinterpret_cast<Array1D<short, 0, num_eqs-1>&>(pivot1);
 
     // dgefa factors a matrix by gaussian elimination.
     // a is returned in the form a = l * u where
@@ -69,19 +64,19 @@ void dgefa (RArray2D& a1, IArray1D& pivot1, int& info)
 
     if (nm1 >= 1) {
 
-        for (int k = 0; k < nm1; ++k) {
+        for (int k = 1; k <= nm1; ++k) {
 
             // find l = pivot index
             int l = k;
             Real dmax = std::abs(a(k,k));
-            for (int i = k+1; i < num_eqs; ++i) {
+            for (int i = k+1; i <= num_eqs; ++i) {
                 if (std::abs(a(i,k)) > dmax) {
                     l = i;
                     dmax = std::abs(a(i,k));
                 }
             }
 
-            pivot(k) = l + 1;
+            pivot(k) = l;
 
             // zero pivot implies this column already triangularized
             if (a(l,k) != 0.0e0_rt) {
@@ -95,25 +90,25 @@ void dgefa (RArray2D& a1, IArray1D& pivot1, int& info)
 
                 // compute multipliers
                 t = -1.0e0_rt / a(k,k);
-                for (int j = k+1; j < num_eqs; ++j) {
+                for (int j = k+1; j <= num_eqs; ++j) {
                     a(j,k) *= t;
                 }
 
                 // row elimination with column indexing
-                for (int j = k+1; j < num_eqs; ++j) {
+                for (int j = k+1; j <= num_eqs; ++j) {
                     t = a(l,j);
                     if (l != k) {
                         a(l,j) = a(k,j);
                         a(k,j) = t;
                     }
-                    for (int i = k+1; i < num_eqs; ++i) {
+                    for (int i = k+1; i <= num_eqs; ++i) {
                         a(i,j) += t * a(i,k);
                     }
                 }
             }
             else {
 
-                info = k+1;
+                info = k;
 
             }
 
@@ -121,9 +116,9 @@ void dgefa (RArray2D& a1, IArray1D& pivot1, int& info)
 
     }
 
-    pivot(num_eqs-1) = num_eqs;
+    pivot(num_eqs) = num_eqs;
 
-    if (a(num_eqs-1,num_eqs-1) == 0.0e0_rt) {
+    if (a(num_eqs,num_eqs) == 0.0e0_rt) {
         info = num_eqs;
     }