Merge branch 'treewalk_cuda1' into treewalk_cuda

MP-Gadget · Oct 15, 2024 · 4949b39 · 4949b39
2 parents 7ad04f5 + bb26889
commit 4949b39
Show file tree

Hide file tree

Showing 12 changed files with 139 additions and 55 deletions.
diff --git a/Makefile.rules b/Makefile.rules
@@ -15,17 +15,25 @@ ifneq ($(findstring -DUSE_CFITSIO, $(OPT)),)
     FITSIO_LIBS ?= $(shell pkg-config --libs cfitsio)
 endif
 
+ifneq ($(findstring -DUSE_CUDA, $(OPT)),)
+    CUDA_INCL ?= 
+    CUDA_LIBS ?= -lcudart
+    NVCC ?= nvcc
+    NVOPTIMIZE ?= -O3
+endif
+
 OPTIONS = $(OPTIMIZE) $(OPT)
 GADGET_TESTDATA_ROOT = $(CURDIR)/../
 
-CFLAGS = $(OPTIONS) $(GSL_INCL) $(FITSIO_INCL)
+CFLAGS = $(OPTIONS) $(GSL_INCL) $(FITSIO_INCL) $(CUDA_INCL)
 CFLAGS += -I../depends/include
 CFLAGS += -I../
 CFLAGS += "-DLOW_PRECISION=$(LOW_PRECISION)"
 #For tests
 TCFLAGS = $(CFLAGS) -DGADGET_TESTDATA_ROOT=\"$(GADGET_TESTDATA_ROOT)\"
 
 BUNDLEDLIBS = -lbigfile-mpi -lbigfile -lpfft_omp -lfftw3_mpi -lfftw3_omp -lfftw3
+
 LIBS  = -lm $(CUDA_LIBS) $(GSL_LIBS) $(FITSIO_LIBS)
 LIBS += -L../depends/lib $(BUNDLEDLIBS)
 V ?= 0
@@ -41,3 +49,7 @@ V ?= 0
 	if test "x$(V)" = "x1" ; then echo $$cmd; fi; \
 	mkdir -p `dirname $@`; \
 	echo Compiling $<; $$cmd
+
+# Rule to compile .cu files (using nvcc)
+.objs/%.o: %.cu
+	$(NVCC) $(NVOPTIMIZE) -c $< -o $@
diff --git a/Options.mk.example b/Options.mk.example
@@ -1,5 +1,10 @@
 #These variables are set to useful defaults, but may be overriden if needed
-#MPICC=mpicc
+#MPICC=mpic++
+#MPICCDEP=mpicc
+
+#NVCC=nvcc
+#NVOPTIMIZE = -O3 -arch=sm_61 # specify architecture according to you GPU model, sm_90 shall be used for Vista's H100
+
 #GSL_LIBS=
 #GSL_INCL=
 #This is a good optimized build default for gcc
@@ -12,7 +17,7 @@ OPTIMIZE =  -fopenmp -O3 -g -Wall -ffast-math
 #OPT += -DDEBUG      # print a lot of debugging messages
 #Disable openmp locking. This means no threading.
 #OPT += -DNO_OPENMP_SPINLOCK
-
+#OPT += -DUSE_CUDA  #Enable GPU-specific CUDA code
 #-----------
 #OPT += -DEXCUR_REION  # reionization with excursion set
 

diff --git a/depends/Makefile b/depends/Makefile
@@ -3,7 +3,8 @@ include $(CONFIG)
 
 .PHONY: depends
 .INTERMEDIATE: pfft
-MPICC ?= mpicc
+# MPICC ?= mpicc
+MPICCDEP ?= mpicc
 OPTIMIZE ?= -O2 -g -fopenmp -Wall
 LIBRARIES=lib/libbigfile-mpi.a
 FFTLIBRARIES=lib/libpfft_omp.a lib/libfftw3_mpi.a lib/libfftw3_omp.a
@@ -14,13 +15,13 @@ lib/libbigfile-mpi.a: bigfile/src/bigfile-mpi.c
 	mkdir -p lib; \
 	mkdir -p include; \
 	cd bigfile/src; \
-	make install PREFIX=$(PWD) CC="$(MPICC)" MPICC="$(MPICC)" CFLAGS="$(OPTIMIZE)" AR="$(AR)"
+	make install PREFIX=$(PWD) CC="$(MPICCDEP)" MPICC="$(MPICCDEP)" CFLAGS="$(OPTIMIZE)" AR="$(AR)"
 
 pfft: install_pfft.sh
 	mkdir -p lib; \
 	mkdir -p include; \
 	#Using -ipo causes icc to crash.
-	MPICC="$(MPICC)" CC="$(MPICC)" CFLAGS="$(filter-out -ipo,$(OPTIMIZE)) -I $(PWD)/include -L$(PWD)/lib" AR="$(AR)" RANLIB=$(RANLIB) \
+	MPICC="$(MPICCDEP)" CC="$(MPICCDEP)" CFLAGS="$(filter-out -ipo,$(OPTIMIZE)) -I $(PWD)/include -L$(PWD)/lib" AR="$(AR)" RANLIB=$(RANLIB) \
         sh $(PWD)/install_pfft.sh $(PWD)/
 
 clean: clean-fast clean-fft

diff --git a/libgadget/cosmology.c b/libgadget/cosmology.c
@@ -1,11 +1,11 @@
 #include <math.h>
-#include <gsl/gsl_integration.h>
 #include <gsl/gsl_errno.h>
 #include <gsl/gsl_odeiv2.h>
 
 #include "cosmology.h"
 #include "physconst.h"
 #include "utils.h"
+#include "timefac.h"
 
 /*Stefan-Boltzmann constant in cgs units*/
 #define  STEFAN_BOLTZMANN 5.670373e-5
@@ -236,19 +236,22 @@ double function_of_k_eval(FunctionOfK * fk, double k)
     }
 }
 
-double function_of_k_tophat_sigma(FunctionOfK * fk, double R)
+// Adapted function to use Tanh-Sinh adaptive integration
+double function_of_k_tophat_sigma(FunctionOfK *fk, double R)
 {
-    gsl_integration_workspace * w = gsl_integration_workspace_alloc (1000);
+    // Create the parameter structure
     struct sigma2_params params = {fk, R};
-    double result,abserr;
-    gsl_function F;
-    F.function = &sigma2_int;
-    F.params = &params;
-
-    /* note: 500/R is here chosen as integration boundary (infinity) */
-    gsl_integration_qags (&F, 0, 500. / R, 0, 1e-4,1000,w,&result, &abserr);
-    //   printf("gsl_integration_qng in TopHatSigma2. Result %g, error: %g, intervals: %lu\n",result, abserr,w->size);
-    gsl_integration_workspace_free (w);
+    double abserr;  // To hold the estimated error
+
+    // Define the integrand as a lambda function wrapping the original `sigma2_int`
+    auto integrand = [&params](double k) -> double {
+        return sigma2_int(k, (void*)&params);
+    };
+
+    // Perform the Tanh-Sinh adaptive integration
+    double result = tanh_sinh_integrate_adaptive(integrand, 0, 500.0 / R, &abserr, 1e-4);
+
+    // Return the square root of the result
     return sqrt(result);
 }
 

diff --git a/libgadget/gravpm.c b/libgadget/gravpm.c
@@ -62,8 +62,8 @@ gravpm_force(PetaPM * pm, DomainDecomp * ddecomp, Cosmology * CP, double Time, d
     PetaPMParticleStruct pstruct = {
         P,
         sizeof(P[0]),
-        (char*) &P[0].Pos[0]  - (char*) P,
-        (char*) &P[0].Mass  - (char*) P,
+        static_cast<size_t>((char*) &P[0].Pos[0]  - (char*) P),
+        static_cast<size_t>((char*) &P[0].Mass  - (char*) P),
         /* Regions allocated inside _prepare*/
         NULL,
         /* By default all particles are active. For hybrid neutrinos set below.*/

diff --git a/libgadget/lenstools.c b/libgadget/lenstools.c
@@ -298,6 +298,7 @@ void savePotentialPlane(double *data, int rows, int cols, const char * const fil
     double Lbox_Mpc = Lbox * UnitLength_in_cm / CM_PER_MPC;  // Box size in Mpc/h
     double comoving_distance_Mpc = comoving_distance * UnitLength_in_cm / CM_PER_MPC;
     double Ode0 = CP->OmegaLambda > 0 ? CP->OmegaLambda : CP->Omega_fld;
+    char unit[] = "rad2    ";  // Mutable string for the UNIT keyword
     // Insert a blank line as a separator
     fits_write_record(fptr, "        ", &status);
     // Add headers to the FITS file
@@ -313,7 +314,7 @@ void savePotentialPlane(double *data, int rows, int cols, const char * const fil
     fits_update_key(fptr, TDOUBLE, "CHI", (&comoving_distance_Mpc), "Comoving distance in Mpc/h", &status);
     fits_update_key(fptr, TDOUBLE, "SIDE", &(Lbox_Mpc), "Side length in Mpc/h", &status);
     fits_update_key(fptr, TLONGLONG, "NPART", &num_particles, "Number of particles on the plane", &status);
-    fits_update_key(fptr, TSTRING, "UNIT", "rad2    ", "Pixel value unit", &status);
+    fits_update_key(fptr, TSTRING, "UNIT", unit, "Pixel value unit", &status);
 
     // Write the 2D array of doubles to the image
     long fpixel[2] = {1, 1};  // first pixel to write (1-based indexing)

diff --git a/libgadget/timefac.c b/libgadget/timefac.c
@@ -3,14 +3,56 @@
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
-#include <gsl/gsl_integration.h>
 
 #include "physconst.h"
 #include "timefac.h"
 #include "timebinmgr.h"
 #include "utils.h"
 
-#define WORKSIZE 10000
+#include <stdio.h>
+#include <math.h>
+#include <boost/math/quadrature/gauss.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>  // For isnan and isinf
+#include <functional>
+#include <boost/math/quadrature/tanh_sinh.hpp>
+
+// Function to perform tanh-sinh integration with adaptive max_refinements
+double tanh_sinh_integrate_adaptive(std::function<double(double)> func, double a, double b, double* estimated_error, double rel_tol, int max_refinements_limit, int init_refine, int step) {
+    double result_prev = 0.0;
+    double result_current = 0.0;
+    *estimated_error = 1.0;  // Start with a large relative error
+    int max_refine = init_refine;
+
+    // Loop until reaching the max refinements limit or satisfying the tolerance
+    for (; max_refine <= max_refinements_limit; max_refine += step) {
+        // Create a Tanh-Sinh integrator with the current max_refinements
+        boost::math::quadrature::tanh_sinh<double> integrator(max_refine);
+
+        // Perform the integration
+        result_current = integrator.integrate(func, a, b);
+
+        // If this is not the first iteration, compute the relative error
+        if (max_refine > init_refine) {
+            *estimated_error = fabs(result_current - result_prev) / fabs(result_current);
+
+            // Check if the relative error is within the target tolerance
+            if (*estimated_error < rel_tol) {
+                break;  // Stop refining if the result is within the tolerance
+            }
+        }
+
+        // Update the previous result for the next iteration
+        result_prev = result_current;
+    }
+
+    // If we exited the loop without achieving the desired tolerance, print a warning
+    if (*estimated_error > rel_tol) {
+        message(1, "Warning: Tanh-Sinh integration did not reach the desired tolerance of %g. Final relative error: %g\n", rel_tol, *estimated_error);
+    }
+
+    // Return the final result
+    return result_current;
+}
 
 /* Integrand for the drift table*/
 static double drift_integ(double a, void *param)
@@ -41,21 +83,26 @@ static double hydrokick_integ(double a, void *param)
   return 1 / (h * pow(a, 3 * GAMMA_MINUS1) * a);
 }
 
-/*Do the integral required to get a factor.*/
-static double get_exact_factor(Cosmology * CP, inttime_t t0, inttime_t t1, double (*factor) (double, void *))
+// Function to compute a factor using Tanh-Sinh adaptive integration
+static double get_exact_factor(Cosmology *CP, inttime_t t0, inttime_t t1, double (*factor)(double, void *))
 {
-    double result, abserr;
-    if(t0 == t1)
+    if (t0 == t1) {
         return 0;
+    }
+
+    // Calculate the scale factors
     double a0 = exp(loga_from_ti(t0));
     double a1 = exp(loga_from_ti(t1));
-    gsl_function F;
-    gsl_integration_workspace *workspace;
-    workspace = gsl_integration_workspace_alloc(WORKSIZE);
-    F.function = factor;
-    F.params = CP;
-    gsl_integration_qag(&F, a0, a1, 0, 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS61, workspace, &result, &abserr);
-    gsl_integration_workspace_free(workspace);
+    double abserr;
+
+    // Define the integrand as a lambda function, wrapping the existing factor function
+    auto integrand = [CP, factor](double a) {
+        return factor(a, (void*)CP);
+    };
+
+    // Call the adaptive Tanh-Sinh integrator
+    double result = tanh_sinh_integrate_adaptive(integrand, a0, a1, &abserr);
+
     return result;
 }
 
@@ -79,25 +126,27 @@ double get_exact_hydrokick_factor(Cosmology * CP, inttime_t ti0, inttime_t ti1)
 /* Integrand for comoving distance */
 static double comoving_distance_integ(double a, void *param)
 {
-    Cosmology *CP = (Cosmology *) param;
-    double h = hubble_function(CP, a);
-    return 1. / (h * a * a); 
+    // Cosmology *CP = (Cosmology *) param;
+    // double h = hubble_function(CP, a);
+    // return 1. / (h * a * a); 
+    return gravkick_integ(a, param);
 }
 
-/* Function to compute the comoving distance between two scale factors */
+/* Function to compute comoving distance using the adaptive integrator */
 double compute_comoving_distance(Cosmology *CP, double a0, double a1, const double UnitVelocity_in_cm_per_s)
-{
+{   
+    // relative error tolerance
+    // double epsrel = 1e-8;
     double result, abserr;
-    gsl_function F;
-    gsl_integration_workspace *workspace = gsl_integration_workspace_alloc(WORKSIZE);
-
-    F.function = comoving_distance_integ;
-    F.params = CP;
+    // Define the integrand as a lambda function, wrapping comoving_distance_integ
+    auto integrand = [CP](double a) {
+        return comoving_distance_integ(a, (void*)CP);
+    };
 
-    // Using GSL to perform the integration
-    gsl_integration_qag(&F, a0, a1, 0, 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS61, workspace, &result, &abserr);
-    gsl_integration_workspace_free(workspace);
+    // Call the generic adaptive integration function
+    // result = adaptive_integrate(integrand, a0, a1, &abserr);
+    result = tanh_sinh_integrate_adaptive(integrand, a0, a1, &abserr);
 
-    return (LIGHTCGS/UnitVelocity_in_cm_per_s) * result;
+    // Convert the result using the provided units
+    return (LIGHTCGS / UnitVelocity_in_cm_per_s) * result;
 }
-
diff --git a/libgadget/timefac.h b/libgadget/timefac.h
@@ -4,10 +4,22 @@
 #include "types.h"
 #include "cosmology.h"
 #include "timebinmgr.h"
+#include <functional>  // For std::function
 
 /* Get the exact drift and kick factors at given time by integrating. */
 double get_exact_drift_factor(Cosmology * CP, inttime_t ti0, inttime_t ti1);
 double get_exact_hydrokick_factor(Cosmology * CP, inttime_t ti0, inttime_t ti1);
 double get_exact_gravkick_factor(Cosmology * CP, inttime_t ti0, inttime_t ti1);
 double compute_comoving_distance(Cosmology *CP, double a0, double a1, const double UnitVelocity_in_cm_per_s);
+double tanh_sinh_integrate_adaptive(
+    std::function<double(double)> func, 
+    double a, 
+    double b, 
+    double* estimated_error, 
+    double rel_tol = 1e-8, 
+    int max_refinements_limit = 30, 
+    int init_refine = 5, 
+    int step = 5
+);
+
 #endif
diff --git a/libgadget/utils/spinlocks.c b/libgadget/utils/spinlocks.c
@@ -54,7 +54,7 @@ struct SpinLocks * init_spinlocks(int NumLock)
     spin.SpinLocks = (pthread_spinlock_t *) mymalloc("SpinLocks", NumLock * sizeof(pthread_spinlock_t));
     #pragma omp parallel for
 #else
-    spin.SpinLocks = mymalloc("SpinLocks", NumLock * sizeof(omp_lock_t));
+    spin.SpinLocks = (omp_lock_t*)mymalloc("SpinLocks", NumLock * sizeof(omp_lock_t));
 #endif
     for(i = 0; i < NumLock; i ++) {
 #ifndef NO_OPENMP_SPINLOCK

diff --git a/libgadget/uvbg.c b/libgadget/uvbg.c
@@ -131,7 +131,7 @@ void save_uvbg_grids(int SnapshotFileCount, char * OutputDir, PetaPM * pm)
     //TODO: think about the cartesian communicator in the PetaPM struct
     //and the mapping between ranks, indices and positions
 
-    size_t dims[2] = {grid_n, 1};
+    size_t dims[2] = {(size_t)grid_n, 1};
     //J21 block
     BigArray arr = {0};
     big_array_init(&arr, UVBGgrids.J21, "=f4", 2, dims, NULL);

diff --git a/libgenic/glass.c b/libgenic/glass.c
@@ -183,8 +183,8 @@ static void glass_force(PetaPM * pm, double t_f, struct ic_part_data * ICP, cons
     PetaPMParticleStruct pstruct = {
         ICP,
         sizeof(ICP[0]),
-        (char*) &ICP[0].Pos[0]  - (char*) ICP,
-        (char*) &ICP[0].Mass  - (char*) ICP,
+        (size_t)((char*) &ICP[0].Pos[0]  - (char*) ICP),
+        (size_t)((char*) &ICP[0].Mass  - (char*) ICP),
         NULL,
         NULL,
         NumPart,

diff --git a/libgenic/zeldovich.c b/libgenic/zeldovich.c
@@ -155,8 +155,9 @@ void displacement_fields(PetaPM * pm, enum TransferType Type, struct ic_part_dat
     PetaPMParticleStruct pstruct = {
         curICP,
         sizeof(curICP[0]),
-        ((char*) &curICP[0].Pos[0]) - (char*) curICP,
-        ((char*) &curICP[0].Mass) - (char*) curICP,
+        (size_t)(((char*) &curICP[0].Pos[0]) - (char*) curICP),
+        (size_t)(((char*) &curICP[0].Mass) - (char*) curICP),
+
         NULL,
         NULL,
         NumPart,