Skip to content

Commit

Permalink
Merge branch 'treewalk_cuda1' into treewalk_cuda
Browse files Browse the repository at this point in the history
  • Loading branch information
astro-YYH authored Oct 15, 2024
2 parents 7ad04f5 + bb26889 commit 4949b39
Show file tree
Hide file tree
Showing 12 changed files with 139 additions and 55 deletions.
14 changes: 13 additions & 1 deletion Makefile.rules
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,25 @@ ifneq ($(findstring -DUSE_CFITSIO, $(OPT)),)
FITSIO_LIBS ?= $(shell pkg-config --libs cfitsio)
endif

ifneq ($(findstring -DUSE_CUDA, $(OPT)),)
CUDA_INCL ?=
CUDA_LIBS ?= -lcudart
NVCC ?= nvcc
NVOPTIMIZE ?= -O3
endif

OPTIONS = $(OPTIMIZE) $(OPT)
GADGET_TESTDATA_ROOT = $(CURDIR)/../

CFLAGS = $(OPTIONS) $(GSL_INCL) $(FITSIO_INCL)
CFLAGS = $(OPTIONS) $(GSL_INCL) $(FITSIO_INCL) $(CUDA_INCL)
CFLAGS += -I../depends/include
CFLAGS += -I../
CFLAGS += "-DLOW_PRECISION=$(LOW_PRECISION)"
#For tests
TCFLAGS = $(CFLAGS) -DGADGET_TESTDATA_ROOT=\"$(GADGET_TESTDATA_ROOT)\"

BUNDLEDLIBS = -lbigfile-mpi -lbigfile -lpfft_omp -lfftw3_mpi -lfftw3_omp -lfftw3

LIBS = -lm $(CUDA_LIBS) $(GSL_LIBS) $(FITSIO_LIBS)
LIBS += -L../depends/lib $(BUNDLEDLIBS)
V ?= 0
Expand All @@ -41,3 +49,7 @@ V ?= 0
if test "x$(V)" = "x1" ; then echo $$cmd; fi; \
mkdir -p `dirname $@`; \
echo Compiling $<; $$cmd

# Rule to compile .cu files (using nvcc)
.objs/%.o: %.cu
$(NVCC) $(NVOPTIMIZE) -c $< -o $@
9 changes: 7 additions & 2 deletions Options.mk.example
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#These variables are set to useful defaults, but may be overriden if needed
#MPICC=mpicc
#MPICC=mpic++
#MPICCDEP=mpicc

#NVCC=nvcc
#NVOPTIMIZE = -O3 -arch=sm_61 # specify architecture according to you GPU model, sm_90 shall be used for Vista's H100

#GSL_LIBS=
#GSL_INCL=
#This is a good optimized build default for gcc
Expand All @@ -12,7 +17,7 @@ OPTIMIZE = -fopenmp -O3 -g -Wall -ffast-math
#OPT += -DDEBUG # print a lot of debugging messages
#Disable openmp locking. This means no threading.
#OPT += -DNO_OPENMP_SPINLOCK

#OPT += -DUSE_CUDA #Enable GPU-specific CUDA code
#-----------
#OPT += -DEXCUR_REION # reionization with excursion set

Expand Down
7 changes: 4 additions & 3 deletions depends/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ include $(CONFIG)

.PHONY: depends
.INTERMEDIATE: pfft
MPICC ?= mpicc
# MPICC ?= mpicc
MPICCDEP ?= mpicc
OPTIMIZE ?= -O2 -g -fopenmp -Wall
LIBRARIES=lib/libbigfile-mpi.a
FFTLIBRARIES=lib/libpfft_omp.a lib/libfftw3_mpi.a lib/libfftw3_omp.a
Expand All @@ -14,13 +15,13 @@ lib/libbigfile-mpi.a: bigfile/src/bigfile-mpi.c
mkdir -p lib; \
mkdir -p include; \
cd bigfile/src; \
make install PREFIX=$(PWD) CC="$(MPICC)" MPICC="$(MPICC)" CFLAGS="$(OPTIMIZE)" AR="$(AR)"
make install PREFIX=$(PWD) CC="$(MPICCDEP)" MPICC="$(MPICCDEP)" CFLAGS="$(OPTIMIZE)" AR="$(AR)"

pfft: install_pfft.sh
mkdir -p lib; \
mkdir -p include; \
#Using -ipo causes icc to crash.
MPICC="$(MPICC)" CC="$(MPICC)" CFLAGS="$(filter-out -ipo,$(OPTIMIZE)) -I $(PWD)/include -L$(PWD)/lib" AR="$(AR)" RANLIB=$(RANLIB) \
MPICC="$(MPICCDEP)" CC="$(MPICCDEP)" CFLAGS="$(filter-out -ipo,$(OPTIMIZE)) -I $(PWD)/include -L$(PWD)/lib" AR="$(AR)" RANLIB=$(RANLIB) \
sh $(PWD)/install_pfft.sh $(PWD)/

clean: clean-fast clean-fft
Expand Down
27 changes: 15 additions & 12 deletions libgadget/cosmology.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#include <math.h>
#include <gsl/gsl_integration.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_odeiv2.h>

#include "cosmology.h"
#include "physconst.h"
#include "utils.h"
#include "timefac.h"

/*Stefan-Boltzmann constant in cgs units*/
#define STEFAN_BOLTZMANN 5.670373e-5
Expand Down Expand Up @@ -236,19 +236,22 @@ double function_of_k_eval(FunctionOfK * fk, double k)
}
}

double function_of_k_tophat_sigma(FunctionOfK * fk, double R)
// Adapted function to use Tanh-Sinh adaptive integration
double function_of_k_tophat_sigma(FunctionOfK *fk, double R)
{
gsl_integration_workspace * w = gsl_integration_workspace_alloc (1000);
// Create the parameter structure
struct sigma2_params params = {fk, R};
double result,abserr;
gsl_function F;
F.function = &sigma2_int;
F.params = &params;

/* note: 500/R is here chosen as integration boundary (infinity) */
gsl_integration_qags (&F, 0, 500. / R, 0, 1e-4,1000,w,&result, &abserr);
// printf("gsl_integration_qng in TopHatSigma2. Result %g, error: %g, intervals: %lu\n",result, abserr,w->size);
gsl_integration_workspace_free (w);
double abserr; // To hold the estimated error

// Define the integrand as a lambda function wrapping the original `sigma2_int`
auto integrand = [&params](double k) -> double {
return sigma2_int(k, (void*)&params);
};

// Perform the Tanh-Sinh adaptive integration
double result = tanh_sinh_integrate_adaptive(integrand, 0, 500.0 / R, &abserr, 1e-4);

// Return the square root of the result
return sqrt(result);
}

Expand Down
4 changes: 2 additions & 2 deletions libgadget/gravpm.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ gravpm_force(PetaPM * pm, DomainDecomp * ddecomp, Cosmology * CP, double Time, d
PetaPMParticleStruct pstruct = {
P,
sizeof(P[0]),
(char*) &P[0].Pos[0] - (char*) P,
(char*) &P[0].Mass - (char*) P,
static_cast<size_t>((char*) &P[0].Pos[0] - (char*) P),
static_cast<size_t>((char*) &P[0].Mass - (char*) P),
/* Regions allocated inside _prepare*/
NULL,
/* By default all particles are active. For hybrid neutrinos set below.*/
Expand Down
3 changes: 2 additions & 1 deletion libgadget/lenstools.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ void savePotentialPlane(double *data, int rows, int cols, const char * const fil
double Lbox_Mpc = Lbox * UnitLength_in_cm / CM_PER_MPC; // Box size in Mpc/h
double comoving_distance_Mpc = comoving_distance * UnitLength_in_cm / CM_PER_MPC;
double Ode0 = CP->OmegaLambda > 0 ? CP->OmegaLambda : CP->Omega_fld;
char unit[] = "rad2 "; // Mutable string for the UNIT keyword
// Insert a blank line as a separator
fits_write_record(fptr, " ", &status);
// Add headers to the FITS file
Expand All @@ -313,7 +314,7 @@ void savePotentialPlane(double *data, int rows, int cols, const char * const fil
fits_update_key(fptr, TDOUBLE, "CHI", (&comoving_distance_Mpc), "Comoving distance in Mpc/h", &status);
fits_update_key(fptr, TDOUBLE, "SIDE", &(Lbox_Mpc), "Side length in Mpc/h", &status);
fits_update_key(fptr, TLONGLONG, "NPART", &num_particles, "Number of particles on the plane", &status);
fits_update_key(fptr, TSTRING, "UNIT", "rad2 ", "Pixel value unit", &status);
fits_update_key(fptr, TSTRING, "UNIT", unit, "Pixel value unit", &status);

// Write the 2D array of doubles to the image
long fpixel[2] = {1, 1}; // first pixel to write (1-based indexing)
Expand Down
105 changes: 77 additions & 28 deletions libgadget/timefac.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,56 @@
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <gsl/gsl_integration.h>

#include "physconst.h"
#include "timefac.h"
#include "timebinmgr.h"
#include "utils.h"

#define WORKSIZE 10000
#include <stdio.h>
#include <math.h>
#include <boost/math/quadrature/gauss.hpp>
#include <boost/math/special_functions/fpclassify.hpp> // For isnan and isinf
#include <functional>
#include <boost/math/quadrature/tanh_sinh.hpp>

// Function to perform tanh-sinh integration with adaptive max_refinements
double tanh_sinh_integrate_adaptive(std::function<double(double)> func, double a, double b, double* estimated_error, double rel_tol, int max_refinements_limit, int init_refine, int step) {
double result_prev = 0.0;
double result_current = 0.0;
*estimated_error = 1.0; // Start with a large relative error
int max_refine = init_refine;

// Loop until reaching the max refinements limit or satisfying the tolerance
for (; max_refine <= max_refinements_limit; max_refine += step) {
// Create a Tanh-Sinh integrator with the current max_refinements
boost::math::quadrature::tanh_sinh<double> integrator(max_refine);

// Perform the integration
result_current = integrator.integrate(func, a, b);

// If this is not the first iteration, compute the relative error
if (max_refine > init_refine) {
*estimated_error = fabs(result_current - result_prev) / fabs(result_current);

// Check if the relative error is within the target tolerance
if (*estimated_error < rel_tol) {
break; // Stop refining if the result is within the tolerance
}
}

// Update the previous result for the next iteration
result_prev = result_current;
}

// If we exited the loop without achieving the desired tolerance, print a warning
if (*estimated_error > rel_tol) {
message(1, "Warning: Tanh-Sinh integration did not reach the desired tolerance of %g. Final relative error: %g\n", rel_tol, *estimated_error);
}

// Return the final result
return result_current;
}

/* Integrand for the drift table*/
static double drift_integ(double a, void *param)
Expand Down Expand Up @@ -41,21 +83,26 @@ static double hydrokick_integ(double a, void *param)
return 1 / (h * pow(a, 3 * GAMMA_MINUS1) * a);
}

/*Do the integral required to get a factor.*/
static double get_exact_factor(Cosmology * CP, inttime_t t0, inttime_t t1, double (*factor) (double, void *))
// Function to compute a factor using Tanh-Sinh adaptive integration
static double get_exact_factor(Cosmology *CP, inttime_t t0, inttime_t t1, double (*factor)(double, void *))
{
double result, abserr;
if(t0 == t1)
if (t0 == t1) {
return 0;
}

// Calculate the scale factors
double a0 = exp(loga_from_ti(t0));
double a1 = exp(loga_from_ti(t1));
gsl_function F;
gsl_integration_workspace *workspace;
workspace = gsl_integration_workspace_alloc(WORKSIZE);
F.function = factor;
F.params = CP;
gsl_integration_qag(&F, a0, a1, 0, 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS61, workspace, &result, &abserr);
gsl_integration_workspace_free(workspace);
double abserr;

// Define the integrand as a lambda function, wrapping the existing factor function
auto integrand = [CP, factor](double a) {
return factor(a, (void*)CP);
};

// Call the adaptive Tanh-Sinh integrator
double result = tanh_sinh_integrate_adaptive(integrand, a0, a1, &abserr);

return result;
}

Expand All @@ -79,25 +126,27 @@ double get_exact_hydrokick_factor(Cosmology * CP, inttime_t ti0, inttime_t ti1)
/* Integrand for comoving distance */
static double comoving_distance_integ(double a, void *param)
{
Cosmology *CP = (Cosmology *) param;
double h = hubble_function(CP, a);
return 1. / (h * a * a);
// Cosmology *CP = (Cosmology *) param;
// double h = hubble_function(CP, a);
// return 1. / (h * a * a);
return gravkick_integ(a, param);
}

/* Function to compute the comoving distance between two scale factors */
/* Function to compute comoving distance using the adaptive integrator */
double compute_comoving_distance(Cosmology *CP, double a0, double a1, const double UnitVelocity_in_cm_per_s)
{
{
// relative error tolerance
// double epsrel = 1e-8;
double result, abserr;
gsl_function F;
gsl_integration_workspace *workspace = gsl_integration_workspace_alloc(WORKSIZE);

F.function = comoving_distance_integ;
F.params = CP;
// Define the integrand as a lambda function, wrapping comoving_distance_integ
auto integrand = [CP](double a) {
return comoving_distance_integ(a, (void*)CP);
};

// Using GSL to perform the integration
gsl_integration_qag(&F, a0, a1, 0, 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS61, workspace, &result, &abserr);
gsl_integration_workspace_free(workspace);
// Call the generic adaptive integration function
// result = adaptive_integrate(integrand, a0, a1, &abserr);
result = tanh_sinh_integrate_adaptive(integrand, a0, a1, &abserr);

return (LIGHTCGS/UnitVelocity_in_cm_per_s) * result;
// Convert the result using the provided units
return (LIGHTCGS / UnitVelocity_in_cm_per_s) * result;
}

12 changes: 12 additions & 0 deletions libgadget/timefac.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,22 @@
#include "types.h"
#include "cosmology.h"
#include "timebinmgr.h"
#include <functional> // For std::function

/* Get the exact drift and kick factors at given time by integrating. */
double get_exact_drift_factor(Cosmology * CP, inttime_t ti0, inttime_t ti1);
double get_exact_hydrokick_factor(Cosmology * CP, inttime_t ti0, inttime_t ti1);
double get_exact_gravkick_factor(Cosmology * CP, inttime_t ti0, inttime_t ti1);
double compute_comoving_distance(Cosmology *CP, double a0, double a1, const double UnitVelocity_in_cm_per_s);
double tanh_sinh_integrate_adaptive(
std::function<double(double)> func,
double a,
double b,
double* estimated_error,
double rel_tol = 1e-8,
int max_refinements_limit = 30,
int init_refine = 5,
int step = 5
);

#endif
2 changes: 1 addition & 1 deletion libgadget/utils/spinlocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ struct SpinLocks * init_spinlocks(int NumLock)
spin.SpinLocks = (pthread_spinlock_t *) mymalloc("SpinLocks", NumLock * sizeof(pthread_spinlock_t));
#pragma omp parallel for
#else
spin.SpinLocks = mymalloc("SpinLocks", NumLock * sizeof(omp_lock_t));
spin.SpinLocks = (omp_lock_t*)mymalloc("SpinLocks", NumLock * sizeof(omp_lock_t));
#endif
for(i = 0; i < NumLock; i ++) {
#ifndef NO_OPENMP_SPINLOCK
Expand Down
2 changes: 1 addition & 1 deletion libgadget/uvbg.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ void save_uvbg_grids(int SnapshotFileCount, char * OutputDir, PetaPM * pm)
//TODO: think about the cartesian communicator in the PetaPM struct
//and the mapping between ranks, indices and positions

size_t dims[2] = {grid_n, 1};
size_t dims[2] = {(size_t)grid_n, 1};
//J21 block
BigArray arr = {0};
big_array_init(&arr, UVBGgrids.J21, "=f4", 2, dims, NULL);
Expand Down
4 changes: 2 additions & 2 deletions libgenic/glass.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ static void glass_force(PetaPM * pm, double t_f, struct ic_part_data * ICP, cons
PetaPMParticleStruct pstruct = {
ICP,
sizeof(ICP[0]),
(char*) &ICP[0].Pos[0] - (char*) ICP,
(char*) &ICP[0].Mass - (char*) ICP,
(size_t)((char*) &ICP[0].Pos[0] - (char*) ICP),
(size_t)((char*) &ICP[0].Mass - (char*) ICP),
NULL,
NULL,
NumPart,
Expand Down
5 changes: 3 additions & 2 deletions libgenic/zeldovich.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,9 @@ void displacement_fields(PetaPM * pm, enum TransferType Type, struct ic_part_dat
PetaPMParticleStruct pstruct = {
curICP,
sizeof(curICP[0]),
((char*) &curICP[0].Pos[0]) - (char*) curICP,
((char*) &curICP[0].Mass) - (char*) curICP,
(size_t)(((char*) &curICP[0].Pos[0]) - (char*) curICP),
(size_t)(((char*) &curICP[0].Mass) - (char*) curICP),

NULL,
NULL,
NumPart,
Expand Down

0 comments on commit 4949b39

Please sign in to comment.