Skip to content

Commit

Permalink
PR: Speed and performance improvement and Last commit fix
Browse files Browse the repository at this point in the history
1. Speed improvement: optimization of Ewald Fourier wave vector initialization
* Example speed improvement: (Performed on RTX 4090)
   * CO2-MFI: 9.7 seconds to 9.0 seconds
   * NU-2000: 17.3 seconds to 11.6 seconds
2. Last commit changed function names, but pushed the wrong files
* fixed
  • Loading branch information
Zhaoli2042 authored Oct 12, 2024
2 parents 7e33578 + 670b446 commit 2d9eb66
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 78 deletions.
245 changes: 189 additions & 56 deletions src_clean/Ewald_Energy_Functions.h

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions src_clean/VDW_Coulomb.cu
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ double CPU_EwaldDifference(Boxsize& Box, Atoms& New, Atoms& Old, ForceField& FF,
return ewaldE;
}

__global__ void Calculate_Single_Body_Energy_SEPARATE_HostGuest_VDWReal(Boxsize Box, Atoms* System, Atoms Old, Atoms New, ForceField FF, double* BlockEnergy, size_t ComponentID, size_t totalAtoms, size_t chainsize, bool* flag, int3 Nblocks, bool Do_New, bool Do_Old, int3 NComps)
__global__ void Calculate_Single_Body_Energy_VDWReal(Boxsize Box, Atoms* System, Atoms Old, Atoms New, ForceField FF, double* BlockEnergy, size_t ComponentID, size_t totalAtoms, size_t chainsize, bool* flag, int3 Nblocks, bool Do_New, bool Do_Old, int3 NComps)
{
//divide species into Host-Host, Host-Guest, and Guest-Guest//
//However, Host-Host and Guest-Guest are mutually exclusive//
Expand Down Expand Up @@ -844,7 +844,7 @@ __global__ void Calculate_Single_Body_Energy_SEPARATE_HostGuest_VDWReal(Boxsize
//}
}

__global__ void Calculate_Single_Body_Energy_SEPARATE_HostGuest_VDWReal_LambdaChange(Boxsize Box, Atoms* System, Atoms Old, Atoms New, ForceField FF, double* BlockEnergy, size_t ComponentID, size_t totalAtoms, size_t chainsize, bool* flag, int3 Nblocks, bool Do_New, bool Do_Old, int3 NComps, double2 newScale)
__global__ void Calculate_Single_Body_Energy_VDWReal_LambdaChange(Boxsize Box, Atoms* System, Atoms Old, Atoms New, ForceField FF, double* BlockEnergy, size_t ComponentID, size_t totalAtoms, size_t chainsize, bool* flag, int3 Nblocks, bool Do_New, bool Do_Old, int3 NComps, double2 newScale)
{
//divide species into Host-Host, Host-Guest, and Guest-Guest//
//However, Host-Host and Guest-Guest are mutually exclusive//
Expand Down Expand Up @@ -1182,7 +1182,7 @@ __global__ void Energy_difference_LambdaChange(Boxsize Box, Atoms* System, Atoms
}
}

__global__ void Calculate_Multiple_Trial_Energy_SEPARATE_HostGuest_VDWReal(Boxsize Box, Atoms* System, Atoms NewMol, ForceField FF, double* Blocksum, size_t ComponentID, size_t totalAtoms, bool* flag, size_t totalthreads, size_t chainsize, size_t NblockForTrial, size_t HG_Nblock, int3 NComps, int2* ExcludeList)
__global__ void Calculate_Multiple_Trial_Energy_VDWReal(Boxsize Box, Atoms* System, Atoms NewMol, ForceField FF, double* Blocksum, size_t ComponentID, size_t totalAtoms, bool* flag, size_t totalthreads, size_t chainsize, size_t NblockForTrial, size_t HG_Nblock, int3 NComps, int2* ExcludeList)
{
//Dividing Nblocks into Nblocks for host-guest and for guest-guest//
//NblockForTrial = HG_Nblock + GG_Nblock;
Expand Down Expand Up @@ -1460,7 +1460,7 @@ __device__ void determine_comp_and_Molindex_from_thread(Atoms* System, size_t& M
}
}

__global__ void TotalVDWCoul(Boxsize Box, Atoms* System, ForceField FF, double* Blocksum, bool* flag, size_t InteractionPerThread, bool UseOffset, int3 BLOCK, int3 NComponents, size_t NFrameworkAtoms, size_t NAdsorbateAtoms, size_t NFrameworkZero_ExtraFramework, bool ConsiderIntra)
__global__ void TotalVDWRealCoulomb(Boxsize Box, Atoms* System, ForceField FF, double* Blocksum, bool* flag, size_t InteractionPerThread, bool UseOffset, int3 BLOCK, int3 NComponents, size_t NFrameworkAtoms, size_t NAdsorbateAtoms, size_t NFrameworkZero_ExtraFramework, bool ConsiderIntra)
{
extern __shared__ double sdata[]; //shared memory for partial sum//

Expand Down Expand Up @@ -1644,7 +1644,7 @@ MoveEnergy Total_VDW_Coulomb_Energy(Simulations& Sim, Components& SystemComponen
//Set Overlap Flag//
cudaMemset(Sim.device_flag, false, sizeof(bool));

TotalVDWCoul<<<Nblock, Nthread, 2 * Nthread * sizeof(double)>>>(Sim.Box, Sim.d_a, FF, Sim.Blocksum, Sim.device_flag, InteractionPerThread, UseOffset, BLOCKS, SystemComponents.NComponents, NHostAtom, NGuestAtom, NFrameworkZero_ExtraFramework, ConsiderIntra);
TotalVDWRealCoulomb<<<Nblock, Nthread, 2 * Nthread * sizeof(double)>>>(Sim.Box, Sim.d_a, FF, Sim.Blocksum, Sim.device_flag, InteractionPerThread, UseOffset, BLOCKS, SystemComponents.NComponents, NHostAtom, NGuestAtom, NFrameworkZero_ExtraFramework, ConsiderIntra);
checkCUDAErrorEwald("WRONG TOTAL VDW+REAL ENERGY\n");

cudaDeviceSynchronize();
Expand Down
8 changes: 4 additions & 4 deletions src_clean/fxn_main.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ inline void Prepare_Widom(WidomStruct& Widom, Boxsize Box, Simulations& Sims, Co

inline void Allocate_Copy_Ewald_Vector(Boxsize& device_Box, Components& SystemComponents)
{
fprintf(SystemComponents.OUTPUT, "****** Allocating Ewald WaveVectors (INITIAL STAGE ONLY) ******\n");
fprintf(SystemComponents.OUTPUT, "****** Allocating Ewald WaveVectors + StructureFactors (INITIAL STAGE ONLY) ******\n");
//Zhao's note: This only works if the box size is not changed, eik_xy might not be useful if box size is not changed//
size_t eikx_size = SystemComponents.eik_x.size() * 2;
size_t eiky_size = SystemComponents.eik_y.size() * 2; //added times 2 for box volume move//
Expand Down Expand Up @@ -243,11 +243,11 @@ inline void Allocate_Copy_Ewald_Vector(Boxsize& device_Box, Components& SystemCo
AdsorbateEik[i].real = 0.0; AdsorbateEik[i].imag = 0.0;
FrameworkEik[i].real = 0.0; FrameworkEik[i].imag = 0.0;
}
if(i < 10) fprintf(SystemComponents.OUTPUT, "Wave Vector %zu is %.5f %.5f\n", i, AdsorbateEik[i].real, AdsorbateEik[i].imag);
if(i < 10) fprintf(SystemComponents.OUTPUT, "Structure Factor %zu is %.5f %.5f\n", i, AdsorbateEik[i].real, AdsorbateEik[i].imag);
}
cudaMemcpy(device_Box.AdsorbateEik, AdsorbateEik, AdsorbateEiksize * sizeof(Complex), cudaMemcpyHostToDevice); checkCUDAError("error copying Complex");
cudaMemcpy(device_Box.FrameworkEik, FrameworkEik, AdsorbateEiksize * sizeof(Complex), cudaMemcpyHostToDevice); checkCUDAError("error copying Complex");
fprintf(SystemComponents.OUTPUT, "****** DONE Allocating Ewald WaveVectors (INITIAL STAGE ONLY) ******\n");
fprintf(SystemComponents.OUTPUT, "****** DONE Allocating Ewald WaveVectors + StructureFactors(INITIAL STAGE ONLY) ******\n");
}

inline void Check_Simulation_Energy(Boxsize& Box, Atoms* System, ForceField FF, ForceField device_FF, Components& SystemComponents, int SIMULATIONSTAGE, size_t Numsim, Simulations& Sim, bool UseGPU)
Expand Down Expand Up @@ -303,7 +303,7 @@ inline void Check_Simulation_Energy(Boxsize& Box, Atoms* System, ForceField FF,
cudaDeviceSynchronize();
//Zhao's note: if doing initial energy, initialize and copy host Ewald to device//
if(SIMULATIONSTAGE == INITIAL) Allocate_Copy_Ewald_Vector(Sim.Box, SystemComponents);
Check_WaveVector_CPUGPU(Sim.Box, SystemComponents); //Check WaveVector on the CPU and GPU//
Check_StructureFactor_CPUGPU(Sim.Box, SystemComponents); //Check StructureFactor on the CPU and GPU//
cudaDeviceSynchronize();
}
//Calculate Tail Correction Energy//
Expand Down
8 changes: 4 additions & 4 deletions src_clean/mc_cbcfc.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ static inline MoveEnergy CBCF_LambdaChange(Components& SystemComponents, Simulat
int3 NBlocks = {(int) HH_Nblock, (int) HG_Nblock, (int) GG_Nblock}; //x: HH_Nblock, y: HG_Nblock, z: GG_Nblock;
bool Do_New = true; bool Do_Old = true;

Calculate_Single_Body_Energy_SEPARATE_HostGuest_VDWReal_LambdaChange<<<Total_Nblock, Nthread, Nthread * 2 * sizeof(double)>>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Molsize, Sims.device_flag, NBlocks, Do_New, Do_Old, SystemComponents.NComponents, newScale);
Calculate_Single_Body_Energy_VDWReal_LambdaChange<<<Total_Nblock, Nthread, Nthread * 2 * sizeof(double)>>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Molsize, Sims.device_flag, NBlocks, Do_New, Do_Old, SystemComponents.NComponents, newScale);

cudaMemcpy(SystemComponents.flag, Sims.device_flag, sizeof(bool), cudaMemcpyDeviceToHost);

Expand Down Expand Up @@ -330,7 +330,7 @@ static inline MoveEnergy CBCFMove(Components& SystemComponents, Simulations& Sim
SystemComponents.Tmmc[SelectedComponent].currentBin = newBin;
if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent])
{
Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent);
Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent);
}
final_energy = energy;
}
Expand Down Expand Up @@ -406,7 +406,7 @@ static inline MoveEnergy CBCFMove(Components& SystemComponents, Simulations& Sim
SystemComponents.Tmmc[SelectedComponent].currentBin = newBin;
if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent])
{
Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent);
Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent);
}
energy.take_negative();
energy += second_step_energy;
Expand Down Expand Up @@ -463,7 +463,7 @@ static inline MoveEnergy CBCFMove(Components& SystemComponents, Simulations& Sim
SystemComponents.Tmmc[SelectedComponent].currentBin = newBin;
if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent])
{
Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent);
Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent);
}
final_energy = energy;
}
Expand Down
4 changes: 2 additions & 2 deletions src_clean/mc_single_particle.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ static inline MoveEnergy SingleBodyMove(Components& SystemComponents, Simulation
//printf("NHostAtom: %zu, HH_Nblock: %zu, HG_Nblock: %zu, NGuestAtom: %zu, GG_Nblock: %zu\n", NHostAtom, HH_Nblock, HG_Nblock, NGuestAtom, GG_Nblock);
if(Atomsize != 0)
{
Calculate_Single_Body_Energy_SEPARATE_HostGuest_VDWReal<<<Total_Nblock, Nthread, Nthread * 2 * sizeof(double)>>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Molsize, Sims.device_flag, NBlocks, Do_New, Do_Old, SystemComponents.NComponents);
Calculate_Single_Body_Energy_VDWReal<<<Total_Nblock, Nthread, Nthread * 2 * sizeof(double)>>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Molsize, Sims.device_flag, NBlocks, Do_New, Do_Old, SystemComponents.NComponents);

cudaMemcpy(SystemComponents.flag, Sims.device_flag, sizeof(bool), cudaMemcpyDeviceToHost);
}
Expand Down Expand Up @@ -212,7 +212,7 @@ static inline MoveEnergy SingleBodyMove(Components& SystemComponents, Simulation
SystemComponents.Moves[SelectedComponent].Record_Move_Accept(MoveType);
if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent])
{
Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent);
Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent);
}
}
else {tot.zero(); };
Expand Down
6 changes: 3 additions & 3 deletions src_clean/mc_swap_moves.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations&
Update_Reinsertion_data<<<1,SystemComponents.Moleculesize[SelectedComponent]>>>(Sims.d_a, temp, SelectedComponent, UpdateLocation); checkCUDAError("error Updating Reinsertion data");
cudaFree(temp);
if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent])
Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent);
Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent);
SystemComponents.Tmmc[SelectedComponent].Update(1.0, NMol, REINSERTION); //Update for TMMC, since Macrostate not changed, just add 1.//
//energy.print();
return energy;
Expand Down Expand Up @@ -186,7 +186,7 @@ static inline MoveEnergy CreateMolecule(Components& SystemComponents, Simulation
Update_insertion_data<<<1,1>>>(Sims.d_a, Sims.Old, Sims.New, SelectedTrial, SelectedComponent, UpdateLocation, (int) SystemComponents.Moleculesize[SelectedComponent]);
if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent])
{
Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent);
Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent);
}
Update_NumberOfMolecules(SystemComponents, Sims.d_a, SelectedComponent, INSERTION);
return energy;
Expand Down Expand Up @@ -612,7 +612,7 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati
cudaFree(temp);
//Zhao's note: BUG!!!!, Think about if OLD/NEW Component belong to different type (framework/adsorbate)//
if(!FF.noCharges && ((SystemComponents.hasPartialCharge[NEWComponent]) ||(SystemComponents.hasPartialCharge[OLDComponent])))
Update_Ewald_Vector(Sims.Box, false, SystemComponents, NEWComponent);
Update_Vector_Ewald(Sims.Box, false, SystemComponents, NEWComponent);
//energy.print();
return energy;
}
Expand Down
4 changes: 2 additions & 2 deletions src_clean/mc_utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ static inline void AcceptInsertion(Components& SystemComponents, Simulations& Si
Update_NumberOfMolecules(SystemComponents, Sims.d_a, SelectedComponent, INSERTION); //true = Insertion//
if(!noCharges && SystemComponents.hasPartialCharge[SelectedComponent])
{
Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent);
Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent);
}
}

Expand All @@ -288,7 +288,7 @@ static inline void AcceptDeletion(Components& SystemComponents, Simulations& Sim
Update_NumberOfMolecules(SystemComponents, Sims.d_a, SelectedComponent, DELETION); //false = Deletion//
if(!noCharges && SystemComponents.hasPartialCharge[SelectedComponent])
{
Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent);
Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent);
}
//Zhao's note: the last molecule can be the fractional molecule, (fractional molecule ID is stored on the host), we need to update it as well (at least check it)//
//The function below will only be processed if the system has a fractional molecule and the transfered molecule is NOT the fractional one //
Expand Down
4 changes: 2 additions & 2 deletions src_clean/mc_widom.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ static inline double Widom_Move_FirstBead_PARTIAL(Components& SystemComponents,
size_t HGGG_Nblock = HG_Nblock + GG_Nblock;
if(Atomsize != 0)
{
Calculate_Multiple_Trial_Energy_SEPARATE_HostGuest_VDWReal<<<HGGG_Nblock * NumberOfTrials, HGGG_Nthread, 2 * HGGG_Nthread * sizeof(double)>>>(Sims.Box, Sims.d_a, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Sims.device_flag, threadsNeeded,1, HGGG_Nblock, HG_Nblock, SystemComponents.NComponents, Sims.ExcludeList); checkCUDAError("Error calculating energies (PARTIAL SUM HGGG)");
Calculate_Multiple_Trial_Energy_VDWReal<<<HGGG_Nblock * NumberOfTrials, HGGG_Nthread, 2 * HGGG_Nthread * sizeof(double)>>>(Sims.Box, Sims.d_a, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Sims.device_flag, threadsNeeded,1, HGGG_Nblock, HG_Nblock, SystemComponents.NComponents, Sims.ExcludeList); checkCUDAError("Error calculating energies (PARTIAL SUM HGGG)");
cudaMemcpy(SystemComponents.flag, Sims.device_flag, NumberOfTrials*sizeof(bool), cudaMemcpyDeviceToHost);
}
//printf("OldNBlock: %zu, HG_Nblock: %zu, GG_Nblock: %zu, HGGG_Nblock: %zu\n", Nblock, HG_Nblock, GG_Nblock, HGGG_Nblock);
Expand Down Expand Up @@ -458,7 +458,7 @@ static inline double Widom_Move_Chain_PARTIAL(Components& SystemComponents, Simu
//Setup calculation for separated HG + GG interactions//
if(Atomsize != 0)
{
Calculate_Multiple_Trial_Energy_SEPARATE_HostGuest_VDWReal<<<HGGG_Nblock * Widom.NumberWidomTrialsOrientations, HGGG_Nthread, 2 * HGGG_Nthread * sizeof(double)>>>(Sims.Box, Sims.d_a, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Sims.device_flag, threadsNeeded, chainsize, HGGG_Nblock, HG_Nblock, SystemComponents.NComponents, Sims.ExcludeList); checkCUDAError("Error calculating energies (PARTIAL SUM HGGG Orientation)");
Calculate_Multiple_Trial_Energy_VDWReal<<<HGGG_Nblock * Widom.NumberWidomTrialsOrientations, HGGG_Nthread, 2 * HGGG_Nthread * sizeof(double)>>>(Sims.Box, Sims.d_a, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Sims.device_flag, threadsNeeded, chainsize, HGGG_Nblock, HG_Nblock, SystemComponents.NComponents, Sims.ExcludeList); checkCUDAError("Error calculating energies (PARTIAL SUM HGGG Orientation)");

cudaMemcpy(SystemComponents.flag, Sims.device_flag, Widom.NumberWidomTrialsOrientations*sizeof(bool), cudaMemcpyDeviceToHost);
}
Expand Down

0 comments on commit 2d9eb66

Please sign in to comment.