Skip to content

Commit

Permalink
PR: Small optimization of code
Browse files Browse the repository at this point in the history
* Examples:
   * CO2-MFI: 9.7 → 8.15 seconds
   * Bae-Mixture: 108.8 → 94.8 seconds
   * NU-2000: 17.20 → 7.5 seconds
   * NPTMC: 223.73 → 175.2 seconds
   * NVT-Gibbs (100 + 100 cycles): 35.3 → 31.45 seconds
  • Loading branch information
Zhaoli2042 authored Oct 15, 2024
2 parents d115f5c + d31b7b6 commit 45d5c9d
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 70 deletions.
21 changes: 7 additions & 14 deletions src_clean/Ewald_Energy_Functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,12 +299,12 @@ __global__ void Fourier_Ewald_Diff(Boxsize Box, Complex* SameTypeEik, Complex* C
}
if((ksqr > 1e-10) && (ksqr < Box.ReciprocalCutOff))
{
double3 ax; ax.x = Box.InverseCell[0]; ax.y = Box.InverseCell[3]; ax.z = Box.InverseCell[6];
double3 ay; ay.x = Box.InverseCell[1]; ay.y = Box.InverseCell[4]; ay.z = Box.InverseCell[7];
double3 az; az.x = Box.InverseCell[2]; az.y = Box.InverseCell[5]; az.z = Box.InverseCell[8];
double3 ax = {Box.InverseCell[0], Box.InverseCell[3], Box.InverseCell[6]};
double3 ay = {Box.InverseCell[1], Box.InverseCell[4], Box.InverseCell[7]};
double3 az = {Box.InverseCell[2], Box.InverseCell[5], Box.InverseCell[8]};
size_t numberOfAtoms = Oldsize + Newsize;
Complex cksum_old; cksum_old.real = 0.0; cksum_old.imag = 0.0;
Complex cksum_new; cksum_new.real = 0.0; cksum_new.imag = 0.0;
Complex cksum_old = {0.0, 0.0};
Complex cksum_new = {0.0, 0.0};
double3 kvec_x = ax * 2.0 * M_PI * (double) kx;
double3 kvec_y = ay * 2.0 * M_PI * (double) ky;
double3 kvec_z = az * 2.0 * M_PI * (double) kz;
Expand Down Expand Up @@ -399,21 +399,14 @@ __global__ void Update_StructureFactor_Stored(Complex* Eik, Complex* Temp_Eik, s
}
void Update_Vector_Ewald(Boxsize& Box, bool CPU, Components& SystemComponents, size_t SelectedComponent)
{
//else //Update on the GPU//
//{
size_t numberOfStructureFactors = (Box.kmax.x + 1) * (2 * Box.kmax.y + 1) * (2 * Box.kmax.z + 1);
size_t Nblock = 0; size_t Nthread = 0; Setup_threadblock(numberOfStructureFactors, &Nblock, &Nthread);
Complex* Stored_Eik;
if(SelectedComponent < SystemComponents.NComponents.y)
{
Stored_Eik = Box.FrameworkEik;
std::swap(Box.FrameworkEik, Box.tempEik);
}
else
{
Stored_Eik = Box.AdsorbateEik;
std::swap(Box.AdsorbateEik, Box.tempEik);
}
Update_StructureFactor_Stored<<<Nblock, Nthread>>>(Stored_Eik, Box.tempEik, numberOfStructureFactors);
//}
}

////////////////////////////////////////////////
Expand Down
2 changes: 2 additions & 0 deletions src_clean/data_struct.h
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,8 @@ struct Components
double Pressure=0.0;
double Beta; // Inverse Temperature

double3* tempMolStorage; // temporary storage for reinsertion move//

size_t EnergyEvalTimes = 0;

bool* flag; // flags for checking overlaps (on host), device version in Simulations struct//
Expand Down
5 changes: 5 additions & 0 deletions src_clean/fxn_main.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,11 @@ inline void Prepare_Widom(WidomStruct& Widom, Boxsize Box, Simulations& Sims, Co
size_t fourier_size = SystemComponents.EikAllocateSize;
if(fourier_size > vdw_real_size) blocksum_size = fourier_size;

//Allocate temporary space for reinsertion//
size_t MaxAdsorbateMolsize = *std::max_element(SystemComponents.Moleculesize.begin() + 1, SystemComponents.Moleculesize.end());;
cudaMalloc(&SystemComponents.tempMolStorage, MaxAdsorbateMolsize * 2 * sizeof(double3));
printf("Allocated %zu double3 for reinsertion!\n", MaxAdsorbateMolsize * 2);

cudaMallocHost(&Sims.Blocksum, blocksum_size*sizeof(double));

cudaMallocManaged(&Sims.ExcludeList, 10 * sizeof(int2));
Expand Down
27 changes: 10 additions & 17 deletions src_clean/mc_swap_moves.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations&
}

//Store The New Locations//
double3* temp;
cudaMalloc(&temp, sizeof(double3) * SystemComponents.Moleculesize[SelectedComponent]);
StoreNewLocation_Reinsertion<<<1,1>>>(Sims.Old, Sims.New, temp, SelectedTrial, SystemComponents.Moleculesize[SelectedComponent]);
StoreNewLocation_Reinsertion<<<1,1>>>(Sims.Old, Sims.New, SystemComponents.tempMolStorage, SelectedTrial, SystemComponents.Moleculesize[SelectedComponent]);
/////////////
// RETRACE //
/////////////
Expand All @@ -98,7 +96,7 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations&
bool EwaldPerformed = false;
if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent])
{
double2 EwaldE = GPU_EwaldDifference_Reinsertion(Sims.Box, Sims.d_a, Sims.Old, temp, FF, Sims.Blocksum, SystemComponents, SelectedComponent, UpdateLocation);
double2 EwaldE = GPU_EwaldDifference_Reinsertion(Sims.Box, Sims.d_a, Sims.Old, SystemComponents.tempMolStorage, FF, Sims.Blocksum, SystemComponents, SelectedComponent, UpdateLocation);

energy.GGEwaldE = EwaldE.x;
energy.HGEwaldE = EwaldE.y;
Expand All @@ -109,8 +107,8 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations&
//Put it after Ewald summation, the required positions are already in place (done by the preparation parts of Ewald Summation)//
if(SystemComponents.UseDNNforHostGuest)
{
if(!EwaldPerformed) Prepare_DNN_InitialPositions_Reinsertion(Sims.d_a, Sims.Old, temp, SystemComponents, SelectedComponent, UpdateLocation);
energy.DNN_E = DNN_Prediction_Reinsertion(SystemComponents, Sims, SelectedComponent, temp);
if(!EwaldPerformed) Prepare_DNN_InitialPositions_Reinsertion(Sims.d_a, Sims.Old, SystemComponents.tempMolStorage, SystemComponents, SelectedComponent, UpdateLocation);
energy.DNN_E = DNN_Prediction_Reinsertion(SystemComponents, Sims, SelectedComponent, SystemComponents.tempMolStorage);
//Correction of DNN - HostGuest energy to the Rosenbluth weight//
double correction = energy.DNN_Correction();
if(fabs(correction) > SystemComponents.DNNDrift) //If there is a huge drift in the energy correction between DNN and Classical HostGuest//
Expand All @@ -131,8 +129,8 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations&
{ // accept the move
SystemComponents.Moves[SelectedComponent].ReinsertionAccepted ++;
//size_t UpdateLocation = SystemComponents.Moleculesize[SelectedComponent] * SelectedMolInComponent;
Update_Reinsertion_data<<<1,SystemComponents.Moleculesize[SelectedComponent]>>>(Sims.d_a, temp, SelectedComponent, UpdateLocation); checkCUDAError("error Updating Reinsertion data");
cudaFree(temp);
Update_Reinsertion_data<<<1,SystemComponents.Moleculesize[SelectedComponent]>>>(Sims.d_a, SystemComponents.tempMolStorage, SelectedComponent, UpdateLocation); checkCUDAError("error Updating Reinsertion data");

if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent])
Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent);
SystemComponents.Tmmc[SelectedComponent].Update(1.0, NMol, REINSERTION); //Update for TMMC, since Macrostate not changed, just add 1.//
Expand All @@ -141,7 +139,6 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations&
}
else
{
cudaFree(temp);
SystemComponents.Tmmc[SelectedComponent].Update(1.0, NMol, REINSERTION); //Update for TMMC, since Macrostate not changed, just add 1.//
energy.zero();
return energy;
Expand Down Expand Up @@ -523,9 +520,7 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati
energy += temp_energy;
}
// Store The New Locations //
double3* temp;
cudaMalloc(&temp, sizeof(double3) * SystemComponents.Moleculesize[NEWComponent]);
StoreNewLocation_Reinsertion<<<1,1>>>(Sims.Old, Sims.New, temp, SelectedTrial, SystemComponents.Moleculesize[NEWComponent]);
StoreNewLocation_Reinsertion<<<1,1>>>(Sims.Old, Sims.New, SystemComponents.tempMolStorage, SelectedTrial, SystemComponents.Moleculesize[NEWComponent]);

/////////////
// RETRACE //
Expand All @@ -549,7 +544,7 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati
size_t UpdateLocation = SystemComponents.Moleculesize[OLDComponent] * OLDMolInComponent;
if(!FF.noCharges)
{
double2 EwaldE = GPU_EwaldDifference_IdentitySwap(Sims.Box, Sims.d_a, Sims.Old, temp, FF, Sims.Blocksum, SystemComponents, OLDComponent, NEWComponent, UpdateLocation);
double2 EwaldE = GPU_EwaldDifference_IdentitySwap(Sims.Box, Sims.d_a, Sims.Old, SystemComponents.tempMolStorage, FF, Sims.Blocksum, SystemComponents, OLDComponent, NEWComponent, UpdateLocation);
energy.GGEwaldE = EwaldE.x;
energy.HGEwaldE = EwaldE.y;
Rosenbluth *= std::exp(-SystemComponents.Beta * (EwaldE.x + EwaldE.y));
Expand Down Expand Up @@ -597,7 +592,7 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati
}

UpdateLocation = SystemComponents.Moleculesize[NEWComponent] * NEWMolInComponent;
Update_IdentitySwap_Insertion_data<<<1,1>>>(Sims.d_a, temp, NEWComponent, UpdateLocation, NEWMolInComponent, SystemComponents.Moleculesize[NEWComponent]); checkCUDAError("error Updating Identity Swap Insertion data");
Update_IdentitySwap_Insertion_data<<<1,1>>>(Sims.d_a, SystemComponents.tempMolStorage, NEWComponent, UpdateLocation, NEWMolInComponent, SystemComponents.Moleculesize[NEWComponent]); checkCUDAError("error Updating Identity Swap Insertion data");

Update_NumberOfMolecules(SystemComponents, Sims.d_a, NEWComponent, INSERTION);
Update_NumberOfMolecules(SystemComponents, Sims.d_a, OLDComponent, DELETION);
Expand All @@ -607,9 +602,8 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati
//Regarding the UpdateLocation, in the code it is the old molecule position//
//if the OLDComponent = NEWComponent, The new location will be filled into the old position//
//So just use what is already in the code//
Update_Reinsertion_data<<<1,SystemComponents.Moleculesize[OLDComponent]>>>(Sims.d_a, temp, OLDComponent, UpdateLocation);
Update_Reinsertion_data<<<1,SystemComponents.Moleculesize[OLDComponent]>>>(Sims.d_a, SystemComponents.tempMolStorage, OLDComponent, UpdateLocation);
}
cudaFree(temp);
//Zhao's note: BUG!!!!, Think about if OLD/NEW Component belong to different type (framework/adsorbate)//
if(!FF.noCharges && ((SystemComponents.hasPartialCharge[NEWComponent]) ||(SystemComponents.hasPartialCharge[OLDComponent])))
Update_Vector_Ewald(Sims.Box, false, SystemComponents, NEWComponent);
Expand All @@ -618,7 +612,6 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati
}
else
{
cudaFree(temp);
energy.zero();
return energy;
}
Expand Down
79 changes: 40 additions & 39 deletions src_clean/mc_widom.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,26 +177,36 @@ __global__ void get_random_trial_orientation(Boxsize Box, Atoms* d_a, Atoms Mol,
//Deletion: MolID = selected ID
const size_t i = blockIdx.x * blockDim.x + threadIdx.x;

__shared__ double3 OldPos;
__shared__ double Chosenscale;
__shared__ double ChosenscaleCoul;
//Record First Bead Information//
//For all orientations//
if(i == 0)
{
Mol.pos[0] = NewMol.pos[FirstBeadTrial];
Mol.scale[0] = NewMol.scale[FirstBeadTrial];
Mol.charge[0] = NewMol.charge[FirstBeadTrial];
Mol.scaleCoul[0] = NewMol.scaleCoul[FirstBeadTrial];
Chosenscale = NewMol.scale[FirstBeadTrial];
ChosenscaleCoul = NewMol.scaleCoul[FirstBeadTrial];
Mol.scale[0] = Chosenscale;
Mol.scaleCoul[0] = ChosenscaleCoul;
Mol.Type[0] = NewMol.Type[FirstBeadTrial];
Mol.MolID[0] = NewMol.MolID[FirstBeadTrial];
OldPos = NewMol.pos[FirstBeadTrial];
Mol.pos[0] = OldPos;
}
__syncthreads();
size_t trial = i / chainsize;
size_t a = i % chainsize;
//Quaternions uses 3 random seeds//
size_t random_index = i + offset;
const Atoms AllData = d_a[SelectedComponent];
size_t random_index = trial + offset;
const Atoms& AllData = d_a[SelectedComponent];
//different from translation (where we copy a whole molecule), here we duplicate the properties of the first bead of a molecule
// so use start_position, not real_pos
//Zhao's note: when there are zero molecule for the species, we need to use some preset values
//the first values always have some numbers. The xyz are not correct, but type and charge are correct. Use those.
double scale = 0.0; double scaleCoul = 0.0;
for(size_t a = 0; a < chainsize; a++)
{
//double scale = 0.0; double scaleCoul = 0.0;
//for(size_t a = 0; a < chainsize; a++)
//{
double3 Vec;
Vec = AllData.pos[1+a] - AllData.pos[0];
switch(MoveType)
Expand All @@ -205,56 +215,46 @@ __global__ void get_random_trial_orientation(Boxsize Box, Atoms* d_a, Atoms Mol,
//Zhao's note: It depends on whether Identity_swap needs to be operated on fractional molecules//
//FOR NOW, JUST PUT IT ALONGSIDE WITH CBMC_INSERTION //
/////////////////////////////////////////////////////////////////////////////////////////////////
case CBMC_INSERTION: case IDENTITY_SWAP_NEW: //Insertion (whole/fractional Molecule)//
case CBMC_INSERTION: case IDENTITY_SWAP_NEW: case REINSERTION_INSERTION: //Insertion (whole/fractional Molecule)//
{
scale = proposed_scale.x; scaleCoul = proposed_scale.y;
//scale = proposed_scale.x; scaleCoul = proposed_scale.y;
Rotate_Quaternions(Vec, random[random_index]);
NewMol.pos[i*chainsize+a] = Mol.pos[0] + Vec;
break;
}
case CBMC_DELETION: //Deletion (whole/fractional molecule)//
{
scale = AllData.scale[start_position+a]; scaleCoul = AllData.scaleCoul[start_position+a];
if(i==0) //if deletion, the first trial position is the old position of the selected molecule//
{
NewMol.pos[i*chainsize+a] = AllData.pos[start_position+a];
}
else
{
Rotate_Quaternions(Vec, random[random_index]);
NewMol.pos[i*chainsize+a] = Mol.pos[0] + Vec;
}
//printf("CHAIN: trial: %lu, xyz: %.5f %.5f %.5f\n", i, NewMol.pos[i*chainsize+a].x, NewMol.pos[i*chainsize+a].y, NewMol.pos[i*chainsize+a].z);
//if(i == 0) printf("i=0, start_position: %lu\n", start_position);
NewMol.pos[i] = OldPos + Vec;
break;
}
/*
case REINSERTION_INSERTION: //Reinsertion-Insertion//
{
scale = AllData.scale[start_position+a]; scaleCoul = AllData.scaleCoul[start_position+a];
//scale = AllData.scale[start_position+a]; scaleCoul = AllData.scaleCoul[start_position+a];
Rotate_Quaternions(Vec, random[random_index]);
NewMol.pos[i*chainsize+a] = Mol.pos[0] + Vec;
NewMol.pos[i] = OldPos + Vec;
break;
}
*/
case CBMC_DELETION: //Deletion (whole/fractional molecule)//
case REINSERTION_RETRACE: case IDENTITY_SWAP_OLD: //Reinsertion-Retrace, but also works for Identity swap (old molecule) for multiple orientations (not first bead) //
{
scale = AllData.scale[start_position+a]; scaleCoul = AllData.scaleCoul[start_position+a];
if(i==0) //if deletion, the first trial position is the old position of the selected molecule//
//scale = AllData.scale[start_position+a]; scaleCoul = AllData.scaleCoul[start_position+a];
if(trial==0) //if deletion, the first trial position is the old position of the selected molecule//
{
NewMol.pos[i*chainsize+a] = AllData.pos[start_position+a];
NewMol.pos[i] = AllData.pos[start_position+a];
}
else
{
Rotate_Quaternions(Vec, random[random_index]);
NewMol.pos[i*chainsize+a] = Mol.pos[0] + Vec;
NewMol.pos[i] = OldPos + Vec;
}
break;
}
}
NewMol.scale[i*chainsize+a] = scale; NewMol.charge[i*chainsize+a] = AllData.charge[start_position+a];
NewMol.scaleCoul[i*chainsize+a] = scaleCoul;
NewMol.Type[i*chainsize+a] = AllData.Type[start_position+a]; NewMol.MolID[i*chainsize+a] = MolID;
}
device_flag[i] = false;
NewMol.scale[i] = Chosenscale;
NewMol.charge[i] = AllData.charge[start_position+a];
NewMol.scaleCoul[i] = ChosenscaleCoul;
NewMol.Type[i] = AllData.Type[start_position+a];
NewMol.MolID[i] = MolID;
//}
if(a == 0)
device_flag[trial] = false;
}


Expand Down Expand Up @@ -438,7 +438,8 @@ static inline double Widom_Move_Chain_PARTIAL(Components& SystemComponents, Simu

Random.Check(Widom.NumberWidomTrialsOrientations);
//Get the first bead positions, and setup trial orientations//
get_random_trial_orientation<<<1,Widom.NumberWidomTrialsOrientations>>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, Sims.device_flag, Random.device_random, Random.offset, FirstBeadTrial, start_position, SelectedComponent, SelectedMolID, chainsize, MoveType, proposed_scale, SystemComponents.CURRENTCYCLE); checkCUDAError("error getting random trials orientations");
size_t trialO_Nthread=0; size_t trialO_Nblock=0; Setup_threadblock(Widom.NumberWidomTrialsOrientations*chainsize, &trialO_Nblock, &trialO_Nthread);
get_random_trial_orientation<<<trialO_Nblock,trialO_Nthread>>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, Sims.device_flag, Random.device_random, Random.offset, FirstBeadTrial, start_position, SelectedComponent, SelectedMolID, chainsize, MoveType, proposed_scale, SystemComponents.CURRENTCYCLE); checkCUDAError("error getting random trials orientations");

Random.Update(Widom.NumberWidomTrialsOrientations);

Expand Down

0 comments on commit 45d5c9d

Please sign in to comment.