Skip to content

Commit

Permalink
Merge branch 'nc_gs_test_GPU' into 'master'
Browse files Browse the repository at this point in the history
modifications in compute_vxc - compute_nvxc and process potential to compute...

See merge request npneq/inq!1141
  • Loading branch information
xavierandrade committed Oct 1, 2024
2 parents cbe3f11 + 96ed05a commit 122a887
Show file tree
Hide file tree
Showing 2 changed files with 188 additions and 108 deletions.
259 changes: 165 additions & 94 deletions src/hamiltonian/xc_term.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,78 +89,22 @@ class xc_term {

////////////////////////////////////////////////////////////////////////////////////////////

template <typename SpinDensityType, typename VXC, typename VKS>
void process_potential(SpinDensityType const & spin_density, VXC const & vxc, VKS & vks) const {

if (spin_density.set_size() == 4) {
gpu::run(vxc.basis().local_size(),
[spi = begin(spin_density.matrix()), vx = begin(vxc.matrix()), vk = begin(vks.matrix())] GPU_LAMBDA (auto ip){
auto bxc = 0.5*(vx[ip][0] - vx[ip][1]);
auto vxc = 0.5*(vx[ip][0] + vx[ip][1]);
auto mag = observables::local_magnetization(spi[ip], 4);
auto dpol = mag.length();
if (fabs(dpol) > 1.e-7) {
auto e_mag = mag/dpol;
vk[ip][0] += vxc + bxc*e_mag[2];
vk[ip][1] += vxc - bxc*e_mag[2];
vk[ip][2] += bxc*e_mag[0];
vk[ip][3] += bxc*e_mag[1];
}
else {
vk[ip][0] += vxc;
vk[ip][1] += vxc;
}
});
}
else {
assert(spin_density.set_size() == 1 or spin_density.set_size() == 2);
gpu::run(vxc.local_set_size(), vxc.basis().local_size(),
[vx = begin(vxc.matrix()), vk = begin(vks.matrix())] GPU_LAMBDA (auto is, auto ip){
vk[ip][is] += vx[ip][is];
});
}
template <typename VXC, typename VKS>
void process_potential(VXC const & vxc, VKS & vks) const {

gpu::run(vxc.local_set_size(), vxc.basis().local_size(),
[vx = begin(vxc.matrix()), vk = begin(vks.matrix())] GPU_LAMBDA (auto is, auto ip){
vk[ip][is] += vx[ip][is];
});
}

///////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////

template <typename SpinDensityType, typename VXC>
double compute_nvxc(SpinDensityType const & spin_density, VXC const & vfunc) const {
double compute_nvxc(SpinDensityType const & spin_density, VXC const & vxc) const {

auto nvxc_ = 0.0;
if (spin_density.set_size() == 4) {
basis::field_set<basis::real_space, double> vxc(spin_density.skeleton());
vxc.fill(0.0);
gpu::run(vfunc.basis().local_size(),
[spi = begin(spin_density.matrix()), vxi = begin(vfunc.matrix()), vxf = begin(vxc.matrix())] GPU_LAMBDA (auto ip){
auto b = 0.5*(vxi[ip][0] - vxi[ip][1]);
auto v = 0.5*(vxi[ip][0] + vxi[ip][1]);
auto mag = observables::local_magnetization(spi[ip], 4);
auto dpol = mag.length();
if (fabs(dpol) > 1.e-7) {
auto e_mag = mag/dpol;
// Vxc = [vxc+bxc^z, bxc^-; bxc^+, vxc-bxc^z]
vxf[ip][0] = v + b*e_mag[2];
vxf[ip][1] = v - b*e_mag[2];
vxf[ip][2] = 2.0*b*e_mag[0];
vxf[ip][3] = 2.0*b*e_mag[1];
}
else {
vxf[ip][0] = v;
vxf[ip][1] = v;
}
});
basis::field<basis::real_space, double> rfield(spin_density.basis());
rfield.fill(0.0);
gpu::run(spin_density.local_set_size(), spin_density.basis().local_size(),
[spi = begin(spin_density.matrix()), vx = begin(vxc.matrix()), rf = begin(rfield.linear())] GPU_LAMBDA (auto is, auto ip){
rf[ip] += vx[ip][is] * spi[ip][is];
});
nvxc_ += operations::integral(rfield);
}
else {
nvxc_ = operations::integral_product_sum(spin_density, vfunc);
}
nvxc_ += operations::integral_product_sum(spin_density, vxc);
return nvxc_;
}

Expand All @@ -169,14 +113,17 @@ class xc_term {
template <typename SpinDensityType, typename CoreDensityType, typename VKSType>
void operator()(SpinDensityType const & spin_density, CoreDensityType const & core_density, VKSType & vks, double & exc, double & nvxc) const {

exc = 0.0;
exc = 0.0;
nvxc = 0.0;
if(not any_true_functional()) return;

auto full_density = process_density(spin_density, core_density);

double efunc = 0.0;

basis::field_set<basis::real_space, double> vxc(spin_density.skeleton());
vxc.fill(0.0);

basis::field_set<basis::real_space, double> vfunc(full_density.skeleton());
auto density_gradient = std::optional<decltype(operations::gradient(full_density))>{};
if(any_requires_gradient()) density_gradient.emplace(operations::gradient(full_density));
Expand All @@ -185,39 +132,29 @@ class xc_term {
if(not func.true_functional()) continue;

evaluate_functional(func, full_density, density_gradient, efunc, vfunc);
exc += efunc;
process_potential(spin_density, vfunc, vks);
compute_vxc(spin_density, vfunc, vxc);

nvxc += compute_nvxc(spin_density, vfunc);
exc += efunc;
}

process_potential(vxc, vks);
nvxc += compute_nvxc(spin_density, vxc);
}

////////////////////////////////////////////////////////////////////////////////////////////

template <typename SpinDensityType, typename CoreDensityType, typename VxcType>
void compute_vxc(SpinDensityType const & spin_density, CoreDensityType const & core_density, VxcType & vxc) {

if(not any_true_functional()) return;
auto full_density = process_density(spin_density, core_density);
double efunc = 0.0;
basis::field_set<basis::real_space, double> vfunc(full_density.skeleton());
vfunc.fill(0.0);
vxc.fill(0.0);
auto density_gradient = std::optional<decltype(operations::gradient(full_density))>{};
if(any_requires_gradient()) density_gradient.emplace(operations::gradient(full_density));

for(auto & func : functionals_) {
if(not func.true_functional()) continue;
evaluate_functional(func, full_density, density_gradient, efunc, vfunc);
if (spin_density.set_size() == 4) {
gpu::run(vfunc.basis().local_size(),
template <typename SpinDensityType, typename VxcType, typename VXC>
static void compute_vxc(SpinDensityType const & spin_density, VxcType const & vfunc, VXC & vxc) {

if (spin_density.set_size() == 4) {
gpu::run(vfunc.basis().local_size(),
[spi = begin(spin_density.matrix()), vxi = begin(vfunc.matrix()), vxf = begin(vxc.matrix())] GPU_LAMBDA (auto ip){
auto b = 0.5*(vxi[ip][0] - vxi[ip][1]);
auto v = 0.5*(vxi[ip][0] + vxi[ip][1]);
auto mag = observables::local_magnetization(spi[ip], 4);
auto dpol = mag.length();
if (fabs(dpol) > 1.e-7) {
auto e_mag = mag / dpol;
auto e_mag = mag/dpol;
vxf[ip][0] += v + b*e_mag[2];
vxf[ip][1] += v - b*e_mag[2];
vxf[ip][2] += b*e_mag[0];
Expand All @@ -228,14 +165,86 @@ class xc_term {
vxf[ip][1] += v;
}
});
}
else {
assert(spin_density.set_size() == 1 or spin_density.set_size() == 2);
gpu::run(vfunc.local_set_size(), vfunc.basis().local_size(),
[vxi = begin(vfunc.matrix()), vxf = begin(vxc.matrix())] GPU_LAMBDA (auto is, auto ip){
vxf[ip][is] += vxi[ip][is];
});
}
}

////////////////////////////////////////////////////////////////////////////////////////////

template<class CommType, typename CoreDensityType, typename SpinDensityType, class occupations_array_type, class kpin_type>
void eval_psi_vxc_psi(CommType & comm, CoreDensityType const & core_density, SpinDensityType const & spin_density, occupations_array_type const & occupations, kpin_type const & kpin, double & nvx, double & ex) {

if (not any_true_functional()) {
nvx += 0.0;
ex += 0.0;
}
else {
auto full_density = process_density(spin_density, core_density);
double efunc = 0.0;
basis::field_set<basis::real_space, double> vxc(spin_density.skeleton());
vxc.fill(0.0);

basis::field_set<basis::real_space, double> vfunc(full_density.skeleton());
auto density_gradient = std::optional<decltype(operations::gradient(full_density))>{};
if (any_requires_gradient()) density_gradient.emplace(operations::gradient(full_density));

for (auto & func : functionals_){
if (not func.true_functional()) continue;

evaluate_functional(func, full_density, density_gradient, efunc, vfunc);
compute_vxc(spin_density, vfunc, vxc);
ex += efunc;
}
else {
assert(spin_density.set_size() == 1 or spin_density.set_size() == 2);
gpu::run(vfunc.local_set_size(), vfunc.basis().local_size(),
[vxi = begin(vfunc.matrix()), vxf = begin(vxc.matrix())] GPU_LAMBDA (auto is, auto ip){
vxf[ip][is] += vxi[ip][is];
});

basis::field<basis::real_space, double> rfield(vxc.basis());
rfield.fill(0.0);
int iphi = 0;
for (auto & phi : kpin) {
compute_psi_vxc_psi_ofr(occupations[iphi], phi, vxc, rfield);
iphi++;
}

rfield.all_reduce(comm);
nvx += operations::integral(rfield);
}
}

////////////////////////////////////////////////////////////////////////////////////////////

template<class occupations_array_type, class field_set_type, typename VxcType>
void compute_psi_vxc_psi_ofr(occupations_array_type const & occupations, field_set_type const & phi, VxcType const & vxc, basis::field<basis::real_space, double> & rfield) {

assert(std::get<1>(sizes(phi.spinor_array())) == phi.spinor_dim());
assert(std::get<2>(sizes(phi.spinor_array())) == phi.local_spinor_set_size());

if (vxc.set_size() == 1) {
gpu::run(phi.local_spinor_set_size(), phi.basis().local_size(),
[ph = begin(phi.matrix()), rf = begin(rfield.linear()), vx=begin(vxc.matrix()), occ=begin(occupations)] GPU_LAMBDA (auto ist, auto ip) {
rf[ip] += occ[ist] * vx[ip][0] * norm(ph[ip][ist]);
});
}
else if (vxc.set_size() == 2) {
gpu::run(phi.local_set_size(), phi.basis().local_size(),
[ph = begin(phi.matrix()), rf = begin(rfield.linear()), vx = begin(vxc.matrix()), occ = begin(occupations), spi = phi.spin_index()] GPU_LAMBDA (auto ist, auto ip) {
rf[ip] += occ[ist] * vx[ip][spi] * norm(ph[ip][ist]);
});
}
else {
assert(vxc.set_size() == 4);
gpu::run(phi.local_spinor_set_size(), phi.basis().local_size(),
[ph = begin(phi.spinor_array()), rf = begin(rfield.linear()), vx = begin(vxc.matrix()), occ = begin(occupations)] GPU_LAMBDA (auto ist, auto ip) {
auto offdiag = vx[ip][2] + complex{0.0, 1.0}*vx[ip][3];
auto cross = 2.0*occ[ist]*real(offdiag*ph[ip][1][ist]*conj(ph[ip][0][ist]));
rf[ip] += occ[ist]*vx[ip][0]*norm(ph[ip][0][ist]);
rf[ip] += occ[ist]*vx[ip][1]*norm(ph[ip][1][ist]);
rf[ip] += cross;
});
}
}

Expand Down Expand Up @@ -480,6 +489,68 @@ TEST_CASE(INQ_TEST_FILE, INQ_TEST_TAG){
CHECK(pbe.any_true_functional() == true);

}


SECTION("nvxc calculation unpolarized") {
auto par = input::parallelization(comm);
auto ions = systems::ions(systems::cell::cubic(10.0_b));
ions.insert("H", {0.0_b, 0.0_b, 0.0_b});
auto electrons = systems::electrons(par, ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_unpolarized());
ground_state::initial_guess(ions, electrons);
auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(100));
auto nvxc = result.energy.nvxc();
auto exc = result.energy.xc();
Approx target = Approx(nvxc).epsilon(1.e-10);
Approx target2= Approx(exc).epsilon(1.e-10);

hamiltonian::xc_term xc_(options::theory{}.lda(), electrons.spin_density().set_size());
auto core_density_ = electrons.atomic_pot().nlcc_density(electrons.states_comm(), electrons.spin_density().basis(), ions);
auto nvxc2 = 0.0;
auto exc2 = 0.0;
xc_.eval_psi_vxc_psi(electrons.kpin_states_comm(), core_density_, electrons.spin_density(), electrons.occupations(), electrons.kpin(), nvxc2, exc2);
CHECK(nvxc2 == target);
CHECK(exc2 == target2);
}

SECTION("nvxc calculation spin polarized") {
auto par = input::parallelization(comm);
auto ions = systems::ions(systems::cell::cubic(10.0_b));
ions.insert("H", {0.0_b, 0.0_b, 0.0_b});
auto electrons = systems::electrons(par, ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_polarized());
ground_state::initial_guess(ions, electrons);
auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(100));
auto nvxc = result.energy.nvxc();
auto exc = result.energy.xc();
Approx target = Approx(nvxc).epsilon(1.e-10);
Approx target2 = Approx(exc).epsilon(1.e-10);

hamiltonian::xc_term xc_(options::theory{}.lda(), electrons.spin_density().set_size());
auto core_density_ = electrons.atomic_pot().nlcc_density(electrons.states_comm(), electrons.spin_density().basis(), ions);
auto nvxc2 = 0.0;
auto exc2 = 0.0;
xc_.eval_psi_vxc_psi(electrons.kpin_states_comm(), core_density_, electrons.spin_density(), electrons.occupations(), electrons.kpin(), nvxc2, exc2);
CHECK(nvxc2 == target);
CHECK(exc2 == target2);
}

SECTION("nvxc calculation spin non collinear") {
auto par = input::parallelization(comm);
auto ions = systems::ions(systems::cell::cubic(10.0_b));
ions.insert("H", {0.0_b, 0.0_b, 0.0_b});
auto electrons = systems::electrons(par, ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_non_collinear());
ground_state::initial_guess(ions, electrons);
auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(100));
auto nvxc = result.energy.nvxc();
auto exc = result.energy.xc();
Approx target = Approx(nvxc).epsilon(1.e-10);
Approx target2 = Approx(exc).epsilon(1.e-10);

hamiltonian::xc_term xc_(options::theory{}.lda(), electrons.spin_density().set_size());
auto core_density_ = electrons.atomic_pot().nlcc_density(electrons.states_comm(), electrons.spin_density().basis(), ions);
auto nvxc2 = 0.0;
auto exc2 = 0.0;
xc_.eval_psi_vxc_psi(electrons.kpin_states_comm(), core_density_, electrons.spin_density(), electrons.occupations(), electrons.kpin(), nvxc2, exc2);
CHECK(nvxc2 == target);
CHECK(exc2 == target2);
}
}
#endif
37 changes: 23 additions & 14 deletions tests/non_collinear_gs.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
/* -*- indent-tabs-mode: t -*- */

#include <inq/inq.hpp>
#include <stdio.h>

using namespace inq;
using namespace inq::magnitude;

inq::utils::match match(3.0e-4);

template <class EnvType>
auto compute_GS(EnvType const & env, systems::ions const & ions) {
int main (int argc, char ** argv) {
auto env = input::environment{};

auto ions = systems::ions(systems::cell::cubic(10.0_b));
ions.insert("H", {0.0_b, 0.0_b, 0.0_b});

auto electrons = systems::electrons(env.par(), ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_polarized());
ground_state::initial_guess(ions, electrons);
Expand All @@ -22,20 +24,27 @@ auto compute_GS(EnvType const & env, systems::ions const & ions) {
auto mag_nc = observables::total_magnetization(electrons_nc.spin_density());

match.check("total energy", result_col.energy.total(), result_nc.energy.total());
match.check("total magnetization", mag.length(), mag_nc.length());
}


int main (int argc, char ** argv) {
auto env = input::environment{};

auto ions = systems::ions(systems::cell::cubic(10.0_b));
ions.insert("H", {0.0_b, 0.0_b, 0.0_b});
compute_GS(env, ions);
match.check("nvxc", result_col.energy.nvxc(), result_nc.energy.nvxc());
match.check("total magnetization", mag.length(), mag_nc.length());

auto d = 1.21_A;
ions = systems::ions(systems::cell::cubic(10.0_b));
ions.insert("O", {0.0_b, 0.0_b, d/2});
ions.insert("O", {0.0_b, 0.0_b,-d/2});
compute_GS(env, ions);

electrons = systems::electrons(env.par(), ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_polarized());
ground_state::initial_guess(ions, electrons);
result_col = ground_state::calculate(ions, electrons, options::theory{}.lda(), inq::options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(1000));
mag = observables::total_magnetization(electrons.spin_density());

electrons_nc = systems::electrons(env.par(), ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_non_collinear());
ground_state::initial_guess(ions, electrons_nc);
result_nc = ground_state::calculate(ions, electrons_nc, options::theory{}.lda(), inq::options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(10000).mixing(0.1));
mag_nc = observables::total_magnetization(electrons_nc.spin_density());

match.check("total energy", result_col.energy.total(), result_nc.energy.total());
match.check("nvxc", result_col.energy.nvxc(), result_nc.energy.nvxc());
match.check("total magnetization", mag.length(), mag_nc.length());

return match.fail();
}

0 comments on commit 122a887

Please sign in to comment.