Merge branch 'nc_gs_test_GPU' into 'master'

modifications in compute_vxc - compute_nvxc and process potential to compute... See merge request npneq/inq!1141
LLNL · Oct 1, 2024 · 122a887 · 122a887
2 parents cbe3f11 + 96ed05a
commit 122a887
Show file tree

Hide file tree

Showing 2 changed files with 188 additions and 108 deletions.
diff --git a/src/hamiltonian/xc_term.hpp b/src/hamiltonian/xc_term.hpp
@@ -89,78 +89,22 @@ class xc_term {
 
 	////////////////////////////////////////////////////////////////////////////////////////////
 
-	template <typename SpinDensityType, typename VXC, typename VKS>
-	void process_potential(SpinDensityType const & spin_density, VXC const & vxc, VKS & vks) const {
-
-		if (spin_density.set_size() == 4) {
-			gpu::run(vxc.basis().local_size(),
-						 [spi = begin(spin_density.matrix()), vx = begin(vxc.matrix()), vk = begin(vks.matrix())] GPU_LAMBDA (auto ip){
-							auto bxc = 0.5*(vx[ip][0] - vx[ip][1]);
-							auto vxc = 0.5*(vx[ip][0] + vx[ip][1]);
-							auto mag = observables::local_magnetization(spi[ip], 4);
-							auto dpol = mag.length();
-							if (fabs(dpol) > 1.e-7) {
-								auto e_mag = mag/dpol;
-								vk[ip][0] += vxc + bxc*e_mag[2];
-								vk[ip][1] += vxc - bxc*e_mag[2];
-								vk[ip][2] += bxc*e_mag[0];
-								vk[ip][3] += bxc*e_mag[1];
-							}
-							else {
-								vk[ip][0] += vxc;
-								vk[ip][1] += vxc;
-							}
-						 });
-		}
-		else {
-			assert(spin_density.set_size() == 1 or spin_density.set_size() == 2);
-			gpu::run(vxc.local_set_size(), vxc.basis().local_size(),
-						 [vx = begin(vxc.matrix()), vk = begin(vks.matrix())] GPU_LAMBDA (auto is, auto ip){
-							vk[ip][is] += vx[ip][is];
-						 });
-		}
+	template <typename VXC, typename VKS>
+	void process_potential(VXC const & vxc, VKS & vks) const {
 
+		gpu::run(vxc.local_set_size(), vxc.basis().local_size(),
+			[vx = begin(vxc.matrix()), vk = begin(vks.matrix())] GPU_LAMBDA (auto is, auto ip){
+				vk[ip][is] += vx[ip][is];
+			});
 	}
 
-  ///////////////////////////////////////////////////////////////////////////////////////////
+	///////////////////////////////////////////////////////////////////////////////////////////
 
 	template <typename SpinDensityType, typename VXC>
-	double compute_nvxc(SpinDensityType const & spin_density, VXC const & vfunc) const {
+	double compute_nvxc(SpinDensityType const & spin_density, VXC const & vxc) const {
 
 		auto nvxc_ = 0.0;
-		if (spin_density.set_size() == 4) {
-			basis::field_set<basis::real_space, double> vxc(spin_density.skeleton());
-			vxc.fill(0.0);
-			gpu::run(vfunc.basis().local_size(),
-						 [spi = begin(spin_density.matrix()), vxi = begin(vfunc.matrix()), vxf = begin(vxc.matrix())] GPU_LAMBDA (auto ip){
-							auto b = 0.5*(vxi[ip][0] - vxi[ip][1]);
-							auto v = 0.5*(vxi[ip][0] + vxi[ip][1]);
-							auto mag = observables::local_magnetization(spi[ip], 4);
-							auto dpol = mag.length();
-							if (fabs(dpol) > 1.e-7) {
-								auto e_mag = mag/dpol;
-								// Vxc = [vxc+bxc^z, bxc^-; bxc^+, vxc-bxc^z]
-								vxf[ip][0] = v + b*e_mag[2];
-								vxf[ip][1] = v - b*e_mag[2];
-								vxf[ip][2] = 2.0*b*e_mag[0];
-								vxf[ip][3] = 2.0*b*e_mag[1];
-							}
-							else {
-								vxf[ip][0] = v;
-								vxf[ip][1] = v;
-							}
-						 });
-			basis::field<basis::real_space, double> rfield(spin_density.basis());
-			rfield.fill(0.0);
-			gpu::run(spin_density.local_set_size(), spin_density.basis().local_size(),
-						[spi = begin(spin_density.matrix()), vx = begin(vxc.matrix()), rf = begin(rfield.linear())] GPU_LAMBDA (auto is, auto ip){
-							rf[ip] += vx[ip][is] * spi[ip][is];
-						});
-			nvxc_ += operations::integral(rfield);
-		}
-		else {
-			nvxc_ = operations::integral_product_sum(spin_density, vfunc);
-		}
+		nvxc_ += operations::integral_product_sum(spin_density, vxc);
 		return nvxc_;
 	}
 
@@ -169,14 +113,17 @@ class xc_term {
   template <typename SpinDensityType, typename CoreDensityType, typename VKSType>
   void operator()(SpinDensityType const & spin_density, CoreDensityType const & core_density, VKSType & vks, double & exc, double & nvxc) const {
 
-    exc = 0.0;
+    	exc = 0.0;
 		nvxc = 0.0;
 		if(not any_true_functional()) return;
 
 		auto full_density = process_density(spin_density, core_density);
 
 		double efunc = 0.0;
 
+		basis::field_set<basis::real_space, double> vxc(spin_density.skeleton());
+		vxc.fill(0.0);
+
 		basis::field_set<basis::real_space, double> vfunc(full_density.skeleton());
 		auto density_gradient = std::optional<decltype(operations::gradient(full_density))>{};
 		if(any_requires_gradient()) density_gradient.emplace(operations::gradient(full_density));
@@ -185,39 +132,29 @@ class xc_term {
 			if(not func.true_functional()) continue;
 
 			evaluate_functional(func, full_density, density_gradient, efunc, vfunc);
-			exc += efunc;
-			process_potential(spin_density, vfunc, vks);
+			compute_vxc(spin_density, vfunc, vxc);
 
-			nvxc += compute_nvxc(spin_density, vfunc);
+			exc += efunc;
 		}
+
+		process_potential(vxc, vks);
+		nvxc += compute_nvxc(spin_density, vxc);
 	}
 
 	////////////////////////////////////////////////////////////////////////////////////////////
 
-	template <typename SpinDensityType, typename CoreDensityType, typename VxcType>
-	void compute_vxc(SpinDensityType const & spin_density, CoreDensityType const & core_density, VxcType & vxc) {
-
-		if(not any_true_functional()) return;
-		auto full_density = process_density(spin_density, core_density);
-		double efunc = 0.0;
-		basis::field_set<basis::real_space, double> vfunc(full_density.skeleton());
-		vfunc.fill(0.0);
-		vxc.fill(0.0);
-		auto density_gradient = std::optional<decltype(operations::gradient(full_density))>{};
-		if(any_requires_gradient()) density_gradient.emplace(operations::gradient(full_density));
-
-		for(auto & func : functionals_) {
-			if(not func.true_functional()) continue;
-			evaluate_functional(func, full_density, density_gradient, efunc, vfunc);
-			if (spin_density.set_size() == 4) {
-				gpu::run(vfunc.basis().local_size(),
+	template <typename SpinDensityType, typename VxcType, typename VXC>
+	static void compute_vxc(SpinDensityType const & spin_density, VxcType const & vfunc, VXC & vxc) {
+
+		if (spin_density.set_size() == 4) {
+			gpu::run(vfunc.basis().local_size(),
 				[spi = begin(spin_density.matrix()), vxi = begin(vfunc.matrix()), vxf = begin(vxc.matrix())] GPU_LAMBDA (auto ip){
 					auto b = 0.5*(vxi[ip][0] - vxi[ip][1]);
 					auto v = 0.5*(vxi[ip][0] + vxi[ip][1]);
 					auto mag = observables::local_magnetization(spi[ip], 4);
 					auto dpol = mag.length();
 					if (fabs(dpol) > 1.e-7) {
-						auto e_mag = mag / dpol;
+						auto e_mag = mag/dpol;
 						vxf[ip][0] += v + b*e_mag[2];
 						vxf[ip][1] += v - b*e_mag[2];
 						vxf[ip][2] += b*e_mag[0];
@@ -228,14 +165,86 @@ class xc_term {
 						vxf[ip][1] += v;
 					}
 				});
+		}
+		else {
+			assert(spin_density.set_size() == 1 or spin_density.set_size() == 2);
+			gpu::run(vfunc.local_set_size(), vfunc.basis().local_size(),
+				[vxi = begin(vfunc.matrix()), vxf = begin(vxc.matrix())] GPU_LAMBDA (auto is, auto ip){
+					vxf[ip][is] += vxi[ip][is];
+				});
+		}
+	}
+
+	////////////////////////////////////////////////////////////////////////////////////////////
+
+	template<class CommType, typename CoreDensityType, typename SpinDensityType, class occupations_array_type, class kpin_type>
+	void eval_psi_vxc_psi(CommType & comm, CoreDensityType const & core_density, SpinDensityType const & spin_density, occupations_array_type const & occupations, kpin_type const & kpin, double & nvx, double & ex) {
+
+		if (not any_true_functional()) {
+			nvx += 0.0;
+			ex += 0.0;
+		}
+		else {
+			auto full_density = process_density(spin_density, core_density);
+			double efunc = 0.0;
+			basis::field_set<basis::real_space, double> vxc(spin_density.skeleton());
+			vxc.fill(0.0);
+
+			basis::field_set<basis::real_space, double> vfunc(full_density.skeleton());
+			auto density_gradient = std::optional<decltype(operations::gradient(full_density))>{};
+			if (any_requires_gradient()) density_gradient.emplace(operations::gradient(full_density));
+
+			for (auto & func : functionals_){
+				if (not func.true_functional()) continue;
+
+				evaluate_functional(func, full_density, density_gradient, efunc, vfunc);
+				compute_vxc(spin_density, vfunc, vxc);
+				ex += efunc;
 			}
-			else {
-				assert(spin_density.set_size() == 1 or spin_density.set_size() == 2);
-				gpu::run(vfunc.local_set_size(), vfunc.basis().local_size(),
-					[vxi = begin(vfunc.matrix()), vxf = begin(vxc.matrix())] GPU_LAMBDA (auto is, auto ip){
-						vxf[ip][is] += vxi[ip][is];
-					});
+
+			basis::field<basis::real_space, double> rfield(vxc.basis());
+			rfield.fill(0.0);
+			int iphi = 0;
+			for (auto & phi : kpin) {
+				compute_psi_vxc_psi_ofr(occupations[iphi], phi, vxc, rfield);
+				iphi++;
 			}
+
+			rfield.all_reduce(comm);
+			nvx += operations::integral(rfield);
+		}
+	}
+
+	////////////////////////////////////////////////////////////////////////////////////////////
+
+	template<class occupations_array_type, class field_set_type, typename VxcType>
+	void compute_psi_vxc_psi_ofr(occupations_array_type const & occupations, field_set_type const & phi, VxcType const & vxc, basis::field<basis::real_space, double> & rfield) {
+
+		assert(std::get<1>(sizes(phi.spinor_array())) == phi.spinor_dim());
+		assert(std::get<2>(sizes(phi.spinor_array())) == phi.local_spinor_set_size());
+
+		if (vxc.set_size() == 1) {
+			gpu::run(phi.local_spinor_set_size(), phi.basis().local_size(),
+				[ph = begin(phi.matrix()), rf = begin(rfield.linear()), vx=begin(vxc.matrix()), occ=begin(occupations)] GPU_LAMBDA (auto ist, auto ip) {
+					rf[ip] += occ[ist] * vx[ip][0] * norm(ph[ip][ist]);
+				});
+		}
+		else if (vxc.set_size() == 2) {
+			gpu::run(phi.local_set_size(), phi.basis().local_size(),
+				[ph = begin(phi.matrix()), rf = begin(rfield.linear()), vx = begin(vxc.matrix()), occ = begin(occupations), spi = phi.spin_index()] GPU_LAMBDA (auto ist, auto ip) {
+					rf[ip] += occ[ist] * vx[ip][spi] * norm(ph[ip][ist]);
+				});
+		}
+		else {
+			assert(vxc.set_size() == 4);
+			gpu::run(phi.local_spinor_set_size(), phi.basis().local_size(),
+				[ph = begin(phi.spinor_array()), rf = begin(rfield.linear()), vx = begin(vxc.matrix()), occ = begin(occupations)] GPU_LAMBDA (auto ist, auto ip) {
+					auto offdiag = vx[ip][2] + complex{0.0, 1.0}*vx[ip][3];
+					auto cross = 2.0*occ[ist]*real(offdiag*ph[ip][1][ist]*conj(ph[ip][0][ist]));
+					rf[ip] += occ[ist]*vx[ip][0]*norm(ph[ip][0][ist]);
+					rf[ip] += occ[ist]*vx[ip][1]*norm(ph[ip][1][ist]);
+					rf[ip] += cross;
+				});
 		}
 	}
 
@@ -480,6 +489,68 @@ TEST_CASE(INQ_TEST_FILE, INQ_TEST_TAG){
 		CHECK(pbe.any_true_functional() == true);
 
 	}
-
+
+	SECTION("nvxc calculation unpolarized") {
+		auto par = input::parallelization(comm);
+		auto ions = systems::ions(systems::cell::cubic(10.0_b));
+		ions.insert("H", {0.0_b, 0.0_b, 0.0_b});
+		auto electrons = systems::electrons(par, ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_unpolarized());
+		ground_state::initial_guess(ions, electrons);
+		auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(100));
+		auto nvxc = result.energy.nvxc();
+		auto exc = result.energy.xc();
+		Approx target = Approx(nvxc).epsilon(1.e-10);
+		Approx target2= Approx(exc).epsilon(1.e-10);
+
+		hamiltonian::xc_term xc_(options::theory{}.lda(), electrons.spin_density().set_size());
+		auto core_density_ = electrons.atomic_pot().nlcc_density(electrons.states_comm(), electrons.spin_density().basis(), ions);
+		auto nvxc2 = 0.0;
+		auto exc2 = 0.0;
+		xc_.eval_psi_vxc_psi(electrons.kpin_states_comm(), core_density_, electrons.spin_density(), electrons.occupations(), electrons.kpin(), nvxc2, exc2);
+		CHECK(nvxc2 == target);
+		CHECK(exc2 == target2);
+	}
+
+	SECTION("nvxc calculation spin polarized") {
+		auto par = input::parallelization(comm);
+		auto ions = systems::ions(systems::cell::cubic(10.0_b));
+		ions.insert("H", {0.0_b, 0.0_b, 0.0_b});
+		auto electrons = systems::electrons(par, ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_polarized());
+		ground_state::initial_guess(ions, electrons);
+		auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(100));
+		auto nvxc = result.energy.nvxc();
+		auto exc = result.energy.xc();
+		Approx target = Approx(nvxc).epsilon(1.e-10);
+		Approx target2 = Approx(exc).epsilon(1.e-10);
+
+		hamiltonian::xc_term xc_(options::theory{}.lda(), electrons.spin_density().set_size());
+		auto core_density_ = electrons.atomic_pot().nlcc_density(electrons.states_comm(), electrons.spin_density().basis(), ions);
+		auto nvxc2 = 0.0;
+		auto exc2 = 0.0;
+		xc_.eval_psi_vxc_psi(electrons.kpin_states_comm(), core_density_, electrons.spin_density(), electrons.occupations(), electrons.kpin(), nvxc2, exc2);
+		CHECK(nvxc2 == target);
+		CHECK(exc2 == target2);
+	}
+
+	SECTION("nvxc calculation spin non collinear") {
+		auto par = input::parallelization(comm);
+		auto ions = systems::ions(systems::cell::cubic(10.0_b));
+		ions.insert("H", {0.0_b, 0.0_b, 0.0_b});
+		auto electrons = systems::electrons(par, ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_non_collinear());
+		ground_state::initial_guess(ions, electrons);
+		auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(100));
+		auto nvxc = result.energy.nvxc();
+		auto exc = result.energy.xc();
+		Approx target = Approx(nvxc).epsilon(1.e-10);
+		Approx target2 = Approx(exc).epsilon(1.e-10);
+
+		hamiltonian::xc_term xc_(options::theory{}.lda(), electrons.spin_density().set_size());
+		auto core_density_ = electrons.atomic_pot().nlcc_density(electrons.states_comm(), electrons.spin_density().basis(), ions);
+		auto nvxc2 = 0.0;
+		auto exc2 = 0.0;
+		xc_.eval_psi_vxc_psi(electrons.kpin_states_comm(), core_density_, electrons.spin_density(), electrons.occupations(), electrons.kpin(), nvxc2, exc2);
+		CHECK(nvxc2 == target);
+		CHECK(exc2 == target2);
+	}
 }
 #endif
diff --git a/tests/non_collinear_gs.cpp b/tests/non_collinear_gs.cpp
@@ -1,15 +1,17 @@
 /* -*- indent-tabs-mode: t -*- */
 
 #include <inq/inq.hpp>
-#include <stdio.h>
 
 using namespace inq;
 using namespace inq::magnitude;
 
 inq::utils::match match(3.0e-4);
 
-template <class EnvType>
-auto compute_GS(EnvType const & env, systems::ions const & ions) {
+int main (int argc, char ** argv) {
+	auto env = input::environment{};
+
+	auto ions = systems::ions(systems::cell::cubic(10.0_b));
+	ions.insert("H", {0.0_b, 0.0_b, 0.0_b});
 
 	auto electrons = systems::electrons(env.par(), ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_polarized());
 	ground_state::initial_guess(ions, electrons);
@@ -22,20 +24,27 @@ auto compute_GS(EnvType const & env, systems::ions const & ions) {
 	auto mag_nc = observables::total_magnetization(electrons_nc.spin_density());
 
 	match.check("total energy",								result_col.energy.total(),	 result_nc.energy.total());
-	match.check("total magnetization",				mag.length(),								 mag_nc.length());
-}
-
-
-int main (int argc, char ** argv) {
-	auto env = input::environment{};
-
-	auto ions = systems::ions(systems::cell::cubic(10.0_b));
-	ions.insert("H", {0.0_b, 0.0_b, 0.0_b});
-	compute_GS(env, ions);
+	match.check("nvxc",                                     result_col.energy.nvxc(),    result_nc.energy.nvxc());
+	match.check("total magnetization",				        mag.length(),				 mag_nc.length());
 
 	auto d = 1.21_A;
 	ions = systems::ions(systems::cell::cubic(10.0_b));
 	ions.insert("O", {0.0_b, 0.0_b, d/2});
 	ions.insert("O", {0.0_b, 0.0_b,-d/2});
-	compute_GS(env, ions);
+
+	electrons = systems::electrons(env.par(), ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_polarized());
+    ground_state::initial_guess(ions, electrons);
+    result_col = ground_state::calculate(ions, electrons, options::theory{}.lda(), inq::options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(1000));
+    mag = observables::total_magnetization(electrons.spin_density());
+
+    electrons_nc = systems::electrons(env.par(), ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_non_collinear());
+    ground_state::initial_guess(ions, electrons_nc);
+    result_nc = ground_state::calculate(ions, electrons_nc, options::theory{}.lda(), inq::options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(10000).mixing(0.1));
+    mag_nc = observables::total_magnetization(electrons_nc.spin_density());
+
+    match.check("total energy",               result_col.energy.total(),   result_nc.energy.total());
+    match.check("nvxc",                       result_col.energy.nvxc(),    result_nc.energy.nvxc());
+    match.check("total magnetization",        mag.length(),                mag_nc.length());
+
+	return match.fail();
 }