From a8a38bd65078731fd1a9847dd1d71ca491283b6e Mon Sep 17 00:00:00 2001
From: kvkarandashev <kvkarandashev@gmail.com>
Date: Sun, 31 Mar 2024 19:19:19 +0200
Subject: [PATCH 1/7] FCHL PBC + OpenMP revision.

---
 src/qmllib/representations/facsf.f90          | 266 ++++++++++--------
 src/qmllib/representations/representations.py |  22 ++
 tests/verification_fchl_acsf_pbc.py           | 125 ++++++++
 3 files changed, 289 insertions(+), 124 deletions(-)
 create mode 100644 tests/verification_fchl_acsf_pbc.py

diff --git a/src/qmllib/representations/facsf.f90 b/src/qmllib/representations/facsf.f90
index 76163791..30ca6985 100644
--- a/src/qmllib/representations/facsf.f90
+++ b/src/qmllib/representations/facsf.f90
@@ -634,7 +634,7 @@ end subroutine fgenerate_acsf_and_gradients
 
 
 subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
-                          & Rs2, Rs3, Ts, eta2, eta3, zeta, rcut, acut, natoms, rep_size, &
+                          & Rs2, Rs3, Ts, eta2, eta3, zeta, rcut, acut, natoms, natoms_tot, rep_size, &
                           & two_body_decay, three_body_decay, three_body_weight, rep)
 
     use acsf_utils, only: decay, calc_angle, calc_cos_angle
@@ -652,7 +652,7 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
     double precision, intent(in) :: zeta
     double precision, intent(in) :: rcut
     double precision, intent(in) :: acut
-    integer, intent(in) :: natoms
+    integer, intent(in) :: natoms, natoms_tot
     integer, intent(in) :: rep_size
     double precision, intent(in) :: two_body_decay
     double precision, intent(in) :: three_body_decay
@@ -660,12 +660,14 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
 
     double precision, intent(out), dimension(natoms, rep_size) :: rep
 
+    integer:: two_body_rep_size
     integer :: i, j, k, l, n, m, o, p, q, s, z, nelements, nbasis2, nbasis3, nabasis
     integer, allocatable, dimension(:) :: element_types
     double precision :: rij, rik, angle, cos_1, cos_2, cos_3, invcut
     ! double precision :: angle_1, angle_2, angle_3
     double precision, allocatable, dimension(:) :: radial, angular, a, b, c
-    double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay, rep3
+    double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay
+    double precision, allocatable, dimension(:, :) :: rep_2body
 
     double precision :: mu, sigma, ksi3
 
@@ -683,36 +685,36 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
     ! number of element types
     nelements = size(elements)
     ! Allocate temporary
-    allocate(element_types(natoms))
+    allocate(element_types(natoms_tot))
 
     ! Store element index of every atom
-    ! !$OMP PARALLEL DO
-    do i = 1, natoms
+    !$OMP PARALLEL DO
+    do i = 1, natoms_tot
         do j = 1, nelements
-            if (nuclear_charges(i) .eq. elements(j)) then
+            if (nuclear_charges(modulo(i-1, natoms)+1) .eq. elements(j)) then
                 element_types(i) = j
                 continue
             endif
         enddo
     enddo
-    ! !$OMP END PARALLEL DO
+    !$OMP END PARALLEL DO
 
 
     ! Get distance matrix
     ! Allocate temporary
-    allocate(distance_matrix(natoms, natoms))
+    allocate(distance_matrix(natoms_tot, natoms_tot))
     distance_matrix = 0.0d0
 
 
-    !  !$OMP PARALLEL DO PRIVATE(rij)
-    do i = 1, natoms
-        do j = i+1, natoms
+    !$OMP PARALLEL DO PRIVATE(rij) COLLAPSE(1)
+    do i = 1, natoms_tot
+        do j = i+1, natoms_tot
             rij = norm2(coordinates(j,:) - coordinates(i,:))
             distance_matrix(i, j) = rij
             distance_matrix(j, i) = rij
         enddo
     enddo
-    ! !$OMP END PARALLEL DO
+    !$OMP END PARALLEL DO
 
     ! number of basis functions in the two body term
     nbasis2 = size(Rs2)
@@ -721,41 +723,45 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
     invcut = 1.0d0 / rcut
 
     ! pre-calculate the radial decay in the two body terms
-    rdecay = decay(distance_matrix, invcut, natoms)
+    rdecay = decay(distance_matrix, invcut, natoms_tot)
 
     ! Allocate temporary
     allocate(radial(nbasis2))
-
+    two_body_rep_size=nbasis2*nelements
+    allocate(rep_2body(natoms, two_body_rep_size))
+    rep=0.0d0
+    rep_2body=0.0d0
     radial = 0.0d0
-    ! !$OMP PARALLEL DO PRIVATE(n,m,rij,radial) REDUCTION(+:rep)
+    !$OMP PARALLEL DO PRIVATE(n,m,rij,radial,mu,sigma) COLLAPSE(1) REDUCTION(+:rep_2body) SCHEDULE(dynamic)
     do i = 1, natoms
-        ! index of the element of atom i
-        m = element_types(i)
-        do j = i + 1, natoms
+        do j = i + 1, natoms_tot
+            ! index of the element of atom i
+            m = element_types(i) ! moved incide j loop to enable COLLAPSE
             ! index of the element of atom j
             n = element_types(j)
             ! distance between atoms i and j
             rij = distance_matrix(i,j)
-            if (rij <= rcut) then
-
-                ! two body term of the representation
-                mu    = log(rij / sqrt(1.0d0 + eta2  / rij**2))
-                sigma = sqrt(log(1.0d0 + eta2  / rij**2))
-                radial(:) = 0.0d0
-
-                do k = 1, nbasis2
-                   radial(k) = 1.0d0/(sigma* sqrt(2.0d0*pi) * Rs2(k)) * rdecay(i,j) &
+            if (rij > rcut) cycle
+            ! two body term of the representation
+            mu    = log(rij / sqrt(1.0d0 + eta2  / rij**2))
+            sigma = sqrt(log(1.0d0 + eta2  / rij**2))
+            radial(:) = 0.0d0
+
+            do k = 1, nbasis2
+                radial(k) = 1.0d0/(sigma* sqrt(2.0d0*pi) * Rs2(k)) * rdecay(i,j) &
                               & * exp( - (log(Rs2(k)) - mu)**2 / (2.0d0 * sigma**2) ) / rij**two_body_decay
-                enddo
+            enddo
 
-                rep(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
-                rep(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep(j, (m-1)*nbasis2 + 1:m*nbasis2) + radial
-            endif
+            rep_2body(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep_2body(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
+            if (j<=natoms) rep_2body(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep_2body(j, (m-1)*nbasis2 + 1:m*nbasis2) + radial
         enddo
     enddo
-    ! !$OMP END PARALLEL DO
+    !$OMP END PARALLEL DO
+
+    rep(:, 1:two_body_rep_size)=rep_2body(:, :)
 
     deallocate(radial)
+    deallocate(rep_2body)
 
     ! number of radial basis functions in the three body term
     nbasis3 = size(Rs3)
@@ -765,23 +771,20 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
     ! Inverse of the three body cutoff
     invcut = 1.0d0 / acut
     ! pre-calculate the radial decay in the three body terms
-    rdecay = decay(distance_matrix, invcut, natoms)
+    rdecay = decay(distance_matrix, invcut, natoms_tot)
 
     ! Allocate temporary
-    allocate(rep3(natoms,rep_size))
     allocate(a(3))
     allocate(b(3))
     allocate(c(3))
     allocate(radial(nbasis3))
     allocate(angular(nabasis))
 
-    rep3 = 0.0d0
-
     ! This could probably be done more efficiently if it's a bottleneck
     ! Also the order is a bit wobbly compared to the tensorflow implementation
-    ! !$OMP PARALLEL DO PRIVATE(rij, n, rik, m, a, b, c, angle, radial, angular, &
-    ! !$OMP cos_1, cos_2, cos_3, mu, sigma, o, ksi3, &
-    ! !$OMP p, q, s, z) REDUCTION(+:rep3) COLLAPSE(2) SCHEDULE(dynamic)
+    !$OMP PARALLEL DO PRIVATE(rij, n, rik, m, a, b, c, angle, radial, angular, &
+    !$OMP cos_1, cos_2, cos_3, mu, sigma, o, ksi3, &
+    !$OMP p, q, s, z) COLLAPSE(1) SCHEDULE(dynamic)
     do i = 1, natoms
         do j = 1, natoms - 1
             if (i .eq. j) cycle
@@ -832,25 +835,22 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
                 ! The highest of the element indices for atoms j and k
                 q = max(n,m) - 1
                 ! calculate the indices that the three body terms should be added to
-                s = nelements * nbasis2 + nbasis3 * nabasis * (-(p * (p + 1))/2 + q + nelements * p) + 1
+                s = two_body_rep_size + nbasis3 * nabasis * (-(p * (p + 1))/2 + q + nelements * p) + 1
 
                 do l = 1, nbasis3
                     ! calculate the indices that the three body terms should be added to
                     z = s + (l-1) * nabasis
                     ! Add the contributions from atoms i,j and k
-                    rep3(i, z:z + nabasis - 1) = rep3(i, z:z + nabasis - 1) + angular * radial(l) * ksi3
+                    rep(i, z:z + nabasis - 1) = rep(i, z:z + nabasis - 1) + angular * radial(l) * ksi3
                 enddo
             enddo
         enddo
     enddo
-    ! !$OMP END PARALLEL DO
-
-    rep = rep + rep3
+    !$OMP END PARALLEL DO
 
     deallocate(element_types)
     deallocate(rdecay)
     deallocate(distance_matrix)
-    deallocate(rep3)
     deallocate(a)
     deallocate(b)
     deallocate(c)
@@ -860,8 +860,8 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
 end subroutine fgenerate_fchl_acsf
 
 subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, elements, &
-                    & Rs2, Rs3, Ts, eta2, eta3, zeta, rcut, acut, natoms, rep_size, &
-                    & two_body_decay, three_body_decay, three_body_weight, rep, grad)
+    & Rs2, Rs3, Ts, eta2, eta3, zeta, rcut, acut, natoms, natoms_tot, rep_size, &
+    & two_body_decay, three_body_decay, three_body_weight, rep, grad)
 
     use acsf_utils, only: decay, calc_angle, calc_cos_angle
 
@@ -888,11 +888,15 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
     double precision, allocatable, dimension(:) :: exp_ln
     double precision, allocatable, dimension(:) :: log_Rs2
 
-    integer, intent(in) :: natoms
+    integer, intent(in) :: natoms, natoms_tot ! natoms_tot includes "virtual" atoms created to satisfy periodic boundary conditions.
     integer, intent(in) :: rep_size
     double precision, intent(out), dimension(natoms, rep_size) :: rep
     double precision, intent(out), dimension(natoms, rep_size, natoms, 3) :: grad
 
+!   Introduced to make OpenMP parallelization easier.
+    double precision, dimension(:, :, :), allocatable:: add_rep
+    double precision, dimension(:, :, :, :), allocatable :: add_grad
+
     integer :: i, j, k, l, m, n,  p, q, s, t, z, nelements, nbasis2, nbasis3, nabasis, twobody_size
     integer, allocatable, dimension(:) :: element_types
     double precision :: rij, rik, angle, dot, rij2, rik2, invrij, invrik, invrij2, invrik2, invcut
@@ -920,45 +924,44 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
     if (natoms /= size(nuclear_charges, dim=1)) then
         write(*,*) "ERROR: Atom Centered Symmetry Functions creation"
         write(*,*) natoms, "coordinates, but", &
-            & size(nuclear_charges, dim=1), "atom_types!"
+        & size(nuclear_charges, dim=1), "atom_types!"
         stop
     endif
 
-
     ! Number of unique elements
     nelements = size(elements)
     ! Allocate temporary
-    allocate(element_types(natoms))
+    allocate(element_types(natoms_tot))
 
     ! Store element index of every atom
-    ! !$OMP PARALLEL DO
-    do i = 1, natoms
+    !$OMP PARALLEL DO SCHEDULE(dynamic)
+    do i = 1, natoms_tot
         do j = 1, nelements
-            if (nuclear_charges(i) .eq. elements(j)) then
+            if (nuclear_charges(modulo(i-1, natoms)+1) .eq. elements(j)) then
                 element_types(i) = j
-                continue
+                exit
             endif
         enddo
     enddo
-    ! !$OMP END PARALLEL DO
+    !$OMP END PARALLEL DO
 
 
 
     ! Get distance matrix
     ! Allocate temporary
-    allocate(distance_matrix(natoms, natoms))
-    allocate(sq_distance_matrix(natoms, natoms))
-    allocate(inv_distance_matrix(natoms, natoms))
-    allocate(inv_sq_distance_matrix(natoms, natoms))
+    allocate(distance_matrix(natoms_tot, natoms_tot))
+    allocate(sq_distance_matrix(natoms_tot, natoms_tot))
+    allocate(inv_distance_matrix(natoms_tot, natoms_tot))
+    allocate(inv_sq_distance_matrix(natoms_tot, natoms_tot))
     distance_matrix = 0.0d0
     sq_distance_matrix = 0.0d0
     inv_distance_matrix = 0.0d0
     inv_sq_distance_matrix = 0.0d0
 
 
-    ! !$OMP PARALLEL DO PRIVATE(rij,rij2,invrij,invrij2) SCHEDULE(dynamic)
-    do i = 1, natoms
-        do j = i+1, natoms
+    !$OMP PARALLEL DO PRIVATE(rij,rij2,invrij,invrij2) COLLAPSE(1)
+    do i = 1, natoms_tot
+        do j = i+1, natoms_tot
             rij = norm2(coordinates(j,:) - coordinates(i,:))
             distance_matrix(i, j) = rij
             distance_matrix(j, i) = rij
@@ -973,7 +976,7 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
             inv_sq_distance_matrix(j, i) = invrij2
         enddo
     enddo
-    ! !$OMP END PARALLEL DO
+    !$OMP END PARALLEL DO
 
 
     ! Number of two body basis functions
@@ -982,7 +985,7 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
     ! Inverse of the two body cutoff distance
     invcut = 1.0d0 / rcut
     ! Pre-calculate the two body decay
-    rdecay = decay(distance_matrix, invcut, natoms)
+    rdecay = decay(distance_matrix, invcut, natoms_tot)
 
     ! Allocate temporary
     allocate(radial_base(nbasis2))
@@ -995,21 +998,23 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
     grad =0.0d0
     rep = 0.0d0
 
+    allocate(add_rep(nbasis2, natoms, natoms), add_grad(nbasis2, 3, natoms, natoms))
+    add_rep=0.0d0
+    add_grad=0.0d0
     log_Rs2(:) = log(Rs2(:))
 
-    ! !$OMP PARALLEL DO PRIVATE(m,n,rij,invrij,radial_base,radial,radial_part,part) REDUCTION(+:rep,grad) &
-    ! !$OMP SCHEDULE(dynamic)
+    !$OMP PARALLEL DO PRIVATE(m, n, rij, invrij, mu, sigma, exp_s2, exp_ln,&
+    !$OMP scaling, radial_base, radial, dx, part, dscal, ddecay) COLLAPSE(1) SCHEDULE(dynamic)
     do i = 1, natoms
-        ! The element index of atom i
-        m = element_types(i)
-        do j = i + 1, natoms
+        do j = i + 1, natoms_tot
+            ! The element index of atom i
+            m = element_types(i) ! moved inside j loop to allow COLLAPSE
             ! The element index of atom j
             n = element_types(j)
             ! Distance between atoms i and j
             rij = distance_matrix(i,j)
             if (rij <= rcut) then
                 invrij = inv_distance_matrix(i,j)
-
                 mu    = log(rij / sqrt(1.0d0 + eta2  * inv_sq_distance_matrix(i, j)))
                 sigma = sqrt(log(1.0d0 + eta2  * inv_sq_distance_matrix(i, j)))
                 exp_s2 = exp(sigma**2)
@@ -1023,35 +1028,47 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
                 radial(:) = radial_base(:) * scaling * rdecay(i,j)
 
                 rep(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
-                rep(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep(j, (m-1)*nbasis2 + 1:m*nbasis2) + radial
-
-
+                if (j<=natoms) add_rep(:, i, j)=radial
                 do k = 1, 3
-
                     dx = -(coordinates(i,k) - coordinates(j,k))
-
                     part(:) = ((log_Rs2(:) - mu) * (-dx *(rij**2 * exp_s2 + eta2) / (rij * sqrt(exp_s2))**3) &
-                        &* sqrt(exp_s2) / (sigma**2 * rij) + (log_Rs2(:) - mu) ** 2 * eta2 * dx / &
-                        &(sigma**4 * rij**4 * exp_s2)) * exp_ln / (Rs2(:) * sigma  * sqrt(pi) * 2) &
-                        &- exp_ln  * eta2 * dx / (Rs2(:) * sigma**3 *sqrt(pi) * rij**4 * exp_s2 * 2.0d0)
+                                &* sqrt(exp_s2) / (sigma**2 * rij) + (log_Rs2(:) - mu) ** 2 * eta2 * dx / &
+                                &(sigma**4 * rij**4 * exp_s2)) * exp_ln / (Rs2(:) * sigma  * sqrt(pi) * 2) &
+                                &- exp_ln  * eta2 * dx / (Rs2(:) * sigma**3 *sqrt(pi) * rij**4 * exp_s2 * 2.0d0)
 
                     dscal = two_body_decay * dx / rij**(two_body_decay+2.0d0)
                     ddecay = dx * 0.5d0 * pi * sin(pi*rij * invcut) * invcut * invrij
 
                     part(:) = part(:) * scaling * rdecay(i,j) + radial_base(:) * dscal * rdecay(i,j) &
-                        & + radial_base(:) * scaling * ddecay
+                                    & + radial_base(:) * scaling * ddecay
 
                     ! The gradients wrt coordinates
                     grad(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) = grad(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) + part
-                    grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) = grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) - part
-                    grad(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) = grad(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) - part
-                    grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) = grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) + part
-
+                    if (j<=natoms) then
+                        grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) = grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) - part
+                        grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) = grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) + part
+                        add_grad(:, k, i, j)=part
+                    endif
                 enddo
             endif
         enddo
     enddo
-    ! !$OMP END PARALLEL DO
+    !$OMP END PARALLEL DO
+
+    !$OMP PARALLEL DO PRIVATE(m) SCHEDULE(dynamic)
+    do j = 2, natoms
+        do i = 1, j-1
+            m = element_types(i)
+            if (distance_matrix(i,j)>rcut) continue
+            rep(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep(j, (m-1)*nbasis2 + 1:m*nbasis2) + add_rep(:, i, j)
+            do k=1, 3
+                grad(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) = grad(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) - add_grad(:, k, i, j)
+            enddo
+        enddo
+    enddo
+    !$OMP END PARALLEL DO
+
+    deallocate(add_rep, add_grad)
 
     deallocate(radial_base)
     deallocate(radial)
@@ -1069,7 +1086,7 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
     ! Inverse of the three body cutoff distance
     invcut = 1.0d0 / acut
     ! Pre-calculate the three body decay
-    rdecay = decay(distance_matrix, invcut, natoms)
+    rdecay = decay(distance_matrix, invcut, natoms_tot)
 
     ! Allocate temporary
     allocate(atom_rep(rep_size - twobody_size))
@@ -1114,17 +1131,17 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
     allocate(d_atm_extra_j(3))
     allocate(d_atm_extra_k(3))
 
-    ! ! This could probably be done more efficiently if it's a bottleneck
-    ! ! The order is a bit wobbly compared to the tensorflow implementation
-    ! !$OMP PARALLEL DO PRIVATE(atom_rep,atom_grad,a,b,c,radial,angular_base, &
-    ! !$OMP angular,d_angular,rij,n,rij2,invrij,invrij2,d_angular_d_i, &
-    ! !$OMP d_angular_d_j,d_angular_d_k,rik,m,rik2,invrik,invrik2,angle, &
-    ! !$OMP p,q,dot,d_radial,d_radial_d_i,d_radial_d_j,d_radial_d_k,s,z, &
-    ! !$OMP d_ijdecay,d_ikdecay) SCHEDULE(dynamic)
+    !$OMP PARALLEL DO PRIVATE(atom_rep, atom_grad, rij, n, rij2, invrij, invrij2,&
+    !$OMP rik, m, rik2, invrik, invrjk, invrik2, a, b, c, angle, cos_i, cos_k,&
+    !$OMP cos_j, radial_base, radial, p, q, dot, angular, d_angular, d_angular_d_j,&
+    !$OMP d_angular_d_k, d_angular_d_i, d_radial, d_radial_d_j, d_radial_d_k,&
+    !$OMP d_radial_d_i, d_ijdecay, d_ikdecay, invr_atm, atm, atm_i, atm_j, atm_k, vi,&
+    !$OMP vj, vk, d_atm_ii, d_atm_ij, d_atm_ik, d_atm_ji, d_atm_jj, d_atm_jk, d_atm_ki,&
+    !$OMP d_atm_kj, d_atm_kk, d_atm_extra_i, d_atm_extra_j, d_atm_extra_k, s, z) SCHEDULE(dynamic)
     do i = 1, natoms
         atom_rep = 0.0d0
         atom_grad = 0.0d0
-        do j = 1, natoms - 1
+        do j = 1, natoms_tot - 1
             if (i .eq. j) cycle
             ! distance between atom i and j
             rij = distance_matrix(i,j)
@@ -1137,7 +1154,7 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
             invrij = inv_distance_matrix(i,j)
             ! inverse squared distance between atom i and j
             invrij2 = inv_sq_distance_matrix(i,j)
-            do k = j + 1, natoms
+            do k = j + 1, natoms_tot
                 if (i .eq. k) cycle
                 ! distance between atom i and k
                 rik = distance_matrix(i,k)
@@ -1250,8 +1267,8 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
                             & angular * radial(l) * (atm_i * d_atm_ii(t) + atm_j * d_atm_ij(t) &
                             & + atm_k * d_atm_ik(t) + d_atm_extra_i(t)) * three_body_weight * rdecay(i,j) * rdecay(i,k) + &
                             & angular * radial(l) * (d_ijdecay(t) * rdecay(i,k) + rdecay(i,j) * d_ikdecay(t)) * atm
-
                         ! Add up all gradient contributions wrt atom j
+                        if (j<=natoms) &
                         atom_grad(z:z + nabasis - 1, j, t) = atom_grad(z:z + nabasis - 1, j, t) + &
                             & d_angular * d_angular_d_j(t) * radial(l) * atm * rdecay(i,j) * rdecay(i,k) + &
                             & angular * d_radial(l) * d_radial_d_j(t) * atm * rdecay(i,j) * rdecay(i,k) + &
@@ -1260,6 +1277,7 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
                             & angular * radial(l) * d_ijdecay(t) * rdecay(i,k) * atm
 
                         ! Add up all gradient contributions wrt atom k
+                        if (k<=natoms) &
                         atom_grad(z:z + nabasis - 1, k, t) = atom_grad(z:z + nabasis - 1, k, t) + &
                             & d_angular * d_angular_d_k(t) * radial(l) * atm * rdecay(i,j) * rdecay(i,k) + &
                             & angular * d_radial(l) * d_radial_d_k(t) * atm * rdecay(i,j) * rdecay(i,k) + &
@@ -1273,33 +1291,33 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
         enddo
         rep(i, twobody_size + 1:) = rep(i, twobody_size + 1:) + atom_rep
         grad(i, twobody_size + 1:,:,:) = grad(i, twobody_size + 1:,:,:) + atom_grad
-    enddo
-    ! !$OMP END PARALLEL DO
-
-    deallocate(rdecay)
-    deallocate(element_types)
-    deallocate(distance_matrix)
-    deallocate(inv_distance_matrix)
-    deallocate(sq_distance_matrix)
-    deallocate(inv_sq_distance_matrix)
-    deallocate(atom_rep)
-    deallocate(atom_grad)
-    deallocate(a)
-    deallocate(b)
-    deallocate(c)
-    deallocate(radial)
-    deallocate(angular_base)
-    deallocate(angular)
-    deallocate(d_angular)
-    deallocate(d_angular_d_i)
-    deallocate(d_angular_d_j)
-    deallocate(d_angular_d_k)
-    deallocate(d_radial)
-    deallocate(d_radial_d_i)
-    deallocate(d_radial_d_j)
-    deallocate(d_radial_d_k)
-    deallocate(d_ijdecay)
-    deallocate(d_ikdecay)
+enddo
+!$OMP END PARALLEL DO
+
+deallocate(rdecay)
+deallocate(element_types)
+deallocate(distance_matrix)
+deallocate(inv_distance_matrix)
+deallocate(sq_distance_matrix)
+deallocate(inv_sq_distance_matrix)
+deallocate(atom_rep)
+deallocate(atom_grad)
+deallocate(a)
+deallocate(b)
+deallocate(c)
+deallocate(radial)
+deallocate(angular_base)
+deallocate(angular)
+deallocate(d_angular)
+deallocate(d_angular_d_i)
+deallocate(d_angular_d_j)
+deallocate(d_angular_d_k)
+deallocate(d_radial)
+deallocate(d_radial_d_i)
+deallocate(d_radial_d_j)
+deallocate(d_radial_d_k)
+deallocate(d_ijdecay)
+deallocate(d_ikdecay)
 
 
 end subroutine fgenerate_fchl_acsf_and_gradients
diff --git a/src/qmllib/representations/representations.py b/src/qmllib/representations/representations.py
index 78bf8b39..22be4bd4 100644
--- a/src/qmllib/representations/representations.py
+++ b/src/qmllib/representations/representations.py
@@ -752,6 +752,7 @@ def generate_fchl_acsf(
     three_body_weight: float = 13.4,
     pad: Union[int, bool] = False,
     gradients: bool = False,
+    cell: Union[ndarray, None] = None,
 ) -> Union[Tuple[ndarray, ndarray], ndarray]:
     """
 
@@ -787,6 +788,8 @@ def generate_fchl_acsf(
     :type acut: float
     :param gradients: To return gradients or not
     :type gradients: boolean
+    :param cell: parameters of the unit cell if periodic boundary conditions are used.
+    :type cell: numpy array
     :return: Atom-centered symmetry functions representation
     :rtype: numpy array
     """
@@ -803,6 +806,23 @@ def generate_fchl_acsf(
     # Normalization constant for three-body
     three_body_weight = np.sqrt(eta3 / np.pi) * three_body_weight
 
+    # If periodic boundary conditions are used add neighboring cells that can influence the center cell.
+    natoms_tot = natoms
+    if cell is not None:
+        nExtend = (np.floor(max(rcut, acut) / np.linalg.norm(cell, 2, axis=0)) + 1).astype(int)
+        true_coords = coordinates
+        for i in range(-nExtend[0], nExtend[0] + 1):
+            for j in range(-nExtend[1], nExtend[1] + 1):
+                for k in range(-nExtend[2], nExtend[2] + 1):
+                    if not (i == 0 and j == 0 and k == 0):
+                        true_coords = np.append(
+                            true_coords,
+                            coordinates + i * cell[0, :] + j * cell[1, :] + k * cell[2, :],
+                            axis=0,
+                        )
+                        natoms_tot += natoms
+        coordinates = true_coords
+
     if gradients is False:
 
         rep = fgenerate_fchl_acsf(
@@ -818,6 +838,7 @@ def generate_fchl_acsf(
             rcut,
             acut,
             natoms,
+            natoms_tot,
             descr_size,
             two_body_decay,
             three_body_decay,
@@ -853,6 +874,7 @@ def generate_fchl_acsf(
             rcut,
             acut,
             natoms,
+            natoms_tot,
             descr_size,
             two_body_decay,
             three_body_decay,
diff --git a/tests/verification_fchl_acsf_pbc.py b/tests/verification_fchl_acsf_pbc.py
new file mode 100644
index 00000000..596e7ebb
--- /dev/null
+++ b/tests/verification_fchl_acsf_pbc.py
@@ -0,0 +1,125 @@
+"""
+Runs the same calculation using the 'cell' keyword for generate_fchl_acsf and by straightforward
+'cell cloning' done inside the test script.
+"""
+import os
+import random
+from copy import deepcopy
+
+import numpy as np
+
+from qmllib.representations import generate_fchl_acsf
+from qmllib.utils.xyz_format import read_xyz
+
+np.set_printoptions(linewidth=666, edgeitems=10)
+
+REP_PARAMS = dict()
+REP_PARAMS["elements"] = [1, 6, 7, 8, 16]
+REP_PARAMS["rcut"] = 5.0
+random.seed(1)
+cell_added_cutoff = 0.1
+
+
+#   For given molecular coordinates generate a cell just large enough to contain the molecule.
+def suitable_cell(coords):
+    max_coords = None
+    min_coords = None
+    for atom_coords in coords:
+        if max_coords is None:
+            max_coords = deepcopy(atom_coords)
+            min_coords = deepcopy(atom_coords)
+        else:
+            max_coords = np.maximum(max_coords, atom_coords)
+            min_coords = np.minimum(min_coords, atom_coords)
+    return np.diag((max_coords - min_coords) * (1.0 + cell_added_cutoff))
+
+
+def generate_fchl_acsf_brute_pbc(nuclear_charges, coordinates, cell, gradients=False):
+    num_atoms = len(nuclear_charges)
+    all_coords = deepcopy(coordinates)
+    all_charges = deepcopy(nuclear_charges)
+    nExtend = (np.floor(REP_PARAMS["rcut"] / np.linalg.norm(cell, 2, axis=0)) + 1).astype(int)
+    print("Checked nExtend:", nExtend, ", gradient calculation:", gradients)
+    for i in range(-nExtend[0], nExtend[0] + 1):
+        for j in range(-nExtend[1], nExtend[1] + 1):
+            for k in range(-nExtend[2], nExtend[2] + 1):
+                if not (i == 0 and j == 0 and k == 0):
+                    all_coords = np.append(
+                        all_coords,
+                        coordinates + i * cell[0, :] + j * cell[1, :] + k * cell[2, :],
+                        axis=0,
+                    )
+                    all_charges = np.append(all_charges, nuclear_charges)
+    if gradients:
+        rep, drep = generate_fchl_acsf(all_charges, all_coords, gradients=True, **REP_PARAMS)
+    else:
+        rep = generate_fchl_acsf(all_charges, all_coords, gradients=False, **REP_PARAMS)
+    rep = rep[:num_atoms, :]
+    if gradients:
+        drep = drep[:num_atoms, :, :num_atoms, :]
+        return rep, drep
+    else:
+        return rep
+
+
+def ragged_array_close(arr1, arr2, error_msg):
+    for el1, el2 in zip(arr1, arr2):
+        assert np.allclose(el1, el2), error_msg
+
+
+def test_fchl_acsf_pbc():
+
+    qm7_dir = os.path.dirname(os.path.realpath(__file__)) + "/assets/qm7"
+    os.chdir(qm7_dir)
+    all_xyzs = os.listdir()
+    test_xyzs = random.sample(all_xyzs, 3)
+
+    reps_no_grad1 = []
+    reps_no_grad2 = []
+
+    reps_wgrad1 = []
+    reps_wgrad2 = []
+    dreps1 = []
+    dreps2 = []
+
+    for xyz in test_xyzs:
+        print("Tested xyz:", xyz)
+        coords, atoms = read_xyz(xyz)
+        cell = suitable_cell(coords)
+        reps_no_grad1.append(generate_fchl_acsf_brute_pbc(atoms, coords, cell, gradients=False))
+        reps_no_grad2.append(
+            generate_fchl_acsf(atoms, coords, cell=cell, gradients=False, **REP_PARAMS)
+        )
+
+        rep_wgrad1, drep1 = generate_fchl_acsf_brute_pbc(atoms, coords, cell, gradients=True)
+        rep_wgrad2, drep2 = generate_fchl_acsf(
+            atoms, coords, cell=cell, gradients=True, **REP_PARAMS
+        )
+
+        reps_wgrad1.append(rep_wgrad1)
+        reps_wgrad2.append(rep_wgrad2)
+
+        dreps1.append(drep1)
+        dreps2.append(drep2)
+
+    ragged_array_close(
+        reps_no_grad1,
+        reps_no_grad2,
+        "Error in PBC implementation for generate_fchl_acsf without gradients.",
+    )
+    ragged_array_close(
+        reps_wgrad1,
+        reps_wgrad2,
+        "Error in PBC implementation for generate_fchl_acsf with gradients (representation).",
+    )
+    ragged_array_close(
+        dreps1,
+        dreps2,
+        "Error in PBC implementation for generate_fchl_acsf with gradients (gradient of representation).",
+    )
+    print("Passed")
+
+
+if __name__ == "__main__":
+
+    test_fchl_acsf_pbc()

From e8c12d292870fd12a60185ebb3d3037e46815bb5 Mon Sep 17 00:00:00 2001
From: kvkarandashev <kvkarandashev@gmail.com>
Date: Mon, 1 Apr 2024 16:49:17 +0200
Subject: [PATCH 2/7] Revision of FCHL's PBC and OpenMP, and the verification
 script.

---
 src/qmllib/representations/facsf.f90          | 125 +++++++++---------
 src/qmllib/representations/representations.py |  15 ++-
 tests/verification_fchl_acsf_pbc.py           |   7 +-
 3 files changed, 78 insertions(+), 69 deletions(-)

diff --git a/src/qmllib/representations/facsf.f90 b/src/qmllib/representations/facsf.f90
index 30ca6985..dc51b3d0 100644
--- a/src/qmllib/representations/facsf.f90
+++ b/src/qmllib/representations/facsf.f90
@@ -652,7 +652,7 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
     double precision, intent(in) :: zeta
     double precision, intent(in) :: rcut
     double precision, intent(in) :: acut
-    integer, intent(in) :: natoms, natoms_tot
+    integer, intent(in) :: natoms, natoms_tot ! natoms_tot includes "virtual" atoms created to satisfy periodic boundary conditions.
     integer, intent(in) :: rep_size
     double precision, intent(in) :: two_body_decay
     double precision, intent(in) :: three_body_decay
@@ -661,13 +661,14 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
     double precision, intent(out), dimension(natoms, rep_size) :: rep
 
     integer:: two_body_rep_size
-    integer :: i, j, k, l, n, m, o, p, q, s, z, nelements, nbasis2, nbasis3, nabasis
+    integer :: i, j, k, l, n, m, o, p, q, s, z, nelements, nbasis2, nbasis3, nabasis, j_id, j_id_max
     integer, allocatable, dimension(:) :: element_types
     double precision :: rij, rik, angle, cos_1, cos_2, cos_3, invcut
     ! double precision :: angle_1, angle_2, angle_3
     double precision, allocatable, dimension(:) :: radial, angular, a, b, c
     double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay
-    double precision, allocatable, dimension(:, :) :: rep_2body
+    double precision, allocatable, dimension(:, :) :: saved_radial
+    integer, allocatable, dimension(:):: saved_j
 
     double precision :: mu, sigma, ksi3
 
@@ -688,12 +689,12 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
     allocate(element_types(natoms_tot))
 
     ! Store element index of every atom
-    !$OMP PARALLEL DO
+    !$OMP PARALLEL DO SCHEDULE(dynamic)
     do i = 1, natoms_tot
         do j = 1, nelements
             if (nuclear_charges(modulo(i-1, natoms)+1) .eq. elements(j)) then
                 element_types(i) = j
-                continue
+                exit
             endif
         enddo
     enddo
@@ -706,7 +707,7 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
     distance_matrix = 0.0d0
 
 
-    !$OMP PARALLEL DO PRIVATE(rij) COLLAPSE(1)
+    !$OMP PARALLEL DO PRIVATE(rij) SCHEDULE(dynamic)
     do i = 1, natoms_tot
         do j = i+1, natoms_tot
             rij = norm2(coordinates(j,:) - coordinates(i,:))
@@ -727,21 +728,21 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
 
     ! Allocate temporary
     allocate(radial(nbasis2))
-    two_body_rep_size=nbasis2*nelements
-    allocate(rep_2body(natoms, two_body_rep_size))
+    allocate(saved_radial(nbasis2, natoms_tot), saved_j(natoms_tot))
     rep=0.0d0
-    rep_2body=0.0d0
-    radial = 0.0d0
-    !$OMP PARALLEL DO PRIVATE(n,m,rij,radial,mu,sigma) COLLAPSE(1) REDUCTION(+:rep_2body) SCHEDULE(dynamic)
+    !$OMP PARALLEL DO PRIVATE(n,m,rij,radial,mu,sigma,saved_radial, saved_j, j_id_max) SCHEDULE(dynamic)
     do i = 1, natoms
+        ! index of the element of atom i
+        m = element_types(i)
+        j_id_max=0
         do j = i + 1, natoms_tot
-            ! index of the element of atom i
-            m = element_types(i) ! moved incide j loop to enable COLLAPSE
-            ! index of the element of atom j
-            n = element_types(j)
             ! distance between atoms i and j
             rij = distance_matrix(i,j)
             if (rij > rcut) cycle
+            j_id_max=j_id_max+1
+            saved_j(j_id_max)=j
+            ! index of the element of atom j
+            n = element_types(j)
             ! two body term of the representation
             mu    = log(rij / sqrt(1.0d0 + eta2  / rij**2))
             sigma = sqrt(log(1.0d0 + eta2  / rij**2))
@@ -751,17 +752,21 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
                 radial(k) = 1.0d0/(sigma* sqrt(2.0d0*pi) * Rs2(k)) * rdecay(i,j) &
                               & * exp( - (log(Rs2(k)) - mu)**2 / (2.0d0 * sigma**2) ) / rij**two_body_decay
             enddo
-
-            rep_2body(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep_2body(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
-            if (j<=natoms) rep_2body(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep_2body(j, (m-1)*nbasis2 + 1:m*nbasis2) + radial
+            saved_radial(:, j_id_max)=radial(:)
+        enddo
+        !$OMP CRITICAL
+        do j_id = 1, j_id_max
+            j=saved_j(j_id)
+            n=element_types(j)
+            rep(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep(i, (n-1)*nbasis2 + 1:n*nbasis2) + saved_radial(:, j_id)
+            if (j<=natoms) rep(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep(j, (m-1)*nbasis2 + 1:m*nbasis2) + saved_radial(:, j_id)
         enddo
+        !$OMP END CRITICAL
     enddo
     !$OMP END PARALLEL DO
 
-    rep(:, 1:two_body_rep_size)=rep_2body(:, :)
-
     deallocate(radial)
-    deallocate(rep_2body)
+    deallocate(saved_radial, saved_j)
 
     ! number of radial basis functions in the three body term
     nbasis3 = size(Rs3)
@@ -779,21 +784,22 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
     allocate(c(3))
     allocate(radial(nbasis3))
     allocate(angular(nabasis))
+    two_body_rep_size=nbasis2*nelements
 
     ! This could probably be done more efficiently if it's a bottleneck
     ! Also the order is a bit wobbly compared to the tensorflow implementation
     !$OMP PARALLEL DO PRIVATE(rij, n, rik, m, a, b, c, angle, radial, angular, &
     !$OMP cos_1, cos_2, cos_3, mu, sigma, o, ksi3, &
-    !$OMP p, q, s, z) COLLAPSE(1) SCHEDULE(dynamic)
+    !$OMP p, q, s, z, l) SCHEDULE(dynamic)
     do i = 1, natoms
-        do j = 1, natoms - 1
+        do j = 1, natoms_tot - 1
             if (i .eq. j) cycle
             ! distance between atoms i and j
             rij = distance_matrix(i,j)
             if (rij > acut)  cycle
             ! index of the element of atom j
             n = element_types(j)
-            do k = j + 1, natoms
+            do k = j + 1, natoms_tot
                 if (i .eq. k) cycle
                 if (j .eq. k) cycle
                 ! distance between atoms i and k
@@ -959,7 +965,7 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
     inv_sq_distance_matrix = 0.0d0
 
 
-    !$OMP PARALLEL DO PRIVATE(rij,rij2,invrij,invrij2) COLLAPSE(1)
+    !$OMP PARALLEL DO PRIVATE(rij,rij2,invrij,invrij2) SCHEDULE(dynamic)
     do i = 1, natoms_tot
         do j = i+1, natoms_tot
             rij = norm2(coordinates(j,:) - coordinates(i,:))
@@ -1004,53 +1010,52 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
     log_Rs2(:) = log(Rs2(:))
 
     !$OMP PARALLEL DO PRIVATE(m, n, rij, invrij, mu, sigma, exp_s2, exp_ln,&
-    !$OMP scaling, radial_base, radial, dx, part, dscal, ddecay) COLLAPSE(1) SCHEDULE(dynamic)
+    !$OMP scaling, radial_base, radial, dx, part, dscal, ddecay) SCHEDULE(dynamic)
     do i = 1, natoms
+        ! The element index of atom i
+        m = element_types(i) ! moved inside loop to enable COLLAPSE
         do j = i + 1, natoms_tot
-            ! The element index of atom i
-            m = element_types(i) ! moved inside j loop to allow COLLAPSE
+            rij = distance_matrix(i,j)
+            if (rij > rcut) continue
             ! The element index of atom j
             n = element_types(j)
             ! Distance between atoms i and j
-            rij = distance_matrix(i,j)
-            if (rij <= rcut) then
-                invrij = inv_distance_matrix(i,j)
-                mu    = log(rij / sqrt(1.0d0 + eta2  * inv_sq_distance_matrix(i, j)))
-                sigma = sqrt(log(1.0d0 + eta2  * inv_sq_distance_matrix(i, j)))
-                exp_s2 = exp(sigma**2)
-                exp_ln = exp(-(log_Rs2(:) - mu)**2 / sigma**2 * 0.5d0) * sqrt(2.0d0)
+            invrij = inv_distance_matrix(i,j)
+            mu    = log(rij / sqrt(1.0d0 + eta2  * inv_sq_distance_matrix(i, j)))
+            sigma = sqrt(log(1.0d0 + eta2  * inv_sq_distance_matrix(i, j)))
+            exp_s2 = exp(sigma**2)
+            exp_ln = exp(-(log_Rs2(:) - mu)**2 / sigma**2 * 0.5d0) * sqrt(2.0d0)
 
-                scaling = 1.0d0 / rij**two_body_decay
+            scaling = 1.0d0 / rij**two_body_decay
 
 
-                radial_base(:) = 1.0d0/(sigma* sqrt(2.0d0*pi) * Rs2(:)) * exp(-(log_Rs2(:) - mu)**2 / (2.0d0 * sigma**2))
+            radial_base(:) = 1.0d0/(sigma* sqrt(2.0d0*pi) * Rs2(:)) * exp(-(log_Rs2(:) - mu)**2 / (2.0d0 * sigma**2))
 
-                radial(:) = radial_base(:) * scaling * rdecay(i,j)
+            radial(:) = radial_base(:) * scaling * rdecay(i,j)
 
-                rep(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
-                if (j<=natoms) add_rep(:, i, j)=radial
-                do k = 1, 3
-                    dx = -(coordinates(i,k) - coordinates(j,k))
-                    part(:) = ((log_Rs2(:) - mu) * (-dx *(rij**2 * exp_s2 + eta2) / (rij * sqrt(exp_s2))**3) &
-                                &* sqrt(exp_s2) / (sigma**2 * rij) + (log_Rs2(:) - mu) ** 2 * eta2 * dx / &
-                                &(sigma**4 * rij**4 * exp_s2)) * exp_ln / (Rs2(:) * sigma  * sqrt(pi) * 2) &
-                                &- exp_ln  * eta2 * dx / (Rs2(:) * sigma**3 *sqrt(pi) * rij**4 * exp_s2 * 2.0d0)
+            rep(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
+            if (j<=natoms) add_rep(:, i, j)=radial
+            do k = 1, 3
+                dx = -(coordinates(i,k) - coordinates(j,k))
+                part(:) = ((log_Rs2(:) - mu) * (-dx *(rij**2 * exp_s2 + eta2) / (rij * sqrt(exp_s2))**3) &
+                            &* sqrt(exp_s2) / (sigma**2 * rij) + (log_Rs2(:) - mu) ** 2 * eta2 * dx / &
+                            &(sigma**4 * rij**4 * exp_s2)) * exp_ln / (Rs2(:) * sigma  * sqrt(pi) * 2) &
+                            &- exp_ln  * eta2 * dx / (Rs2(:) * sigma**3 *sqrt(pi) * rij**4 * exp_s2 * 2.0d0)
 
-                    dscal = two_body_decay * dx / rij**(two_body_decay+2.0d0)
-                    ddecay = dx * 0.5d0 * pi * sin(pi*rij * invcut) * invcut * invrij
+                dscal = two_body_decay * dx / rij**(two_body_decay+2.0d0)
+                ddecay = dx * 0.5d0 * pi * sin(pi*rij * invcut) * invcut * invrij
 
-                    part(:) = part(:) * scaling * rdecay(i,j) + radial_base(:) * dscal * rdecay(i,j) &
-                                    & + radial_base(:) * scaling * ddecay
+                part(:) = part(:) * scaling * rdecay(i,j) + radial_base(:) * dscal * rdecay(i,j) &
+                                & + radial_base(:) * scaling * ddecay
 
-                    ! The gradients wrt coordinates
-                    grad(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) = grad(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) + part
-                    if (j<=natoms) then
-                        grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) = grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) - part
-                        grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) = grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) + part
-                        add_grad(:, k, i, j)=part
-                    endif
-                enddo
-            endif
+                ! The gradients wrt coordinates
+                grad(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) = grad(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) + part
+                if (j<=natoms) then
+                    grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) = grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) - part
+                    grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) = grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) + part
+                    add_grad(:, k, i, j)=part
+                endif
+            enddo
         enddo
     enddo
     !$OMP END PARALLEL DO
@@ -1058,8 +1063,8 @@ subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, eleme
     !$OMP PARALLEL DO PRIVATE(m) SCHEDULE(dynamic)
     do j = 2, natoms
         do i = 1, j-1
+            if (distance_matrix(i,j)>rcut) cycle
             m = element_types(i)
-            if (distance_matrix(i,j)>rcut) continue
             rep(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep(j, (m-1)*nbasis2 + 1:m*nbasis2) + add_rep(:, i, j)
             do k=1, 3
                 grad(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) = grad(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) - add_grad(:, k, i, j)
diff --git a/src/qmllib/representations/representations.py b/src/qmllib/representations/representations.py
index 22be4bd4..c727f2aa 100644
--- a/src/qmllib/representations/representations.py
+++ b/src/qmllib/representations/representations.py
@@ -814,13 +814,14 @@ def generate_fchl_acsf(
         for i in range(-nExtend[0], nExtend[0] + 1):
             for j in range(-nExtend[1], nExtend[1] + 1):
                 for k in range(-nExtend[2], nExtend[2] + 1):
-                    if not (i == 0 and j == 0 and k == 0):
-                        true_coords = np.append(
-                            true_coords,
-                            coordinates + i * cell[0, :] + j * cell[1, :] + k * cell[2, :],
-                            axis=0,
-                        )
-                        natoms_tot += natoms
+                    if i == 0 and j == 0 and k == 0:
+                        continue
+                    true_coords = np.append(
+                        true_coords,
+                        coordinates + i * cell[0, :] + j * cell[1, :] + k * cell[2, :],
+                        axis=0,
+                    )
+                    natoms_tot += natoms
         coordinates = true_coords
 
     if gradients is False:
diff --git a/tests/verification_fchl_acsf_pbc.py b/tests/verification_fchl_acsf_pbc.py
index 596e7ebb..e9d75058 100644
--- a/tests/verification_fchl_acsf_pbc.py
+++ b/tests/verification_fchl_acsf_pbc.py
@@ -16,6 +16,7 @@
 REP_PARAMS = dict()
 REP_PARAMS["elements"] = [1, 6, 7, 8, 16]
 REP_PARAMS["rcut"] = 5.0
+REP_PARAMS["acut"] = 5.0
 random.seed(1)
 cell_added_cutoff = 0.1
 
@@ -38,7 +39,9 @@ def generate_fchl_acsf_brute_pbc(nuclear_charges, coordinates, cell, gradients=F
     num_atoms = len(nuclear_charges)
     all_coords = deepcopy(coordinates)
     all_charges = deepcopy(nuclear_charges)
-    nExtend = (np.floor(REP_PARAMS["rcut"] / np.linalg.norm(cell, 2, axis=0)) + 1).astype(int)
+    nExtend = (
+        np.floor(max(REP_PARAMS["rcut"], REP_PARAMS["acut"]) / np.linalg.norm(cell, 2, axis=0)) + 1
+    ).astype(int)
     print("Checked nExtend:", nExtend, ", gradient calculation:", gradients)
     for i in range(-nExtend[0], nExtend[0] + 1):
         for j in range(-nExtend[1], nExtend[1] + 1):
@@ -72,7 +75,7 @@ def test_fchl_acsf_pbc():
     qm7_dir = os.path.dirname(os.path.realpath(__file__)) + "/assets/qm7"
     os.chdir(qm7_dir)
     all_xyzs = os.listdir()
-    test_xyzs = random.sample(all_xyzs, 3)
+    test_xyzs = random.sample(all_xyzs, 10)
 
     reps_no_grad1 = []
     reps_no_grad2 = []

From 6a2d6390d263c92b2d45ad313178811fde0ba5fa Mon Sep 17 00:00:00 2001
From: kvkarandashev <kvkarandashev@gmail.com>
Date: Thu, 11 Apr 2024 15:22:07 +0200
Subject: [PATCH 3/7] FCHL gradients for PBC fixed.

Verification script fixed and made more thorough.
---
 Makefile                             |   49 +-
 src/qmllib/representations/facsf.f90 | 2475 +++++++++++++-------------
 tests/verification_fchl_acsf_pbc.py  |  194 +-
 3 files changed, 1387 insertions(+), 1331 deletions(-)

diff --git a/Makefile b/Makefile
index a27cfeaa..dbbe9eac 100644
--- a/Makefile
+++ b/Makefile
@@ -5,20 +5,24 @@ pip=./env/bin/pip
 pytest=pytest
 j=1
 
+version_file=src/qmllib/version.py
+
 .PHONY: build
 
-all: env setup
+all: env
+
+## Setup
 
 env:
 	${mamba} env create -f ./environment_dev.yaml -p ./env --quiet
 	${python} -m pre_commit install
 	${python} -m pip install -e .
 
-setup: ./.git/hooks/pre-commit
-
 ./.git/hooks/pre-commit:
 	${python} -m pre_commit install
 
+## Development
+
 format:
 	${python} -m pre_commit run --all-files
 
@@ -37,9 +41,42 @@ compile:
 
 build:
 	${python} -m build --sdist --skip-dependency-check  .
-	@# ${python} -m pip wheel --no-deps -v .
-	@# ${python} -m pip wheel -v .
-	@#ls *.whl
+
+upload:
+	${python} -m twine upload ./dist/*.tar.gz
+
+## Version
+
+VERSION=$(shell cat ${version_file} | egrep -o "([0-9]{1,}\.)+[0-9]{1,}")
+VERSION_PATCH=$(shell echo ${VERSION} | cut -d'.' -f3)
+VERSION_MINOR=$(shell echo ${VERSION} | cut -d'.' -f2)
+VERSION_MAJOR=$(shell echo ${VERSION} | cut -d'.' -f1)
+GIT_COMMIT=$(shell git rev-parse --short HEAD)
+
+bump-version-dev:
+	test ! -z "${VERSION}"
+	test ! -z "${GIT_COMMIT}"
+	exit 1
+	# Not Implemented
+
+bump-version-patch:
+	test ! -z "${VERSION_PATCH}"
+	echo "__version__ = \"${VERSION_MAJOR}.${VERSION_MINOR}.$(shell awk 'BEGIN{print ${VERSION_PATCH}+1}')\"" > ${version_file}
+
+bump-version-minor:
+	test ! -z "${VERSION_MINOR}"
+	echo "__version__ = \"${VERSION_MAJOR}.$(shell awk 'BEGIN{print ${VERSION_MINOR}+1}').0\"" > ${version_file}
+
+bump-version-major:
+	test ! -z "${VERSION_MAJOR}"
+	echo "__version__ = \"$(shell awk 'BEGIN{print ${VERSION_MAJOR}+1}').0.0\"" > ${version_file}
+
+commit-version-tag:
+	git tag --list | grep -qix "${VERSION}"
+	git commit -m "Release ${VERSION}" --no-verify ${version_file}
+	git tag 'v${VERSION}'
+
+## Clean
 
 clean:
 	find ./src/ -type f \
diff --git a/src/qmllib/representations/facsf.f90 b/src/qmllib/representations/facsf.f90
index dc51b3d0..7d60efd0 100644
--- a/src/qmllib/representations/facsf.f90
+++ b/src/qmllib/representations/facsf.f90
@@ -1,292 +1,278 @@
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 module acsf_utils
 
-    implicit none
+   implicit none
 
 contains
 
-function decay(r, invrc, natoms) result(f)
+   function decay(r, invrc, natoms) result(f)
 
-    implicit none
+      implicit none
 
-    double precision, intent(in), dimension(:,:) :: r
-    double precision, intent(in) :: invrc
-    integer, intent(in) :: natoms
-    double precision, dimension(natoms, natoms) :: f
+      double precision, intent(in), dimension(:, :) :: r
+      double precision, intent(in) :: invrc
+      integer, intent(in) :: natoms
+      double precision, dimension(natoms, natoms) :: f
 
-    double precision, parameter :: pi = 4.0d0 * atan(1.0d0)
+      double precision, parameter :: pi = 4.0d0*atan(1.0d0)
 
-    ! Decaying function reaching 0 at rc
-    f = 0.5d0 * (cos(pi * r * invrc) + 1.0d0)
+      ! Decaying function reaching 0 at rc
+      f = 0.5d0*(cos(pi*r*invrc) + 1.0d0)
 
+   end function decay
 
-end function decay
+   function calc_angle(a, b, c) result(angle)
 
-function calc_angle(a, b, c) result(angle)
+      implicit none
 
-    implicit none
+      double precision, intent(in), dimension(3) :: a
+      double precision, intent(in), dimension(3) :: b
+      double precision, intent(in), dimension(3) :: c
 
-    double precision, intent(in), dimension(3) :: a
-    double precision, intent(in), dimension(3) :: b
-    double precision, intent(in), dimension(3) :: c
+      double precision, dimension(3) :: v1
+      double precision, dimension(3) :: v2
 
-    double precision, dimension(3) :: v1
-    double precision, dimension(3) :: v2
+      double precision :: cos_angle
+      double precision :: angle
 
-    double precision :: cos_angle
-    double precision :: angle
+      v1 = a - b
+      v2 = c - b
 
-    v1 = a - b
-    v2 = c - b
+      v1 = v1/norm2(v1)
+      v2 = v2/norm2(v2)
 
-    v1 = v1 / norm2(v1)
-    v2 = v2 / norm2(v2)
+      cos_angle = dot_product(v1, v2)
 
-    cos_angle = dot_product(v1,v2)
+      ! Clipping
+      if (cos_angle > 1.0d0) cos_angle = 1.0d0
+      if (cos_angle < -1.0d0) cos_angle = -1.0d0
 
-    ! Clipping
-    if (cos_angle > 1.0d0) cos_angle = 1.0d0
-    if (cos_angle < -1.0d0) cos_angle = -1.0d0
+      angle = acos(cos_angle)
 
-    angle = acos(cos_angle)
+   end function calc_angle
 
-end function calc_angle
+   function calc_cos_angle(a, b, c) result(cos_angle)
 
-function calc_cos_angle(a, b, c) result(cos_angle)
+      implicit none
 
-    implicit none
+      double precision, intent(in), dimension(3) :: a
+      double precision, intent(in), dimension(3) :: b
+      double precision, intent(in), dimension(3) :: c
 
-    double precision, intent(in), dimension(3) :: a
-    double precision, intent(in), dimension(3) :: b
-    double precision, intent(in), dimension(3) :: c
+      double precision, dimension(3) :: v1
+      double precision, dimension(3) :: v2
 
-    double precision, dimension(3) :: v1
-    double precision, dimension(3) :: v2
+      double precision :: cos_angle
 
-    double precision :: cos_angle
+      v1 = a - b
+      v2 = c - b
 
-    v1 = a - b
-    v2 = c - b
+      v1 = v1/norm2(v1)
+      v2 = v2/norm2(v2)
 
-    v1 = v1 / norm2(v1)
-    v2 = v2 / norm2(v2)
+      cos_angle = dot_product(v1, v2)
 
-    cos_angle = dot_product(v1,v2)
+   end function calc_cos_angle
 
-end function calc_cos_angle
+   function true_atom_id(atom_id, natoms) result(tatom_id)
+      integer, intent(in):: atom_id, natoms
+      integer:: tatom_id
 
-end module acsf_utils
+      if (atom_id <= natoms) then
+         tatom_id = atom_id
+      else
+         tatom_id = modulo(atom_id - 1, natoms) + 1
+      end if
 
+   end function
+
+end module acsf_utils
 
 subroutine fgenerate_acsf(coordinates, nuclear_charges, elements, &
                           & Rs2, Rs3, Ts, eta2, eta3, zeta, rcut, acut, natoms, rep_size, rep)
 
-    use acsf_utils, only: decay, calc_angle
-
-    implicit none
-
-    double precision, intent(in), dimension(:, :) :: coordinates
-    integer, intent(in), dimension(:) :: nuclear_charges
-    integer, intent(in), dimension(:) :: elements
-    double precision, intent(in), dimension(:) :: Rs2
-    double precision, intent(in), dimension(:) :: Rs3
-    double precision, intent(in), dimension(:) :: Ts
-    double precision, intent(in) :: eta2
-    double precision, intent(in) :: eta3
-    double precision, intent(in) :: zeta
-    double precision, intent(in) :: rcut
-    double precision, intent(in) :: acut
-    integer, intent(in) :: natoms
-    integer, intent(in) :: rep_size
-    double precision, intent(out), dimension(natoms, rep_size) :: rep
-
-    integer :: i, j, k, l, n, m, p, q, s, z, nelements, nbasis2, nbasis3, nabasis
-    integer, allocatable, dimension(:) :: element_types
-    double precision :: rij, rik, angle, invcut
-    double precision, allocatable, dimension(:) :: radial, angular, a, b, c
-    double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay,  rep_subset
-
-    double precision, parameter :: pi = 4.0d0 * atan(1.0d0)
-
-    if (natoms /= size(nuclear_charges, dim=1)) then
-        write(*,*) "ERROR: Atom Centered Symmetry Functions creation"
-        write(*,*) natoms, "coordinates, but", &
-            & size(nuclear_charges, dim=1), "atom_types!"
-        stop
-    endif
-
-
-    ! number of element types
-    nelements = size(elements)
-    ! Allocate temporary
-    allocate(element_types(natoms))
-
-    ! Store element index of every atom
-    !$OMP PARALLEL DO
-    do i = 1, natoms
-        do j = 1, nelements
-            if (nuclear_charges(i) .eq. elements(j)) then
-                element_types(i) = j
-                cycle
-            endif
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-
-    ! Get distance matrix
-    ! Allocate temporary
-    allocate(distance_matrix(natoms, natoms))
-    distance_matrix = 0.0d0
-
-
-    !$OMP PARALLEL DO PRIVATE(rij)
-    do i = 1, natoms
-        do j = i+1, natoms
-            rij = norm2(coordinates(j,:) - coordinates(i,:))
-            distance_matrix(i, j) = rij
-            distance_matrix(j, i) = rij
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-    ! number of basis functions in the two body term
-    nbasis2 = size(Rs2)
-
-    ! Inverse of the two body cutoff
-    invcut = 1.0d0 / rcut
-    ! pre-calculate the radial decay in the two body terms
-    rdecay = decay(distance_matrix, invcut, natoms)
-
-    ! Allocate temporary
-    allocate(radial(nbasis2))
-    allocate(rep_subset(natoms, nelements * nbasis2))
-
-    rep_subset = 0.0d0
-
-    !$OMP PARALLEL DO PRIVATE(n,m,rij,radial) COLLAPSE(2) REDUCTION(+:rep_subset) SCHEDULE(dynamic)
-    do i = 1, natoms
-        do j = 1, natoms
-            if (j .le. i) cycle
-            rij = distance_matrix(i,j)
-            if (rij <= rcut) then
-                ! index of the element of atom i
-                m = element_types(i)
-                ! index of the element of atom j
-                n = element_types(j)
-                ! distance between atoms i and j
-                ! two body term of the representation
-                radial = exp(-eta2*(rij - Rs2)**2) * rdecay(i,j)
-                rep_subset(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep_subset(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
-                rep_subset(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep_subset(j, (m-1)*nbasis2 + 1:m*nbasis2) + radial
-            endif
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-    rep(:, 1:nelements * nbasis2) = rep_subset(:,:)
-
-    deallocate(radial)
-    deallocate(rep_subset)
-
-    ! number of radial basis functions in the three body term
-    nbasis3 = size(Rs3)
-    ! number of radial basis functions in the three body term
-    nabasis = size(Ts)
-
-    ! Inverse of the three body cutoff
-    invcut = 1.0d0 / acut
-    ! pre-calculate the radial decay in the three body terms
-    rdecay = decay(distance_matrix, invcut, natoms)
-
-    ! Allocate temporary
-    allocate(rep_subset(natoms,rep_size - nelements * nbasis2))
-    allocate(a(3))
-    allocate(b(3))
-    allocate(c(3))
-    allocate(radial(nbasis3))
-    allocate(angular(nabasis))
-
-    rep_subset = 0.0d0
-
-    ! This could probably be done more efficiently if it's a bottleneck
-    ! Also the order is a bit wobbly compared to the tensorflow implementation
-    !$OMP PARALLEL DO PRIVATE(rij, n, rik, m, a, b, c, angle, radial, angular, &
-    !$OMP p, q, s, z) REDUCTION(+:rep_subset) COLLAPSE(2) SCHEDULE(dynamic)
-    do i = 1, natoms
-        do j = 1, natoms - 1
-            if (i .eq. j) cycle
-            ! distance between atoms i and j
-            rij = distance_matrix(i,j)
-            if (rij > acut) cycle
+   use acsf_utils, only: decay, calc_angle
+
+   implicit none
+
+   double precision, intent(in), dimension(:, :) :: coordinates
+   integer, intent(in), dimension(:) :: nuclear_charges
+   integer, intent(in), dimension(:) :: elements
+   double precision, intent(in), dimension(:) :: Rs2
+   double precision, intent(in), dimension(:) :: Rs3
+   double precision, intent(in), dimension(:) :: Ts
+   double precision, intent(in) :: eta2
+   double precision, intent(in) :: eta3
+   double precision, intent(in) :: zeta
+   double precision, intent(in) :: rcut
+   double precision, intent(in) :: acut
+   integer, intent(in) :: natoms
+   integer, intent(in) :: rep_size
+   double precision, intent(out), dimension(natoms, rep_size) :: rep
+
+   integer :: i, j, k, l, n, m, p, q, s, z, nelements, nbasis2, nbasis3, nabasis
+   integer, allocatable, dimension(:) :: element_types
+   double precision :: rij, rik, angle, invcut
+   double precision, allocatable, dimension(:) :: radial, angular, a, b, c
+   double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay, rep_subset
+
+   double precision, parameter :: pi = 4.0d0*atan(1.0d0)
+
+   if (natoms /= size(nuclear_charges, dim=1)) then
+      write (*, *) "ERROR: Atom Centered Symmetry Functions creation"
+      write (*, *) natoms, "coordinates, but", &
+          & size(nuclear_charges, dim=1), "atom_types!"
+      stop
+   end if
+
+   ! number of element types
+   nelements = size(elements)
+   ! Allocate temporary
+   allocate (element_types(natoms))
+
+   ! Store element index of every atom
+   !$OMP PARALLEL DO
+   do i = 1, natoms
+      do j = 1, nelements
+         if (nuclear_charges(i) .eq. elements(j)) then
+            element_types(i) = j
+            cycle
+         end if
+      end do
+   end do
+   !$OMP END PARALLEL DO
+
+   ! Get distance matrix
+   ! Allocate temporary
+   allocate (distance_matrix(natoms, natoms))
+   distance_matrix = 0.0d0
+
+   !$OMP PARALLEL DO PRIVATE(rij)
+   do i = 1, natoms
+      do j = i + 1, natoms
+         rij = norm2(coordinates(j, :) - coordinates(i, :))
+         distance_matrix(i, j) = rij
+         distance_matrix(j, i) = rij
+      end do
+   end do
+   !$OMP END PARALLEL DO
+
+   ! number of basis functions in the two body term
+   nbasis2 = size(Rs2)
+
+   ! Inverse of the two body cutoff
+   invcut = 1.0d0/rcut
+   ! pre-calculate the radial decay in the two body terms
+   rdecay = decay(distance_matrix, invcut, natoms)
+
+   ! Allocate temporary
+   allocate (radial(nbasis2))
+   allocate (rep_subset(natoms, nelements*nbasis2))
+
+   rep_subset = 0.0d0
+
+   !$OMP PARALLEL DO PRIVATE(n,m,rij,radial) COLLAPSE(2) REDUCTION(+:rep_subset) SCHEDULE(dynamic)
+   do i = 1, natoms
+      do j = 1, natoms
+         if (j .le. i) cycle
+         rij = distance_matrix(i, j)
+         if (rij <= rcut) then
+            ! index of the element of atom i
+            m = element_types(i)
             ! index of the element of atom j
             n = element_types(j)
-            do k = j + 1, natoms
-                if (i .eq. k) cycle
-                ! distance between atoms i and k
-                rik = distance_matrix(i,k)
-                if (rik > acut) cycle
-                ! index of the element of atom k
-                m = element_types(k)
-                ! coordinates of atoms j, i, k
-                a = coordinates(j,:)
-                b = coordinates(i,:)
-                c = coordinates(k,:)
-                ! angle between atoms i, j and k centered on i
-                angle = calc_angle(a,b,c)
-                ! The radial part of the three body terms including decay
-                radial = exp(-eta3*(0.5d0 * (rij+rik) - Rs3)**2) * rdecay(i,j) * rdecay(i,k)
-                ! The angular part of the three body terms
-                angular = 2.0d0 * ((1.0d0 + cos(angle - Ts)) * 0.5d0) ** zeta
-                ! The lowest of the element indices for atoms j and k
-                p = min(n,m) - 1
-                ! The highest of the element indices for atoms j and k
-                q = max(n,m) - 1
-                ! calculate the indices that the three body terms should be added to
-                s = nbasis3 * nabasis * (-(p * (p + 1))/2 + q + nelements * p) + 1
-                do l = 1, nbasis3
-                    ! calculate the indices that the three body terms should be added to
-                    z = s + (l-1) * nabasis
-                    ! Add the contributions from atoms i,j and k
-                    rep_subset(i, z:z + nabasis - 1) = rep_subset(i, z:z + nabasis - 1) + angular * radial(l)
-                enddo
-            enddo
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-    rep(:, nelements * nbasis2 + 1:) = rep_subset(:,:)
-
-    deallocate(element_types)
-    deallocate(rdecay)
-    deallocate(distance_matrix)
-    deallocate(rep_subset)
-    deallocate(a)
-    deallocate(b)
-    deallocate(c)
-    deallocate(radial)
-    deallocate(angular)
+            ! distance between atoms i and j
+            ! two body term of the representation
+            radial = exp(-eta2*(rij - Rs2)**2)*rdecay(i, j)
+            rep_subset(i, (n - 1)*nbasis2 + 1:n*nbasis2) = rep_subset(i, (n - 1)*nbasis2 + 1:n*nbasis2) + radial
+            rep_subset(j, (m - 1)*nbasis2 + 1:m*nbasis2) = rep_subset(j, (m - 1)*nbasis2 + 1:m*nbasis2) + radial
+         end if
+      end do
+   end do
+   !$OMP END PARALLEL DO
+
+   rep(:, 1:nelements*nbasis2) = rep_subset(:, :)
+
+   deallocate (radial)
+   deallocate (rep_subset)
+
+   ! number of radial basis functions in the three body term
+   nbasis3 = size(Rs3)
+   ! number of radial basis functions in the three body term
+   nabasis = size(Ts)
+
+   ! Inverse of the three body cutoff
+   invcut = 1.0d0/acut
+   ! pre-calculate the radial decay in the three body terms
+   rdecay = decay(distance_matrix, invcut, natoms)
+
+   ! Allocate temporary
+   allocate (rep_subset(natoms, rep_size - nelements*nbasis2))
+   allocate (a(3))
+   allocate (b(3))
+   allocate (c(3))
+   allocate (radial(nbasis3))
+   allocate (angular(nabasis))
+
+   rep_subset = 0.0d0
+
+   ! This could probably be done more efficiently if it's a bottleneck
+   ! Also the order is a bit wobbly compared to the tensorflow implementation
+   !$OMP PARALLEL DO PRIVATE(rij, n, rik, m, a, b, c, angle, radial, angular, &
+   !$OMP p, q, s, z) REDUCTION(+:rep_subset) COLLAPSE(2) SCHEDULE(dynamic)
+   do i = 1, natoms
+      do j = 1, natoms - 1
+         if (i .eq. j) cycle
+         ! distance between atoms i and j
+         rij = distance_matrix(i, j)
+         if (rij > acut) cycle
+         ! index of the element of atom j
+         n = element_types(j)
+         do k = j + 1, natoms
+            if (i .eq. k) cycle
+            ! distance between atoms i and k
+            rik = distance_matrix(i, k)
+            if (rik > acut) cycle
+            ! index of the element of atom k
+            m = element_types(k)
+            ! coordinates of atoms j, i, k
+            a = coordinates(j, :)
+            b = coordinates(i, :)
+            c = coordinates(k, :)
+            ! angle between atoms i, j and k centered on i
+            angle = calc_angle(a, b, c)
+            ! The radial part of the three body terms including decay
+            radial = exp(-eta3*(0.5d0*(rij + rik) - Rs3)**2)*rdecay(i, j)*rdecay(i, k)
+            ! The angular part of the three body terms
+            angular = 2.0d0*((1.0d0 + cos(angle - Ts))*0.5d0)**zeta
+            ! The lowest of the element indices for atoms j and k
+            p = min(n, m) - 1
+            ! The highest of the element indices for atoms j and k
+            q = max(n, m) - 1
+            ! calculate the indices that the three body terms should be added to
+            s = nbasis3*nabasis*(-(p*(p + 1))/2 + q + nelements*p) + 1
+            do l = 1, nbasis3
+               ! calculate the indices that the three body terms should be added to
+               z = s + (l - 1)*nabasis
+               ! Add the contributions from atoms i,j and k
+               rep_subset(i, z:z + nabasis - 1) = rep_subset(i, z:z + nabasis - 1) + angular*radial(l)
+            end do
+         end do
+      end do
+   end do
+   !$OMP END PARALLEL DO
+
+   rep(:, nelements*nbasis2 + 1:) = rep_subset(:, :)
+
+   deallocate (element_types)
+   deallocate (rdecay)
+   deallocate (distance_matrix)
+   deallocate (rep_subset)
+   deallocate (a)
+   deallocate (b)
+   deallocate (c)
+   deallocate (radial)
+   deallocate (angular)
 
 end subroutine fgenerate_acsf
 
@@ -294,1035 +280,1014 @@ subroutine fgenerate_acsf_and_gradients(coordinates, nuclear_charges, elements,
                           & Rs2, Rs3, Ts, eta2, eta3, zeta, rcut, acut, natoms, &
                           & rep_size, rep, grad)
 
-    use acsf_utils, only: decay, calc_angle
-
-    implicit none
-
-    double precision, intent(in), dimension(:, :) :: coordinates
-    integer, intent(in), dimension(:) :: nuclear_charges
-    integer, intent(in), dimension(:) :: elements
-    double precision, intent(in), dimension(:) :: Rs2
-    double precision, intent(in), dimension(:) :: Rs3
-    double precision, intent(in), dimension(:) :: Ts
-    double precision, intent(in) :: eta2
-    double precision, intent(in) :: eta3
-    double precision, intent(in) :: zeta
-    double precision, intent(in) :: rcut
-    double precision, intent(in) :: acut
-    integer, intent(in) :: natoms
-    integer, intent(in) :: rep_size
-    double precision, intent(out), dimension(natoms, rep_size) :: rep
-    double precision, intent(out), dimension(natoms, rep_size, natoms, 3) :: grad
-
-    integer :: i, j, k, l, n, m, p, q, s, t, z, nelements, nbasis2, nbasis3, nabasis, twobody_size
-    integer, allocatable, dimension(:) :: element_types
-    double precision :: rij, rik, angle, dot, rij2, rik2, invrij, invrik, invrij2, invrik2, invcut
-    double precision, allocatable, dimension(:) :: radial_base, radial, angular, a, b, c, atom_rep
-    double precision, allocatable, dimension(:) :: angular_base, d_radial, d_radial_d_i
-    double precision, allocatable, dimension(:) :: d_radial_d_j, d_radial_d_k, d_ijdecay, d_ikdecay
-    double precision, allocatable, dimension(:) :: d_angular, part, radial_part
-    double precision, allocatable, dimension(:) :: d_angular_d_i, d_angular_d_j, d_angular_d_k
-    double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay, sq_distance_matrix
-    double precision, allocatable, dimension(:, :) :: inv_distance_matrix, inv_sq_distance_matrix
-    double precision, allocatable, dimension(:, :) :: rep_subset
-    double precision, allocatable, dimension(:, :, :) :: atom_grad
-    double precision, allocatable, dimension(:, :, :, :) :: grad_subset
-
-    double precision, parameter :: pi = 4.0d0 * atan(1.0d0)
-
-    if (natoms /= size(nuclear_charges, dim=1)) then
-        write(*,*) "ERROR: Atom Centered Symmetry Functions creation"
-        write(*,*) natoms, "coordinates, but", &
-            & size(nuclear_charges, dim=1), "atom_types!"
-        stop
-    endif
-
-
-    ! Number of unique elements
-    nelements = size(elements)
-    ! Allocate temporary
-    allocate(element_types(natoms))
-
-    ! Store element index of every atom
-    !$OMP PARALLEL DO
-    do i = 1, natoms
-        do j = 1, nelements
-            if (nuclear_charges(i) .eq. elements(j)) then
-                element_types(i) = j
-                cycle
-            endif
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-
-
-    ! Get distance matrix
-    ! Allocate temporary
-    allocate(distance_matrix(natoms, natoms))
-    allocate(sq_distance_matrix(natoms, natoms))
-    allocate(inv_distance_matrix(natoms, natoms))
-    allocate(inv_sq_distance_matrix(natoms, natoms))
-    distance_matrix = 0.0d0
-    sq_distance_matrix = 0.0d0
-    inv_distance_matrix = 0.0d0
-    inv_sq_distance_matrix = 0.0d0
-
-
-    !$OMP PARALLEL DO PRIVATE(rij,rij2,invrij,invrij2) SCHEDULE(dynamic)
-    do i = 1, natoms
-        do j = 1, natoms
-            if (j .le. i) cycle
-            rij = norm2(coordinates(j,:) - coordinates(i,:))
-            distance_matrix(i, j) = rij
-            distance_matrix(j, i) = rij
-            rij2 = rij * rij
-            sq_distance_matrix(i, j) = rij2
-            sq_distance_matrix(j, i) = rij2
-            invrij = 1.0d0 / rij
-            inv_distance_matrix(i, j) = invrij
-            inv_distance_matrix(j, i) = invrij
-            invrij2 = invrij * invrij
-            inv_sq_distance_matrix(i, j) = invrij2
-            inv_sq_distance_matrix(j, i) = invrij2
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-
-    ! Number of two body basis functions
-    nbasis2 = size(Rs2)
-
-    ! Inverse of the two body cutoff distance
-    invcut = 1.0d0 / rcut
-    ! Pre-calculate the two body decay
-    rdecay = decay(distance_matrix, invcut, natoms)
-
-    ! Allocate temporary
-    allocate(radial_base(nbasis2))
-    allocate(radial(nbasis2))
-    allocate(radial_part(nbasis2))
-    allocate(part(nbasis2))
-    allocate(rep_subset(natoms, nelements * nbasis2))
-    allocate(grad_subset(natoms, nelements * nbasis2, natoms, 3))
-
-    grad_subset = 0.0d0
-    rep_subset = 0.0d0
-
-    !$OMP PARALLEL DO PRIVATE(m,n,rij,invrij,radial_base,radial,radial_part,part) REDUCTION(+:rep_subset,grad_subset) &
-    !$OMP SCHEDULE(dynamic)
-    do i = 1, natoms
-        ! The element index of atom i
-        m = element_types(i)
-        do j = i + 1, natoms
-            ! The element index of atom j
-            n = element_types(j)
-            ! Distance between atoms i and j
-            rij = distance_matrix(i,j)
-            if (rij <= rcut) then
-                invrij = inv_distance_matrix(i,j)
-                ! part of the two body terms
-                radial_base = exp(-eta2*(rij - Rs2)**2)
-                ! The full two body term between atom i and j
-                radial = radial_base * rdecay(i,j)
-                ! Add the contributions from atoms i and j
-                rep_subset(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep_subset(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
-                rep_subset(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep_subset(j, (m-1)*nbasis2 + 1:m*nbasis2) + radial
-                ! Part of the gradients that can be reused for x,y and z coordinates
-                radial_part = - radial_base * invrij * (2.0d0 * eta2 * (rij - Rs2) * rdecay(i,j) + &
-                    & 0.5d0 * pi * sin(pi*rij * invcut) * invcut)
-                do k = 1, 3
-                    ! The gradients wrt coordinates
-                    part = radial_part * (coordinates(i,k) - coordinates(j,k))
-                    grad_subset(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) = &
-                        grad_subset(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) + part
-                    grad_subset(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) = &
-                        grad_subset(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) - part
-                    grad_subset(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) = &
-                        grad_subset(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) - part
-                    grad_subset(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) = &
-                        grad_subset(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) + part
-                enddo
-            endif
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-    rep(:,:nelements*nbasis2) = rep_subset(:,:)
-    grad(:,:nelements*nbasis2,:,:) = grad_subset(:,:,:,:)
-
-    deallocate(radial_base)
-    deallocate(radial)
-    deallocate(radial_part)
-    deallocate(part)
-    deallocate(rep_subset)
-    deallocate(grad_subset)
-
-
-    ! Number of radial basis functions in the three body term
-    nbasis3 = size(Rs3)
-    ! Number of angular basis functions in the three body term
-    nabasis = size(Ts)
-    ! Size of two body terms
-    twobody_size = nelements * nbasis2
-
-    ! Inverse of the three body cutoff distance
-    invcut = 1.0d0 / acut
-    ! Pre-calculate the three body decay
-    rdecay = decay(distance_matrix, invcut, natoms)
-
-    ! Allocate temporary
-    allocate(atom_rep(rep_size - twobody_size))
-    allocate(atom_grad(rep_size - twobody_size, natoms, 3))
-    allocate(a(3))
-    allocate(b(3))
-    allocate(c(3))
-    allocate(radial(nbasis3))
-    allocate(angular_base(nabasis))
-    allocate(angular(nabasis))
-    allocate(d_angular(nabasis))
-    allocate(d_angular_d_i(3))
-    allocate(d_angular_d_j(3))
-    allocate(d_angular_d_k(3))
-    allocate(d_radial(nbasis3))
-    allocate(d_radial_d_i(3))
-    allocate(d_radial_d_j(3))
-    allocate(d_radial_d_k(3))
-    allocate(d_ijdecay(3))
-    allocate(d_ikdecay(3))
-
-    ! This could probably be done more efficiently if it's a bottleneck
-    ! The order is a bit wobbly compared to the tensorflow implementation
-    !$OMP PARALLEL DO PRIVATE(atom_rep,atom_grad,a,b,c,radial,angular_base, &
-    !$OMP angular,d_angular,rij,n,rij2,invrij,invrij2,d_angular_d_i, &
-    !$OMP d_angular_d_j,d_angular_d_k,rik,m,rik2,invrik,invrik2,angle, &
-    !$OMP p,q,dot,d_radial,d_radial_d_i,d_radial_d_j,d_radial_d_k,s,z, &
-    !$OMP d_ijdecay,d_ikdecay) SCHEDULE(dynamic)
-    do i = 1, natoms
-        atom_rep = 0.0d0
-        atom_grad = 0.0d0
-        do j = 1, natoms - 1
-            if (i .eq. j) cycle
-            ! distance between atom i and j
-            rij = distance_matrix(i,j)
-            if (rij > acut) cycle
-            ! index of the element of atom j
-            n = element_types(j)
-            ! squared distance between atom i and j
-            rij2 = sq_distance_matrix(i,j)
-            ! inverse distance between atom i and j
-            invrij = inv_distance_matrix(i,j)
-            ! inverse squared distance between atom i and j
-            invrij2 = inv_sq_distance_matrix(i,j)
-            do k = j + 1, natoms
-                if (i .eq. k) cycle
-                ! distance between atom i and k
-                rik = distance_matrix(i,k)
-                if (rik > acut) cycle
-                ! index of the element of atom k
-                m = element_types(k)
-                ! squared distance between atom i and k
-                rik2 = sq_distance_matrix(i,k)
-                ! inverse distance between atom i and k
-                invrik = inv_distance_matrix(i,k)
-                ! inverse squared distance between atom i and k
-                invrik2 = inv_sq_distance_matrix(i,k)
-                ! coordinates of atoms j, i, k
-                a = coordinates(j,:)
-                b = coordinates(i,:)
-                c = coordinates(k,:)
-                ! angle between atom i, j and k, centered on i
-                angle = calc_angle(a,b,c)
-                ! part of the radial part of the 3body terms
-                radial = exp(-eta3*(0.5d0 * (rij+rik) - Rs3)**2)
-                ! used in the angular part of the 3body terms and in gradients
-                angular_base = ((1.0d0 + cos(angle - Ts)) * 0.5d0)
-                ! angular part of the 3body terms
-                angular = 2.0d0 * angular_base ** zeta
-                ! the lowest index of the elements of j,k
-                p = min(n,m) - 1
-                ! the highest index of the elements of j,k
-                q = max(n,m) - 1
-                ! Dot product between the vectors connecting atom i,j and i,k
-                dot = dot_product(a-b,c-b)
-                ! Part of the derivative of the angular basis functions wrt coordinates (dim(nabasis))
-                ! including decay
-                d_angular = (zeta * angular * sin(angle-Ts) * rdecay(i,j) * rdecay(i,k)) / &
-                    ! & (2.0d0 * sqrt(rij2 * rik2 - dot**2) * angular_base)
-                    & (2.0d0 * max(1d-10, sqrt(abs(rij2 * rik2 - dot**2)) * angular_base))
-                ! write(*,*) angular_base
-                ! Part of the derivative of the angular basis functions wrt atom j (dim(3))
-                d_angular_d_j = c - b + dot * ((b - a) * invrij2)
-                ! Part of the derivative of the angular basis functions wrt atom k (dim(3))
-                d_angular_d_k = a - b + dot * ((b - c) * invrik2)
-                ! Part of the derivative of the angular basis functions wrt atom i (dim(3))
-                d_angular_d_i = - (d_angular_d_j + d_angular_d_k)
-                ! Part of the derivative of the radial basis functions wrt coordinates (dim(nbasis3))
-                ! including decay
-                d_radial = radial * eta3 * (0.5d0 * (rij+rik) - Rs3) * rdecay(i,j) * rdecay(i,k)
-                ! Part of the derivative of the radial basis functions wrt atom j (dim(3))
-                d_radial_d_j = (b - a) * invrij
-                ! Part of the derivative of the radial basis functions wrt atom k (dim(3))
-                d_radial_d_k = (b - c) * invrik
-                ! Part of the derivative of the radial basis functions wrt atom i (dim(3))
-                d_radial_d_i = - (d_radial_d_j + d_radial_d_k)
-                ! Part of the derivative of the i,j decay functions wrt coordinates (dim(3))
-                d_ijdecay = - pi * (b - a) * sin(pi * rij * invcut) * 0.5d0 * invrij * invcut
-                ! Part of the derivative of the i,k decay functions wrt coordinates (dim(3))
-                d_ikdecay = - pi * (b - c) * sin(pi * rik * invcut) * 0.5d0 * invrik * invcut
-
-                ! Get index of where the contributions of atoms i,j,k should be added
-                s = nbasis3 * nabasis * (-(p * (p + 1))/2 + q + nelements * p) + 1
-                do l = 1, nbasis3
-                    ! Get index of where the contributions of atoms i,j,k should be added
-                    z = s + (l-1) * nabasis
-                    ! Add the contributions for atoms i,j,k
-                    atom_rep(z:z + nabasis - 1) = atom_rep(z:z + nabasis - 1) + angular * radial(l) * rdecay(i,j) * rdecay(i,k)
-                    do t = 1, 3
-                        ! Add up all gradient contributions wrt atom i
-                        atom_grad(z:z + nabasis - 1, i, t) = atom_grad(z:z + nabasis - 1, i, t) + &
-                            & d_angular * d_angular_d_i(t) * radial(l) + &
-                            & angular * d_radial(l) * d_radial_d_i(t) + &
-                            & angular * radial(l) * (d_ijdecay(t) * rdecay(i,k) + rdecay(i,j) * d_ikdecay(t))
-                        ! Add up all gradient contributions wrt atom j
-                        atom_grad(z:z + nabasis - 1, j, t) = atom_grad(z:z + nabasis - 1, j, t) + &
-                            & d_angular * d_angular_d_j(t) * radial(l) + &
-                            & angular * d_radial(l) * d_radial_d_j(t) - &
-                            & angular * radial(l) * d_ijdecay(t) * rdecay(i,k)
-                        ! Add up all gradient contributions wrt atom k
-                        atom_grad(z:z + nabasis - 1, k, t) = atom_grad(z:z + nabasis - 1, k, t) + &
-                            & d_angular * d_angular_d_k(t) * radial(l) + &
-                            & angular * d_radial(l) * d_radial_d_k(t) - &
-                            & angular * radial(l) * rdecay(i,j) * d_ikdecay(t)
-                    enddo
-                enddo
-            enddo
-        enddo
-        rep(i, twobody_size + 1:) = atom_rep
-        grad(i, twobody_size + 1:,:,:) = atom_grad
-    enddo
-    !$OMP END PARALLEL DO
-
-
-    deallocate(rdecay)
-    deallocate(element_types)
-    deallocate(distance_matrix)
-    deallocate(inv_distance_matrix)
-    deallocate(sq_distance_matrix)
-    deallocate(inv_sq_distance_matrix)
-    deallocate(atom_rep)
-    deallocate(atom_grad)
-    deallocate(a)
-    deallocate(b)
-    deallocate(c)
-    deallocate(radial)
-    deallocate(angular_base)
-    deallocate(angular)
-    deallocate(d_angular)
-    deallocate(d_angular_d_i)
-    deallocate(d_angular_d_j)
-    deallocate(d_angular_d_k)
-    deallocate(d_radial)
-    deallocate(d_radial_d_i)
-    deallocate(d_radial_d_j)
-    deallocate(d_radial_d_k)
-    deallocate(d_ijdecay)
-    deallocate(d_ikdecay)
-
+   use acsf_utils, only: decay, calc_angle
+
+   implicit none
+
+   double precision, intent(in), dimension(:, :) :: coordinates
+   integer, intent(in), dimension(:) :: nuclear_charges
+   integer, intent(in), dimension(:) :: elements
+   double precision, intent(in), dimension(:) :: Rs2
+   double precision, intent(in), dimension(:) :: Rs3
+   double precision, intent(in), dimension(:) :: Ts
+   double precision, intent(in) :: eta2
+   double precision, intent(in) :: eta3
+   double precision, intent(in) :: zeta
+   double precision, intent(in) :: rcut
+   double precision, intent(in) :: acut
+   integer, intent(in) :: natoms
+   integer, intent(in) :: rep_size
+   double precision, intent(out), dimension(natoms, rep_size) :: rep
+   double precision, intent(out), dimension(natoms, rep_size, natoms, 3) :: grad
+
+   integer :: i, j, k, l, n, m, p, q, s, t, z, nelements, nbasis2, nbasis3, nabasis, twobody_size
+   integer, allocatable, dimension(:) :: element_types
+   double precision :: rij, rik, angle, dot, rij2, rik2, invrij, invrik, invrij2, invrik2, invcut
+   double precision, allocatable, dimension(:) :: radial_base, radial, angular, a, b, c, atom_rep
+   double precision, allocatable, dimension(:) :: angular_base, d_radial, d_radial_d_i
+   double precision, allocatable, dimension(:) :: d_radial_d_j, d_radial_d_k, d_ijdecay, d_ikdecay
+   double precision, allocatable, dimension(:) :: d_angular, part, radial_part
+   double precision, allocatable, dimension(:) :: d_angular_d_i, d_angular_d_j, d_angular_d_k
+   double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay, sq_distance_matrix
+   double precision, allocatable, dimension(:, :) :: inv_distance_matrix, inv_sq_distance_matrix
+   double precision, allocatable, dimension(:, :) :: rep_subset
+   double precision, allocatable, dimension(:, :, :) :: atom_grad
+   double precision, allocatable, dimension(:, :, :, :) :: grad_subset
+
+   double precision, parameter :: pi = 4.0d0*atan(1.0d0)
+
+   if (natoms /= size(nuclear_charges, dim=1)) then
+      write (*, *) "ERROR: Atom Centered Symmetry Functions creation"
+      write (*, *) natoms, "coordinates, but", &
+          & size(nuclear_charges, dim=1), "atom_types!"
+      stop
+   end if
+
+   ! Number of unique elements
+   nelements = size(elements)
+   ! Allocate temporary
+   allocate (element_types(natoms))
+
+   ! Store element index of every atom
+   !$OMP PARALLEL DO
+   do i = 1, natoms
+      do j = 1, nelements
+         if (nuclear_charges(i) .eq. elements(j)) then
+            element_types(i) = j
+            cycle
+         end if
+      end do
+   end do
+   !$OMP END PARALLEL DO
+
+   ! Get distance matrix
+   ! Allocate temporary
+   allocate (distance_matrix(natoms, natoms))
+   allocate (sq_distance_matrix(natoms, natoms))
+   allocate (inv_distance_matrix(natoms, natoms))
+   allocate (inv_sq_distance_matrix(natoms, natoms))
+   distance_matrix = 0.0d0
+   sq_distance_matrix = 0.0d0
+   inv_distance_matrix = 0.0d0
+   inv_sq_distance_matrix = 0.0d0
+
+   !$OMP PARALLEL DO PRIVATE(rij,rij2,invrij,invrij2) SCHEDULE(dynamic)
+   do i = 1, natoms
+      do j = 1, natoms
+         if (j .le. i) cycle
+         rij = norm2(coordinates(j, :) - coordinates(i, :))
+         distance_matrix(i, j) = rij
+         distance_matrix(j, i) = rij
+         rij2 = rij*rij
+         sq_distance_matrix(i, j) = rij2
+         sq_distance_matrix(j, i) = rij2
+         invrij = 1.0d0/rij
+         inv_distance_matrix(i, j) = invrij
+         inv_distance_matrix(j, i) = invrij
+         invrij2 = invrij*invrij
+         inv_sq_distance_matrix(i, j) = invrij2
+         inv_sq_distance_matrix(j, i) = invrij2
+      end do
+   end do
+   !$OMP END PARALLEL DO
+
+   ! Number of two body basis functions
+   nbasis2 = size(Rs2)
+
+   ! Inverse of the two body cutoff distance
+   invcut = 1.0d0/rcut
+   ! Pre-calculate the two body decay
+   rdecay = decay(distance_matrix, invcut, natoms)
+
+   ! Allocate temporary
+   allocate (radial_base(nbasis2))
+   allocate (radial(nbasis2))
+   allocate (radial_part(nbasis2))
+   allocate (part(nbasis2))
+   allocate (rep_subset(natoms, nelements*nbasis2))
+   allocate (grad_subset(natoms, nelements*nbasis2, natoms, 3))
+
+   grad_subset = 0.0d0
+   rep_subset = 0.0d0
+
+   !$OMP PARALLEL DO PRIVATE(m,n,rij,invrij,radial_base,radial,radial_part,part) REDUCTION(+:rep_subset,grad_subset) &
+   !$OMP SCHEDULE(dynamic)
+   do i = 1, natoms
+      ! The element index of atom i
+      m = element_types(i)
+      do j = i + 1, natoms
+         ! The element index of atom j
+         n = element_types(j)
+         ! Distance between atoms i and j
+         rij = distance_matrix(i, j)
+         if (rij <= rcut) then
+            invrij = inv_distance_matrix(i, j)
+            ! part of the two body terms
+            radial_base = exp(-eta2*(rij - Rs2)**2)
+            ! The full two body term between atom i and j
+            radial = radial_base*rdecay(i, j)
+            ! Add the contributions from atoms i and j
+            rep_subset(i, (n - 1)*nbasis2 + 1:n*nbasis2) = rep_subset(i, (n - 1)*nbasis2 + 1:n*nbasis2) + radial
+            rep_subset(j, (m - 1)*nbasis2 + 1:m*nbasis2) = rep_subset(j, (m - 1)*nbasis2 + 1:m*nbasis2) + radial
+            ! Part of the gradients that can be reused for x,y and z coordinates
+            radial_part = -radial_base*invrij*(2.0d0*eta2*(rij - Rs2)*rdecay(i, j) + &
+                & 0.5d0*pi*sin(pi*rij*invcut)*invcut)
+            do k = 1, 3
+               ! The gradients wrt coordinates
+               part = radial_part*(coordinates(i, k) - coordinates(j, k))
+               grad_subset(i, (n - 1)*nbasis2 + 1:n*nbasis2, i, k) = &
+                  grad_subset(i, (n - 1)*nbasis2 + 1:n*nbasis2, i, k) + part
+               grad_subset(i, (n - 1)*nbasis2 + 1:n*nbasis2, j, k) = &
+                  grad_subset(i, (n - 1)*nbasis2 + 1:n*nbasis2, j, k) - part
+               grad_subset(j, (m - 1)*nbasis2 + 1:m*nbasis2, j, k) = &
+                  grad_subset(j, (m - 1)*nbasis2 + 1:m*nbasis2, j, k) - part
+               grad_subset(j, (m - 1)*nbasis2 + 1:m*nbasis2, i, k) = &
+                  grad_subset(j, (m - 1)*nbasis2 + 1:m*nbasis2, i, k) + part
+            end do
+         end if
+      end do
+   end do
+   !$OMP END PARALLEL DO
+
+   rep(:, :nelements*nbasis2) = rep_subset(:, :)
+   grad(:, :nelements*nbasis2, :, :) = grad_subset(:, :, :, :)
+
+   deallocate (radial_base)
+   deallocate (radial)
+   deallocate (radial_part)
+   deallocate (part)
+   deallocate (rep_subset)
+   deallocate (grad_subset)
+
+   ! Number of radial basis functions in the three body term
+   nbasis3 = size(Rs3)
+   ! Number of angular basis functions in the three body term
+   nabasis = size(Ts)
+   ! Size of two body terms
+   twobody_size = nelements*nbasis2
+
+   ! Inverse of the three body cutoff distance
+   invcut = 1.0d0/acut
+   ! Pre-calculate the three body decay
+   rdecay = decay(distance_matrix, invcut, natoms)
+
+   ! Allocate temporary
+   allocate (atom_rep(rep_size - twobody_size))
+   allocate (atom_grad(rep_size - twobody_size, natoms, 3))
+   allocate (a(3))
+   allocate (b(3))
+   allocate (c(3))
+   allocate (radial(nbasis3))
+   allocate (angular_base(nabasis))
+   allocate (angular(nabasis))
+   allocate (d_angular(nabasis))
+   allocate (d_angular_d_i(3))
+   allocate (d_angular_d_j(3))
+   allocate (d_angular_d_k(3))
+   allocate (d_radial(nbasis3))
+   allocate (d_radial_d_i(3))
+   allocate (d_radial_d_j(3))
+   allocate (d_radial_d_k(3))
+   allocate (d_ijdecay(3))
+   allocate (d_ikdecay(3))
+
+   ! This could probably be done more efficiently if it's a bottleneck
+   ! The order is a bit wobbly compared to the tensorflow implementation
+   !$OMP PARALLEL DO PRIVATE(atom_rep,atom_grad,a,b,c,radial,angular_base, &
+   !$OMP angular,d_angular,rij,n,rij2,invrij,invrij2,d_angular_d_i, &
+   !$OMP d_angular_d_j,d_angular_d_k,rik,m,rik2,invrik,invrik2,angle, &
+   !$OMP p,q,dot,d_radial,d_radial_d_i,d_radial_d_j,d_radial_d_k,s,z, &
+   !$OMP d_ijdecay,d_ikdecay) SCHEDULE(dynamic)
+   do i = 1, natoms
+      atom_rep = 0.0d0
+      atom_grad = 0.0d0
+      do j = 1, natoms - 1
+         if (i .eq. j) cycle
+         ! distance between atom i and j
+         rij = distance_matrix(i, j)
+         if (rij > acut) cycle
+         ! index of the element of atom j
+         n = element_types(j)
+         ! squared distance between atom i and j
+         rij2 = sq_distance_matrix(i, j)
+         ! inverse distance between atom i and j
+         invrij = inv_distance_matrix(i, j)
+         ! inverse squared distance between atom i and j
+         invrij2 = inv_sq_distance_matrix(i, j)
+         do k = j + 1, natoms
+            if (i .eq. k) cycle
+            ! distance between atom i and k
+            rik = distance_matrix(i, k)
+            if (rik > acut) cycle
+            ! index of the element of atom k
+            m = element_types(k)
+            ! squared distance between atom i and k
+            rik2 = sq_distance_matrix(i, k)
+            ! inverse distance between atom i and k
+            invrik = inv_distance_matrix(i, k)
+            ! inverse squared distance between atom i and k
+            invrik2 = inv_sq_distance_matrix(i, k)
+            ! coordinates of atoms j, i, k
+            a = coordinates(j, :)
+            b = coordinates(i, :)
+            c = coordinates(k, :)
+            ! angle between atom i, j and k, centered on i
+            angle = calc_angle(a, b, c)
+            ! part of the radial part of the 3body terms
+            radial = exp(-eta3*(0.5d0*(rij + rik) - Rs3)**2)
+            ! used in the angular part of the 3body terms and in gradients
+            angular_base = ((1.0d0 + cos(angle - Ts))*0.5d0)
+            ! angular part of the 3body terms
+            angular = 2.0d0*angular_base**zeta
+            ! the lowest index of the elements of j,k
+            p = min(n, m) - 1
+            ! the highest index of the elements of j,k
+            q = max(n, m) - 1
+            ! Dot product between the vectors connecting atom i,j and i,k
+            dot = dot_product(a - b, c - b)
+            ! Part of the derivative of the angular basis functions wrt coordinates (dim(nabasis))
+            ! including decay
+            d_angular = (zeta*angular*sin(angle - Ts)*rdecay(i, j)*rdecay(i, k))/ &
+                        ! & (2.0d0 * sqrt(rij2 * rik2 - dot**2) * angular_base)
+                        (2.0d0*max(1d-10, sqrt(abs(rij2*rik2 - dot**2))*angular_base))
+            ! write(*,*) angular_base
+            ! Part of the derivative of the angular basis functions wrt atom j (dim(3))
+            d_angular_d_j = c - b + dot*((b - a)*invrij2)
+            ! Part of the derivative of the angular basis functions wrt atom k (dim(3))
+            d_angular_d_k = a - b + dot*((b - c)*invrik2)
+            ! Part of the derivative of the angular basis functions wrt atom i (dim(3))
+            d_angular_d_i = -(d_angular_d_j + d_angular_d_k)
+            ! Part of the derivative of the radial basis functions wrt coordinates (dim(nbasis3))
+            ! including decay
+            d_radial = radial*eta3*(0.5d0*(rij + rik) - Rs3)*rdecay(i, j)*rdecay(i, k)
+            ! Part of the derivative of the radial basis functions wrt atom j (dim(3))
+            d_radial_d_j = (b - a)*invrij
+            ! Part of the derivative of the radial basis functions wrt atom k (dim(3))
+            d_radial_d_k = (b - c)*invrik
+            ! Part of the derivative of the radial basis functions wrt atom i (dim(3))
+            d_radial_d_i = -(d_radial_d_j + d_radial_d_k)
+            ! Part of the derivative of the i,j decay functions wrt coordinates (dim(3))
+            d_ijdecay = -pi*(b - a)*sin(pi*rij*invcut)*0.5d0*invrij*invcut
+            ! Part of the derivative of the i,k decay functions wrt coordinates (dim(3))
+            d_ikdecay = -pi*(b - c)*sin(pi*rik*invcut)*0.5d0*invrik*invcut
+
+            ! Get index of where the contributions of atoms i,j,k should be added
+            s = nbasis3*nabasis*(-(p*(p + 1))/2 + q + nelements*p) + 1
+            do l = 1, nbasis3
+               ! Get index of where the contributions of atoms i,j,k should be added
+               z = s + (l - 1)*nabasis
+               ! Add the contributions for atoms i,j,k
+               atom_rep(z:z + nabasis - 1) = atom_rep(z:z + nabasis - 1) + angular*radial(l)*rdecay(i, j)*rdecay(i, k)
+               do t = 1, 3
+                  ! Add up all gradient contributions wrt atom i
+                  atom_grad(z:z + nabasis - 1, i, t) = atom_grad(z:z + nabasis - 1, i, t) + &
+                      & d_angular*d_angular_d_i(t)*radial(l) + &
+                      & angular*d_radial(l)*d_radial_d_i(t) + &
+                      & angular*radial(l)*(d_ijdecay(t)*rdecay(i, k) + rdecay(i, j)*d_ikdecay(t))
+                  ! Add up all gradient contributions wrt atom j
+                  atom_grad(z:z + nabasis - 1, j, t) = atom_grad(z:z + nabasis - 1, j, t) + &
+                      & d_angular*d_angular_d_j(t)*radial(l) + &
+                      & angular*d_radial(l)*d_radial_d_j(t) - &
+                      & angular*radial(l)*d_ijdecay(t)*rdecay(i, k)
+                  ! Add up all gradient contributions wrt atom k
+                  atom_grad(z:z + nabasis - 1, k, t) = atom_grad(z:z + nabasis - 1, k, t) + &
+                      & d_angular*d_angular_d_k(t)*radial(l) + &
+                      & angular*d_radial(l)*d_radial_d_k(t) - &
+                      & angular*radial(l)*rdecay(i, j)*d_ikdecay(t)
+               end do
+            end do
+         end do
+      end do
+      rep(i, twobody_size + 1:) = atom_rep
+      grad(i, twobody_size + 1:, :, :) = atom_grad
+   end do
+   !$OMP END PARALLEL DO
+
+   deallocate (rdecay)
+   deallocate (element_types)
+   deallocate (distance_matrix)
+   deallocate (inv_distance_matrix)
+   deallocate (sq_distance_matrix)
+   deallocate (inv_sq_distance_matrix)
+   deallocate (atom_rep)
+   deallocate (atom_grad)
+   deallocate (a)
+   deallocate (b)
+   deallocate (c)
+   deallocate (radial)
+   deallocate (angular_base)
+   deallocate (angular)
+   deallocate (d_angular)
+   deallocate (d_angular_d_i)
+   deallocate (d_angular_d_j)
+   deallocate (d_angular_d_k)
+   deallocate (d_radial)
+   deallocate (d_radial_d_i)
+   deallocate (d_radial_d_j)
+   deallocate (d_radial_d_k)
+   deallocate (d_ijdecay)
+   deallocate (d_ikdecay)
 
 end subroutine fgenerate_acsf_and_gradients
 
-
 subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
-                          & Rs2, Rs3, Ts, eta2, eta3, zeta, rcut, acut, natoms, natoms_tot, rep_size, &
-                          & two_body_decay, three_body_decay, three_body_weight, rep)
-
-    use acsf_utils, only: decay, calc_angle, calc_cos_angle
-
-    implicit none
-
-    double precision, intent(in), dimension(:, :) :: coordinates
-    integer, intent(in), dimension(:) :: nuclear_charges
-    integer, intent(in), dimension(:) :: elements
-    double precision, intent(in), dimension(:) :: Rs2
-    double precision, intent(in), dimension(:) :: Rs3
-    double precision, intent(in), dimension(:) :: Ts
-    double precision, intent(in) :: eta2
-    double precision, intent(in) :: eta3
-    double precision, intent(in) :: zeta
-    double precision, intent(in) :: rcut
-    double precision, intent(in) :: acut
-    integer, intent(in) :: natoms, natoms_tot ! natoms_tot includes "virtual" atoms created to satisfy periodic boundary conditions.
-    integer, intent(in) :: rep_size
-    double precision, intent(in) :: two_body_decay
-    double precision, intent(in) :: three_body_decay
-    double precision, intent(in) :: three_body_weight
-
-    double precision, intent(out), dimension(natoms, rep_size) :: rep
-
-    integer:: two_body_rep_size
-    integer :: i, j, k, l, n, m, o, p, q, s, z, nelements, nbasis2, nbasis3, nabasis, j_id, j_id_max
-    integer, allocatable, dimension(:) :: element_types
-    double precision :: rij, rik, angle, cos_1, cos_2, cos_3, invcut
-    ! double precision :: angle_1, angle_2, angle_3
-    double precision, allocatable, dimension(:) :: radial, angular, a, b, c
-    double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay
-    double precision, allocatable, dimension(:, :) :: saved_radial
-    integer, allocatable, dimension(:):: saved_j
-
-    double precision :: mu, sigma, ksi3
-
-    double precision, parameter :: pi = 4.0d0 * atan(1.0d0)
-
-
-    if (natoms /= size(nuclear_charges, dim=1)) then
-        write(*,*) "ERROR: Atom Centered Symmetry Functions creation"
-        write(*,*) natoms, "coordinates, but", &
-            & size(nuclear_charges, dim=1), "atom_types!"
-        stop
-    endif
-
-
-    ! number of element types
-    nelements = size(elements)
-    ! Allocate temporary
-    allocate(element_types(natoms_tot))
-
-    ! Store element index of every atom
-    !$OMP PARALLEL DO SCHEDULE(dynamic)
-    do i = 1, natoms_tot
-        do j = 1, nelements
-            if (nuclear_charges(modulo(i-1, natoms)+1) .eq. elements(j)) then
-                element_types(i) = j
-                exit
-            endif
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-
-    ! Get distance matrix
-    ! Allocate temporary
-    allocate(distance_matrix(natoms_tot, natoms_tot))
-    distance_matrix = 0.0d0
-
-
-    !$OMP PARALLEL DO PRIVATE(rij) SCHEDULE(dynamic)
-    do i = 1, natoms_tot
-        do j = i+1, natoms_tot
-            rij = norm2(coordinates(j,:) - coordinates(i,:))
-            distance_matrix(i, j) = rij
-            distance_matrix(j, i) = rij
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-    ! number of basis functions in the two body term
-    nbasis2 = size(Rs2)
-
-    ! Inverse of the two body cutoff
-    invcut = 1.0d0 / rcut
-
-    ! pre-calculate the radial decay in the two body terms
-    rdecay = decay(distance_matrix, invcut, natoms_tot)
-
-    ! Allocate temporary
-    allocate(radial(nbasis2))
-    allocate(saved_radial(nbasis2, natoms_tot), saved_j(natoms_tot))
-    rep=0.0d0
-    !$OMP PARALLEL DO PRIVATE(n,m,rij,radial,mu,sigma,saved_radial, saved_j, j_id_max) SCHEDULE(dynamic)
-    do i = 1, natoms
-        ! index of the element of atom i
-        m = element_types(i)
-        j_id_max=0
-        do j = i + 1, natoms_tot
-            ! distance between atoms i and j
-            rij = distance_matrix(i,j)
-            if (rij > rcut) cycle
-            j_id_max=j_id_max+1
-            saved_j(j_id_max)=j
-            ! index of the element of atom j
-            n = element_types(j)
+    & Rs2, Rs3, Ts, eta2, eta3, zeta, rcut, acut, natoms, natoms_tot, rep_size, &
+    & two_body_decay, three_body_decay, three_body_weight, rep)
+
+   use acsf_utils, only: decay, calc_angle, calc_cos_angle, true_atom_id
+
+   implicit none
+
+   double precision, intent(in), dimension(:, :) :: coordinates
+   integer, intent(in), dimension(:) :: nuclear_charges
+   integer, intent(in), dimension(:) :: elements
+   double precision, intent(in), dimension(:) :: Rs2
+   double precision, intent(in), dimension(:) :: Rs3
+   double precision, intent(in), dimension(:) :: Ts
+   double precision, intent(in) :: eta2
+   double precision, intent(in) :: eta3
+   double precision, intent(in) :: zeta
+   double precision, intent(in) :: rcut
+   double precision, intent(in) :: acut
+   integer, intent(in) :: natoms, natoms_tot ! natoms_tot includes "virtual" atoms created to satisfy periodic boundary conditions.
+   integer, intent(in) :: rep_size
+   double precision, intent(in) :: two_body_decay
+   double precision, intent(in) :: three_body_decay
+   double precision, intent(in) :: three_body_weight
+
+   double precision, intent(out), dimension(natoms, rep_size) :: rep
+!   Introduced to make OpenMP parallelization easier.
+
+   integer:: i, j, k, l, n, m, o, p, q, s, z, nelements, nbasis2, nbasis3, nabasis, j_id, max_j_id
+   integer, allocatable, dimension(:) :: element_types, saved_j
+   double precision :: rij, rik, angle, cos_1, cos_2, cos_3, invcut
+   double precision, allocatable, dimension(:) :: radial, angular, a, b, c
+   double precision, allocatable, dimension(:, :):: saved_radial
+   double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay
+
+   double precision :: mu, sigma, ksi3
+
+   double precision, parameter :: pi = 4.0d0*atan(1.0d0)
+
+   if (natoms /= size(nuclear_charges, dim=1)) then
+      write (*, *) "ERROR: Atom Centered Symmetry Functions creation"
+      write (*, *) natoms, "coordinates, but", &
+      & size(nuclear_charges, dim=1), "atom_types!"
+      stop
+   end if
+
+! number of element types
+   nelements = size(elements)
+! Allocate temporary
+   allocate (element_types(natoms_tot))
+
+! Store element index of every atom
+!$OMP PARALLEL DO SCHEDULE(dynamic)
+   do i = 1, natoms_tot
+      do j = 1, nelements
+      if (nuclear_charges(true_atom_id(i, natoms)) .eq. elements(j)) then
+         element_types(i) = j
+         exit
+      end if
+      end do
+   end do
+!$OMP END PARALLEL DO
+
+! Get distance matrix
+! Allocate temporary
+   allocate (distance_matrix(natoms_tot, natoms_tot))
+   distance_matrix = 0.0d0
+
+!$OMP PARALLEL DO PRIVATE(rij) SCHEDULE(dynamic)
+   do i = 1, natoms_tot
+   do j = i + 1, natoms_tot
+      rij = norm2(coordinates(j, :) - coordinates(i, :))
+      distance_matrix(i, j) = rij
+      distance_matrix(j, i) = rij
+   end do
+   end do
+!$OMP END PARALLEL DO
+
+! number of basis functions in the two body term
+   nbasis2 = size(Rs2)
+
+! Inverse of the two body cutoff
+   invcut = 1.0d0/rcut
+
+! pre-calculate the radial decay in the two body terms
+   rdecay = decay(distance_matrix, invcut, natoms_tot)
+
+! Allocate temporary
+   allocate (radial(nbasis2), saved_radial(nbasis2, natoms_tot), saved_j(natoms_tot))
+
+   radial = 0.0d0
+!$OMP PARALLEL DO PRIVATE(n,m,rij,radial,mu,sigma, saved_radial, saved_j) SCHEDULE(dynamic)
+   do i = 1, natoms
+! index of the element of atom i
+      max_j_id = 0
+      do j = i + 1, natoms_tot
+! distance between atoms i and j
+         rij = distance_matrix(i, j)
+         if (rij <= rcut) then
+            max_j_id = max_j_id + 1
+            saved_j(max_j_id) = j
             ! two body term of the representation
-            mu    = log(rij / sqrt(1.0d0 + eta2  / rij**2))
-            sigma = sqrt(log(1.0d0 + eta2  / rij**2))
+            mu = log(rij/sqrt(1.0d0 + eta2/rij**2))
+            sigma = sqrt(log(1.0d0 + eta2/rij**2))
             radial(:) = 0.0d0
 
             do k = 1, nbasis2
-                radial(k) = 1.0d0/(sigma* sqrt(2.0d0*pi) * Rs2(k)) * rdecay(i,j) &
-                              & * exp( - (log(Rs2(k)) - mu)**2 / (2.0d0 * sigma**2) ) / rij**two_body_decay
-            enddo
-            saved_radial(:, j_id_max)=radial(:)
-        enddo
-        !$OMP CRITICAL
-        do j_id = 1, j_id_max
-            j=saved_j(j_id)
-            n=element_types(j)
-            rep(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep(i, (n-1)*nbasis2 + 1:n*nbasis2) + saved_radial(:, j_id)
-            if (j<=natoms) rep(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep(j, (m-1)*nbasis2 + 1:m*nbasis2) + saved_radial(:, j_id)
-        enddo
-        !$OMP END CRITICAL
-    enddo
-    !$OMP END PARALLEL DO
-
-    deallocate(radial)
-    deallocate(saved_radial, saved_j)
-
-    ! number of radial basis functions in the three body term
-    nbasis3 = size(Rs3)
-    ! number of radial basis functions in the three body term
-    nabasis = size(Ts)
-
-    ! Inverse of the three body cutoff
-    invcut = 1.0d0 / acut
-    ! pre-calculate the radial decay in the three body terms
-    rdecay = decay(distance_matrix, invcut, natoms_tot)
-
-    ! Allocate temporary
-    allocate(a(3))
-    allocate(b(3))
-    allocate(c(3))
-    allocate(radial(nbasis3))
-    allocate(angular(nabasis))
-    two_body_rep_size=nbasis2*nelements
-
-    ! This could probably be done more efficiently if it's a bottleneck
-    ! Also the order is a bit wobbly compared to the tensorflow implementation
-    !$OMP PARALLEL DO PRIVATE(rij, n, rik, m, a, b, c, angle, radial, angular, &
-    !$OMP cos_1, cos_2, cos_3, mu, sigma, o, ksi3, &
-    !$OMP p, q, s, z, l) SCHEDULE(dynamic)
-    do i = 1, natoms
-        do j = 1, natoms_tot - 1
-            if (i .eq. j) cycle
-            ! distance between atoms i and j
-            rij = distance_matrix(i,j)
-            if (rij > acut)  cycle
-            ! index of the element of atom j
-            n = element_types(j)
-            do k = j + 1, natoms_tot
-                if (i .eq. k) cycle
-                if (j .eq. k) cycle
-                ! distance between atoms i and k
-                rik = distance_matrix(i,k)
-                if (rik > acut) cycle
-                ! index of the element of atom k
-                m = element_types(k)
-                ! coordinates of atoms j, i, k
-                a = coordinates(j,:)
-                b = coordinates(i,:)
-                c = coordinates(k,:)
-                ! angle between atoms i, j and k centered on i
-                angle   = calc_angle(a,b,c)
-                cos_1 = calc_cos_angle(a,b,c)
-                cos_2 = calc_cos_angle(a,c,b)
-                cos_3 = calc_cos_angle(b,a,c)
-
-                ! The radial part of the three body terms including decay
-                radial = exp(-eta3*(0.5d0 * (rij+rik) - Rs3)**2) * rdecay(i,j) * rdecay(i,k)
-
-                ksi3 = (1.0d0 + 3.0d0 * cos_1 * cos_2 * cos_3) &
-                     & / (distance_matrix(i,k) * distance_matrix(i,j) * distance_matrix(j,k) &
-                 & )**three_body_decay * three_body_weight
-
-                angular = 0.0d0
-                do l = 1, nabasis/2
-
-                    o = l*2-1
-                    angular(2*l-1) = angular(2*l-1) + 2*cos(o * angle) &
-                        & * exp(-(zeta * o)**2 /2)
-
-                    angular(2*l) = angular(2*l) + 2*sin(o * angle) &
-                        & * exp(-(zeta * o)**2 /2)
-
-                enddo
-
-                ! The lowest of the element indices for atoms j and k
-                p = min(n,m) - 1
-                ! The highest of the element indices for atoms j and k
-                q = max(n,m) - 1
-                ! calculate the indices that the three body terms should be added to
-                s = two_body_rep_size + nbasis3 * nabasis * (-(p * (p + 1))/2 + q + nelements * p) + 1
-
-                do l = 1, nbasis3
-                    ! calculate the indices that the three body terms should be added to
-                    z = s + (l-1) * nabasis
-                    ! Add the contributions from atoms i,j and k
-                    rep(i, z:z + nabasis - 1) = rep(i, z:z + nabasis - 1) + angular * radial(l) * ksi3
-                enddo
-            enddo
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-    deallocate(element_types)
-    deallocate(rdecay)
-    deallocate(distance_matrix)
-    deallocate(a)
-    deallocate(b)
-    deallocate(c)
-    deallocate(radial)
-    deallocate(angular)
+               radial(k) = 1.0d0/(sigma*sqrt(2.0d0*pi)*Rs2(k))*rdecay(i, j) &
+                          & *exp(-(log(Rs2(k)) - mu)**2/(2.0d0*sigma**2))/rij**two_body_decay
+            end do
+            saved_radial(:, max_j_id) = radial(:)
+         end if
+      end do
+      m = element_types(i)
+      !$OMP CRITICAL
+      do j_id = 1, max_j_id
+         j = saved_j(j_id)
+         ! index of the element of atom j
+         n = element_types(j)
+         rep(i, (n - 1)*nbasis2 + 1:n*nbasis2) = rep(i, (n - 1)*nbasis2 + 1:n*nbasis2) + saved_radial(:, j_id)
+         if (j <= natoms) then
+            rep(j, (m - 1)*nbasis2 + 1:m*nbasis2) = rep(j, (m - 1)*nbasis2 + 1:m*nbasis2) + saved_radial(:, j_id)
+         end if
+      end do
+      !$OMP END CRITICAL
+   end do
+!$OMP END PARALLEL DO
+   deallocate (radial, saved_radial, saved_j)
+
+! number of radial basis functions in the three body term
+   nbasis3 = size(Rs3)
+! number of radial basis functions in the three body term
+   nabasis = size(Ts)
+
+! Inverse of the three body cutoff
+   invcut = 1.0d0/acut
+! pre-calculate the radial decay in the three body terms
+   rdecay = decay(distance_matrix, invcut, natoms_tot)
+
+! Allocate temporary
+   allocate (a(3))
+   allocate (b(3))
+   allocate (c(3))
+   allocate (radial(nbasis3))
+   allocate (angular(nabasis))
+
+!$OMP PARALLEL DO PRIVATE(rij, n, rik, m, a, b, c, angle, cos_1, cos_2, cos_3,&
+!$OMP radial, ksi3, angular, o, p, q, s, z) SCHEDULE(dynamic)
+   do i = 1, natoms
+   do j = 1, natoms_tot - 1
+      if (i .eq. j) cycle
+! distance between atoms i and j
+      rij = distance_matrix(i, j)
+      if (rij > acut) cycle
+! index of the element of atom j
+      n = element_types(j)
+      do k = j + 1, natoms_tot
+         if (i .eq. k) cycle
+         if (j .eq. k) cycle
+! distance between atoms i and k
+         rik = distance_matrix(i, k)
+         if (rik > acut) cycle
+! index of the element of atom k
+         m = element_types(k)
+! coordinates of atoms j, i, k
+         a = coordinates(j, :)
+         b = coordinates(i, :)
+         c = coordinates(k, :)
+! angle between atoms i, j and k centered on i
+         angle = calc_angle(a, b, c)
+         cos_1 = calc_cos_angle(a, b, c)
+         cos_2 = calc_cos_angle(a, c, b)
+         cos_3 = calc_cos_angle(b, a, c)
+
+! The radial part of the three body terms including decay
+         radial = exp(-eta3*(0.5d0*(rij + rik) - Rs3)**2)*rdecay(i, j)*rdecay(i, k)
+
+         ksi3 = (1.0d0 + 3.0d0*cos_1*cos_2*cos_3) &
+         & /(distance_matrix(i, k)*distance_matrix(i, j)*distance_matrix(j, k) &
+         & )**three_body_decay*three_body_weight
+
+         angular = 0.0d0
+         do l = 1, nabasis/2
+
+            o = l*2 - 1
+            angular(2*l - 1) = angular(2*l - 1) + 2*cos(o*angle) &
+                              & *exp(-(zeta*o)**2/2)
+
+            angular(2*l) = angular(2*l) + 2*sin(o*angle) &
+                          & *exp(-(zeta*o)**2/2)
+
+         end do
+
+! The lowest of the element indices for atoms j and k
+         p = min(n, m) - 1
+! The highest of the element indices for atoms j and k
+         q = max(n, m) - 1
+! calculate the indices that the three body terms should be added to
+         s = nelements*nbasis2 + nbasis3*nabasis*(-(p*(p + 1))/2 + q + nelements*p) + 1
+
+         do l = 1, nbasis3
+! calculate the indices that the three body terms should be added to
+            z = s + (l - 1)*nabasis
+! Add the contributions from atoms i,j and k
+            rep(i, z:z + nabasis - 1) = rep(i, z:z + nabasis - 1) + angular*radial(l)*ksi3
+         end do
+      end do
+   end do
+   end do
+!$OMP END PARALLEL DO
+
+   deallocate (element_types)
+   deallocate (rdecay)
+   deallocate (distance_matrix)
+   deallocate (a)
+   deallocate (b)
+   deallocate (c)
+   deallocate (radial)
+   deallocate (angular)
 
 end subroutine fgenerate_fchl_acsf
 
 subroutine fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, elements, &
-    & Rs2, Rs3, Ts, eta2, eta3, zeta, rcut, acut, natoms, natoms_tot, rep_size, &
-    & two_body_decay, three_body_decay, three_body_weight, rep, grad)
-
-    use acsf_utils, only: decay, calc_angle, calc_cos_angle
-
-    implicit none
-
-    double precision, intent(in), dimension(:, :) :: coordinates
-    integer, intent(in), dimension(:) :: nuclear_charges
-    integer, intent(in), dimension(:) :: elements
-    double precision, intent(in), dimension(:) :: Rs2
-    double precision, intent(in), dimension(:) :: Rs3
-    double precision, intent(in), dimension(:) :: Ts
-    double precision, intent(in) :: eta2
-    double precision, intent(in) :: eta3
-    double precision, intent(in) :: zeta
-    double precision, intent(in) :: rcut
-    double precision, intent(in) :: acut
-
-    double precision, intent(in) :: two_body_decay
-    double precision, intent(in) :: three_body_decay
-    double precision, intent(in) :: three_body_weight
-
-    double precision :: mu, sigma, dx, exp_s2, scaling, dscal, ddecay
-    double precision :: cos_i, cos_j, cos_k
-    double precision, allocatable, dimension(:) :: exp_ln
-    double precision, allocatable, dimension(:) :: log_Rs2
-
-    integer, intent(in) :: natoms, natoms_tot ! natoms_tot includes "virtual" atoms created to satisfy periodic boundary conditions.
-    integer, intent(in) :: rep_size
-    double precision, intent(out), dimension(natoms, rep_size) :: rep
-    double precision, intent(out), dimension(natoms, rep_size, natoms, 3) :: grad
+                    & Rs2, Rs3, Ts, eta2, eta3, zeta, rcut, acut, natoms, natoms_tot, rep_size, &
+                    & two_body_decay, three_body_decay, three_body_weight, rep, grad)
+
+   use acsf_utils, only: decay, calc_angle, calc_cos_angle, true_atom_id
+
+   implicit none
+
+   double precision, intent(in), dimension(:, :) :: coordinates
+   integer, intent(in), dimension(:) :: nuclear_charges
+   integer, intent(in), dimension(:) :: elements
+   double precision, intent(in), dimension(:) :: Rs2
+   double precision, intent(in), dimension(:) :: Rs3
+   double precision, intent(in), dimension(:) :: Ts
+   double precision, intent(in) :: eta2
+   double precision, intent(in) :: eta3
+   double precision, intent(in) :: zeta
+   double precision, intent(in) :: rcut
+   double precision, intent(in) :: acut
+
+   double precision, intent(in) :: two_body_decay
+   double precision, intent(in) :: three_body_decay
+   double precision, intent(in) :: three_body_weight
+
+   double precision :: mu, sigma, dx, exp_s2, scaling, dscal, ddecay
+   double precision :: cos_i, cos_j, cos_k
+   double precision, allocatable, dimension(:) :: exp_ln
+   double precision, allocatable, dimension(:) :: log_Rs2
+
+   integer, intent(in) :: natoms, natoms_tot ! natoms_tot includes "virtual" atoms created to satisfy periodic boundary conditions.
+   integer, intent(in) :: rep_size
+   double precision, intent(out), dimension(natoms, rep_size) :: rep
+   double precision, intent(out), dimension(natoms, rep_size, natoms, 3) :: grad
 
 !   Introduced to make OpenMP parallelization easier.
-    double precision, dimension(:, :, :), allocatable:: add_rep
-    double precision, dimension(:, :, :, :), allocatable :: add_grad
-
-    integer :: i, j, k, l, m, n,  p, q, s, t, z, nelements, nbasis2, nbasis3, nabasis, twobody_size
-    integer, allocatable, dimension(:) :: element_types
-    double precision :: rij, rik, angle, dot, rij2, rik2, invrij, invrik, invrij2, invrik2, invcut
-    double precision, allocatable, dimension(:) :: radial_base, radial, angular, a, b, c, atom_rep
-    double precision, allocatable, dimension(:) :: angular_base, d_radial, d_radial_d_i
-    double precision, allocatable, dimension(:) :: d_radial_d_j, d_radial_d_k, d_ijdecay, d_ikdecay
-    double precision, allocatable, dimension(:) :: d_angular, part, radial_part
-    double precision, allocatable, dimension(:) :: d_angular_d_i, d_angular_d_j, d_angular_d_k
-    double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay, sq_distance_matrix
-    double precision, allocatable, dimension(:, :) :: inv_distance_matrix, inv_sq_distance_matrix
-    double precision, allocatable, dimension(:, :, :) :: atom_grad
-
-    double precision :: atm, atm_i, atm_j, atm_k
-    double precision :: invrjk, invr_atm, vi, vj, vk
-    double precision, allocatable, dimension(:) :: d_atm_i, d_atm_j, d_atm_k
-    double precision, allocatable, dimension(:) :: d_atm_ii, d_atm_ji, d_atm_ki
-    double precision, allocatable, dimension(:) :: d_atm_ij, d_atm_jj, d_atm_kj
-    double precision, allocatable, dimension(:) :: d_atm_ik, d_atm_jk, d_atm_kk
-    double precision, allocatable, dimension(:) :: d_atm_i2, d_atm_j2, d_atm_k2
-    double precision, allocatable, dimension(:) :: d_atm_i3, d_atm_j3, d_atm_k3
-    double precision, allocatable, dimension(:) :: d_atm_extra_i, d_atm_extra_j, d_atm_extra_k
-
-    double precision, parameter :: pi = 4.0d0 * atan(1.0d0)
-
-    if (natoms /= size(nuclear_charges, dim=1)) then
-        write(*,*) "ERROR: Atom Centered Symmetry Functions creation"
-        write(*,*) natoms, "coordinates, but", &
-        & size(nuclear_charges, dim=1), "atom_types!"
-        stop
-    endif
-
-    ! Number of unique elements
-    nelements = size(elements)
-    ! Allocate temporary
-    allocate(element_types(natoms_tot))
-
-    ! Store element index of every atom
-    !$OMP PARALLEL DO SCHEDULE(dynamic)
-    do i = 1, natoms_tot
-        do j = 1, nelements
-            if (nuclear_charges(modulo(i-1, natoms)+1) .eq. elements(j)) then
-                element_types(i) = j
-                exit
-            endif
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-
-
-    ! Get distance matrix
-    ! Allocate temporary
-    allocate(distance_matrix(natoms_tot, natoms_tot))
-    allocate(sq_distance_matrix(natoms_tot, natoms_tot))
-    allocate(inv_distance_matrix(natoms_tot, natoms_tot))
-    allocate(inv_sq_distance_matrix(natoms_tot, natoms_tot))
-    distance_matrix = 0.0d0
-    sq_distance_matrix = 0.0d0
-    inv_distance_matrix = 0.0d0
-    inv_sq_distance_matrix = 0.0d0
-
-
-    !$OMP PARALLEL DO PRIVATE(rij,rij2,invrij,invrij2) SCHEDULE(dynamic)
-    do i = 1, natoms_tot
-        do j = i+1, natoms_tot
-            rij = norm2(coordinates(j,:) - coordinates(i,:))
-            distance_matrix(i, j) = rij
-            distance_matrix(j, i) = rij
-            rij2 = rij * rij
-            sq_distance_matrix(i, j) = rij2
-            sq_distance_matrix(j, i) = rij2
-            invrij = 1.0d0 / rij
-            inv_distance_matrix(i, j) = invrij
-            inv_distance_matrix(j, i) = invrij
-            invrij2 = invrij * invrij
-            inv_sq_distance_matrix(i, j) = invrij2
-            inv_sq_distance_matrix(j, i) = invrij2
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-
-    ! Number of two body basis functions
-    nbasis2 = size(Rs2)
-
-    ! Inverse of the two body cutoff distance
-    invcut = 1.0d0 / rcut
-    ! Pre-calculate the two body decay
-    rdecay = decay(distance_matrix, invcut, natoms_tot)
-
-    ! Allocate temporary
-    allocate(radial_base(nbasis2))
-    allocate(radial(nbasis2))
-    allocate(radial_part(nbasis2))
-    allocate(part(nbasis2))
-    allocate(exp_ln(nbasis2))
-    allocate(log_Rs2(nbasis2))
-
-    grad =0.0d0
-    rep = 0.0d0
-
-    allocate(add_rep(nbasis2, natoms, natoms), add_grad(nbasis2, 3, natoms, natoms))
-    add_rep=0.0d0
-    add_grad=0.0d0
-    log_Rs2(:) = log(Rs2(:))
-
-    !$OMP PARALLEL DO PRIVATE(m, n, rij, invrij, mu, sigma, exp_s2, exp_ln,&
-    !$OMP scaling, radial_base, radial, dx, part, dscal, ddecay) SCHEDULE(dynamic)
-    do i = 1, natoms
-        ! The element index of atom i
-        m = element_types(i) ! moved inside loop to enable COLLAPSE
-        do j = i + 1, natoms_tot
-            rij = distance_matrix(i,j)
-            if (rij > rcut) continue
-            ! The element index of atom j
-            n = element_types(j)
-            ! Distance between atoms i and j
-            invrij = inv_distance_matrix(i,j)
-            mu    = log(rij / sqrt(1.0d0 + eta2  * inv_sq_distance_matrix(i, j)))
-            sigma = sqrt(log(1.0d0 + eta2  * inv_sq_distance_matrix(i, j)))
+   double precision, dimension(:, :, :), allocatable:: add_rep
+   double precision, dimension(:, :, :, :), allocatable :: add_grad
+
+   integer :: i, j, k, l, m, n, p, q, s, t, z, nelements, nbasis2, nbasis3, nabasis, twobody_size, true_j, true_k
+   integer, allocatable, dimension(:) :: element_types
+   double precision :: rij, rik, angle, dot, rij2, rik2, invrij, invrik, invrij2, invrik2, invcut
+   double precision, allocatable, dimension(:) :: radial_base, radial, angular, a, b, c, atom_rep
+   double precision, allocatable, dimension(:) :: angular_base, d_radial, d_radial_d_i
+   double precision, allocatable, dimension(:) :: d_radial_d_j, d_radial_d_k, d_ijdecay, d_ikdecay
+   double precision, allocatable, dimension(:) :: d_angular, part, radial_part
+   double precision, allocatable, dimension(:) :: d_angular_d_i, d_angular_d_j, d_angular_d_k
+   double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay, sq_distance_matrix
+   double precision, allocatable, dimension(:, :) :: inv_distance_matrix, inv_sq_distance_matrix
+   double precision, allocatable, dimension(:, :, :) :: atom_grad
+
+   double precision :: atm, atm_i, atm_j, atm_k
+   double precision :: invrjk, invr_atm, vi, vj, vk
+   double precision, allocatable, dimension(:) :: d_atm_i, d_atm_j, d_atm_k
+   double precision, allocatable, dimension(:) :: d_atm_ii, d_atm_ji, d_atm_ki
+   double precision, allocatable, dimension(:) :: d_atm_ij, d_atm_jj, d_atm_kj
+   double precision, allocatable, dimension(:) :: d_atm_ik, d_atm_jk, d_atm_kk
+   double precision, allocatable, dimension(:) :: d_atm_i2, d_atm_j2, d_atm_k2
+   double precision, allocatable, dimension(:) :: d_atm_i3, d_atm_j3, d_atm_k3
+   double precision, allocatable, dimension(:) :: d_atm_extra_i, d_atm_extra_j, d_atm_extra_k
+
+   double precision, parameter :: pi = 4.0d0*atan(1.0d0)
+
+   if (natoms /= size(nuclear_charges, dim=1)) then
+      write (*, *) "ERROR: Atom Centered Symmetry Functions creation"
+      write (*, *) natoms, "coordinates, but", &
+          & size(nuclear_charges, dim=1), "atom_types!"
+      stop
+   end if
+
+   ! Number of unique elements
+   nelements = size(elements)
+   ! Allocate temporary
+   allocate (element_types(natoms))
+
+! Store element index of every atom
+!$OMP PARALLEL DO SCHEDULE(dynamic)
+   do i = 1, natoms
+      do j = 1, nelements
+         if (nuclear_charges(i) .eq. elements(j)) then
+            element_types(i) = j
+            exit
+         end if
+      end do
+   end do
+!$OMP END PARALLEL DO
+
+   ! Get distance matrix
+   ! Allocate temporary
+   allocate (distance_matrix(natoms_tot, natoms_tot))
+   allocate (sq_distance_matrix(natoms_tot, natoms_tot))
+   allocate (inv_distance_matrix(natoms_tot, natoms_tot))
+   allocate (inv_sq_distance_matrix(natoms_tot, natoms_tot))
+   distance_matrix = 0.0d0
+   sq_distance_matrix = 0.0d0
+   inv_distance_matrix = 0.0d0
+   inv_sq_distance_matrix = 0.0d0
+
+!$OMP PARALLEL DO PRIVATE(rij,rij2,invrij,invrij2) SCHEDULE(dynamic)
+   do i = 1, natoms_tot
+      do j = i + 1, natoms_tot
+         rij = norm2(coordinates(j, :) - coordinates(i, :))
+         distance_matrix(i, j) = rij
+         distance_matrix(j, i) = rij
+         rij2 = rij*rij
+         sq_distance_matrix(i, j) = rij2
+         sq_distance_matrix(j, i) = rij2
+         invrij = 1.0d0/rij
+         inv_distance_matrix(i, j) = invrij
+         inv_distance_matrix(j, i) = invrij
+         invrij2 = invrij*invrij
+         inv_sq_distance_matrix(i, j) = invrij2
+         inv_sq_distance_matrix(j, i) = invrij2
+      end do
+   end do
+!$OMP END PARALLEL DO
+
+   ! Number of two body basis functions
+   nbasis2 = size(Rs2)
+
+   ! Inverse of the two body cutoff distance
+   invcut = 1.0d0/rcut
+   ! Pre-calculate the two body decay
+   rdecay = decay(distance_matrix, invcut, natoms_tot)
+
+   ! Allocate temporary
+   allocate (radial_base(nbasis2))
+   allocate (radial(nbasis2))
+   allocate (radial_part(nbasis2))
+   allocate (part(nbasis2))
+   allocate (exp_ln(nbasis2))
+   allocate (log_Rs2(nbasis2))
+
+   grad = 0.0d0
+   rep = 0.0d0
+
+   allocate (add_rep(nbasis2, natoms, natoms), add_grad(nbasis2, 3, natoms, natoms))
+   add_rep = 0.0d0
+   add_grad = 0.0d0
+   log_Rs2(:) = log(Rs2(:))
+
+!$OMP PARALLEL DO PRIVATE(m, n, rij, invrij, mu, sigma, exp_s2, exp_ln,&
+!$OMP scaling, radial_base, radial, dx, part, dscal, ddecay) SCHEDULE(dynamic)
+   do i = 1, natoms
+      ! The element index of atom i
+      m = element_types(i)
+      do j = i + 1, natoms_tot
+         ! The element index of atom j
+         true_j = true_atom_id(j, natoms)
+         n = element_types(true_j)
+         ! Distance between atoms i and j
+         rij = distance_matrix(i, j)
+         if (rij <= rcut) then
+            invrij = inv_distance_matrix(i, j)
+
+            mu = log(rij/sqrt(1.0d0 + eta2*inv_sq_distance_matrix(i, j)))
+            sigma = sqrt(log(1.0d0 + eta2*inv_sq_distance_matrix(i, j)))
             exp_s2 = exp(sigma**2)
-            exp_ln = exp(-(log_Rs2(:) - mu)**2 / sigma**2 * 0.5d0) * sqrt(2.0d0)
+            exp_ln = exp(-(log_Rs2(:) - mu)**2/sigma**2*0.5d0)*sqrt(2.0d0)
 
-            scaling = 1.0d0 / rij**two_body_decay
+            scaling = 1.0d0/rij**two_body_decay
 
+            radial_base(:) = 1.0d0/(sigma*sqrt(2.0d0*pi)*Rs2(:))*exp(-(log_Rs2(:) - mu)**2/(2.0d0*sigma**2))
 
-            radial_base(:) = 1.0d0/(sigma* sqrt(2.0d0*pi) * Rs2(:)) * exp(-(log_Rs2(:) - mu)**2 / (2.0d0 * sigma**2))
+            radial(:) = radial_base(:)*scaling*rdecay(i, j)
 
-            radial(:) = radial_base(:) * scaling * rdecay(i,j)
+            rep(i, (n - 1)*nbasis2 + 1:n*nbasis2) = rep(i, (n - 1)*nbasis2 + 1:n*nbasis2) + radial
+            if (j <= natoms) add_rep(:, i, j) = radial
 
-            rep(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
-            if (j<=natoms) add_rep(:, i, j)=radial
             do k = 1, 3
-                dx = -(coordinates(i,k) - coordinates(j,k))
-                part(:) = ((log_Rs2(:) - mu) * (-dx *(rij**2 * exp_s2 + eta2) / (rij * sqrt(exp_s2))**3) &
-                            &* sqrt(exp_s2) / (sigma**2 * rij) + (log_Rs2(:) - mu) ** 2 * eta2 * dx / &
-                            &(sigma**4 * rij**4 * exp_s2)) * exp_ln / (Rs2(:) * sigma  * sqrt(pi) * 2) &
-                            &- exp_ln  * eta2 * dx / (Rs2(:) * sigma**3 *sqrt(pi) * rij**4 * exp_s2 * 2.0d0)
-
-                dscal = two_body_decay * dx / rij**(two_body_decay+2.0d0)
-                ddecay = dx * 0.5d0 * pi * sin(pi*rij * invcut) * invcut * invrij
-
-                part(:) = part(:) * scaling * rdecay(i,j) + radial_base(:) * dscal * rdecay(i,j) &
-                                & + radial_base(:) * scaling * ddecay
-
-                ! The gradients wrt coordinates
-                grad(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) = grad(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) + part
-                if (j<=natoms) then
-                    grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) = grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) - part
-                    grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) = grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) + part
-                    add_grad(:, k, i, j)=part
-                endif
-            enddo
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-    !$OMP PARALLEL DO PRIVATE(m) SCHEDULE(dynamic)
-    do j = 2, natoms
-        do i = 1, j-1
-            if (distance_matrix(i,j)>rcut) cycle
-            m = element_types(i)
-            rep(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep(j, (m-1)*nbasis2 + 1:m*nbasis2) + add_rep(:, i, j)
-            do k=1, 3
-                grad(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) = grad(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) - add_grad(:, k, i, j)
-            enddo
-        enddo
-    enddo
-    !$OMP END PARALLEL DO
-
-    deallocate(add_rep, add_grad)
-
-    deallocate(radial_base)
-    deallocate(radial)
-    deallocate(radial_part)
-    deallocate(part)
-
-
-    ! Number of radial basis functions in the three body term
-    nbasis3 = size(Rs3)
-    ! Number of angular basis functions in the three body term
-    nabasis = size(Ts)
-    ! Size of two body terms
-    twobody_size = nelements * nbasis2
-
-    ! Inverse of the three body cutoff distance
-    invcut = 1.0d0 / acut
-    ! Pre-calculate the three body decay
-    rdecay = decay(distance_matrix, invcut, natoms_tot)
-
-    ! Allocate temporary
-    allocate(atom_rep(rep_size - twobody_size))
-    allocate(atom_grad(rep_size - twobody_size, natoms, 3))
-    allocate(a(3))
-    allocate(b(3))
-    allocate(c(3))
-    allocate(radial(nbasis3))
-    allocate(radial_base(nbasis3))
-    allocate(angular_base(nabasis))
-    allocate(angular(nabasis))
-    allocate(d_angular(nabasis))
-    allocate(d_angular_d_i(3))
-    allocate(d_angular_d_j(3))
-    allocate(d_angular_d_k(3))
-    allocate(d_radial(nbasis3))
-    allocate(d_radial_d_i(3))
-    allocate(d_radial_d_j(3))
-    allocate(d_radial_d_k(3))
-    allocate(d_ijdecay(3))
-    allocate(d_ikdecay(3))
-
-    allocate(d_atm_i(3))
-    allocate(d_atm_j(3))
-    allocate(d_atm_k(3))
-    allocate(d_atm_ii(3))
-    allocate(d_atm_ij(3))
-    allocate(d_atm_ik(3))
-    allocate(d_atm_ji(3))
-    allocate(d_atm_jj(3))
-    allocate(d_atm_jk(3))
-    allocate(d_atm_ki(3))
-    allocate(d_atm_kj(3))
-    allocate(d_atm_kk(3))
-    allocate(d_atm_i2(3))
-    allocate(d_atm_j2(3))
-    allocate(d_atm_k2(3))
-    allocate(d_atm_i3(3))
-    allocate(d_atm_j3(3))
-    allocate(d_atm_k3(3))
-    allocate(d_atm_extra_i(3))
-    allocate(d_atm_extra_j(3))
-    allocate(d_atm_extra_k(3))
-
-    !$OMP PARALLEL DO PRIVATE(atom_rep, atom_grad, rij, n, rij2, invrij, invrij2,&
-    !$OMP rik, m, rik2, invrik, invrjk, invrik2, a, b, c, angle, cos_i, cos_k,&
-    !$OMP cos_j, radial_base, radial, p, q, dot, angular, d_angular, d_angular_d_j,&
-    !$OMP d_angular_d_k, d_angular_d_i, d_radial, d_radial_d_j, d_radial_d_k,&
-    !$OMP d_radial_d_i, d_ijdecay, d_ikdecay, invr_atm, atm, atm_i, atm_j, atm_k, vi,&
-    !$OMP vj, vk, d_atm_ii, d_atm_ij, d_atm_ik, d_atm_ji, d_atm_jj, d_atm_jk, d_atm_ki,&
-    !$OMP d_atm_kj, d_atm_kk, d_atm_extra_i, d_atm_extra_j, d_atm_extra_k, s, z) SCHEDULE(dynamic)
-    do i = 1, natoms
-        atom_rep = 0.0d0
-        atom_grad = 0.0d0
-        do j = 1, natoms_tot - 1
-            if (i .eq. j) cycle
-            ! distance between atom i and j
-            rij = distance_matrix(i,j)
-            if (rij > acut) cycle
-            ! index of the element of atom j
-            n = element_types(j)
-            ! squared distance between atom i and j
-            rij2 = sq_distance_matrix(i,j)
-            ! inverse distance between atom i and j
-            invrij = inv_distance_matrix(i,j)
-            ! inverse squared distance between atom i and j
-            invrij2 = inv_sq_distance_matrix(i,j)
-            do k = j + 1, natoms_tot
-                if (i .eq. k) cycle
-                ! distance between atom i and k
-                rik = distance_matrix(i,k)
-                if (rik > acut) cycle
-                ! index of the element of atom k
-                m = element_types(k)
-                ! squared distance between atom i and k
-                rik2 = sq_distance_matrix(i,k)
-                ! inverse distance between atom i and k
-                invrik = inv_distance_matrix(i,k)
-                ! inverse distance between atom j and k
-                invrjk = inv_distance_matrix(j,k)
-                ! inverse squared distance between atom i and k
-                invrik2 = inv_sq_distance_matrix(i,k)
-                ! coordinates of atoms j, i, k
-                a = coordinates(j,:)
-                b = coordinates(i,:)
-                c = coordinates(k,:)
-                ! angle between atom i, j and k, centered on i
-                angle = calc_angle(a,b,c)
-                cos_i = calc_cos_angle(a,b,c)
-                cos_k = calc_cos_angle(a,c,b)
-                cos_j = calc_cos_angle(b,a,c)
-
-                ! part of the radial part of the 3body terms
-                radial_base(:) = exp(-eta3*(0.5d0 * (rij+rik) - Rs3(:))**2)
-                radial(:) = radial_base(:) ! * scaling
-
-                p = min(n,m) - 1
-                ! the highest index of the elements of j,k
-                q = max(n,m) - 1
-                ! Dot product between the vectors connecting atom i,j and i,k
-                dot = dot_product(a-b,c-b)
-
-                angular(1)   =  exp(-(zeta**2)*0.5d0) * 2 * cos(angle)
-                angular(2)   =  exp(-(zeta**2)*0.5d0) * 2 * sin(angle)
-
-                d_angular(1) =  exp(-(zeta**2)*0.5d0) * 2 * sin(angle) / sqrt(max(1d-10, rij2 * rik2 - dot**2))
-                d_angular(2) = -exp(-(zeta**2)*0.5d0) * 2 * cos(angle) / sqrt(max(1d-10, rij2 * rik2 - dot**2))
-
-                ! Part of the derivative of the angular basis functions wrt atom j (dim(3))
-                d_angular_d_j = c - b + dot * ((b - a) * invrij2)
-                ! Part of the derivative of the angular basis functions wrt atom k (dim(3))
-                d_angular_d_k = a - b + dot * ((b - c) * invrik2)
-                ! Part of the derivative of the angular basis functions wrt atom i (dim(3))
-                d_angular_d_i = - (d_angular_d_j + d_angular_d_k)
-
-                ! Part of the derivative of the radial basis functions wrt coordinates (dim(nbasis3))
-                ! including decay
-                d_radial = radial * eta3 * (0.5d0 * (rij+rik) - Rs3) ! * rdecay(i,j) * rdecay(i,k)
-                ! Part of the derivative of the radial basis functions wrt atom j (dim(3))
-                d_radial_d_j = (b - a) * invrij
-                ! Part of the derivative of the radial basis functions wrt atom k (dim(3))
-                d_radial_d_k = (b - c) * invrik
-                ! Part of the derivative of the radial basis functions wrt atom i (dim(3))
-                d_radial_d_i = - (d_radial_d_j + d_radial_d_k)
-
-                ! Part of the derivative of the i,j decay functions wrt coordinates (dim(3))
-                d_ijdecay = - pi * (b - a) * sin(pi * rij * invcut) * 0.5d0 * invrij * invcut
-                ! Part of the derivative of the i,k decay functions wrt coordinates (dim(3))
-                d_ikdecay = - pi * (b - c) * sin(pi * rik * invcut) * 0.5d0 * invrik * invcut
-
-                invr_atm = (invrij * invrjk *invrik)**three_body_decay
-
-                ! Axilrod-Teller-Muto term
-                atm = (1.0d0 + 3.0d0 * cos_i * cos_j * cos_k) * invr_atm * three_body_weight
-
-                atm_i = (3.0d0 * cos_j * cos_k) * invr_atm * invrij * invrik
-                atm_j = (3.0d0 * cos_k * cos_i) * invr_atm * invrij * invrjk
-                atm_k = (3.0d0 * cos_i * cos_j) * invr_atm * invrjk * invrik
-
-                vi = dot_product(a-b,c-b)
-                vj = dot_product(c-a,b-a)
-                vk = dot_product(b-c,a-c)
-
-                d_atm_ii(:) = 2 * b - a - c - vi * ((b-a)*invrij**2 + (b-c)*invrik**2)
-                d_atm_ij(:) = c - a - vj * (b-a)*invrij**2
-                d_atm_ik(:) = a - c - vk * (b-c)*invrik**2
-
-                d_atm_ji(:) = c - b - vi * (a-b)*invrij**2
-                d_atm_jj(:) = 2 * a - b - c - vj * ((a-b)*invrij**2 + (a-c)*invrjk**2)
-                d_atm_jk(:) = b - c - vk * (a-c)*invrjk**2
-
-                d_atm_ki(:) = a - b - vi * (c-b)*invrik**2
-                d_atm_kj(:) = b - a - vj * (c-a)*invrjk**2
-                d_atm_kk(:) = 2 * c - a - b - vk * ((c-a)*invrjk**2 + (c-b)*invrik**2)
-
-                d_atm_extra_i(:) = ((a-b)*invrij**2 + (c-b)*invrik**2) * atm * three_body_decay / three_body_weight
-                d_atm_extra_j(:) = ((b-a)*invrij**2 + (c-a)*invrjk**2) * atm * three_body_decay / three_body_weight
-                d_atm_extra_k(:) = ((a-c)*invrjk**2 + (b-c)*invrik**2) * atm * three_body_decay / three_body_weight
-
-                ! Get index of where the contributions of atoms i,j,k should be added
-                s = nbasis3 * nabasis * (-(p * (p + 1))/2 + q + nelements * p) + 1
-
-                do l = 1, nbasis3
-
-                    ! Get index of where the contributions of atoms i,j,k should be added
-                    z = s + (l-1) * nabasis
-
-                    ! Add the contributions for atoms i,j,k
-                    atom_rep(z:z + nabasis - 1) = atom_rep(z:z + nabasis - 1) &
-                        & + angular * radial(l) * atm * rdecay(i,j) * rdecay(i,k)
-
-                    do t = 1, 3
-
-                        ! Add up all gradient contributions wrt atom i
-                        atom_grad(z:z + nabasis - 1, i, t) = atom_grad(z:z + nabasis - 1, i, t) + &
-                            & d_angular * d_angular_d_i(t) * radial(l) * atm * rdecay(i,j) * rdecay(i,k) + &
-                            & angular * d_radial(l) * d_radial_d_i(t) * atm * rdecay(i,j) * rdecay(i,k) + &
-                            & angular * radial(l) * (atm_i * d_atm_ii(t) + atm_j * d_atm_ij(t) &
-                            & + atm_k * d_atm_ik(t) + d_atm_extra_i(t)) * three_body_weight * rdecay(i,j) * rdecay(i,k) + &
-                            & angular * radial(l) * (d_ijdecay(t) * rdecay(i,k) + rdecay(i,j) * d_ikdecay(t)) * atm
-                        ! Add up all gradient contributions wrt atom j
-                        if (j<=natoms) &
-                        atom_grad(z:z + nabasis - 1, j, t) = atom_grad(z:z + nabasis - 1, j, t) + &
-                            & d_angular * d_angular_d_j(t) * radial(l) * atm * rdecay(i,j) * rdecay(i,k) + &
-                            & angular * d_radial(l) * d_radial_d_j(t) * atm * rdecay(i,j) * rdecay(i,k) + &
-                            & angular * radial(l) * (atm_i * d_atm_ji(t) + atm_j * d_atm_jj(t) &
-                            & + atm_k * d_atm_jk(t) + d_atm_extra_j(t)) * three_body_weight * rdecay(i,j) * rdecay(i,k) - &
-                            & angular * radial(l) * d_ijdecay(t) * rdecay(i,k) * atm
-
-                        ! Add up all gradient contributions wrt atom k
-                        if (k<=natoms) &
-                        atom_grad(z:z + nabasis - 1, k, t) = atom_grad(z:z + nabasis - 1, k, t) + &
-                            & d_angular * d_angular_d_k(t) * radial(l) * atm * rdecay(i,j) * rdecay(i,k) + &
-                            & angular * d_radial(l) * d_radial_d_k(t) * atm * rdecay(i,j) * rdecay(i,k) + &
-                            & angular * radial(l) * (atm_i * d_atm_ki(t) + atm_j * d_atm_kj(t) &
-                            & + atm_k * d_atm_kk(t) + d_atm_extra_k(t)) * three_body_weight * rdecay(i,j) * rdecay(i,k) - &
-                            & angular * radial(l) * rdecay(i,j) * d_ikdecay(t) * atm
-
-                    enddo
-                enddo
-            enddo
-        enddo
-        rep(i, twobody_size + 1:) = rep(i, twobody_size + 1:) + atom_rep
-        grad(i, twobody_size + 1:,:,:) = grad(i, twobody_size + 1:,:,:) + atom_grad
-enddo
+
+               dx = -(coordinates(i, k) - coordinates(j, k))
+
+               part(:) = ((log_Rs2(:) - mu)*(-dx*(rij**2*exp_s2 + eta2)/(rij*sqrt(exp_s2))**3) &
+                   &*sqrt(exp_s2)/(sigma**2*rij) + (log_Rs2(:) - mu)**2*eta2*dx/ &
+                   &(sigma**4*rij**4*exp_s2))*exp_ln/(Rs2(:)*sigma*sqrt(pi)*2) &
+                   &- exp_ln*eta2*dx/(Rs2(:)*sigma**3*sqrt(pi)*rij**4*exp_s2*2.0d0)
+
+               dscal = two_body_decay*dx/rij**(two_body_decay + 2.0d0)
+               ddecay = dx*0.5d0*pi*sin(pi*rij*invcut)*invcut*invrij
+
+               part(:) = part(:)*scaling*rdecay(i, j) + radial_base(:)*dscal*rdecay(i, j) &
+                   & + radial_base(:)*scaling*ddecay
+
+               ! The gradients wrt coordinates
+               grad(i, (n - 1)*nbasis2 + 1:n*nbasis2, i, k) = grad(i, (n - 1)*nbasis2 + 1:n*nbasis2, i, k) + part
+               grad(i, (n - 1)*nbasis2 + 1:n*nbasis2, true_j, k) = grad(i, (n - 1)*nbasis2 + 1:n*nbasis2, true_j, k) - part
+               if (j <= natoms) then
+                  grad(j, (m - 1)*nbasis2 + 1:m*nbasis2, i, k) = grad(j, (m - 1)*nbasis2 + 1:m*nbasis2, i, k) + part
+                  add_grad(:, k, i, j) = part
+               end if
+            end do
+         end if
+      end do
+   end do
 !$OMP END PARALLEL DO
 
-deallocate(rdecay)
-deallocate(element_types)
-deallocate(distance_matrix)
-deallocate(inv_distance_matrix)
-deallocate(sq_distance_matrix)
-deallocate(inv_sq_distance_matrix)
-deallocate(atom_rep)
-deallocate(atom_grad)
-deallocate(a)
-deallocate(b)
-deallocate(c)
-deallocate(radial)
-deallocate(angular_base)
-deallocate(angular)
-deallocate(d_angular)
-deallocate(d_angular_d_i)
-deallocate(d_angular_d_j)
-deallocate(d_angular_d_k)
-deallocate(d_radial)
-deallocate(d_radial_d_i)
-deallocate(d_radial_d_j)
-deallocate(d_radial_d_k)
-deallocate(d_ijdecay)
-deallocate(d_ikdecay)
+!$OMP PARALLEL DO PRIVATE(m) SCHEDULE(dynamic)
+   do j = 2, natoms
+      do i = 1, j - 1
+         m = element_types(i)
+         rep(j, (m - 1)*nbasis2 + 1:m*nbasis2) = rep(j, (m - 1)*nbasis2 + 1:m*nbasis2) + add_rep(:, i, j)
+         do k = 1, 3
+            grad(j, (m - 1)*nbasis2 + 1:m*nbasis2, j, k) = grad(j, (m - 1)*nbasis2 + 1:m*nbasis2, j, k) - add_grad(:, k, i, j)
+         end do
+      end do
+   end do
+!$OMP END PARALLEL DO
+
+   deallocate (add_rep, add_grad)
+
+   deallocate (radial_base)
+   deallocate (radial)
+   deallocate (radial_part)
+   deallocate (part)
+
+   ! Number of radial basis functions in the three body term
+   nbasis3 = size(Rs3)
+   ! Number of angular basis functions in the three body term
+   nabasis = size(Ts)
+   ! Size of two body terms
+   twobody_size = nelements*nbasis2
+
+   ! Inverse of the three body cutoff distance
+   invcut = 1.0d0/acut
+   ! Pre-calculate the three body decay
+   rdecay = decay(distance_matrix, invcut, natoms_tot)
+
+   ! Allocate temporary
+   allocate (atom_rep(rep_size - twobody_size))
+   allocate (atom_grad(rep_size - twobody_size, natoms, 3))
+   allocate (a(3))
+   allocate (b(3))
+   allocate (c(3))
+   allocate (radial(nbasis3))
+   allocate (radial_base(nbasis3))
+   allocate (angular_base(nabasis))
+   allocate (angular(nabasis))
+   allocate (d_angular(nabasis))
+   allocate (d_angular_d_i(3))
+   allocate (d_angular_d_j(3))
+   allocate (d_angular_d_k(3))
+   allocate (d_radial(nbasis3))
+   allocate (d_radial_d_i(3))
+   allocate (d_radial_d_j(3))
+   allocate (d_radial_d_k(3))
+   allocate (d_ijdecay(3))
+   allocate (d_ikdecay(3))
+
+   allocate (d_atm_i(3))
+   allocate (d_atm_j(3))
+   allocate (d_atm_k(3))
+   allocate (d_atm_ii(3))
+   allocate (d_atm_ij(3))
+   allocate (d_atm_ik(3))
+   allocate (d_atm_ji(3))
+   allocate (d_atm_jj(3))
+   allocate (d_atm_jk(3))
+   allocate (d_atm_ki(3))
+   allocate (d_atm_kj(3))
+   allocate (d_atm_kk(3))
+   allocate (d_atm_i2(3))
+   allocate (d_atm_j2(3))
+   allocate (d_atm_k2(3))
+   allocate (d_atm_i3(3))
+   allocate (d_atm_j3(3))
+   allocate (d_atm_k3(3))
+   allocate (d_atm_extra_i(3))
+   allocate (d_atm_extra_j(3))
+   allocate (d_atm_extra_k(3))
+
+!$OMP PARALLEL DO PRIVATE(atom_rep, atom_grad, rij, n, rij2, invrij, invrij2,&
+!$OMP rik, m, rik2, invrik, invrjk, invrik2, a, b, c, angle, cos_i, cos_k,&
+!$OMP cos_j, radial_base, radial, p, q, dot, angular, d_angular, d_angular_d_j,&
+!$OMP d_angular_d_k, d_angular_d_i, d_radial, d_radial_d_j, d_radial_d_k,&
+!$OMP d_radial_d_i, d_ijdecay, d_ikdecay, invr_atm, atm, atm_i, atm_j, atm_k, vi,&
+!$OMP vj, vk, d_atm_ii, d_atm_ij, d_atm_ik, d_atm_ji, d_atm_jj, d_atm_jk, d_atm_ki,&
+!$OMP d_atm_kj, d_atm_kk, d_atm_extra_i, d_atm_extra_j, d_atm_extra_k, s, z) SCHEDULE(dynamic)
+   do i = 1, natoms
+      atom_rep = 0.0d0
+      atom_grad = 0.0d0
+      do j = 1, natoms_tot - 1
+         if (i .eq. j) cycle
+         ! distance between atom i and j
+         rij = distance_matrix(i, j)
+         if (rij > acut) cycle
+         ! index of the element of atom j
+         true_j = true_atom_id(j, natoms)
+         n = element_types(true_j)
+         ! squared distance between atom i and j
+         rij2 = sq_distance_matrix(i, j)
+         ! inverse distance between atom i and j
+         invrij = inv_distance_matrix(i, j)
+         ! inverse squared distance between atom i and j
+         invrij2 = inv_sq_distance_matrix(i, j)
+         do k = j + 1, natoms_tot
+            if (i .eq. k) cycle
+            ! distance between atom i and k
+            rik = distance_matrix(i, k)
+            if (rik > acut) cycle
+            ! index of the element of atom k
+            true_k = true_atom_id(k, natoms)
+            m = element_types(true_k)
+            ! squared distance between atom i and k
+            rik2 = sq_distance_matrix(i, k)
+            ! inverse distance between atom i and k
+            invrik = inv_distance_matrix(i, k)
+            ! inverse distance between atom j and k
+            invrjk = inv_distance_matrix(j, k)
+            ! inverse squared distance between atom i and k
+            invrik2 = inv_sq_distance_matrix(i, k)
+            ! coordinates of atoms j, i, k
+            a = coordinates(j, :)
+            b = coordinates(i, :)
+            c = coordinates(k, :)
+            ! angle between atom i, j and k, centered on i
+            angle = calc_angle(a, b, c)
+            cos_i = calc_cos_angle(a, b, c)
+            cos_k = calc_cos_angle(a, c, b)
+            cos_j = calc_cos_angle(b, a, c)
+
+            ! part of the radial part of the 3body terms
+            radial_base(:) = exp(-eta3*(0.5d0*(rij + rik) - Rs3(:))**2)
+            radial(:) = radial_base(:) ! * scaling
+
+            p = min(n, m) - 1
+            ! the highest index of the elements of j,k
+            q = max(n, m) - 1
+            ! Dot product between the vectors connecting atom i,j and i,k
+            dot = dot_product(a - b, c - b)
+
+            angular(1) = exp(-(zeta**2)*0.5d0)*2*cos(angle)
+            angular(2) = exp(-(zeta**2)*0.5d0)*2*sin(angle)
+
+            d_angular(1) = exp(-(zeta**2)*0.5d0)*2*sin(angle)/sqrt(max(1d-10, rij2*rik2 - dot**2))
+            d_angular(2) = -exp(-(zeta**2)*0.5d0)*2*cos(angle)/sqrt(max(1d-10, rij2*rik2 - dot**2))
+
+            ! Part of the derivative of the angular basis functions wrt atom j (dim(3))
+            d_angular_d_j = c - b + dot*((b - a)*invrij2)
+            ! Part of the derivative of the angular basis functions wrt atom k (dim(3))
+            d_angular_d_k = a - b + dot*((b - c)*invrik2)
+            ! Part of the derivative of the angular basis functions wrt atom i (dim(3))
+            d_angular_d_i = -(d_angular_d_j + d_angular_d_k)
+
+            ! Part of the derivative of the radial basis functions wrt coordinates (dim(nbasis3))
+            ! including decay
+            d_radial = radial*eta3*(0.5d0*(rij + rik) - Rs3) ! * rdecay(i,j) * rdecay(i,k)
+            ! Part of the derivative of the radial basis functions wrt atom j (dim(3))
+            d_radial_d_j = (b - a)*invrij
+            ! Part of the derivative of the radial basis functions wrt atom k (dim(3))
+            d_radial_d_k = (b - c)*invrik
+            ! Part of the derivative of the radial basis functions wrt atom i (dim(3))
+            d_radial_d_i = -(d_radial_d_j + d_radial_d_k)
+
+            ! Part of the derivative of the i,j decay functions wrt coordinates (dim(3))
+            d_ijdecay = -pi*(b - a)*sin(pi*rij*invcut)*0.5d0*invrij*invcut
+            ! Part of the derivative of the i,k decay functions wrt coordinates (dim(3))
+            d_ikdecay = -pi*(b - c)*sin(pi*rik*invcut)*0.5d0*invrik*invcut
+
+            invr_atm = (invrij*invrjk*invrik)**three_body_decay
+
+            ! Axilrod-Teller-Muto term
+            atm = (1.0d0 + 3.0d0*cos_i*cos_j*cos_k)*invr_atm*three_body_weight
+
+            atm_i = (3.0d0*cos_j*cos_k)*invr_atm*invrij*invrik
+            atm_j = (3.0d0*cos_k*cos_i)*invr_atm*invrij*invrjk
+            atm_k = (3.0d0*cos_i*cos_j)*invr_atm*invrjk*invrik
+
+            vi = dot_product(a - b, c - b)
+            vj = dot_product(c - a, b - a)
+            vk = dot_product(b - c, a - c)
+
+            d_atm_ii(:) = 2*b - a - c - vi*((b - a)*invrij**2 + (b - c)*invrik**2)
+            d_atm_ij(:) = c - a - vj*(b - a)*invrij**2
+            d_atm_ik(:) = a - c - vk*(b - c)*invrik**2
+
+            d_atm_ji(:) = c - b - vi*(a - b)*invrij**2
+            d_atm_jj(:) = 2*a - b - c - vj*((a - b)*invrij**2 + (a - c)*invrjk**2)
+            d_atm_jk(:) = b - c - vk*(a - c)*invrjk**2
+
+            d_atm_ki(:) = a - b - vi*(c - b)*invrik**2
+            d_atm_kj(:) = b - a - vj*(c - a)*invrjk**2
+            d_atm_kk(:) = 2*c - a - b - vk*((c - a)*invrjk**2 + (c - b)*invrik**2)
+
+            d_atm_extra_i(:) = ((a - b)*invrij**2 + (c - b)*invrik**2)*atm*three_body_decay/three_body_weight
+            d_atm_extra_j(:) = ((b - a)*invrij**2 + (c - a)*invrjk**2)*atm*three_body_decay/three_body_weight
+            d_atm_extra_k(:) = ((a - c)*invrjk**2 + (b - c)*invrik**2)*atm*three_body_decay/three_body_weight
+
+            ! Get index of where the contributions of atoms i,j,k should be added
+            s = nbasis3*nabasis*(-(p*(p + 1))/2 + q + nelements*p) + 1
+
+            do l = 1, nbasis3
+
+               ! Get index of where the contributions of atoms i,j,k should be added
+               z = s + (l - 1)*nabasis
+
+               ! Add the contributions for atoms i,j,k
+               atom_rep(z:z + nabasis - 1) = atom_rep(z:z + nabasis - 1) &
+                   & + angular*radial(l)*atm*rdecay(i, j)*rdecay(i, k)
+
+               do t = 1, 3
+
+                  ! Add up all gradient contributions wrt atom i
+                  atom_grad(z:z + nabasis - 1, i, t) = atom_grad(z:z + nabasis - 1, i, t) + &
+                      & d_angular*d_angular_d_i(t)*radial(l)*atm*rdecay(i, j)*rdecay(i, k) + &
+                      & angular*d_radial(l)*d_radial_d_i(t)*atm*rdecay(i, j)*rdecay(i, k) + &
+                      & angular*radial(l)*(atm_i*d_atm_ii(t) + atm_j*d_atm_ij(t) &
+                      & + atm_k*d_atm_ik(t) + d_atm_extra_i(t))*three_body_weight*rdecay(i, j)*rdecay(i, k) + &
+                      & angular*radial(l)*(d_ijdecay(t)*rdecay(i, k) + rdecay(i, j)*d_ikdecay(t))*atm
+                  ! Add up all gradient contributions wrt atom j
+                  atom_grad(z:z + nabasis - 1, true_j, t) = atom_grad(z:z + nabasis - 1, true_j, t) + &
+                      & d_angular*d_angular_d_j(t)*radial(l)*atm*rdecay(i, j)*rdecay(i, k) + &
+                      & angular*d_radial(l)*d_radial_d_j(t)*atm*rdecay(i, j)*rdecay(i, k) + &
+                      & angular*radial(l)*(atm_i*d_atm_ji(t) + atm_j*d_atm_jj(t) &
+                      & + atm_k*d_atm_jk(t) + d_atm_extra_j(t))*three_body_weight*rdecay(i, j)*rdecay(i, k) - &
+                      & angular*radial(l)*d_ijdecay(t)*rdecay(i, k)*atm
+
+                  ! Add up all gradient contributions wrt atom k
+                  atom_grad(z:z + nabasis - 1, true_k, t) = atom_grad(z:z + nabasis - 1, true_k, t) + &
+                      & d_angular*d_angular_d_k(t)*radial(l)*atm*rdecay(i, j)*rdecay(i, k) + &
+                      & angular*d_radial(l)*d_radial_d_k(t)*atm*rdecay(i, j)*rdecay(i, k) + &
+                      & angular*radial(l)*(atm_i*d_atm_ki(t) + atm_j*d_atm_kj(t) &
+                      & + atm_k*d_atm_kk(t) + d_atm_extra_k(t))*three_body_weight*rdecay(i, j)*rdecay(i, k) - &
+                      & angular*radial(l)*rdecay(i, j)*d_ikdecay(t)*atm
+
+               end do
+            end do
+         end do
+      end do
+      rep(i, twobody_size + 1:) = rep(i, twobody_size + 1:) + atom_rep
+      grad(i, twobody_size + 1:, :, :) = grad(i, twobody_size + 1:, :, :) + atom_grad
+   end do
+!$OMP END PARALLEL DO
 
+   deallocate (rdecay)
+   deallocate (element_types)
+   deallocate (distance_matrix)
+   deallocate (inv_distance_matrix)
+   deallocate (sq_distance_matrix)
+   deallocate (inv_sq_distance_matrix)
+   deallocate (atom_rep)
+   deallocate (atom_grad)
+   deallocate (a)
+   deallocate (b)
+   deallocate (c)
+   deallocate (radial)
+   deallocate (angular_base)
+   deallocate (angular)
+   deallocate (d_angular)
+   deallocate (d_angular_d_i)
+   deallocate (d_angular_d_j)
+   deallocate (d_angular_d_k)
+   deallocate (d_radial)
+   deallocate (d_radial_d_i)
+   deallocate (d_radial_d_j)
+   deallocate (d_radial_d_k)
+   deallocate (d_ijdecay)
+   deallocate (d_ikdecay)
 
 end subroutine fgenerate_fchl_acsf_and_gradients
diff --git a/tests/verification_fchl_acsf_pbc.py b/tests/verification_fchl_acsf_pbc.py
index e9d75058..687a49ed 100644
--- a/tests/verification_fchl_acsf_pbc.py
+++ b/tests/verification_fchl_acsf_pbc.py
@@ -1,7 +1,8 @@
 """
-Runs the same calculation using the 'cell' keyword for generate_fchl_acsf and by straightforward
-'cell cloning' done inside the test script.
+This file contains tests for the atom centred symmetry function module.
 """
+
+import glob
 import os
 import random
 from copy import deepcopy
@@ -11,28 +12,26 @@
 from qmllib.representations import generate_fchl_acsf
 from qmllib.utils.xyz_format import read_xyz
 
-np.set_printoptions(linewidth=666, edgeitems=10)
+# from tests.conftest import ASSETS
 
+np.set_printoptions(linewidth=666, edgeitems=10)
 REP_PARAMS = dict()
 REP_PARAMS["elements"] = [1, 6, 7, 8, 16]
-REP_PARAMS["rcut"] = 5.0
-REP_PARAMS["acut"] = 5.0
-random.seed(1)
-cell_added_cutoff = 0.1
+REP_PARAMS["rcut"] = 8.0
+REP_PARAMS["acut"] = 8.0
 
 
-#   For given molecular coordinates generate a cell just large enough to contain the molecule.
-def suitable_cell(coords):
-    max_coords = None
-    min_coords = None
-    for atom_coords in coords:
-        if max_coords is None:
-            max_coords = deepcopy(atom_coords)
-            min_coords = deepcopy(atom_coords)
-        else:
-            max_coords = np.maximum(max_coords, atom_coords)
-            min_coords = np.minimum(min_coords, atom_coords)
-    return np.diag((max_coords - min_coords) * (1.0 + cell_added_cutoff))
+def pbc_corrected_drep(drep, num_atoms):
+    new_shape = list(drep.shape)
+    new_shape[0] = num_atoms
+    new_shape[2] = num_atoms
+    new_drep = np.zeros(new_shape)
+    num_atoms_tot = drep.shape[0]
+    for i in range(num_atoms):
+        for j in range(num_atoms_tot):
+            true_j = j % num_atoms
+            new_drep[i, :, true_j, :] += drep[i, :, j, :]
+    return new_drep
 
 
 def generate_fchl_acsf_brute_pbc(nuclear_charges, coordinates, cell, gradients=False):
@@ -42,7 +41,7 @@ def generate_fchl_acsf_brute_pbc(nuclear_charges, coordinates, cell, gradients=F
     nExtend = (
         np.floor(max(REP_PARAMS["rcut"], REP_PARAMS["acut"]) / np.linalg.norm(cell, 2, axis=0)) + 1
     ).astype(int)
-    print("Checked nExtend:", nExtend, ", gradient calculation:", gradients)
+    print("Checked nExtend:", nExtend)
     for i in range(-nExtend[0], nExtend[0] + 1):
         for j in range(-nExtend[1], nExtend[1] + 1):
             for k in range(-nExtend[2], nExtend[2] + 1):
@@ -54,75 +53,130 @@ def generate_fchl_acsf_brute_pbc(nuclear_charges, coordinates, cell, gradients=F
                     )
                     all_charges = np.append(all_charges, nuclear_charges)
     if gradients:
-        rep, drep = generate_fchl_acsf(all_charges, all_coords, gradients=True, **REP_PARAMS)
+        if len(all_charges) > 2500:
+            return None, None
+        rep, drep = generate_fchl_acsf(all_charges, all_coords, gradients=gradients, **REP_PARAMS)
     else:
-        rep = generate_fchl_acsf(all_charges, all_coords, gradients=False, **REP_PARAMS)
+        rep = generate_fchl_acsf(all_charges, all_coords, gradients=gradients, **REP_PARAMS)
+
     rep = rep[:num_atoms, :]
     if gradients:
-        drep = drep[:num_atoms, :, :num_atoms, :]
-        return rep, drep
+        return rep, pbc_corrected_drep(drep, num_atoms)
     else:
         return rep
 
 
-def ragged_array_close(arr1, arr2, error_msg):
-    for el1, el2 in zip(arr1, arr2):
-        assert np.allclose(el1, el2), error_msg
+def get_acsf_numgrad(coordinates, nuclear_charges, dx=1e-6, cell=None):
+
+    natoms = len(coordinates)
+    true_coords = deepcopy(coordinates)
+
+    true_rep = generate_fchl_acsf(
+        nuclear_charges, coordinates, gradients=False, cell=cell, **REP_PARAMS
+    )
+
+    gradient = np.zeros((3, natoms, true_rep.shape[0], true_rep.shape[1]))
+
+    for n, coord in enumerate(true_coords):
+        for xyz, x in enumerate(coord):
+
+            temp_coords = deepcopy(true_coords)
+            temp_coords[n, xyz] = x + 2.0 * dx
+
+            rep = generate_fchl_acsf(
+                nuclear_charges, temp_coords, gradients=False, cell=cell, **REP_PARAMS
+            )
+            gradient[xyz, n] -= rep
 
+            temp_coords[n, xyz] = x + dx
+            rep = generate_fchl_acsf(
+                nuclear_charges, temp_coords, gradients=False, cell=cell, **REP_PARAMS
+            )
+            gradient[xyz, n] += 8.0 * rep
 
-def test_fchl_acsf_pbc():
+            temp_coords[n, xyz] = x - dx
+            rep = generate_fchl_acsf(
+                nuclear_charges, temp_coords, gradients=False, cell=cell, **REP_PARAMS
+            )
+            gradient[xyz, n] -= 8.0 * rep
 
-    qm7_dir = os.path.dirname(os.path.realpath(__file__)) + "/assets/qm7"
-    os.chdir(qm7_dir)
-    all_xyzs = os.listdir()
-    test_xyzs = random.sample(all_xyzs, 10)
+            temp_coords[n, xyz] = x - 2.0 * dx
+            rep = generate_fchl_acsf(
+                nuclear_charges, temp_coords, gradients=False, cell=cell, **REP_PARAMS
+            )
+            gradient[xyz, n] += rep
 
-    reps_no_grad1 = []
-    reps_no_grad2 = []
+    gradient /= 12 * dx
 
-    reps_wgrad1 = []
-    reps_wgrad2 = []
-    dreps1 = []
-    dreps2 = []
+    gradient = np.swapaxes(gradient, 0, 1)
+    gradient = np.swapaxes(gradient, 2, 0)
+    gradient = np.swapaxes(gradient, 3, 1)
 
-    for xyz in test_xyzs:
-        print("Tested xyz:", xyz)
-        coords, atoms = read_xyz(xyz)
-        cell = suitable_cell(coords)
-        reps_no_grad1.append(generate_fchl_acsf_brute_pbc(atoms, coords, cell, gradients=False))
-        reps_no_grad2.append(
-            generate_fchl_acsf(atoms, coords, cell=cell, gradients=False, **REP_PARAMS)
+    return gradient
+
+
+#   For given molecular coordinates generate a cell just large enough to contain the molecule.
+def suitable_cell(coords, cell_added_cutoff=0.1):
+    max_coords = None
+    min_coords = None
+    for atom_coords in coords:
+        if max_coords is None:
+            max_coords = deepcopy(atom_coords)
+            min_coords = deepcopy(atom_coords)
+        else:
+            max_coords = np.maximum(max_coords, atom_coords)
+            min_coords = np.minimum(min_coords, atom_coords)
+    return np.diag((max_coords - min_coords) * (1.0 + cell_added_cutoff))
+
+
+def test_fchl_acsf():
+
+    all_xyzs = glob.glob(os.path.dirname(__file__) + "/assets/qm7/*.xyz")
+    random.seed(1)
+    xyzs = random.sample(all_xyzs, 16)
+    #    xyzs=["/home/konst/qmlcode/qmllib/tests/assets/qm7/0101.xyz"]
+    #    xyzs=["/home/konst/qmlcode/qmllib/tests/assets/qm7/4843.xyz"]
+
+    for xyz in xyzs:
+        print("xyz:", xyz)
+        coordinates, nuclear_charges = read_xyz(xyz)
+
+        cell = suitable_cell(coordinates)
+
+        (repa, anal_grad) = generate_fchl_acsf(
+            nuclear_charges, coordinates, gradients=True, cell=cell, **REP_PARAMS
         )
 
-        rep_wgrad1, drep1 = generate_fchl_acsf_brute_pbc(atoms, coords, cell, gradients=True)
-        rep_wgrad2, drep2 = generate_fchl_acsf(
-            atoms, coords, cell=cell, gradients=True, **REP_PARAMS
+        repb = generate_fchl_acsf(
+            nuclear_charges, coordinates, gradients=False, cell=cell, **REP_PARAMS
         )
 
-        reps_wgrad1.append(rep_wgrad1)
-        reps_wgrad2.append(rep_wgrad2)
+        assert np.allclose(repa, repb), "Error in FCHL-ACSF representation implementation"
 
-        dreps1.append(drep1)
-        dreps2.append(drep2)
+        repc = generate_fchl_acsf_brute_pbc(nuclear_charges, coordinates, cell)
+
+        assert np.allclose(repa, repc), "Error in PBC implementation"
+
+        repd, brute_pbc_grad = generate_fchl_acsf_brute_pbc(
+            nuclear_charges, coordinates, cell, gradients=True
+        )
+        if repd is None:
+            print("too large gradient matrix for brute PBC check")
+        else:
+            assert np.allclose(repd, repa)
+            assert np.allclose(
+                anal_grad, brute_pbc_grad
+            ), "Error in FCHL-ACSF gradient implementation"
+
+        num_grad = get_acsf_numgrad(coordinates, nuclear_charges, cell=cell)
+        print(
+            "analytic-numerical gradient difference vs. average magnitude:",
+            np.max(np.abs(num_grad - anal_grad)),
+            np.mean(np.abs(num_grad)),
+        )
 
-    ragged_array_close(
-        reps_no_grad1,
-        reps_no_grad2,
-        "Error in PBC implementation for generate_fchl_acsf without gradients.",
-    )
-    ragged_array_close(
-        reps_wgrad1,
-        reps_wgrad2,
-        "Error in PBC implementation for generate_fchl_acsf with gradients (representation).",
-    )
-    ragged_array_close(
-        dreps1,
-        dreps2,
-        "Error in PBC implementation for generate_fchl_acsf with gradients (gradient of representation).",
-    )
-    print("Passed")
 
+#        assert np.allclose(anal_grad, brute_pbc_grad), "Error in FCHL-ACSF gradient implementation"
 
-if __name__ == "__main__":
 
-    test_fchl_acsf_pbc()
+test_fchl_acsf()

From 68a6c1a940acbca078828b871f0356fadcdb5a95 Mon Sep 17 00:00:00 2001
From: kvkarandashev <kvkarandashev@gmail.com>
Date: Sun, 30 Jun 2024 16:19:32 +0200
Subject: [PATCH 4/7] excessive `print` statement deleted in SLATM.

---
 src/qmllib/representations/representations.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/qmllib/representations/representations.py b/src/qmllib/representations/representations.py
index c727f2aa..c38e4b14 100644
--- a/src/qmllib/representations/representations.py
+++ b/src/qmllib/representations/representations.py
@@ -565,7 +565,6 @@ def generate_slatm(
         n3 = 0
         mbs = np.zeros(0)
         for mbtype in mbtypes:
-            print(mbtype)
             if len(mbtype) == 1:
                 mbsi = get_boa(mbtype[0], zs)
                 if alchemy:

From 289b83d1e487031d91b72cd6bed985ac3c74b685 Mon Sep 17 00:00:00 2001
From: kvkarandashev <kvkarandashev@gmail.com>
Date: Tue, 4 Feb 2025 22:00:08 +0100
Subject: [PATCH 5/7] verification script revision

---
 tests/test_kernel_derivatives.py              |  2 +-
 ...acsf_pbc.py => verification_fchl19_pbc.py} | 37 +++++++++----------
 2 files changed, 19 insertions(+), 20 deletions(-)
 rename tests/{verification_fchl_acsf_pbc.py => verification_fchl19_pbc.py} (83%)

diff --git a/tests/test_kernel_derivatives.py b/tests/test_kernel_derivatives.py
index 005eb88a..0e14f871 100644
--- a/tests/test_kernel_derivatives.py
+++ b/tests/test_kernel_derivatives.py
@@ -78,7 +78,7 @@ def csv_to_molecular_reps(csv_filename):
 
                         disp_coords[j, xyz] += disp
 
-                        # dx1 = generate_fchl_acsf(nuclear_charges, disp_coords, **REP_PARAMS)
+                        # dx1 = generate_fchl19(nuclear_charges, disp_coords, **REP_PARAMS)
                         # print(dx1.shape)
                         dx1 = generate_acsf(nuclear_charges, disp_coords, pad=MAX_ATOMS)
 
diff --git a/tests/verification_fchl_acsf_pbc.py b/tests/verification_fchl19_pbc.py
similarity index 83%
rename from tests/verification_fchl_acsf_pbc.py
rename to tests/verification_fchl19_pbc.py
index 687a49ed..5358c2d6 100644
--- a/tests/verification_fchl_acsf_pbc.py
+++ b/tests/verification_fchl19_pbc.py
@@ -2,14 +2,13 @@
 This file contains tests for the atom centred symmetry function module.
 """
 
-import glob
-import os
+import pathlib
 import random
 from copy import deepcopy
 
 import numpy as np
 
-from qmllib.representations import generate_fchl_acsf
+from qmllib.representations import generate_fchl19
 from qmllib.utils.xyz_format import read_xyz
 
 # from tests.conftest import ASSETS
@@ -34,7 +33,7 @@ def pbc_corrected_drep(drep, num_atoms):
     return new_drep
 
 
-def generate_fchl_acsf_brute_pbc(nuclear_charges, coordinates, cell, gradients=False):
+def generate_fchl19_brute_pbc(nuclear_charges, coordinates, cell, gradients=False):
     num_atoms = len(nuclear_charges)
     all_coords = deepcopy(coordinates)
     all_charges = deepcopy(nuclear_charges)
@@ -55,9 +54,9 @@ def generate_fchl_acsf_brute_pbc(nuclear_charges, coordinates, cell, gradients=F
     if gradients:
         if len(all_charges) > 2500:
             return None, None
-        rep, drep = generate_fchl_acsf(all_charges, all_coords, gradients=gradients, **REP_PARAMS)
+        rep, drep = generate_fchl19(all_charges, all_coords, gradients=gradients, **REP_PARAMS)
     else:
-        rep = generate_fchl_acsf(all_charges, all_coords, gradients=gradients, **REP_PARAMS)
+        rep = generate_fchl19(all_charges, all_coords, gradients=gradients, **REP_PARAMS)
 
     rep = rep[:num_atoms, :]
     if gradients:
@@ -71,7 +70,7 @@ def get_acsf_numgrad(coordinates, nuclear_charges, dx=1e-6, cell=None):
     natoms = len(coordinates)
     true_coords = deepcopy(coordinates)
 
-    true_rep = generate_fchl_acsf(
+    true_rep = generate_fchl19(
         nuclear_charges, coordinates, gradients=False, cell=cell, **REP_PARAMS
     )
 
@@ -83,25 +82,25 @@ def get_acsf_numgrad(coordinates, nuclear_charges, dx=1e-6, cell=None):
             temp_coords = deepcopy(true_coords)
             temp_coords[n, xyz] = x + 2.0 * dx
 
-            rep = generate_fchl_acsf(
+            rep = generate_fchl19(
                 nuclear_charges, temp_coords, gradients=False, cell=cell, **REP_PARAMS
             )
             gradient[xyz, n] -= rep
 
             temp_coords[n, xyz] = x + dx
-            rep = generate_fchl_acsf(
+            rep = generate_fchl19(
                 nuclear_charges, temp_coords, gradients=False, cell=cell, **REP_PARAMS
             )
             gradient[xyz, n] += 8.0 * rep
 
             temp_coords[n, xyz] = x - dx
-            rep = generate_fchl_acsf(
+            rep = generate_fchl19(
                 nuclear_charges, temp_coords, gradients=False, cell=cell, **REP_PARAMS
             )
             gradient[xyz, n] -= 8.0 * rep
 
             temp_coords[n, xyz] = x - 2.0 * dx
-            rep = generate_fchl_acsf(
+            rep = generate_fchl19(
                 nuclear_charges, temp_coords, gradients=False, cell=cell, **REP_PARAMS
             )
             gradient[xyz, n] += rep
@@ -129,9 +128,9 @@ def suitable_cell(coords, cell_added_cutoff=0.1):
     return np.diag((max_coords - min_coords) * (1.0 + cell_added_cutoff))
 
 
-def test_fchl_acsf():
+def test_fchl19():
 
-    all_xyzs = glob.glob(os.path.dirname(__file__) + "/assets/qm7/*.xyz")
+    all_xyzs = list(pathlib.Path("./assets/qm7").glob("*.xyz"))
     random.seed(1)
     xyzs = random.sample(all_xyzs, 16)
     #    xyzs=["/home/konst/qmlcode/qmllib/tests/assets/qm7/0101.xyz"]
@@ -143,21 +142,21 @@ def test_fchl_acsf():
 
         cell = suitable_cell(coordinates)
 
-        (repa, anal_grad) = generate_fchl_acsf(
+        (repa, anal_grad) = generate_fchl19(
             nuclear_charges, coordinates, gradients=True, cell=cell, **REP_PARAMS
         )
 
-        repb = generate_fchl_acsf(
+        repb = generate_fchl19(
             nuclear_charges, coordinates, gradients=False, cell=cell, **REP_PARAMS
         )
 
-        assert np.allclose(repa, repb), "Error in FCHL-ACSF representation implementation"
+        assert np.allclose(repa, repb), "Error in FCHL19 representation implementation"
 
-        repc = generate_fchl_acsf_brute_pbc(nuclear_charges, coordinates, cell)
+        repc = generate_fchl19_brute_pbc(nuclear_charges, coordinates, cell)
 
         assert np.allclose(repa, repc), "Error in PBC implementation"
 
-        repd, brute_pbc_grad = generate_fchl_acsf_brute_pbc(
+        repd, brute_pbc_grad = generate_fchl19_brute_pbc(
             nuclear_charges, coordinates, cell, gradients=True
         )
         if repd is None:
@@ -179,4 +178,4 @@ def test_fchl_acsf():
 #        assert np.allclose(anal_grad, brute_pbc_grad), "Error in FCHL-ACSF gradient implementation"
 
 
-test_fchl_acsf()
+test_fchl19()

From 8ccc30383bd7cfde0df10f42a79db9a519933ff8 Mon Sep 17 00:00:00 2001
From: kvkarandashev <kvkarandashev@gmail.com>
Date: Wed, 5 Feb 2025 11:03:57 +0100
Subject: [PATCH 6/7] OpenMP bugfix

---
 src/qmllib/representations/facsf.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/qmllib/representations/facsf.f90 b/src/qmllib/representations/facsf.f90
index e42a6d4e..5c3059d6 100644
--- a/src/qmllib/representations/facsf.f90
+++ b/src/qmllib/representations/facsf.f90
@@ -700,7 +700,7 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
    allocate (radial(nbasis2), saved_radial(nbasis2, natoms_tot), saved_j(natoms_tot))
 
    radial = 0.0d0
-!$OMP PARALLEL DO PRIVATE(n,m,rij,radial,mu,sigma, saved_radial, saved_j) SCHEDULE(dynamic)
+!$OMP PARALLEL DO PRIVATE(n,m,rij,radial,mu,sigma, saved_radial, saved_j, max_j_id) SCHEDULE(dynamic)
    do i = 1, natoms
 ! index of the element of atom i
       max_j_id = 0
@@ -723,7 +723,7 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
          end if
       end do
       m = element_types(i)
-      !$OMP CRITICAL
+!$OMP CRITICAL
       do j_id = 1, max_j_id
          j = saved_j(j_id)
          ! index of the element of atom j
@@ -733,7 +733,7 @@ subroutine fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, &
             rep(j, (m - 1)*nbasis2 + 1:m*nbasis2) = rep(j, (m - 1)*nbasis2 + 1:m*nbasis2) + saved_radial(:, j_id)
          end if
       end do
-      !$OMP END CRITICAL
+!$OMP END CRITICAL
    end do
 !$OMP END PARALLEL DO
    deallocate (radial, saved_radial, saved_j)

From 4773c9bd51c66db9fb0fe39e868eaeca3d5d5b59 Mon Sep 17 00:00:00 2001
From: kvkarandashev <kvkarandashev@gmail.com>
Date: Tue, 18 Feb 2025 14:13:48 +0100
Subject: [PATCH 7/7] verification script turned into test script

---
 ...ation_fchl19_pbc.py => test_fchl19_pbc.py} | 49 +++++++------------
 1 file changed, 19 insertions(+), 30 deletions(-)
 rename tests/{verification_fchl19_pbc.py => test_fchl19_pbc.py} (81%)

diff --git a/tests/verification_fchl19_pbc.py b/tests/test_fchl19_pbc.py
similarity index 81%
rename from tests/verification_fchl19_pbc.py
rename to tests/test_fchl19_pbc.py
index 5358c2d6..ed387aff 100644
--- a/tests/verification_fchl19_pbc.py
+++ b/tests/test_fchl19_pbc.py
@@ -2,17 +2,15 @@
 This file contains tests for the atom centred symmetry function module.
 """
 
-import pathlib
-import random
+# import pathlib
 from copy import deepcopy
 
 import numpy as np
+from conftest import ASSETS
 
 from qmllib.representations import generate_fchl19
 from qmllib.utils.xyz_format import read_xyz
 
-# from tests.conftest import ASSETS
-
 np.set_printoptions(linewidth=666, edgeitems=10)
 REP_PARAMS = dict()
 REP_PARAMS["elements"] = [1, 6, 7, 8, 16]
@@ -128,16 +126,16 @@ def suitable_cell(coords, cell_added_cutoff=0.1):
     return np.diag((max_coords - min_coords) * (1.0 + cell_added_cutoff))
 
 
-def test_fchl19():
+def test_fchl19_pbc():
 
-    all_xyzs = list(pathlib.Path("./assets/qm7").glob("*.xyz"))
-    random.seed(1)
-    xyzs = random.sample(all_xyzs, 16)
-    #    xyzs=["/home/konst/qmlcode/qmllib/tests/assets/qm7/0101.xyz"]
-    #    xyzs=["/home/konst/qmlcode/qmllib/tests/assets/qm7/4843.xyz"]
+    #    all_xyzs = list(pathlib.Path("./assets/qm7").glob("*.xyz"))
+    #    random.seed(3)
+    #    xyzs = random.sample(all_xyzs, 1)
+    xyzs = [ASSETS / "qm7/0101.xyz"]
+    #    xyzs=[ASSETS / "qm7/0101.xyz"]
 
     for xyz in xyzs:
-        print("xyz:", xyz)
+        #        print("xyz:", xyz)
         coordinates, nuclear_charges = read_xyz(xyz)
 
         cell = suitable_cell(coordinates)
@@ -156,26 +154,17 @@ def test_fchl19():
 
         assert np.allclose(repa, repc), "Error in PBC implementation"
 
-        repd, brute_pbc_grad = generate_fchl19_brute_pbc(
-            nuclear_charges, coordinates, cell, gradients=True
-        )
-        if repd is None:
-            print("too large gradient matrix for brute PBC check")
-        else:
-            assert np.allclose(repd, repa)
-            assert np.allclose(
-                anal_grad, brute_pbc_grad
-            ), "Error in FCHL-ACSF gradient implementation"
-
         num_grad = get_acsf_numgrad(coordinates, nuclear_charges, cell=cell)
-        print(
-            "analytic-numerical gradient difference vs. average magnitude:",
-            np.max(np.abs(num_grad - anal_grad)),
-            np.mean(np.abs(num_grad)),
-        )
-
+        #        print(
+        #            "analytic-numerical gradient difference vs. average magnitude:",
+        #            np.max(np.abs(num_grad - anal_grad)),
+        #            np.mean(np.abs(num_grad)),
+        #        )
 
-#        assert np.allclose(anal_grad, brute_pbc_grad), "Error in FCHL-ACSF gradient implementation"
+        assert np.allclose(
+            anal_grad, num_grad, atol=5.0e-6
+        ), "Error in FCHL-ACSF gradient implementation"
 
 
-test_fchl19()
+if __name__ == "__main__":
+    test_fchl19_pbc()