From d776425668abae727e852ee2bd7a8cddc71907f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89tienne=20Polack?= <polack@hollved.net>
Date: Thu, 8 Feb 2024 15:07:06 +0100
Subject: [PATCH 1/3] big squash

---
 docs/src/developer/data_structures.md         |   5 +-
 docs/src/guide/discretisation.jl              |   3 +-
 examples/custom_potential.jl                  |   5 +-
 examples/error_estimates_forces.jl            |  68 +++++-----
 examples/gross_pitaevskii.jl                  |   9 +-
 .../DFTKGenericLinearAlgebraExt.jl            |  24 +++-
 ext/DFTKIntervalArithmeticExt.jl              |   4 +-
 ext/DFTKJLD2Ext.jl                            |   6 +
 ext/DFTKWriteVTKExt.jl                        |  14 ++-
 src/DFTK.jl                                   |   3 +
 src/Model.jl                                  |  16 ++-
 src/PlaneWaveBasis.jl                         |  11 +-
 src/Psi.jl                                    |  49 ++++++++
 src/common/ortho.jl                           |  12 +-
 src/common/types.jl                           |   1 +
 src/densities.jl                              |  53 +++++---
 src/eigen/diag.jl                             |  42 ++++---
 src/eigen/lobpcg_hyper_impl.jl                |   2 +-
 src/eigen/preconditioners.jl                  |  26 +++-
 src/external/wannier_shared.jl                |  10 +-
 src/fft.jl                                    |  89 ++++++++++---
 src/input_output.jl                           |   3 +
 src/interpolation.jl                          |  16 +--
 src/orbitals.jl                               |  22 ++--
 src/postprocess/current.jl                    |  20 +--
 src/response/chi0.jl                          |  43 ++++---
 src/response/hessian.jl                       |  54 ++++----
 src/scf/direct_minimization.jl                |  21 ++--
 src/scf/nbands_algorithm.jl                   |   4 +-
 src/scf/newton.jl                             |  39 +++---
 src/scf/self_consistent_field.jl              |   9 +-
 src/supercell.jl                              |  13 +-
 src/symmetry.jl                               |  14 ++-
 src/terms/Hamiltonian.jl                      |  65 ++++++----
 src/terms/anyonic.jl                          |   8 +-
 src/terms/entropy.jl                          |   4 +-
 src/terms/kinetic.jl                          |   6 +-
 src/terms/magnetic.jl                         |   6 +-
 src/terms/nonlocal.jl                         |  52 ++++----
 src/terms/operators.jl                        | 110 +++++++++++-----
 src/transfer.jl                               |  20 +--
 src/workarounds/forwarddiff_rules.jl          |  12 +-
 test/anyons.jl                                |  10 +-
 test/chi0.jl                                  |   2 +-
 test/compute_density.jl                       |  31 ++---
 test/compute_jacobian_eigen.jl                |  25 ++--
 test/fourier_transforms.jl                    |  16 +--
 test/hamiltonian_consistency.jl               |  26 ++--
 test/hessian.jl                               |  16 ++-
 test/multicomponents.jl                       | 119 ++++++++++++++++++
 test/scf_compare.jl                           |   4 +-
 test/serialisation.jl                         |   1 +
 test/todict.jl                                |  14 ++-
 test/transfer.jl                              |  13 +-
 54 files changed, 860 insertions(+), 410 deletions(-)
 create mode 100644 src/Psi.jl
 create mode 100644 test/multicomponents.jl

diff --git a/docs/src/developer/data_structures.md b/docs/src/developer/data_structures.md
index 74a18bdb43..bf35cf1238 100644
--- a/docs/src/developer/data_structures.md
+++ b/docs/src/developer/data_structures.md
@@ -140,10 +140,13 @@ For example let us check the normalization of the first eigenfunction
 at the first ``k``-point in reciprocal space:
 
 ```@example data_structures
-ψtest = scfres.ψ[1][:, 1]
+ψtest = scfres.ψ[1][:, :, 1]
 sum(abs2.(ψtest))
 ```
 
+The first index of `ψtest` is the component (e.g., the spin) and the second is the
+``G``-component.
+
 We now perform an IFFT to get ψ in real space. The ``k``-point has to be
 passed because ψ is expressed on the ``k``-dependent basis.
 Again the function is normalised:
diff --git a/docs/src/guide/discretisation.jl b/docs/src/guide/discretisation.jl
index b3737fb896..f5c0d67546 100644
--- a/docs/src/guide/discretisation.jl
+++ b/docs/src/guide/discretisation.jl
@@ -109,7 +109,8 @@ scfres.energies
 # We can also get the first eigenvector (in the plane wave basis) and plot it
 using Plots
 
-ψ_fourier = scfres.ψ[1][:, 1];    # first k-point, all G components, first eigenvector
+ψ_fourier = scfres.ψ[1][1, :, 1];  # first k-point, first (and only) component,
+                                   #   all G components, first eigenvector
 ## Transform the wave function to real space
 ψ = ifft(basis, basis.kpoints[1], ψ_fourier)[:, 1, 1]
 ## Eigenvectors are only defined up to a phase. We fix it by imposing that psi(0) is real
diff --git a/examples/custom_potential.jl b/examples/custom_potential.jl
index 43f166df6f..86235ce750 100644
--- a/examples/custom_potential.jl
+++ b/examples/custom_potential.jl
@@ -72,8 +72,9 @@ compute_forces(scfres)
 tot_local_pot = DFTK.total_local_potential(scfres.ham)[:, 1, 1]; # use only dimension 1
 
 # Extract other quantities before plotting them
-ρ = scfres.ρ[:, 1, 1, 1]        # converged density, first spin component
-ψ_fourier = scfres.ψ[1][:, 1]   # first k-point, all G components, first eigenvector
+ρ = scfres.ρ[:, 1, 1, 1]           # converged density, first spin component
+ψ_fourier = scfres.ψ[1][1, :, 1]   # first k-point, first (and only) component,
+                                   #   all G components, first eigenvector
 
 # Transform the wave function to real space and fix the phase:
 ψ = ifft(basis, basis.kpoints[1], ψ_fourier)[:, 1, 1]
diff --git a/examples/error_estimates_forces.jl b/examples/error_estimates_forces.jl
index 7e7000ca2b..db79d1ad27 100644
--- a/examples/error_estimates_forces.jl
+++ b/examples/error_estimates_forces.jl
@@ -85,50 +85,60 @@ res, occ = DFTK.select_occupied_orbitals(basis_ref, res, scfres.occupation)
 #   size ``N×N`` (``N`` being the number of electrons) whose solution is
 #   ``U = S(S^*S)^{-1/2}`` where ``S`` is the overlap matrix ``ψ^*ϕ``.
 function compute_error(basis, ϕ, ψ)
-    map(zip(ϕ, ψ)) do (ϕk, ψk)
+    ϕ = to_composite_σG(basis, ϕ)
+    ψ = to_composite_σG(basis, ψ)
+    map(zip(basis.kpoints, ϕ, ψ)) do (kpt, ϕk, ψk)
         S = ψk'ϕk
         U = S*(S'S)^(-1/2)
-        ϕk - ψk*U
+        from_composite_σG(basis, kpt, ϕk - ψk*U)
     end
 end
 err = compute_error(basis_ref, ψr, ψ_ref);
 
 # - Compute ``{\boldsymbol M}^{-1}R(P)`` with ``{\boldsymbol M}^{-1}`` defined in [^CDKL2021]:
 P = [PreconditionerTPA(basis_ref, kpt) for kpt in basis_ref.kpoints]
-map(zip(P, ψr)) do (Pk, ψk)
-    DFTK.precondprep!(Pk, ψk)
+map(zip(P, ψr, basis_ref.kpoints)) do (Pk, ψk, kpt)
+    DFTK.precondprep!(Pk, to_composite_σG(basis_ref, kpt, ψk))
 end
-function apply_M(φk, Pk, δφnk, n)
-    DFTK.proj_tangent_kpt!(δφnk, φk)
-    δφnk = sqrt.(Pk.mean_kin[n] .+ Pk.kin) .* δφnk
-    DFTK.proj_tangent_kpt!(δφnk, φk)
-    δφnk = sqrt.(Pk.mean_kin[n] .+ Pk.kin) .* δφnk
-    DFTK.proj_tangent_kpt!(δφnk, φk)
+function apply_M(basis, kpt, φk, Pk, δφnk, n)
+    fact = reshape(sqrt.(Pk.mean_kin[n] .+ Pk.kin), size(δφnk)...)
+    DFTK.proj_tangent_kpt!(δφnk, basis, kpt, φk)
+    δφnk = fact .* δφnk
+    DFTK.proj_tangent_kpt!(δφnk, basis, kpt, φk)
+    δφnk = fact .* δφnk
+    DFTK.proj_tangent_kpt!(δφnk, basis, kpt, φk)
 end
-function apply_inv_M(φk, Pk, δφnk, n)
-    DFTK.proj_tangent_kpt!(δφnk, φk)
-    op(x) = apply_M(φk, Pk, x, n)
+function apply_inv_M(basis, kpt, φk, Pk, δφnk, n)
+    DFTK.proj_tangent_kpt!(δφnk, basis, kpt, φk)
+    op(x) = let
+        x = from_composite_σG(basis, kpt, x)
+        to_composite_σG(basis, kpt, apply_M(basis, kpt, φk, Pk, x, n))
+    end
     function f_ldiv!(x, y)
-        x .= DFTK.proj_tangent_kpt(y, φk)
+        x_σG = from_composite_σG(basis, kpt, x)
+        y_σG = from_composite_σG(basis, kpt, y)
+        x_σG .= DFTK.proj_tangent_kpt(basis, kpt, y_σG, φk)
         x ./= (Pk.mean_kin[n] .+ Pk.kin)
-        DFTK.proj_tangent_kpt!(x, φk)
+        DFTK.proj_tangent_kpt!(x_σG, basis, kpt, φk)
+        x
     end
-    J = LinearMap{eltype(φk)}(op, size(δφnk, 1))
-    δφnk = cg(J, δφnk; Pl=DFTK.FunctionPreconditioner(f_ldiv!),
-              verbose=false, reltol=0, abstol=1e-15)
-    DFTK.proj_tangent_kpt!(δφnk, φk)
+    J = LinearMap{eltype(φk)}(op, prod(size(δφnk)[1:2]))
+    δφnk_vec = to_composite_σG(basis, kpt, δφnk)
+    δφnk_vec = cg(J, δφnk_vec; Pl=DFTK.FunctionPreconditioner(f_ldiv!),
+                  verbose=false, reltol=0, abstol=1e-15)
+    DFTK.proj_tangent_kpt!(δφnk, basis, kpt, φk)
 end
-function apply_metric(φ, P, δφ, A::Function)
+function apply_metric(basis, φ, P, δφ, A::Function)
     map(enumerate(δφ)) do (ik, δφk)
         Aδφk = similar(δφk)
         φk = φ[ik]
-        for n = 1:size(δφk,2)
-            Aδφk[:,n] = A(φk, P[ik], δφk[:,n], n)
+        for n = 1:size(δφk, 3)
+            Aδφk[:, :, n] = A(basis, basis.kpoints[ik], φk, P[ik], δφk[:, :, n], n)
         end
         Aδφk
     end
 end
-Mres = apply_metric(ψr, P, res, apply_inv_M);
+Mres = apply_metric(basis, ψr, P, res, apply_inv_M);
 
 # We can now compute the modified residual ``R_{\rm Schur}(P)`` using a Schur
 # complement to approximate the error on low-frequencies[^CDKL2021]:
@@ -151,14 +161,14 @@ resLF = DFTK.transfer_blochwave(res, basis_ref, basis)
 resHF = res - DFTK.transfer_blochwave(resLF, basis, basis_ref);
 
 # - Compute ``{\boldsymbol M}^{-1}_{22}R_2(P)``:
-e2 = apply_metric(ψr, P, resHF, apply_inv_M);
+e2 = apply_metric(basis, ψr, P, resHF, apply_inv_M);
 
 # - Compute the right hand side of the Schur system:
 ## Rayleigh coefficients needed for `apply_Ω`
-Λ = map(enumerate(ψr)) do (ik, ψk)
-    Hk = hamr.blocks[ik]
-    Hψk = Hk * ψk
-    ψk'Hψk
+Λ = map(enumerate(zip(ψr, hamr * ψr))) do (ik, (ψk, Hψk))
+    ψk = to_composite_σG(basis, basis.kpoints[ik], ψk)
+    Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk)
+    from_composite_σG(basis, basis.kpoints[ik], ψk'Hψk)
 end
 ΩpKe2 = DFTK.apply_Ω(e2, ψr, hamr, Λ) .+ DFTK.apply_K(basis_ref, e2, ψr, ρr, occ)
 ΩpKe2 = DFTK.transfer_blochwave(ΩpKe2, basis_ref, basis)
@@ -204,7 +214,7 @@ end;
 #   the actual error ``P-P_*``. Usually this is of course not the case, but this
 #   is the "best" improvement we can hope for with a linearisation, so we are
 #   aiming for this precision.
-df_err = df(basis_ref, occ, ψr, DFTK.proj_tangent(err, ψr), ρr)
+df_err = df(basis_ref, occ, ψr, DFTK.proj_tangent(basis, err, ψr), ρr)
 forces["F(P) - df(P)⋅(P-P_*)"]   = f - df_err
 relerror["F(P) - df(P)⋅(P-P_*)"] = compute_relerror(f - df_err);
 
diff --git a/examples/gross_pitaevskii.jl b/examples/gross_pitaevskii.jl
index 63f386a33d..1c13a87d34 100644
--- a/examples/gross_pitaevskii.jl
+++ b/examples/gross_pitaevskii.jl
@@ -57,8 +57,9 @@ scfres.energies
 # We use the opportunity to explore some of DFTK internals.
 #
 # Extract the converged density and the obtained wave function:
-ρ = real(scfres.ρ)[:, 1, 1, 1]  # converged density, first spin component
-ψ_fourier = scfres.ψ[1][:, 1];    # first k-point, all G components, first eigenvector
+ρ = real(scfres.ρ)[:, 1, 1, 1]     # converged density, first spin component
+ψ_fourier = scfres.ψ[1][1, :, 1]   # first k-point, first (and only) component,
+                                   #   all G components, first eigenvector
 
 # Transform the wave function to real space and fix the phase:
 ψ = ifft(basis, basis.kpoints[1], ψ_fourier)[:, 1, 1]
@@ -93,7 +94,7 @@ E, ham = energy_hamiltonian(basis, scfres.ψ, scfres.occupation; ρ=scfres.ρ)
 H = ham.blocks[1];
 
 # `H` can be used as a linear operator (efficiently using FFTs), or converted to a dense matrix:
-ψ11 = scfres.ψ[1][:, 1] # first k-point, first eigenvector
+ψ11 = scfres.ψ[1][1, :, 1]  # first k-point, first component, first eigenvector
 Hmat = Array(H)  # This is now just a plain Julia matrix,
 ##                  which we can compute and store in this simple 1D example
 @assert norm(Hmat * ψ11 - H * ψ11) < 1e-10
@@ -107,4 +108,4 @@ A[1, end] = A[end, 1] = -1
 K = A / dx^2 / 2
 V = Diagonal(pot.(x) + C .* α .* (ρ.^(α-1)))
 H_findiff = K + V;
-maximum(abs.(H_findiff*ψ - (dot(ψ, H_findiff*ψ) / dot(ψ, ψ)) * ψ))
+maximum(abs, H_findiff*ψ - (dot(ψ, H_findiff*ψ) / dot(ψ, ψ)) * ψ)
diff --git a/ext/DFTKGenericLinearAlgebraExt/DFTKGenericLinearAlgebraExt.jl b/ext/DFTKGenericLinearAlgebraExt/DFTKGenericLinearAlgebraExt.jl
index 1c16df7438..451afe3168 100644
--- a/ext/DFTKGenericLinearAlgebraExt/DFTKGenericLinearAlgebraExt.jl
+++ b/ext/DFTKGenericLinearAlgebraExt/DFTKGenericLinearAlgebraExt.jl
@@ -29,21 +29,21 @@ end
 DFTK.default_primes(::Any) = (2, )
 
 # Generic fallback function, Float32 and Float64 specialization in fft.jl
-function DFTK.build_fft_plans!(tmp::AbstractArray{<:Complex})
+function DFTK.build_fft_plans!(tmp::AbstractArray{<:Complex}; region=1:ndims(tmp))
     # Note: FourierTransforms has no support for in-place FFTs at the moment
     # ... also it's extension to multi-dimensional arrays is broken and
     #     the algo only works for some cases
     @assert all(ispow2, size(tmp))
 
-    # opFFT = AbstractFFTs.plan_fft(tmp)   # TODO When multidim works
+    # opFFT = AbstractFFTs.plan_fft(tmp)     # TODO When multidim works
     # opBFFT = inv(opFFT).p
-    opFFT  = generic_plan_fft(tmp)               # Fallback for now
+    opFFT  = generic_plan_fft(tmp)           # Fallback for now
     opBFFT = generic_plan_bfft(tmp)
     # TODO Can be cut once FourierTransforms supports AbstractFFTs properly
     ipFFT  = DummyInplace{typeof(opFFT)}(opFFT)
     ipBFFT = DummyInplace{typeof(opBFFT)}(opBFFT)
 
-    ipFFT, opFFT, ipBFFT, opBFFT
+    (; ipFFT, opFFT, ipBFFT, opBFFT)
 end
 
 struct GenericPlan{T}
@@ -51,7 +51,7 @@ struct GenericPlan{T}
     factor::T
 end
 
-function Base.:*(p::GenericPlan, X::AbstractArray)
+function Base.:*(p::GenericPlan, X::AbstractArray{T, 3}) where {T}
     pl1, pl2, pl3 = p.subplans
     ret = similar(X)
     for i = 1:size(X, 1), j = 1:size(X, 2)
@@ -86,4 +86,18 @@ function generic_plan_bfft(data::AbstractArray{T, 3}) where {T}
                     plan_bfft(data[1, 1, :])], T(1))
 end
 
+# TODO: Let's not bother with multicomponents yet.
+function generic_plan_fft(data::AbstractArray{T, 4}) where {T}
+    @assert size(data, 1) == 1
+    generic_plan_fft(data[1, :, :, :])
+end
+function generic_plan_bfft(data::AbstractArray{T, 4}) where {T}
+    @assert size(data, 1) == 1
+    generic_plan_bfft(data[1, :, :, :])
+end
+function Base.:*(p::GenericPlan, X::AbstractArray{T, 4}) where {T}
+    @assert size(X, 1) == 1
+    reshape(p * X[1, :, :, :], size(X)...)
+end
+
 end
diff --git a/ext/DFTKIntervalArithmeticExt.jl b/ext/DFTKIntervalArithmeticExt.jl
index 8de5309115..53dcdd8ce3 100644
--- a/ext/DFTKIntervalArithmeticExt.jl
+++ b/ext/DFTKIntervalArithmeticExt.jl
@@ -42,8 +42,8 @@ function _is_well_conditioned(A::AbstractArray{<:Interval}; kwargs...)
 end
 
 function symmetry_operations(lattice::AbstractMatrix{<:Interval}, atoms, positions,
-                                  magnetic_moments=[];
-                                  tol_symmetry=max(SYMMETRY_TOLERANCE, maximum(radius, lattice)))
+                             magnetic_moments=[];
+                             tol_symmetry=max(SYMMETRY_TOLERANCE, maximum(radius, lattice)))
     @assert tol_symmetry < 1e-2
     symmetry_operations(IntervalArithmetic.mid.(lattice), atoms, positions, magnetic_moments;
                         tol_symmetry)
diff --git a/ext/DFTKJLD2Ext.jl b/ext/DFTKJLD2Ext.jl
index 57ab1904d3..ac0d93900a 100644
--- a/ext/DFTKJLD2Ext.jl
+++ b/ext/DFTKJLD2Ext.jl
@@ -89,6 +89,12 @@ function load_scfres_jld2(jld, basis; skip_hamiltonian, strict)
                             "($(jld["n_spin_components"])) and supplied basis " *
                             "($(basis.model.n_spin_components))")
             end
+            if jld["n_components"] != basis.model.n_components
+                consistent_kpts = false
+                errormsg = ("Mismatch in number of spin components between file " *
+                            "($(jld["n_components"])) and supplied basis " *
+                            "($(basis.model.n_components))")
+            end
         end
         errormsg = MPI.bcast(errormsg, 0, MPI.COMM_WORLD)
         isempty(errormsg) || error(errormsg)
diff --git a/ext/DFTKWriteVTKExt.jl b/ext/DFTKWriteVTKExt.jl
index 71ee529d1a..7c046d5508 100644
--- a/ext/DFTKWriteVTKExt.jl
+++ b/ext/DFTKWriteVTKExt.jl
@@ -24,12 +24,14 @@ function DFTK.save_scfres(::Val{:vts}, filename::AbstractString, scfres::NamedTu
         ψblock = gather_kpts_block(scfres.basis, ψblock_dist)
 
         if mpi_master()
-            for ik = 1:length(basis.kpoints), n = 1:size(ψblock, 2)
-                kpt_n_G = length(G_vectors(basis, basis.kpoints[ik]))
-                ψnk_real = ifft(basis, basis.kpoints[ik], ψblock[1:kpt_n_G, n, ik])
-                prefix = @sprintf "ψ_k%03i_n%03i" ik n
-                vtkfile["$(prefix)_real"] = real.(ψnk_real)
-                vtkfile["$(prefix)_imag"] = imag.(ψnk_real)
+            for ik = 1:length(basis.kpoints)
+                for σ = 1:basis.model.n_components, n = 1:size(ψblock, 3)
+                    kpt_n_G = length(G_vectors(basis, basis.kpoints[ik]))
+                    ψσnk_real = ifft(basis, basis.kpoints[ik], ψblock[:, 1:kpt_n_G, n, ik])
+                    prefix = @sprintf "ψ_k%03i_n%03i_σ%03i" ik n σ
+                    vtkfile["$(prefix)_real"] = real.(ψσnk_real)
+                    vtkfile["$(prefix)_imag"] = imag.(ψσnk_real)
+                end
             end
         end
     end
diff --git a/src/DFTK.jl b/src/DFTK.jl
index b95f3bf2ee..202fd4f678 100644
--- a/src/DFTK.jl
+++ b/src/DFTK.jl
@@ -75,6 +75,8 @@ export compute_fft_size
 export G_vectors, G_vectors_cart, r_vectors, r_vectors_cart
 export Gplusk_vectors, Gplusk_vectors_cart
 export Kpoint
+export to_composite_σG
+export from_composite_σG
 export ifft
 export irfft
 export ifft!
@@ -86,6 +88,7 @@ include("Model.jl")
 include("structure.jl")
 include("bzmesh.jl")
 include("PlaneWaveBasis.jl")
+include("Psi.jl")
 include("fft.jl")
 include("orbitals.jl")
 include("input_output.jl")
diff --git a/src/Model.jl b/src/Model.jl
index 04e0128e9f..9f0979de5c 100644
--- a/src/Model.jl
+++ b/src/Model.jl
@@ -26,6 +26,10 @@ struct Model{T <: Real, VT <: Real}
     n_electrons::Union{Int, Nothing}
     εF::Union{T, Nothing}
 
+    # Option for multicomponents computations.
+    # TODO: Planed to replace current handling of spins, as it will allow full spin computations.
+    n_components::Int
+
     # spin_polarization values:
     #     :none       No spin polarization, αα and ββ density identical,
     #                 αβ and βα blocks zero, 1 spin component treated explicitly
@@ -106,7 +110,8 @@ function Model(lattice::AbstractMatrix{T},
                terms=[Kinetic()],
                temperature=zero(T),
                smearing=temperature > 0 ? Smearing.FermiDirac() : Smearing.None(),
-               spin_polarization=determine_spin_polarization(magnetic_moments),
+               n_components=1,
+               spin_polarization=default_spin_polarization(magnetic_moments, n_components),
                symmetries=default_symmetries(lattice, atoms, positions, magnetic_moments,
                                              spin_polarization, terms),
                ) where {T <: Real}
@@ -178,7 +183,8 @@ function Model(lattice::AbstractMatrix{T},
     Model{T,value_type(T)}(model_name,
                            lattice, recip_lattice, n_dim, inv_lattice, inv_recip_lattice,
                            unit_cell_volume, recip_cell_volume,
-                           n_electrons, εF, spin_polarization, n_spin, temperature, smearing,
+                           n_electrons, εF, n_components, spin_polarization, n_spin,
+                           temperature, smearing,
                            atoms, positions, atom_groups, terms, symmetries)
 end
 function Model(lattice::AbstractMatrix{<:Integer}, atoms::Vector{<:Element},
@@ -224,6 +230,7 @@ function Model{T}(model::Model;
           model.temperature,
           model.smearing,
           model.εF,
+          model.n_components,
           model.spin_polarization,
           model.symmetries,
           # Can be safely disabled: this has been checked for model
@@ -249,6 +256,11 @@ normalize_magnetic_moment(mm::AbstractVector)::Vec3{Float64} = mm
 """Number of valence electrons."""
 n_electrons_from_atoms(atoms) = sum(n_elec_valence, atoms; init=0)
 
+function default_spin_polarization(magnetic_moments, n_components)
+    n_components > 1 && return :spinless
+    determine_spin_polarization(magnetic_moments)
+end
+
 """
 Default logic to determine the symmetry operations to be used in the model.
 """
diff --git a/src/PlaneWaveBasis.jl b/src/PlaneWaveBasis.jl
index 52e0945b69..3d1f9f658e 100644
--- a/src/PlaneWaveBasis.jl
+++ b/src/PlaneWaveBasis.jl
@@ -73,6 +73,11 @@ struct PlaneWaveBasis{T,
     ipBFFT
     fft_normalization::T   # fft = fft_normalization * FFT
     ifft_normalization::T  # ifft = ifft_normalization * BFFT
+    # Multicomponents plans
+    opFFT_mc
+    ipFFT_mc
+    opBFFT_mc
+    ipBFFT_mc
 
     # "cubic" basis in reciprocal and real space, on which potentials and densities are stored
     G_vectors::T_G_vectors
@@ -250,7 +255,10 @@ function PlaneWaveBasis(model::Model{T}, Ecut::Real, fft_size::Tuple{Int, Int, I
 
     # Setup FFT plans
     Gs = to_device(architecture, G_vectors(fft_size))
-    (ipFFT, opFFT, ipBFFT, opBFFT) = build_fft_plans!(similar(Gs, Complex{T}, fft_size))
+    (; ipFFT, opFFT, ipBFFT, opBFFT) = build_fft_plans!(similar(Gs, Complex{T}, fft_size))
+    # strided FFT plans for multicomponents
+    ipFFT_mc, opFFT_mc, ipBFFT_mc, opBFFT_mc =
+        build_fft_plans!(similar(Gs, Complex{T}, model.n_components, fft_size...); region=2:4)
 
     # Normalization constants
     # fft = fft_normalization * FFT
@@ -338,6 +346,7 @@ function PlaneWaveBasis(model::Model{T}, Ecut::Real, fft_size::Tuple{Int, Int, I
         Ecut, variational,
         opFFT, ipFFT, opBFFT, ipBFFT,
         fft_normalization, ifft_normalization,
+        opFFT_mc, ipFFT_mc, opBFFT_mc, ipBFFT_mc,
         Gs, r_vectors,
         kpoints, kweights, kgrid,
         kcoords_global, kweights_global,
diff --git a/src/Psi.jl b/src/Psi.jl
new file mode 100644
index 0000000000..fcb58b2acf
--- /dev/null
+++ b/src/Psi.jl
@@ -0,0 +1,49 @@
+# Helpers functions converting to and fro implicit composite definition in [σG] and explicit
+# definition in [σ,G].
+# The `basis` and `kpoint` arguments are not-necessary, but we keep them to match the
+# signatures of most DFTK functions.
+
+# ψk[σ, G, n] → ψk[σG, n]
+# `prod()` because of Sternheimer solver where some quantities might have 0-ending sizes.
+to_composite_σG(::PlaneWaveBasis, ::Kpoint, ψk::AbstractArray3) =
+    reshape(ψk, prod(size(ψk)[1:2]), :)
+# ψk[σ, G] → ψk[σG]
+to_composite_σG(::PlaneWaveBasis, ::Kpoint, ψk::AbstractMatrix) = reshape(ψk, :)
+"""
+Converts each wave-function from a tensor `ψ[ik][σ, G, n]` to a matrix `ψ[ik][σG, n]`.
+Useful for inner-working of DFTK that relies on simple matrix-vector operations.
+"""
+to_composite_σG(basis, ψ::AbstractVector) = to_composite_σG.(basis, basis.kpoints, ψ)
+
+# ψk[σG, n] → ψk[σ, G, n]
+from_composite_σG(basis::PlaneWaveBasis, ::Kpoint, ψk::AbstractMatrix) =
+    reshape(ψk, basis.model.n_components, :, size(ψk, 2))
+# ψk[σG] → ψk[σ, G]
+from_composite_σG(basis::PlaneWaveBasis, ::Kpoint, ψk::AbstractVector) =
+    reshape(ψk, basis.model.n_components, :)
+
+# For LOBPCG compability.
+# ψk[σG] → ψk[σ, G, 1:1]
+unfold_from_composite_σG(basis::PlaneWaveBasis, ::Kpoint, ψk::AbstractVector) =
+    reshape(ψk, basis.model.n_components, :, 1)
+# ψk[1:1G, n] → ψk[1:1, G, n]
+unfold_from_composite_σG(basis::PlaneWaveBasis, ::Kpoint, ψk::AbstractMatrix) =
+    reshape(ψk, basis.model.n_components, :, size(ψk, 2))
+"""
+Reciprocal to [`to_composite_σG`](@ref).
+"""
+from_composite_σG(basis::PlaneWaveBasis, ψ::AbstractVector) =
+    from_composite_σG.(basis, basis.kpoints, ψ)
+
+@timing function enforce_phase!(basis::PlaneWaveBasis, kpt::Kpoint, ψk)
+    map(eachcol(to_composite_σG(basis, kpt, ψk))) do ψkn
+        ϕ = angle(ψkn[end])
+        if abs(ϕ) > sqrt(eps(ϕ))
+            ε = sign(real(ψkn[end] / cis(ϕ)))
+            ψkn ./= ε*cis(ϕ)
+        end
+    end
+    ψk
+end
+"""Ensure that the wave function is real."""
+enforce_phase!(basis, ψ::AbstractVector) = enforce_phase!.(basis, basis.kpoints, ψ)
diff --git a/src/common/ortho.jl b/src/common/ortho.jl
index fc5473ca3b..ef6aca4263 100644
--- a/src/common/ortho.jl
+++ b/src/common/ortho.jl
@@ -1,9 +1,5 @@
-@timing function ortho_qr(φk::ArrayType) where {ArrayType <: AbstractArray}
-    x = convert(ArrayType, qr(φk).Q)
-    if size(x) == size(φk)
-        return x
-    else
-        # Sometimes QR (but funnily not always) CUDA messes up the size here
-        return x[:, 1:size(φk, 2)]
-    end
+@views @timing function ortho_qr(basis, kpt, φk::AbstractArray{T}) where {T}
+    x = convert(Matrix{T}, qr(to_composite_σG(basis, kpt, φk)).Q)
+    # Sometimes QR (but funnily not always) CUDA messes up the size here.
+    from_composite_σG(basis, kpt, x)[:, :, 1:size(φk, 3)]
 end
diff --git a/src/common/types.jl b/src/common/types.jl
index c70b8f75fc..ddd20533fb 100644
--- a/src/common/types.jl
+++ b/src/common/types.jl
@@ -9,3 +9,4 @@ value_type(T) = T
 const Mat3{T} = SMatrix{3, 3, T, 9} where {T}
 const Vec3{T} = SVector{3, T} where {T}
 const AbstractArray3{T} = AbstractArray{T, 3}
+const AbstractArray4{T} = AbstractArray{T, 4}
diff --git a/src/densities.jl b/src/densities.jl
index d07697607f..12e6750599 100644
--- a/src/densities.jl
+++ b/src/densities.jl
@@ -1,6 +1,8 @@
 # Densities (and potentials) are represented by arrays
 # ρ[ix,iy,iz,iσ] in real space, where iσ ∈ [1:n_spin_components]
 
+# TODO: We reduce all components for the density. Will need to be though again when we merge
+# the components and the spins.
 """
     compute_density(basis::PlaneWaveBasis, ψ::AbstractVector, occupation::AbstractVector)
 
@@ -17,6 +19,8 @@ using an optional `occupation_threshold`. By default all occupation numbers are
     # Note, that we special-case Tψ, since when T is Dual and eltype(ψ[1]) is not
     # (e.g. stress calculation), then only the normalisation factor introduces
     # dual numbers, but not yet the FFT
+    Tρ = promote_type(T, real(eltype(ψ[1])))
+    n_components = basis.model.n_components
 
     # Occupation should be on the CPU as we are going to be doing scalar indexing.
     occupation = [to_cpu(oc) for oc in occupation]
@@ -25,7 +29,7 @@ using an optional `occupation_threshold`. By default all occupation numbers are
 
     function allocate_local_storage()
         (; ρ=zeros_like(basis.G_vectors, Tρ, basis.fft_size..., basis.model.n_spin_components),
-         ψnk_real=zeros_like(basis.G_vectors, complex(Tψ), basis.fft_size...))
+         ψnk_real=zeros_like(basis.G_vectors, complex(Tρ), n_components, basis.fft_size...))
     end
     # We split the total iteration range (ik, n) in chunks, and parallelize over them.
     range = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]]
@@ -33,10 +37,12 @@ using an optional `occupation_threshold`. By default all occupation numbers are
     storages = parallel_loop_over_range(range; allocate_local_storage) do kn, storage
         (ik, n) = kn
         kpt = basis.kpoints[ik]
-        ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, n]; normalize=false)
-        storage.ρ[:, :, :, kpt.spin] .+= (occupation[ik][n] .* basis.kweights[ik]
-                                          .* (basis.ifft_normalization)^2
-                                          .* abs2.(storage.ψnk_real))
+        ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, :, n]; normalize=false)
+        for σ = 1:n_components
+            storage.ρ[:, :, :, kpt.spin] .+= (occupation[ik][n] .* basis.kweights[ik]
+                                                  .* (basis.ifft_normalization)^2
+                                                  .* abs2.(storage.ψnk_real[σ, :, :, :]))
+        end
 
         synchronize_device(basis.architecture)
     end
@@ -61,6 +67,7 @@ end
     # δρ is expected to be real when computations are not phonon-related.
     Tδρ = iszero(q) ? real(Tψ) : Tψ
     real_qzero = iszero(q) ? real : identity
+    n_components = basis.model.n_components
 
     # occupation should be on the CPU as we are going to be doing scalar indexing.
     occupation = [to_cpu(oc) for oc in occupation]
@@ -69,8 +76,8 @@ end
 
     function allocate_local_storage()
         (; δρ=zeros_like(basis.G_vectors, Tδρ, basis.fft_size..., basis.model.n_spin_components),
-          ψnk_real=zeros_like(basis.G_vectors, Tψ, basis.fft_size...),
-         δψnk_real=zeros_like(basis.G_vectors, Tψ, basis.fft_size...))
+          ψnk_real=zeros_like(basis.G_vectors, Tψ, n_components, basis.fft_size...),
+         δψnk_real=zeros_like(basis.G_vectors, Tψ, n_components, basis.fft_size...))
     end
     range = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]]
 
@@ -84,14 +91,18 @@ end
         (ik, n) = kn
 
         kpt = basis.kpoints[ik]
-        ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, n])
+        ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, :, n])
         # … and then we compute the real Fourier transform in the adequate basis.
-        ifft!(storage.δψnk_real, basis, δψ_plus_k[ik].kpt, δψ_plus_k[ik].ψk[:, n])
-
-        storage.δρ[:, :, :, kpt.spin] .+= real_qzero(
-            2 .* occupation[ik][n] .* basis.kweights[ik] .* conj(storage.ψnk_real)
-                                                         .* storage.δψnk_real
-                .+ δoccupation[ik][n] .* basis.kweights[ik] .* abs2.(storage.ψnk_real))
+        ifft!(storage.δψnk_real, basis, δψ_plus_k[ik].kpt, δψ_plus_k[ik].ψk[:, :, n])
+
+        for σ = 1:basis.model.n_components
+            storage.δρ[:, :, :, kpt.spin] .+= real_qzero(
+                2 .* occupation[ik][n] .* basis.kweights[ik]
+                                       .* conj(storage.ψnk_real[σ, :, :, :])
+                                       .* storage.δψnk_real[σ, :, :, :]
+                 .+ δoccupation[ik][n] .* basis.kweights[ik]
+                                       .* abs2.(storage.ψnk_real[σ, :, :, :]))
+        end
 
         synchronize_device(basis.architecture)
     end
@@ -101,16 +112,18 @@ end
     symmetrize_ρ(basis, δρ; do_lowpass=false)
 end
 
-@views @timing function compute_kinetic_energy_density(basis::PlaneWaveBasis, ψ, occupation)
-    T = promote_type(eltype(basis), real(eltype(ψ[1])))
+@views @timing function compute_kinetic_energy_density(basis::PlaneWaveBasis{TT}, ψ,
+                                                       occupation) where {TT}
+    @assert basis.model.n_components == 1
+    T = promote_type(TT, real(eltype(ψ[1])))
     τ = similar(ψ[1], T, (basis.fft_size..., basis.model.n_spin_components))
     τ .= 0
-    dαψnk_real = zeros(complex(eltype(basis)), basis.fft_size)
+    dαψσnk_real = zeros(complex(T), basis.fft_size)
     for (ik, kpt) in enumerate(basis.kpoints)
         G_plus_k = [[p[α] for p in Gplusk_vectors_cart(basis, kpt)] for α = 1:3]
-        for n = 1:size(ψ[ik], 2), α = 1:3
-            ifft!(dαψnk_real, basis, kpt, im .* G_plus_k[α] .* ψ[ik][:, n])
-            @. τ[:, :, :, kpt.spin] += occupation[ik][n] * basis.kweights[ik] / 2 * abs2(dαψnk_real)
+        for n = 1:size(ψ[ik], 3), σ = 1:basis.model.n_components, α = 1:3
+            ifft!(dαψσnk_real, basis, kpt, im .* G_plus_k[α] .* ψ[ik][σ, :, n])
+            @. τ[:, :, :, kpt.spin] += occupation[ik][n] * basis.kweights[ik] / 2 * abs2(dαψσnk_real)
         end
     end
     mpi_sum!(τ, basis.comm_kpts)
diff --git a/src/eigen/diag.jl b/src/eigen/diag.jl
index e39c9fbbb9..b2500af0b3 100644
--- a/src/eigen/diag.jl
+++ b/src/eigen/diag.jl
@@ -7,48 +7,52 @@ that really does the work, operating on a single ``k``-Block.
 `prec_type` should be a function that returns a preconditioner when called as `prec(ham, kpt)`
 """
 function diagonalize_all_kblocks(eigensolver, ham::Hamiltonian, nev_per_kpoint::Int;
-                                 ψguess=nothing,
-                                 prec_type=PreconditionerTPA, interpolate_kpoints=true,
-                                 tol=1e-6, miniter=1, maxiter=100, n_conv_check=nothing)
-    kpoints = ham.basis.kpoints
+                                 ψguess=nothing, prec_type=PreconditionerTPA,
+                                 interpolate_kpoints=true, tol=1e-6, miniter=1, maxiter=100,
+                                 n_conv_check=nothing)
+    basis   = ham.basis
+    kpoints = basis.kpoints
+    n_components = basis.model.n_components
     results = Vector{Any}(undef, length(kpoints))
 
     for (ik, kpt) in enumerate(kpoints)
-        n_Gk = length(G_vectors(ham.basis, kpt))
+        n_Gk = length(G_vectors(basis, kpt))
         if n_Gk < nev_per_kpoint
             error("The size of the plane wave basis is $n_Gk, and you are asking for " *
                   "$nev_per_kpoint eigenvalues. Increase Ecut.")
         end
         # Get ψguessk
         if !isnothing(ψguess)
-            if n_Gk != size(ψguess[ik], 1)
-                error("Mismatch in dimension between guess ($(size(ψguess[ik], 1)) and " *
+            if n_Gk != size(ψguess[ik], 2)
+                error("Mismatch in dimension between guess ($(size(ψguess[ik], 2))) and " *
                       "Hamiltonian ($n_Gk)")
             end
-            nev_guess = size(ψguess[ik], 2)
+            nev_guess = size(ψguess[ik], 3)
             if nev_guess > nev_per_kpoint
                 ψguessk = ψguess[ik][:, 1:nev_per_kpoint]
             elseif nev_guess == nev_per_kpoint
                 ψguessk = ψguess[ik]
             else
-                X0 = similar(ψguess[ik], n_Gk, nev_per_kpoint)
-                X0[:, 1:nev_guess] = ψguess[ik]
-                X0[:, nev_guess+1:end] = randn(eltype(X0), n_Gk, nev_per_kpoint - nev_guess)
-                ψguessk = ortho_qr(X0)
+                X0 = similar(ψguess[ik], n_components, n_Gk, nev_per_kpoint)
+                X0[:, :, 1:nev_guess] = ψguess[ik]
+                X0[:, :, nev_guess+1:end] = randn(eltype(X0), n_components, n_Gk,
+                                                              nev_per_kpoint - nev_guess)
+                ψguessk = ortho_qr(basis, kpt, X0)
             end
         elseif interpolate_kpoints && ik > 1
             # use information from previous k-point
-            ψguessk = interpolate_kpoint(results[ik - 1].X, ham.basis, kpoints[ik - 1],
-                                         ham.basis, kpoints[ik])
+            ψguessk = interpolate_kpoint(results[ik - 1].X, basis, kpoints[ik - 1],
+                                         basis, kpoints[ik])
         else
-            ψguessk = random_orbitals(ham.basis, kpt, nev_per_kpoint)
+            ψguessk = random_orbitals(basis, kpt, nev_per_kpoint)
         end
-        @assert size(ψguessk) == (n_Gk, nev_per_kpoint)
+        @assert size(ψguessk) == (n_components, n_Gk, nev_per_kpoint)
 
         prec = nothing
         !isnothing(prec_type) && (prec = prec_type(ham[ik]))
-        results[ik] = eigensolver(ham[ik], ψguessk;
-                                  prec, tol, miniter, maxiter, n_conv_check)
+        res = eigensolver(ham[ik], to_composite_σG(basis, kpt, ψguessk);
+                          prec, tol, miniter, maxiter, n_conv_check)
+        results[ik] = merge(res, (; X=from_composite_σG(basis, kpt, res.X)))
     end
 
     # Transform results into a nicer datastructure
@@ -68,7 +72,7 @@ Tuple returned by `diagonalize_all_kblocks`.
 """
 function select_eigenpairs_all_kblocks(eigres, range)
     merge(eigres, (; λ=[λk[range] for λk in eigres.λ],
-                   X=[Xk[:, range] for Xk in eigres.X],
+                   X=[Xk[:, :, range] for Xk in eigres.X],
                    residual_norms=[resk[range] for resk in eigres.residual_norms]))
 end
 
diff --git a/src/eigen/lobpcg_hyper_impl.jl b/src/eigen/lobpcg_hyper_impl.jl
index 88a792ac12..26326269b9 100644
--- a/src/eigen/lobpcg_hyper_impl.jl
+++ b/src/eigen/lobpcg_hyper_impl.jl
@@ -156,7 +156,7 @@ function B_ortho!(X, BX)
     rdiv!(BX, U)
 end
 
-normest(M) = maximum(abs.(diag(M))) + norm(M - Diagonal(diag(M)))
+normest(M) = maximum(abs, diag(M)) + norm(M - Diagonal(diag(M)))
 # Orthogonalizes X to tol
 # Returns the new X, the number of Cholesky factorizations algorithm, and the
 # growth factor by which small perturbations of X can have been
diff --git a/src/eigen/preconditioners.jl b/src/eigen/preconditioners.jl
index a9c213d9a8..182babf992 100644
--- a/src/eigen/preconditioners.jl
+++ b/src/eigen/preconditioners.jl
@@ -26,7 +26,7 @@ PreconditionerNone(::HamiltonianBlock) = I
 """
 mutable struct PreconditionerTPA{T <: Real}
     basis::PlaneWaveBasis
-    kpt::Kpoint
+    kpoint::Kpoint
     kin::AbstractVector{T}  # kinetic energy of every G
     mean_kin::Union{Nothing, Vector{T}}  # mean kinetic energy of every band
     default_shift::T  # if mean_kin is not set by `precondprep!`, this will be used for the shift
@@ -46,12 +46,23 @@ function PreconditionerTPA(ham::HamiltonianBlock; kwargs...)
     PreconditionerTPA(ham.basis, ham.kpoint; kwargs...)
 end
 
-@views function ldiv!(Y, P::PreconditionerTPA, R)
+@views function ldiv!(Y, P::PreconditionerTPA, R::AbstractArray)
     if P.mean_kin === nothing
         ldiv!(Y, Diagonal(P.kin .+ P.default_shift), R)
+    elseif ndims(R) == 3
+        Threads.@threads for n = 1:size(Y, 3)
+            for σ = 1:size(Y, 1)
+                Y[σ, :, n] .= P.mean_kin[n] ./ (P.mean_kin[n] .+ P.kin) .* R[σ, :, n]
+            end
+        end
     else
-    parallel_loop_over_range(1:size(Y, 2)) do n
-            Y[:, n] .= P.mean_kin[n] ./ (P.mean_kin[n] .+ P.kin) .* R[:, n]
+        # Fallback for LOBPCG as it works on matrices.
+        R_σG = from_composite_σG(P.basis, P.kpoint, R)
+        Y_σG = from_composite_σG(P.basis, P.kpoint, Y)
+        parallel_loop_over_range(1:size(Y, 2)) do n
+            for σ = 1:P.basis.model.n_components
+                Y_σG[σ, :, n] .= P.mean_kin[n] ./ (P.mean_kin[n] .+ P.kin) .* R_σG[σ, :, n]
+            end
         end
     end
     Y
@@ -73,7 +84,10 @@ end
 (Base.:*)(P::PreconditionerTPA, R) = mul!(copy(R), P, R)
 
 function precondprep!(P::PreconditionerTPA, X::AbstractArray)
-    P.mean_kin = [real(dot(x, Diagonal(P.kin), x)) for x in eachcol(X)]
+    P.mean_kin = map(eachcol(X)) do x
+        x = from_composite_σG(P.basis, P.kpoint, x)
+        sum(real(dot(x[σ, :, :], Diagonal(P.kin), x[σ, :, :]))
+            for σ = 1:P.basis.model.n_components)
+    end
 end
 precondprep!(P::PreconditionerTPA, ::Nothing) = 1  # fallback for edge cases
-
diff --git a/src/external/wannier_shared.jl b/src/external/wannier_shared.jl
index 37e9a9ebfa..f8ce686071 100644
--- a/src/external/wannier_shared.jl
+++ b/src/external/wannier_shared.jl
@@ -190,13 +190,14 @@ end
 
 
 @timing function write_w90_unk(fileprefix::String, basis, ψ; n_bands, spin=1)
+    @assert basis.model.n_components == 1
     fft_size = basis.fft_size
 
     for ik in krange_spin(basis, spin)
         open(dirname(fileprefix) * (@sprintf "/UNK%05i.%i" ik spin), "w") do fp
             println(fp, "$(fft_size[1]) $(fft_size[2]) $(fft_size[3]) $ik $n_bands")
             for iband = 1:n_bands
-                ψnk_real = ifft(basis, basis.kpoints[ik], @view ψ[ik][:, iband])
+                ψnk_real = ifft(basis, basis.kpoints[ik], @view ψ[ik][1, :, iband])
                 for iz = 1:fft_size[3], iy = 1:fft_size[2], ix = 1:fft_size[1]
                     @printf fp "%25.18f %25.18f\n" real(ψnk_real[ix, iy, iz]) imag(ψnk_real[ix, iy, iz])
                 end
@@ -218,6 +219,7 @@ We use here that:
 ``u_{n(k + G_{\rm shift})}(r) = e^{-i*\langle G_{\rm shift},r \rangle} u_{nk}``.
 """
 @views function overlap_Mmn_k_kpb(basis::PlaneWaveBasis, ψ, ik, ik_plus_b, G_shift, n_bands)
+    @assert basis.model.n_components == 1
     # Search for common Fourier modes and their resp. indices in Bloch states k and k+b
     # TODO Check if this can be improved using the G vector mapping in the kpoints
     k        = basis.kpoints[ik]
@@ -233,7 +235,7 @@ We use here that:
     for n = 1:n_bands
         for m = 1:n_bands
             # Select the coefficient in right order
-            Mkb[m, n] = dot(ψ[ik][ip, m], ψ[ik_plus_b][ip_plus_b, n])
+            Mkb[m, n] = dot(ψ[ik][1, ip, m], ψ[ik_plus_b][1, ip_plus_b, n])
         end
     end
     iszero(Mkb) && return Matrix(I, n_bands, n_bands)
@@ -297,19 +299,19 @@ function compute_amn_kpoint(basis::PlaneWaveBasis, kpt, ψk, projections, n_band
     Ak
 end
 
-
 @timing function write_w90_amn(
         fileprefix::String,
         basis::PlaneWaveBasis,
         projections,
         ψ;
         n_bands)
+    @assert basis.model.n_components == 1
     open(fileprefix * ".amn", "w") do fp
         println(fp, "Generated by DFTK at $(now())")
         println(fp, "$n_bands   $(length(basis.kpoints))  $(length(projections))")
 
         for (ik, (ψk, kpt)) in enumerate(zip(ψ, basis.kpoints))
-            Ak = compute_amn_kpoint(basis, kpt, ψk, projections, n_bands)
+            Ak = compute_amn_kpoint(basis, kpt, ψk[1, :, :], projections, n_bands)
             for n = 1:size(Ak, 2)
                 for m = 1:size(Ak, 1)
                     @printf(fp, "%3i %3i %3i  %22.18f %22.18f \n",
diff --git a/src/fft.jl b/src/fft.jl
index 163f4c5848..7bb9b795b6 100644
--- a/src/fft.jl
+++ b/src/fft.jl
@@ -24,8 +24,8 @@ function ifft!(f_real::AbstractArray3, basis::PlaneWaveBasis, f_fourier::Abstrac
     mul!(f_real, basis.opBFFT, f_fourier)
     f_real .*= basis.ifft_normalization
 end
-function ifft!(f_real::AbstractArray3, basis::PlaneWaveBasis,
-               kpt::Kpoint, f_fourier::AbstractVector; normalize=true)
+function ifft!(f_real::AbstractArray3, basis::PlaneWaveBasis, kpt::Kpoint,
+               f_fourier::AbstractVector; normalize=true)
     @assert length(f_fourier) == length(kpt.mapping)
     @assert size(f_real) == basis.fft_size
 
@@ -37,6 +37,25 @@ function ifft!(f_real::AbstractArray3, basis::PlaneWaveBasis,
     normalize && (f_real .*= basis.ifft_normalization)
     f_real
 end
+function ifft!(f_real::AbstractArray4, basis::PlaneWaveBasis, f_fourier::AbstractMatrix)
+    mul!(f_real, basis.opBFFT_mc, f_fourier)
+    f_real .*= basis.ifft_normalization
+end
+function ifft!(f_real::AbstractArray4, basis::PlaneWaveBasis, kpt::Kpoint,
+               f_fourier::AbstractMatrix; normalize=true)
+    n_components = basis.model.n_components
+    @assert length(f_fourier) == n_components*length(kpt.mapping)
+    @assert size(f_real)[2:end] == basis.fft_size
+
+    # Pad the input data
+    fill!(f_real, 0)
+    reshape(f_real, n_components, :)[:, kpt.mapping] = f_fourier
+
+    # Perform an IFFT
+    mul!(f_real, basis.ipBFFT_mc, f_real)
+    normalize && (f_real .*= basis.ifft_normalization)
+    f_real
+end
 
 """
     ifft(basis::PlaneWaveBasis, [kpt::Kpoint, ] f_fourier)
@@ -57,6 +76,11 @@ end
 function ifft(basis::PlaneWaveBasis, kpt::Kpoint, f_fourier::AbstractVector; kwargs...)
     ifft!(similar(f_fourier, basis.fft_size...), basis, kpt, f_fourier; kwargs...)
 end
+function ifft(basis::PlaneWaveBasis, kpt::Kpoint, f_fourier::AbstractMatrix; kwargs...)
+    ifft!(similar(f_fourier, basis.model.n_components, basis.fft_size...),
+          basis, kpt, f_fourier; kwargs...)
+end
+
 """
 Perform a real valued iFFT; see [`ifft`](@ref). Note that this function
 silently drops the imaginary part.
@@ -77,8 +101,8 @@ function fft!(f_fourier::AbstractArray3, basis::PlaneWaveBasis, f_real::Abstract
     mul!(f_fourier, basis.opFFT, f_real)
     f_fourier .*= basis.fft_normalization
 end
-function fft!(f_fourier::AbstractVector, basis::PlaneWaveBasis,
-              kpt::Kpoint, f_real::AbstractArray3; normalize=true)
+function fft!(f_fourier::AbstractVector, basis::PlaneWaveBasis, kpt::Kpoint,
+              f_real::AbstractArray3; normalize=true)
     @assert size(f_real) == basis.fft_size
     @assert length(f_fourier) == length(kpt.mapping)
 
@@ -90,6 +114,28 @@ function fft!(f_fourier::AbstractVector, basis::PlaneWaveBasis,
     normalize && (f_fourier .*= basis.fft_normalization)
     f_fourier
 end
+function fft!(f_fourier::AbstractArray4, basis::PlaneWaveBasis, f_real::AbstractArray4)
+    if eltype(f_real) <: Real
+        f_real = complex.(f_real)
+    end
+    mul!(f_fourier, basis.opFFT_mc, f_real)
+    f_fourier .*= basis.fft_normalization
+end
+function fft!(f_fourier::AbstractMatrix, basis::PlaneWaveBasis, kpt::Kpoint,
+              f_real::AbstractArray4; normalize=true)
+    @assert size(f_real)[2:end] == basis.fft_size
+    @assert length(f_fourier) == basis.model.n_components * length(kpt.mapping)
+
+    # FFT
+    mul!(f_real, basis.ipFFT_mc, f_real)
+
+    # Truncate
+    for σ = 1:basis.model.n_components
+        f_fourier[σ, :] .= @view(f_real[σ, :, :, :][kpt.mapping])
+    end
+    normalize && (f_fourier .*= basis.fft_normalization)
+    f_fourier
+end
 
 """
     fft(basis::PlaneWaveBasis, [kpt::Kpoint, ] f_real)
@@ -112,6 +158,10 @@ end
 function fft(basis::PlaneWaveBasis, kpt::Kpoint, f_real::AbstractArray3; kwargs...)
     fft!(similar(f_real, length(kpt.mapping)), basis, kpt, copy(f_real); kwargs...)
 end
+function fft(basis::PlaneWaveBasis, kpt::Kpoint, f_real::AbstractArray4; kwargs...)
+    fft!(similar(f_real, basis.model.n_components, length(kpt.mapping)), basis, kpt,
+         copy(f_real); kwargs...)
+end
 
 # returns matrix representations of the ifft and fft matrices. For debug purposes.
 function ifft_matrix(basis::PlaneWaveBasis{T}) where {T}
@@ -261,25 +311,32 @@ end
 Plan a FFT of type `T` and size `fft_size`, spending some time on finding an
 optimal algorithm. (Inplace, out-of-place) x (forward, backward) FFT plans are returned.
 """
-function build_fft_plans!(tmp::Array{Complex{Float64}})
-    ipFFT = FFTW.plan_fft!(tmp; flags=FFTW.MEASURE)
-    opFFT = FFTW.plan_fft(tmp;  flags=FFTW.MEASURE)
+function build_fft_plans!(tmp::Array{Complex{Float64}}; region=1:ndims(tmp))
+    ipFFT = FFTW.plan_fft!(tmp, region; flags=FFTW.MEASURE)
+    opFFT = FFTW.plan_fft(tmp, region;  flags=FFTW.MEASURE)
     # backwards-FFT by inverting and stripping off normalizations
-    ipFFT, opFFT, inv(ipFFT).p, inv(opFFT).p
+    ipBFFT = inv(ipFFT).p
+    opBFFT = inv(opFFT).p
+    (; ipFFT, opFFT, ipBFFT, opBFFT)
 end
-function build_fft_plans!(tmp::Array{Complex{Float32}})
+function build_fft_plans!(tmp::Array{Complex{Float32}}; region=1:ndims(tmp))
     # For Float32 there are issues with aligned FFTW plans, so we
     # fall back to unaligned FFTW plans (which are generally discouraged).
-    ipFFT = FFTW.plan_fft!(tmp; flags=FFTW.MEASURE | FFTW.UNALIGNED)
-    opFFT = FFTW.plan_fft(tmp;  flags=FFTW.MEASURE | FFTW.UNALIGNED)
+    ipFFT = FFTW.plan_fft!(tmp, region; flags=FFTW.MEASURE | FFTW.UNALIGNED)
+    opFFT = FFTW.plan_fft(tmp, region;  flags=FFTW.MEASURE | FFTW.UNALIGNED)
     # backwards-FFT by inverting and stripping off normalizations
-    ipFFT, opFFT, inv(ipFFT).p, inv(opFFT).p
+    ipBFFT = inv(ipFFT).p
+    opBFFT = inv(opFFT).p
+    (; ipFFT, opFFT, ipBFFT, opBFFT)
 end
-function build_fft_plans!(tmp::AbstractArray{Complex{T}}) where {T<:Union{Float32,Float64}}
-    ipFFT = AbstractFFTs.plan_fft!(tmp)
-    opFFT = AbstractFFTs.plan_fft(tmp)
+function build_fft_plans!(tmp::AbstractArray{Complex{T}};
+                          region=1:ndims(tmp)) where {T<:Union{Float32,Float64}}
+    ipFFT = AbstractFFTs.plan_fft!(tmp, region)
+    opFFT = AbstractFFTs.plan_fft(tmp, region)
     # backwards-FFT by inverting and stripping off normalizations
-    ipFFT, opFFT, inv(ipFFT).p, inv(opFFT).p
+    ipBFFT = inv(ipFFT).p
+    opBFFT = inv(opFFT).p
+    (; ipFFT, opFFT, ipBFFT, opBFFT)
 end
 
 # TODO Some grid sizes are broken in the generic FFT implementation
diff --git a/src/input_output.jl b/src/input_output.jl
index f7932836f6..62775973bc 100644
--- a/src/input_output.jl
+++ b/src/input_output.jl
@@ -8,6 +8,7 @@
 function Base.show(io::IO, model::Model)
     nD = model.n_dim == 3 ? "" : "$(model.n_dim)D, "
     print(io, "Model(", model.model_name, ", ", nD,
+          "n_components = ", model.n_components, ", ",
           "spin_polarization = :", model.spin_polarization, ")")
 end
 
@@ -33,6 +34,7 @@ function Base.show(io::IO, ::MIME"text/plain", model::Model)
     if !isnothing(model.n_electrons)
         showfieldln(io, "num. electrons", model.n_electrons)
     end
+    showfieldln(io, "num. components",    model.n_components)
     showfieldln(io, "spin polarization",  model.spin_polarization)
     showfieldln(io, "temperature",        @sprintf "%.5g Ha" model.temperature)
     if model.temperature > 0
@@ -74,6 +76,7 @@ function todict!(dict, model::Model)
     dict["recip_lattice"]     = model.recip_lattice
     dict["n_dim"]             = model.n_dim
     dict["spin_polarization"] = model.spin_polarization
+    dict["n_components"]      = model.n_components
     dict["n_spin_components"] = model.n_spin_components
     dict["temperature"]       = model.temperature
     dict["smearing"]          = string(model.smearing)
diff --git a/src/interpolation.jl b/src/interpolation.jl
index d18bf4959f..71850e54c5 100644
--- a/src/interpolation.jl
+++ b/src/interpolation.jl
@@ -93,24 +93,26 @@ end
 Interpolate some data from one ``k``-point to another. The interpolation is fast, but not
 necessarily exact. Intended only to construct guesses for iterative solvers.
 """
-function interpolate_kpoint(data_in::AbstractVecOrMat,
+function interpolate_kpoint(data_in::AbstractArray3,
                             basis_in::PlaneWaveBasis,  kpoint_in::Kpoint,
                             basis_out::PlaneWaveBasis, kpoint_out::Kpoint)
     # TODO merge with transfer_blochwave_kpt
     if kpoint_in == kpoint_out
         return copy(data_in)
     end
-    @assert length(G_vectors(basis_in, kpoint_in)) == size(data_in, 1)
+    n_components = basis_in.model.n_components
+    @assert n_components == basis_out.model.n_components == size(data_in, 1)
+    @assert length(G_vectors(basis_in, kpoint_in)) == size(data_in, 2)
 
-    n_bands  = size(data_in, 2)
+    n_bands  = size(data_in, 3)
     n_Gk_out = length(G_vectors(basis_out, kpoint_out))
-    data_out = similar(data_in, n_Gk_out, n_bands) .= 0
+    data_out = similar(data_in, n_components, n_Gk_out, n_bands) .= 0
     # TODO: use a map, or this will not be GPU compatible (scalar indexing)
-    for iin = 1:size(data_in, 1)
+    for iin = 1:size(data_in, 2)
         idx_fft = kpoint_in.mapping[iin]
         idx_fft in keys(kpoint_out.mapping_inv) || continue
         iout = kpoint_out.mapping_inv[idx_fft]
-        data_out[iout, :] = data_in[iin, :]
+        data_out[:, iout, :] = data_in[:, iin, :]
     end
-    ortho_qr(data_out)  # Re-orthogonalize and renormalize
+    ortho_qr(basis_out, kpoint_out, data_out)  # Re-orthogonalize and renormalize
 end
diff --git a/src/orbitals.jl b/src/orbitals.jl
index ace1adb8fb..905feab178 100644
--- a/src/orbitals.jl
+++ b/src/orbitals.jl
@@ -6,16 +6,15 @@ using Random  # Used to have a generic API for CPU and GPU computations alike: s
 # others when using temperature. It is set to 0.0 by default, to treat with insulators.
 function select_occupied_orbitals(basis, ψ, occupation; threshold=0.0)
     N = [something(findlast(x -> x > threshold, occk), 0) for occk in occupation]
-    selected_ψ   = [@view ψk[:, 1:N[ik]] for (ik, ψk)   in enumerate(ψ)]
-    selected_occ = [      occk[1:N[ik]]  for (ik, occk) in enumerate(occupation)]
+    selected_ψ   = [@view ψk[:, :, 1:N[ik]] for (ik, ψk)   in enumerate(ψ)]
+    selected_occ = [      occk[1:N[ik]]     for (ik, occk) in enumerate(occupation)]
 
-    # if we have an insulator, sanity check that the orbitals we kept are the
-    # occupied ones
+    # If we have an insulator, sanity check that the orbitals we kept are the occupied ones.
     if iszero(threshold)
         model   = basis.model
         n_spin  = model.n_spin_components
         n_bands = div(model.n_electrons, n_spin * filled_occupation(model), RoundUp)
-        @assert n_bands == size(selected_ψ[1], 2)
+        @assert all([n_bands == size(ψk, 3) for ψk in selected_ψ])
     end
     (; ψ=selected_ψ, occupation=selected_occ)
 end
@@ -29,9 +28,9 @@ function blockify_ψ(basis::PlaneWaveBasis, ψ::AbstractVector)
 
     # Zero-pad each ψk to the full stored rectangle
     ψ = map(ψ) do ψk
-        n_G, n_bands = size(ψk)
-        ψk_full = similar(ψk, max_n_G, n_bands)
-        ψk_full[1:n_G, :] = ψk
+        n_components, n_G, n_bands = size(ψk)
+        ψk_full = similar(ψk, n_components, max_n_G, n_bands)
+        ψk_full[:, 1:n_G, :] = ψk
         ψk_full
     end
     (; ψ, n_G_vectors)
@@ -41,7 +40,7 @@ end
 # given in the second argument.
 function unblockify_ψ(ψ_padded::AbstractVector, n_G_vectors::AbstractVector)
     map(n_G_vectors, ψ_padded) do n_Gk, ψk_padded
-        ψk_padded[1:n_Gk, :]
+        ψk_padded[:, 1:n_Gk, :]
     end
 end
 
@@ -80,7 +79,8 @@ end
 unpack_ψ(x, sizes_ψ) = deepcopy(unsafe_unpack_ψ(x, sizes_ψ))
 
 function random_orbitals(basis::PlaneWaveBasis{T}, kpt::Kpoint, howmany::Integer) where {T}
-    orbitals = similar(basis.G_vectors, Complex{T}, length(G_vectors(basis, kpt)), howmany)
+    orbitals = similar(basis.G_vectors, Complex{T},
+                       basis.model.n_components, length(G_vectors(basis, kpt)), howmany)
     randn!(TaskLocalRNG(), orbitals)  # use the RNG on the device if we're using a GPU
-    ortho_qr(orbitals)
+    ortho_qr(basis, kpt, orbitals)
 end
diff --git a/src/postprocess/current.jl b/src/postprocess/current.jl
index 5f9ccaf46b..4f4613d236 100644
--- a/src/postprocess/current.jl
+++ b/src/postprocess/current.jl
@@ -4,16 +4,20 @@ Computes the *probability* (not charge) current, ``∑_n f_n \Im(ψ_n · ∇ψ_n
 """
 function compute_current(basis::PlaneWaveBasis, ψ, occupation)
     @assert length(basis.symmetries) == 1  # TODO lift this
-    current = [zeros(eltype(basis), basis.fft_size) for α = 1:3]
+    current = [zeros(eltype(basis), basis.fft_size) for _ = 1:3]
     for (ik, kpt) in enumerate(basis.kpoints)
-        for (n, ψnk) in enumerate(eachcol(ψ[ik]))
+        for (n, ψnk) in enumerate(eachslice(ψ[ik]; dims=3))
             ψnk_real = ifft(basis, kpt, ψnk)
-            for α = 1:3
-                dαψnk = [im * q[α] for q in Gplusk_vectors_cart(basis, kpt)] .* ψnk
-                dαψnk_real = ifft(basis, kpt, dαψnk)
-                current[α] .+= @. basis.kweights[ik] *
-                                  occupation[ik][n] *
-                                  imag(conj(ψnk_real) * dαψnk_real)
+            for σ = 1:basis.model.n_components
+                ψσnk = ψnk[σ, :]
+                ψσnk_real = ψnk_real[σ, :, :, :]
+                for α = 1:3
+                    dαψσnk = [im * q[α] for q in Gplusk_vectors_cart(basis, kpt)] .* ψσnk
+                    dαψσnk_real = ifft(basis, kpt, dαψσnk)
+                    current[α] .+= @. basis.kweights[ik] *
+                                      occupation[ik][n] *
+                                      imag(conj(ψσnk_real) * dαψσnk_real)
+                end
             end
         end
     end
diff --git a/src/response/chi0.jl b/src/response/chi0.jl
index 3c7ce90f81..f88db6fbad 100644
--- a/src/response/chi0.jl
+++ b/src/response/chi0.jl
@@ -175,7 +175,7 @@ function sternheimer_solver(Hk, ψk, ε, rhs;
     function R_ldiv!(x, y)
         x .= R(precon \ R(y))
     end
-    J = LinearMap{eltype(ψk)}(RAR, size(Hk, 1))
+    J = LinearMap{eltype(ψk)}(RAR, size(ψk, 1))
     res = cg(J, bb; precon=FunctionPreconditioner(R_ldiv!), tol, proj=R,
              callback=cg_callback)
     !res.converged && @warn("Sternheimer CG not converged", res.n_iter,
@@ -188,7 +188,7 @@ function sternheimer_solver(Hk, ψk, ε, rhs;
     # Note that αkn is an empty array if there is no extra bands.
     αkn = ψk_exHψk_ex \ ψk_extra' * (b - H(δψknᴿ))
 
-    δψkn = ψk_extra * αkn + δψknᴿ
+    ψk_extra * αkn + δψknᴿ  # δψkn
 end
 
 # Apply the four-point polarizability operator χ0_4P = -Ω^-1
@@ -252,7 +252,7 @@ The derivatives of the occupations are in-place stored in δocc.
 The tuple (; δocc, δεF) is returned. It is assumed the passed `δocc`
 are initialised to zero.
 """
-function compute_δocc!(δocc, basis::PlaneWaveBasis{T}, ψ, εF, ε, δHψ) where {T}
+@views function compute_δocc!(δocc, basis::PlaneWaveBasis{T}, ψ, εF, ε, δHψ) where {T}
     model = basis.model
     temperature = model.temperature
     smearing = model.smearing
@@ -266,7 +266,7 @@ function compute_δocc!(δocc, basis::PlaneWaveBasis{T}, ψ, εF, ε, δHψ) whe
         D = zero(T)
         for ik = 1:Nk, (n, εnk) in enumerate(ε[ik])
             enred = (εnk - εF) / temperature
-            δεnk = real(dot(ψ[ik][:, n], δHψ[ik][:, n]))
+            δεnk = real(dot(ψ[ik][:, :, n], δHψ[ik][:, :, n]))
             fpnk = filled_occ * Smearing.occupation_derivative(smearing, enred) / temperature
             δocc[ik][n] = δεnk * fpnk
             D += fpnk * basis.kweights[ik]
@@ -292,9 +292,9 @@ a Sternheimer equation for each `k`-points. It is assumed the passed `δψ` are
 to zero.
 For phonon, `δHψ[ik]` is ``δH·ψ_{k-q}``, expressed in `basis.kpoints[ik]`.
 """
-function compute_δψ!(δψ, basis::PlaneWaveBasis{T}, H, ψ, εF, ε, δHψ, ε_minus_q=ε;
-                     ψ_extra=[zeros_like(ψk, size(ψk,1), 0) for ψk in ψ],
-                     q=zero(Vec3{T}), kwargs_sternheimer...) where {T}
+@views function compute_δψ!(δψ, basis::PlaneWaveBasis{T}, H, ψ, εF, ε, δHψ, ε_minus_q=ε;
+                            ψ_extra=[zeros_like(ψk, size(ψk)[1:2]..., 0) for ψk in ψ],
+                            q=zero(Vec3{T}), kwargs_sternheimer...) where {T}
     # We solve the Sternheimer equation
     #   (H_k - ε_{n,k-q}) δψ_{n,k} = - (1 - P_{k}) δHψ_{n, k-q},
     # where P_{k} is the projector on ψ_{k} and with the conventions:
@@ -315,9 +315,6 @@ function compute_δψ!(δψ, basis::PlaneWaveBasis{T}, H, ψ, εF, ε, δHψ, ε
         δψk = δψ[ik]
         εk_minus_q = ε_minus_q[ik]
 
-        ψk_extra = ψ_extra[ik]
-        Hψk_extra = Hk * ψk_extra
-        εk_extra  = diag(real.(ψk_extra' * Hψk_extra))
         for n = 1:length(εk_minus_q)
             fnk_minus_q = filled_occ * Smearing.occupation(smearing, (εk_minus_q[n]-εF) / temperature)
 
@@ -330,12 +327,22 @@ function compute_δψ!(δψ, basis::PlaneWaveBasis{T}, H, ψ, εF, ε, δHψ, ε
                 ddiff = Smearing.occupation_divided_difference
                 ratio = filled_occ * ddiff(smearing, εk[m], εk_minus_q[n], εF, temperature)
                 αmn = compute_αmn(fmk, fnk_minus_q, ratio)  # fnk_minus_q * αmn + fmk * αnm = ratio
-                δψk[:, n] .+= ψk[:, m] .* αmn .* dot(ψk[:, m], δHψ[ik][:, n])
+                δψk[:, :, n] .+= ψk[:, :, m] .* αmn .* dot(ψk[:, :, m], δHψ[ik][:, :, n])
             end
 
-            # Sternheimer contribution
-            δψk[:, n] .+= sternheimer_solver(Hk, ψk, εk_minus_q[n], δHψ[ik][:, n]; ψk_extra,
-                                             εk_extra, Hψk_extra, kwargs_sternheimer...)
+            # Sternheimer contribution:
+            #   First, reinterpret the quantities to solve the equation as a generic
+            #   matrix/vector problem…
+            kpt = basis.kpoints[ik]
+            ψk_σG = to_composite_σG(basis, kpt, ψk)
+            δHψkn = to_composite_σG(basis, kpt, δHψ[ik][:, :, n])
+            ψk_extra = to_composite_σG(basis, kpt, ψ_extra[ik])
+            Hψk_extra = to_composite_σG(basis, kpt, Hk * ψ_extra[ik])
+            εk_extra = diag(real.(ψk_extra'Hψk_extra))
+            δψkn = to_composite_σG(basis, kpt, δψk[:, :, n])
+            #   … then, solve the problem.
+            δψkn .+= sternheimer_solver(Hk, ψk_σG, εk_minus_q[n], δHψkn; ψk_extra, εk_extra,
+                                        Hψk_extra, kwargs_sternheimer...)
         end
     end
 end
@@ -356,10 +363,10 @@ end
     mask_occ   = map(occk -> findall(occnk -> abs(occnk) ≥ occ_thresh, occk), occupation)
     mask_extra = map(occk -> findall(occnk -> abs(occnk) < occ_thresh, occk), occupation)
 
-    ψ_occ   = [ψ[ik][:, maskk] for (ik, maskk) in enumerate(mask_occ)]
-    ψ_extra = [ψ[ik][:, maskk] for (ik, maskk) in enumerate(mask_extra)]
+    ψ_occ   = [ψ[ik][:, :, maskk] for (ik, maskk) in enumerate(mask_occ)]
+    ψ_extra = [ψ[ik][:, :, maskk] for (ik, maskk) in enumerate(mask_extra)]
     ε_occ   = [eigenvalues[ik][maskk] for (ik, maskk) in enumerate(mask_occ)]
-    δHψ_minus_q_occ = [δHψ[ik][:, mask_occ[k_to_k_minus_q[ik]]] for ik = 1:length(basis.kpoints)]
+    δHψ_minus_q_occ = [δHψ[ik][:, :, mask_occ[k_to_k_minus_q[ik]]] for ik = 1:length(basis.kpoints)]
     # Only needed for phonon calculations.
     ε_minus_q_occ  = [eigenvalues[k_to_k_minus_q[ik]][mask_occ[k_to_k_minus_q[ik]]]
                       for ik = 1:length(basis.kpoints)]
@@ -383,7 +390,7 @@ end
     # Then we compute δψ (again in-place into a zero-padded array) with elements of
     # `basis.kpoints` that are equivalent to `k+q`.
     δψ = zero.(δHψ)
-    δψ_occ = [δψ[ik][:, maskk] for (ik, maskk) in enumerate(mask_occ[k_to_k_minus_q])]
+    δψ_occ = [δψ[ik][:, :, maskk] for (ik, maskk) in enumerate(mask_occ[k_to_k_minus_q])]
     compute_δψ!(δψ_occ, basis, ham.blocks, ψ_occ, εF, ε_occ, δHψ_minus_q_occ, ε_minus_q_occ;
                 ψ_extra, q, kwargs_sternheimer...)
 
diff --git a/src/response/hessian.jl b/src/response/hessian.jl
index 3350f114ed..dca1e577f2 100644
--- a/src/response/hessian.jl
+++ b/src/response/hessian.jl
@@ -26,9 +26,14 @@ Compute the application of Ω defined at ψ to δψ. H is the Hamiltonian comput
 from ψ and Λ is the set of Rayleigh coefficients ψk' * Hk * ψk at each k-point.
 """
 @timing function apply_Ω(δψ, ψ, H::Hamiltonian, Λ)
-    δψ = proj_tangent(δψ, ψ)
-    Ωδψ = [H.blocks[ik] * δψk - δψk * Λ[ik] for (ik, δψk) in enumerate(δψ)]
-    proj_tangent!(Ωδψ, ψ)
+    δψ = proj_tangent(H.basis, δψ, ψ)
+    Ωδψ = map(enumerate(zip(δψ, H * δψ))) do (ik, (δψk, Hδψk))
+        δψk = to_composite_σG(H.basis, H.basis.kpoints[ik], δψ[ik])
+        Λk = to_composite_σG(H.basis, H.basis.kpoints[ik], Λ[ik])
+        δψΛk = from_composite_σG(H.basis, H.basis.kpoints[ik], δψk * Λk)
+        Hδψk - δψΛk
+    end
+    proj_tangent!(Ωδψ, H.basis, ψ)
 end
 
 """
@@ -38,7 +43,7 @@ Compute the application of K defined at ψ to δψ. ρ is the density issued fro
 δψ also generates a δρ, computed with `compute_δρ`.
 """
 @views @timing function apply_K(basis::PlaneWaveBasis, δψ, ψ, ρ, occupation)
-    δψ = proj_tangent(δψ, ψ)
+    δψ = proj_tangent(basis, δψ, ψ)
     δρ = compute_δρ(basis, ψ, δψ, occupation)
     δV = apply_kernel(basis, δρ; ρ)
 
@@ -46,15 +51,18 @@ Compute the application of K defined at ψ to δψ. ρ is the density issued fro
         kpt = basis.kpoints[ik]
         δVψk = similar(ψk)
 
-        for n = 1:size(ψk, 2)
-            ψnk_real = ifft(basis, kpt, ψk[:, n])
-            δVψnk_real = δV[:, :, :, kpt.spin] .* ψnk_real
-            δVψk[:, n] = fft(basis, kpt, δVψnk_real)
+        for n = 1:size(ψk, 3)
+            ψnk_real = ifft(basis, kpt, ψk[:, :, n])
+            δVψnk_real = similar(ψnk_real)
+            for σ = 1:basis.model.n_components
+                δVψnk_real[σ, :, :, :] = δV[:, :, :, kpt.spin] .* ψnk_real[σ, :, :, :]
+            end
+            δVψk[:, :, n] = fft(basis, kpt, δVψnk_real)
         end
         δVψk
     end
     # ensure projection onto the tangent space
-    proj_tangent!(Kδψ, ψ)
+    proj_tangent!(Kδψ, basis, ψ)
 end
 
 """
@@ -83,24 +91,28 @@ Return δψ where (Ω+K) δψ = rhs
     unsafe_unpack(x) = unsafe_unpack_ψ(reinterpret_complex(x), size.(ψ))
 
     # project rhs on the tangent space before starting
-    proj_tangent!(rhs, ψ)
+    proj_tangent!(rhs, basis, ψ)
     rhs_pack = pack(rhs)
 
     # preconditioner
     Pks = [PreconditionerTPA(basis, kpt) for kpt in basis.kpoints]
     for ik = 1:length(Pks)
-        precondprep!(Pks[ik], ψ[ik])
+        precondprep!(Pks[ik], to_composite_σG(basis, basis.kpoints[ik], ψ[ik]))
     end
     function f_ldiv!(x, y)
         δψ = unpack(y)
-        proj_tangent!(δψ, ψ)
-        Pδψ = [ Pks[ik] \ δψk for (ik, δψk) in enumerate(δψ)]
-        proj_tangent!(Pδψ, ψ)
+        proj_tangent!(δψ, basis, ψ)
+        Pδψ = [Pks[ik] \ δψk for (ik, δψk) in enumerate(δψ)]
+        proj_tangent!(Pδψ, basis, ψ)
         x .= pack(Pδψ)
     end
 
     # Rayleigh-coefficients
-    Λ = [ψk'Hψk for (ψk, Hψk) in zip(ψ, H * ψ)]
+    Λ = map(enumerate(zip(ψ, H * ψ))) do (ik, (ψk, Hψk))
+        ψk = to_composite_σG(basis, basis.kpoints[ik], ψk)
+        Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk)
+        from_composite_σG(basis, basis.kpoints[ik], ψk'Hψk)
+    end
 
     # mapping of the linear system on the tangent space
     function ΩpK(x)
@@ -114,7 +126,7 @@ Return δψ where (Ω+K) δψ = rhs
     # solve (Ω+K) δψ = rhs on the tangent space with CG
     function proj(x)
         δψ = unpack(x)
-        proj_tangent!(δψ, ψ)
+        proj_tangent!(δψ, basis, ψ)
         pack(δψ)
     end
     res = cg(J, rhs_pack; precon=FunctionPreconditioner(f_ldiv!), proj, tol,
@@ -134,10 +146,10 @@ Solve the problem `(Ω+K) δψ = rhs` using a split algorithm, where `rhs` is ty
     - `δVind`: Change in potential induced by `δρ` (the term needed on top of `δHextψ`
       to get `δHψ`).
 """
-@timing function solve_ΩplusK_split(ham::Hamiltonian, ρ::AbstractArray{T}, ψ, occupation, εF,
-                                    eigenvalues, rhs; tol=1e-8, tol_sternheimer=tol/10,
-                                    verbose=false, occupation_threshold, q=zero(Vec3{real(T)}),
-                                    kwargs...) where {T}
+@timing function solve_ΩplusK_split(ham::Hamiltonian, ρ::AbstractArray{T}, ψ, occupation, εF, 
+                                     eigenvalues, rhs; tol=1e-8, tol_sternheimer=tol/10, 
+                                     verbose=false, occupation_threshold, q=zero(Vec3{real(T)}), 
+                                     kwargs...) where {T}
     # Using χ04P = -Ω^-1, E extension operator (2P->4P) and R restriction operator:
     # (Ω+K)^-1 =  Ω^-1 ( 1 -   K   (1 + Ω^-1 K  )^-1    Ω^-1  )
     #          = -χ04P ( 1 -   K   (1 - χ04P K  )^-1   (-χ04P))
@@ -177,7 +189,7 @@ Solve the problem `(Ω+K) δψ = rhs` using a split algorithm, where `rhs` is ty
 
     # Compute total change in eigenvalues
     δeigenvalues = map(ψ, δHψ) do ψk, δHψk
-        map(eachcol(ψk), eachcol(δHψk)) do ψnk, δHψnk
+        map(eachslice(ψk; dims=3), eachslice(δHψk; dims=3)) do ψnk, δHψnk
             real(dot(ψnk, δHψnk))  # δε_{nk} = <ψnk | δH | ψnk>
         end
     end
diff --git a/src/scf/direct_minimization.jl b/src/scf/direct_minimization.jl
index ef0e142987..232ff0874d 100644
--- a/src/scf/direct_minimization.jl
+++ b/src/scf/direct_minimization.jl
@@ -95,6 +95,8 @@ function direct_minimization(basis::PlaneWaveBasis{T};
     pack(ψ) = copy(reinterpret_real(pack_ψ(ψ)))
     unpack(x) = unpack_ψ(reinterpret_complex(x), size.(ψ))
     unsafe_unpack(x) = unsafe_unpack_ψ(reinterpret_complex(x), size.(ψ))
+    ψ_σG = to_composite_σG(basis, ψ)
+    unsafe_unpack_σG(x) = unsafe_unpack_ψ(reinterpret_complex(x), size.(ψ_σG))
 
     # This will get updated along the iterations
     ρ    = nothing
@@ -115,7 +117,8 @@ function direct_minimization(basis::PlaneWaveBasis{T};
         # the next step would be ρout - ρ. We thus record convergence, but let Optim do
         # one more step.
         δψ = unsafe_unpack(optim_state.s)
-        ψ_next = [ortho_qr(ψ[ik] - δψ[ik]) for ik in 1:Nk]
+        ψ_next = [ortho_qr(basis, kpt, ψ[ik] - δψ[ik])
+                  for (ik, kpt) in enumerate(basis.kpoints)]
         compute_density(basis, ψ_next, occupation)
     end
 
@@ -154,9 +157,9 @@ function direct_minimization(basis::PlaneWaveBasis{T};
         energies.total
     end
 
-    manifold = DMManifold(Nk, unsafe_unpack)
+    manifold = DMManifold(Nk, unsafe_unpack_σG)
     Pks = [prec_type(basis, kpt) for kpt in basis.kpoints]
-    P = DMPreconditioner(Nk, Pks, unsafe_unpack)
+    P = DMPreconditioner(Nk, Pks, unsafe_unpack_σG)
 
     optim_options = Optim.Options(; allow_f_increases=true,
                                   callback=optim_callback,
@@ -171,12 +174,12 @@ function direct_minimization(basis::PlaneWaveBasis{T};
     ψ = unpack(Optim.minimizer(res))
 
     # Final Rayleigh-Ritz (not strictly necessary, but sometimes useful)
-    eigenvalues = Vector{T}[]
-    for ik = 1:Nk
-        Hψk = ham[ik] * ψ[ik]
-        F = eigen(Hermitian(ψ[ik]'Hψk))
-        push!(eigenvalues, F.values)
-        ψ[ik] .= ψ[ik] * F.vectors
+    eigenvalues::Vector{Vector{T}} = map(enumerate(zip(ψ, ham * ψ))) do (ik, (ψk, Hψk))
+        ψk = to_composite_σG(basis, basis.kpoints[ik], ψk)
+        Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk)
+        F = eigen(Hermitian(ψk'Hψk))
+        ψk .= ψk * F.vectors
+        F.values
     end
 
     εF = nothing  # does not necessarily make sense here, as the
diff --git a/src/scf/nbands_algorithm.jl b/src/scf/nbands_algorithm.jl
index 77139c07da..79a3a13968 100644
--- a/src/scf/nbands_algorithm.jl
+++ b/src/scf/nbands_algorithm.jl
@@ -59,7 +59,7 @@ function determine_n_bands(bands::AdaptiveBands, occupation::Nothing, eigenvalue
     if isnothing(ψ)
         n_bands_compute = bands.n_bands_compute
     else
-        n_bands_compute = max(bands.n_bands_compute, maximum(ψk -> size(ψk, 2), ψ))
+        n_bands_compute = max(bands.n_bands_compute, maximum(ψk -> size(ψk, 3), ψ))
     end
     # Boost number of bands to converge to have more information around in the next step
     # and to thus make a better decision on the number of bands we actually care about.
@@ -87,7 +87,7 @@ function determine_n_bands(bands::AdaptiveBands, occupation::AbstractVector,
     end
     n_bands_compute = max(bands.n_bands_compute, n_bands_compute_ε, n_bands_converge + 3)
     if !isnothing(ψ)
-        n_bands_compute = max(n_bands_compute, maximum(ψk -> size(ψk, 2), ψ))
+        n_bands_compute = max(n_bands_compute, maximum(ψk -> size(ψk, 3), ψ))
     end
     (; n_bands_converge, n_bands_compute)
 end
diff --git a/src/scf/newton.jl b/src/scf/newton.jl
index 144599999d..ccbba0ce52 100644
--- a/src/scf/newton.jl
+++ b/src/scf/newton.jl
@@ -53,21 +53,25 @@ function compute_projected_gradient(basis::PlaneWaveBasis, ψ, occupation)
     ρ = compute_density(basis, ψ, occupation)
     H = energy_hamiltonian(basis, ψ, occupation; ρ).ham
 
-    [proj_tangent_kpt(H.blocks[ik] * ψk, ψk) for (ik, ψk) in enumerate(ψ)]
+    [proj_tangent_kpt(H.basis, H.basis.kpoints[ik], H.blocks[ik] * ψk, ψk)
+     for (ik, ψk) in enumerate(ψ)]
 end
 
 # Projections on the space tangent to ψ
-function proj_tangent_kpt!(δψk, ψk)
+function proj_tangent_kpt!(δψk, basis::PlaneWaveBasis, kpt::Kpoint, ψk)
     # δψk = δψk - ψk * (ψk'δψk)
-    mul!(δψk, ψk, ψk'δψk, -1, 1)
+    ψk_σG = to_composite_σG(basis, kpt, ψk)
+    δψk_σG = to_composite_σG(basis, kpt, δψk)
+    mul!(δψk_σG, ψk_σG, ψk_σG'δψk_σG, -1, 1)
+    δψk
 end
-proj_tangent_kpt(δψk, ψk) = proj_tangent_kpt!(deepcopy(δψk), ψk)
+proj_tangent_kpt(basis, kpt, δψk, ψk) = proj_tangent_kpt!(deepcopy(δψk), basis, kpt, ψk)
 
-function proj_tangent(δψ, ψ)
-    [proj_tangent_kpt(δψ[ik], ψk) for (ik, ψk) in enumerate(ψ)]
+function proj_tangent(basis::PlaneWaveBasis, δψ, ψ)
+    [proj_tangent_kpt(basis, basis.kpoints[ik], δψ[ik], ψk) for (ik, ψk) in enumerate(ψ)]
 end
-function proj_tangent!(δψ, ψ)
-    [proj_tangent_kpt!(δψ[ik], ψk) for (ik, ψk) in enumerate(ψ)]
+function proj_tangent!(δψ, basis::PlaneWaveBasis, ψ)
+    [proj_tangent_kpt!(δψ[ik], basis, basis.kpoints[ik], ψk) for (ik, ψk) in enumerate(ψ)]
 end
 
 
@@ -79,8 +83,7 @@ end
 Newton algorithm. Be careful that the starting point needs to be not too far
 from the solution.
 """
-function newton(basis::PlaneWaveBasis{T}, ψ0;
-                tol=1e-6, tol_cg=tol / 100, maxiter=20,
+function newton(basis::PlaneWaveBasis{T}, ψ0; tol=1e-6, tol_cg=tol / 100, maxiter=20,
                 callback=ScfDefaultCallback(),
                 is_converged=ScfConvergenceDensity(tol)) where {T}
 
@@ -93,7 +96,7 @@ function newton(basis::PlaneWaveBasis{T}, ψ0;
     filled_occ = filled_occupation(model)
     n_spin = model.n_spin_components
     n_bands = div(model.n_electrons, n_spin * filled_occ, RoundUp)
-    @assert n_bands == size(ψ0[1], 2)
+    @assert all([n_bands == size(ψk, 3) for ψk in ψ0])
 
     # number of kpoints and occupation
     Nk = length(basis.kpoints)
@@ -119,7 +122,7 @@ function newton(basis::PlaneWaveBasis{T}, ψ0;
         res = compute_projected_gradient(basis, ψ, occupation)
         # solve (Ω+K) δψ = -res so that the Newton step is ψ <- ψ + δψ
         δψ = solve_ΩplusK(basis, ψ, -res, occupation; tol=tol_cg).δψ
-        ψ  = [ortho_qr(ψ[ik] + δψ[ik]) for ik = 1:Nk]
+        ψ  = [ortho_qr(basis, basis.kpoints[ik], ψ[ik] + δψ[ik]) for ik = 1:Nk]
 
         ρout        = compute_density(basis, ψ, occupation)
         energies, H = energy_hamiltonian(basis, ψ, occupation; ρ=ρout)
@@ -136,12 +139,12 @@ function newton(basis::PlaneWaveBasis{T}, ψ0;
     end
 
     # Rayleigh-Ritz
-    eigenvalues = []
-    for ik = 1:Nk
-        Hψk = H.blocks[ik] * ψ[ik]
-        F = eigen(Hermitian(ψ[ik]'Hψk))
-        push!(eigenvalues, F.values)
-        ψ[ik] .= ψ[ik] * F.vectors
+    eigenvalues = map(enumerate(zip(ψ, H * ψ))) do (ik, (ψk, Hψk))
+        ψk = to_composite_σG(basis, basis.kpoints[ik], ψk)
+        Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk)
+        F = eigen(Hermitian(ψk'Hψk))
+        ψk .= ψk * F.vectors
+        F.values
     end
 
     εF = nothing  # does not necessarily make sense here, as the
diff --git a/src/scf/self_consistent_field.jl b/src/scf/self_consistent_field.jl
index 37117e0839..c598e88ab6 100644
--- a/src/scf/self_consistent_field.jl
+++ b/src/scf/self_consistent_field.jl
@@ -63,8 +63,8 @@ function next_density(ham::Hamiltonian,
         increased_n_bands = true
     else
         @assert length(ψ) == length(ham.basis.kpoints)
-        n_bands_compute = max(n_bands_compute, maximum(ψk -> size(ψk, 2), ψ))
-        increased_n_bands = n_bands_compute > size(ψ[1], 2)
+        n_bands_compute = max(n_bands_compute, maximum(ψk -> size(ψk, 3), ψ))
+        increased_n_bands = any([n_bands_compute > size(ψk, 3) for ψk in ψ])
     end
 
     # TODO Synchronize since right now it is assumed that the same number of bands are
@@ -90,8 +90,9 @@ function next_density(ham::Hamiltonian,
               "`nbandsalg=AdaptiveBands(model; n_bands_converge=$(n_bands_converge + 3)`)")
     end
 
-    ρout = compute_density(ham.basis, eigres.X, occupation; nbandsalg.occupation_threshold)
-    (; ψ=eigres.X, eigenvalues=eigres.λ, occupation, εF, ρout, diagonalization=eigres,
+    ψ = eigres.X
+    ρout = compute_density(ham.basis, ψ, occupation; nbandsalg.occupation_threshold)
+    (; ψ, eigenvalues=eigres.λ, occupation, εF, ρout, diagonalization=eigres,
      n_bands_converge, nbandsalg.occupation_threshold)
 end
 
diff --git a/src/supercell.jl b/src/supercell.jl
index fbc3d82d30..4cbb3e3e47 100644
--- a/src/supercell.jl
+++ b/src/supercell.jl
@@ -72,8 +72,7 @@ function cell_to_supercell(ψ, basis::PlaneWaveBasis{T},
     # Ensure that the basis is unfolded.
     length(basis.kgrid) != length(basis.kpoints) && error("basis must be unfolded")
 
-    num_kpG   = sum(size.(ψ, 1))
-    num_bands = size(ψ[1], 2)
+    num_kpG   = sum([size(ψk, 2) for ψk in ψ])
 
     # Maps k+G vector of initial basis to a G vector of basis_supercell
     function cell_supercell_mapping(kpt)
@@ -84,12 +83,12 @@ function cell_to_supercell(ψ, basis::PlaneWaveBasis{T},
     # Transfer all ψ[k] independently and return the hcat of all blocs
     ψ_out_blocs = []
     for (ik, kpt) in enumerate(basis.kpoints)
-        ψk_supercell = zeros(complex(T), num_kpG, num_bands)
-        ψk_supercell[cell_supercell_mapping(kpt), :] .= ψ[ik]
+        ψk_supercell = zeros(complex(T), basis.model.n_components, num_kpG, size(ψ[ik], 3))
+        ψk_supercell[:, cell_supercell_mapping(kpt), :] .= ψ[ik]
         push!(ψ_out_blocs, ψk_supercell)
     end
-    # Note that each column is normalize since each ψ[ik][:,n] is.
-    reduce(hcat, ψ_out_blocs)
+    # Note that each band is normalized since each ψ[ik][:,:,n] is.
+    [reshape(stack(ψ_out_blocs), basis.model.n_components, num_kpG, :)]
 end
 
 @doc raw"""
@@ -110,7 +109,7 @@ function cell_to_supercell(scfres::NamedTuple)
 
     # Compute supercell basis, ψ, occupations and ρ
     basis_supercell = cell_to_supercell(basis)
-    ψ_supercell     = [cell_to_supercell(ψ, basis, basis_supercell)]
+    ψ_supercell     = cell_to_supercell(ψ, basis, basis_supercell)
     eigs_supercell  = [reduce(vcat, scfres_unfold.eigenvalues)]
     occ_supercell   = compute_occupation(basis_supercell, eigs_supercell, scfres.εF).occupation
     ρ_supercell     = compute_density(basis_supercell, ψ_supercell, occ_supercell;
diff --git a/src/symmetry.jl b/src/symmetry.jl
index a25f0556d3..275736b8f5 100644
--- a/src/symmetry.jl
+++ b/src/symmetry.jl
@@ -216,7 +216,7 @@ Apply a symmetry operation to eigenvectors `ψk` at a given `kpoint` to obtain a
 equivalent point in [-0.5, 0.5)^3 and associated eigenvectors (expressed in the
 basis of the new ``k``-point).
 """
-function apply_symop(symop::SymOp, basis, kpoint, ψk::AbstractVecOrMat)
+function apply_symop(symop::SymOp, basis, kpoint, ψk::AbstractArray)
     S, τ = symop.S, symop.τ
     isone(symop) && return kpoint, ψk
 
@@ -248,11 +248,13 @@ function apply_symop(symop::SymOp, basis, kpoint, ψk::AbstractVecOrMat)
     invS = Mat3{Int}(inv(S))
     Gs_full = [G + kshift for G in G_vectors(basis, Skpoint)]
     ψSk = zero(ψk)
-    for iband = 1:size(ψk, 2)
+    for iband = 1:size(ψk, 3)
         for (ig, G_full) in enumerate(Gs_full)
             igired = index_G_vectors(basis, kpoint, invS * G_full)
             @assert igired !== nothing
-            ψSk[ig, iband] = cis2pi(-dot(G_full, τ)) * ψk[igired, iband]
+            for σ = 1:basis.model.n_components
+                ψSk[σ, ig, iband] = cis2pi(-dot(G_full, τ)) * ψk[σ, igired, iband]
+            end
         end
     end
 
@@ -415,6 +417,8 @@ function unfold_array(basis_irred, basis_unfolded, data, is_ψ)
         error("Brillouin zone symmetry unfolding not supported with MPI yet")
     end
     data_unfolded = similar(data, length(basis_unfolded.kpoints))
+    n_components = basis_unfolded.model.n_components
+    @assert n_components == basis_irred.model.n_components
     for ik_unfolded = 1:length(basis_unfolded.kpoints)
         kpt_unfolded = basis_unfolded.kpoints[ik_unfolded]
         ik_irred, symop = unfold_mapping(basis_irred, kpt_unfolded)
@@ -422,8 +426,8 @@ function unfold_array(basis_irred, basis_unfolded, data, is_ψ)
             # transform ψ_k from data into ψ_Sk in data_unfolded
             kunfold_coord = kpt_unfolded.coordinate
             @assert normalize_kpoint_coordinate(kunfold_coord) ≈ kunfold_coord
-            _, ψSk = apply_symop(symop, basis_irred,
-                                  basis_irred.kpoints[ik_irred], data[ik_irred])
+            _, ψSk = apply_symop(symop, basis_irred, basis_irred.kpoints[ik_irred],
+                                 data[ik_irred])
             data_unfolded[ik_unfolded] = ψSk
         else
             # simple copy
diff --git a/src/terms/Hamiltonian.jl b/src/terms/Hamiltonian.jl
index 94e4adc9bc..9208f69d6a 100644
--- a/src/terms/Hamiltonian.jl
+++ b/src/terms/Hamiltonian.jl
@@ -56,7 +56,8 @@ function HamiltonianBlock(basis, kpoint, operators; scratch=nothing)
     end
 end
 function _ham_allocate_scratch(basis::PlaneWaveBasis{T}) where {T}
-    [(; ψ_reals=zeros_like(basis.G_vectors, complex(T), basis.fft_size...))
+    [(; ψ_reals=zeros_like(basis.G_vectors, complex(T),
+                           basis.model.n_components, basis.fft_size...))
      for _ = 1:Threads.nthreads()]
 end
 
@@ -90,37 +91,48 @@ function LinearAlgebra.mul!(Hψ, H::Hamiltonian, ψ)
     Hψ
 end
 # need `deepcopy` here to copy the elements of the array of arrays ψ (not just pointers)
-Base.:*(H::Hamiltonian, ψ) = mul!(deepcopy(ψ), H, ψ)
+Base.:*(H::Hamiltonian, ψ::AbstractArray) = mul!(deepcopy(ψ), H, ψ)
+
+# Serves as a wrapper around the specialized multiplication functions for LOBPCG as it
+# expects to work on vectors or matrices. Explicitely create a 3-tensor to prevent ambiguity.
+@timing function LinearAlgebra.mul!(Hψ_σG::AbstractVecOrMat, H::HamiltonianBlock,
+                                    ψ_σG::AbstractVecOrMat)
+    Hψ = unfold_from_composite_σG(H.basis, H.kpoint, Hψ_σG)
+    ψ = unfold_from_composite_σG(H.basis, H.kpoint, ψ_σG)
+    mul!(Hψ, H, ψ)
+    Hψ_σG
+end
 
 # Loop through bands, IFFT to get ψ in real space, loop through terms, FFT and accumulate into Hψ
 # For the common DftHamiltonianBlock there is an optimized version below
-@views @timing "Hamiltonian multiplication" function LinearAlgebra.mul!(Hψ::AbstractArray,
+@views @timing "Hamiltonian multiplication" function LinearAlgebra.mul!(Hψ::AbstractArray3,
                                                                         H::GenericHamiltonianBlock,
-                                                                        ψ::AbstractArray)
+                                                                        ψ::AbstractArray3)
+    basis = H.basis
     function allocate_local_storage()
-        T = eltype(H.basis)
-        (; Hψ_fourier = similar(Hψ[:, 1]),
-           ψ_real  = similar(ψ, complex(T), H.basis.fft_size...),
-           Hψ_real = similar(Hψ, complex(T), H.basis.fft_size...))
+        T = eltype(basis)
+        (; Hψ_fourier = similar(Hψ[:, :, 1]),
+           ψ_real  = similar(ψ, complex(T), basis.model.n_components, basis.fft_size...),
+           Hψ_real = similar(Hψ, complex(T), basis.model.n_components, basis.fft_size...))
     end
-    parallel_loop_over_range(1:size(ψ, 2); allocate_local_storage) do iband, storage
+    parallel_loop_over_range(1:size(ψ, 3); allocate_local_storage) do iband, storage
         to = TimerOutput()  # Thread-local timer output
 
         # Take ψi, IFFT it to ψ_real, apply each term to Hψ_fourier and Hψ_real, and add it
         # to Hψ.
         storage.Hψ_real .= 0
         storage.Hψ_fourier .= 0
-        ifft!(storage.ψ_real, H.basis, H.kpoint, ψ[:, iband])
+        ifft!(storage.ψ_real, basis, H.kpoint, ψ[:, :, iband])
         for op in H.optimized_operators
             @timeit to "$(nameof(typeof(op)))" begin
                 apply!((; fourier=storage.Hψ_fourier, real=storage.Hψ_real),
                        op,
-                       (; fourier=ψ[:, iband], real=storage.ψ_real))
+                       (; fourier=ψ[:, :, iband], real=storage.ψ_real))
             end
         end
-        Hψ[:, iband] .= storage.Hψ_fourier
-        fft!(storage.Hψ_fourier, H.basis, H.kpoint, storage.Hψ_real)
-        Hψ[:, iband] .+= storage.Hψ_fourier
+        Hψ[:, :, iband] .= storage.Hψ_fourier
+        fft!(storage.Hψ_fourier, basis, H.kpoint, storage.Hψ_real)
+        Hψ[:, :, iband] .+= storage.Hψ_fourier
 
         if Threads.threadid() == 1
             merge!(DFTK.timer, to; tree_point=[t.name for t in DFTK.timer.timer_stack])
@@ -131,10 +143,10 @@ Base.:*(H::Hamiltonian, ψ) = mul!(deepcopy(ψ), H, ψ)
 end
 
 # Fast version, specialized on DFT models. Minimizes the number of FFTs and allocations
-@views @timing "DftHamiltonian multiplication" function LinearAlgebra.mul!(Hψ::AbstractArray,
+@views @timing "DftHamiltonian multiplication" function LinearAlgebra.mul!(Hψ::AbstractArray3,
                                                                            H::DftHamiltonianBlock,
-                                                                           ψ::AbstractArray)
-    n_bands = size(ψ, 2)
+                                                                           ψ::AbstractArray3)
+    n_bands = size(ψ, 3)
     iszero(n_bands) && return Hψ  # Nothing to do if ψ empty
     have_divAgrad = !isnothing(H.divAgrad_op)
 
@@ -146,17 +158,21 @@ end
         ψ_real = storage.ψ_reals
 
         @timeit to "local+kinetic" begin
-            ifft!(ψ_real, H.basis, H.kpoint, ψ[:, iband]; normalize=false)
-            ψ_real .*= potential
-            fft!(Hψ[:, iband], H.basis, H.kpoint, ψ_real; normalize=false)  # overwrites ψ_real
-            Hψ[:, iband] .+= H.fourier_op.multiplier .* ψ[:, iband]
+            ifft!(ψ_real, H.basis, H.kpoint, ψ[:, :, iband]; normalize=false)
+            for σ = 1:H.basis.model.n_components
+                ψ_real[σ, :, :, :] .*= potential
+            end
+            fft!(Hψ[:, :, iband], H.basis, H.kpoint, ψ_real; normalize=false)  # overwrites ψ_real
+            for σ = 1:H.basis.model.n_components
+                Hψ[σ, :, iband] .+= H.fourier_op.multiplier .* ψ[σ, :, iband]
+            end
         end
 
         if have_divAgrad
             @timeit to "divAgrad" begin
-                apply!((; fourier=Hψ[:, iband], real=nothing),
+                apply!((; fourier=Hψ[:, :, iband], real=nothing),
                        H.divAgrad_op,
-                       (; fourier=ψ[:, iband], real=nothing);
+                       (; fourier=ψ[:, :, iband], real=nothing);
                        ψ_scratch=ψ_real)
             end
         end
@@ -180,11 +196,10 @@ end
     Hψ
 end
 
-
 """
 Get energies and Hamiltonian
 kwargs is additional info that might be useful for the energy terms to precompute
-(eg the density ρ)
+(e.g., the density ρ)
 """
 @timing function energy_hamiltonian(basis::PlaneWaveBasis, ψ, occupation; kwargs...)
     # it: index into terms, ik: index into kpoints
diff --git a/src/terms/anyonic.jl b/src/terms/anyonic.jl
index 8a4000936a..c0cfa20ae8 100644
--- a/src/terms/anyonic.jl
+++ b/src/terms/anyonic.jl
@@ -72,6 +72,7 @@ function (A::Anyonic)(basis)
     @assert basis.model.lattice[2, 1] == basis.model.lattice[1, 2] == 0
     @assert basis.model.lattice[1, 1] == basis.model.lattice[2, 2]
     @assert basis.model.n_spin_components == 1
+    @assert basis.model.n_components == 1
 
     TermAnyonic(basis, eltype(basis)(A.hbar), eltype(basis)(A.β))
 end
@@ -103,7 +104,7 @@ function ene_ops(term::TermAnyonic, basis::PlaneWaveBasis{T}, ψ, occupation;
                  ρ, kwargs...) where {T}
     hbar = term.hbar
     β = term.β
-    @assert ψ !== nothing # the hamiltonian depends explicitly on ψ
+    @assert !isnothing(ψ)  # the hamiltonian depends explicitly on ψ
 
     # Compute A in Fourier domain
     # curl A = 2π ρ, ∇⋅A = 0
@@ -153,8 +154,9 @@ function ene_ops(term::TermAnyonic, basis::PlaneWaveBasis{T}, ψ, occupation;
     ops_ham = [ops_energy..., RealSpaceMultiplication(basis, basis.kpoints[1], eff_pot_real)]
 
     E = zero(T)
-    for iband = 1:size(ψ[1], 2)
-        ψnk = @views ψ[1][:, iband]
+    @assert length(ψ) == 1
+    for iband = 1:size(ψ[1], 3)
+        ψnk = @views ψ[1][:, :, iband]
         # TODO optimize this
         for op in ops_energy
             E += occupation[1][iband] * real(dot(ψnk, op * ψnk))
diff --git a/src/terms/entropy.jl b/src/terms/entropy.jl
index 006ed95f4f..e1155a594d 100644
--- a/src/terms/entropy.jl
+++ b/src/terms/entropy.jl
@@ -25,8 +25,8 @@ function ene_ops(term::TermEntropy, basis::PlaneWaveBasis{T}, ψ, occupation;
     eigenvalues = kwargs[:eigenvalues]
 
     E = zero(T)
-    for (ik, k) in enumerate(basis.kpoints)
-        for iband = 1:size(ψ[ik], 2)
+    for (ik, ψk) in enumerate(ψ)
+        for iband = 1:size(ψk, 3)
             E -= (temperature
                   * basis.kweights[ik]
                   * filled_occupation(basis.model)
diff --git a/src/terms/kinetic.jl b/src/terms/kinetic.jl
index 311f83d4a5..93d8d739d6 100644
--- a/src/terms/kinetic.jl
+++ b/src/terms/kinetic.jl
@@ -48,10 +48,10 @@ end
 
     E = zero(T)
     for (ik, ψk) in enumerate(ψ)
-        for iband = 1:size(ψk, 2)
-            ψnk = @views ψk[:, iband]
+        for iband = 1:size(ψk, 3), σ = 1:size(ψk, 1)
+            ψσkn = @views ψk[σ, :, iband]
             E += (basis.kweights[ik] * occupation[ik][iband]
-                  * real(dot(ψnk, Diagonal(term.kinetic_energies[ik]), ψnk)))
+                  * real(dot(ψσkn, Diagonal(term.kinetic_energies[ik]), ψσkn)))
         end
     end
     E = mpi_sum(E, basis.comm_kpts)
diff --git a/src/terms/magnetic.jl b/src/terms/magnetic.jl
index 4fc26384b1..b21d9568bf 100644
--- a/src/terms/magnetic.jl
+++ b/src/terms/magnetic.jl
@@ -39,9 +39,9 @@ function ene_ops(term::TermMagnetic, basis::PlaneWaveBasis{T}, ψ, occupation;
     end
 
     E = zero(T)
-    for (ik, k) in enumerate(basis.kpoints)
-        for iband = 1:size(ψ[1], 2)
-            ψnk = @views ψ[ik][:, iband]
+    for (ik, ψk) in enumerate(ψ)
+        for iband = 1:size(ψk, 3)
+            ψnk = @views ψ[ik][:, :, iband]
             # TODO optimize this
             E += basis.kweights[ik] * occupation[ik][iband] * real(dot(ψnk, ops[ik] * ψnk))
         end
diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl
index a1fed882b7..a00476ea20 100644
--- a/src/terms/nonlocal.jl
+++ b/src/terms/nonlocal.jl
@@ -37,9 +37,11 @@ end
 
     E = zero(T)
     for (ik, ψk) in enumerate(ψ)
-        Pψk = term.ops[ik].P' * ψk  # nproj x nband
-        band_enes = dropdims(sum(real.(conj.(Pψk) .* (term.ops[ik].D * Pψk)), dims=1), dims=1)
-        E += basis.kweights[ik] * sum(band_enes .* occupation[ik])
+        for σ = 1:basis.model.n_components
+            Pψk = term.ops[ik].P' * ψk[σ, :, :]  # nproj x nband
+            band_enes = dropdims(sum(real.(conj.(Pψk) .* (term.ops[ik].D * Pψk)), dims=1), dims=1)
+            E += basis.kweights[ik] * sum(band_enes .* occupation[ik])
+        end
     end
     E = mpi_sum(E, basis.comm_kpts)
 
@@ -76,13 +78,15 @@ end
                 structure_factors = [cis2pi(-dot(p, r)) for p in G_plus_k]
                 P = structure_factors .* form_factors ./ sqrt(unit_cell_volume)
 
-                forces[idx] += map(1:3) do α
+                @views forces[idx] += map(1:3) do α
                     dPdR = [-2T(π)*im*p[α] for p in G_plus_k] .* P
-                    ψk = ψ[ik]
-                    δHψk = P * (C * (dPdR' * ψk))
-                    -sum(occupation[ik][iband] * basis.kweights[ik] *
-                             2real(dot(ψk[:, iband], δHψk[:, iband]))
-                         for iband=1:size(ψk, 2))
+                    mapreduce(+, 1:model.n_components) do σ
+                        ψkσ = ψ[ik][σ, :, :]
+                        δHψkσ = P * (C * (dPdR' * ψkσ))
+                        -sum(occupation[ik][iband] * basis.kweights[ik] *
+                                 2real(dot(ψkσ[:, iband], δHψkσ[:, iband]))
+                             for iband=1:size(ψkσ, 2))
+                    end
                 end  # α
             end  # r
         end  # kpt
@@ -254,11 +258,11 @@ function PDPψk(basis, positions, psp_groups, kpt, kpt_minus_q, ψk)
     D = build_projection_coefficients(basis, psp_groups)
     P = build_projection_vectors(basis, kpt, psp_groups, positions)
     P_minus_q = build_projection_vectors(basis, kpt_minus_q, psp_groups, positions)
-    P * (D * P_minus_q' * ψk)
+    stack([P * (D * P_minus_q' * ψkσ) for ψkσ in eachslice(ψk; dims=1)]; dims=1)
 end
 
-function compute_dynmat_δH(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, occupation,
-                           δψ, δoccupation, q) where {T}
+@views function compute_dynmat_δH(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ,
+                                  occupation, δψ, δoccupation, q) where {T}
     S = complex(T)
     model = basis.model
     psp_groups = [group for group in model.atom_groups
@@ -278,16 +282,18 @@ function compute_dynmat_δH(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ,
             for idx in group
                 δforces[idx] += map(1:3) do α
                     δHψk = derivative_wrt_αs(model.positions, α, idx) do positions_αs
-                        PDPψk(basis, positions_αs, psp_groups, kpt_plus_q, kpt, ψ[ik])
+                        PDPψk(basis, positions_αs, psp_groups, kpt_plus_q, kpt, ψk)
                     end
                     δHψk_plus_q = derivative_wrt_αs(model.positions, α, idx) do positions_αs
-                        PDPψk(basis, positions_αs, psp_groups, kpt, kpt, ψ[ik])
+                        PDPψk(basis, positions_αs, psp_groups, kpt, kpt, ψk)
+                    end
+                    mapreduce(+, 1:basis.model.n_components) do σ
+                        -sum(  2occupation[ik][iband] * basis.kweights[ik]
+                                   * dot(δψk_plus_q[σ, :, iband], δHψk[σ, :, iband])
+                             + δoccupation[ik][iband]  * basis.kweights[ik]
+                                   * 2real(dot(ψk[σ, :, iband], δHψk_plus_q[σ, :, iband]))
+                             for iband=1:size(ψk, 3))
                     end
-                    -sum(  2occupation[ik][iband] * basis.kweights[ik]
-                               * dot(δψk_plus_q[:, iband], δHψk[:, iband])
-                         + δoccupation[ik][iband]  * basis.kweights[ik]
-                               * 2real(dot(ψk[:, iband], δHψk_plus_q[:, iband]))
-                         for iband=1:size(ψk, 2))
                 end
             end
         end
@@ -330,9 +336,10 @@ end
                     PDPψk(basis, positions_βsαs, psp_groups, kpt, kpt, ψ[ik])
                 end
             end
-            dynmat_δ²H[β, s, α, s] += sum(occupation[ik][n] * basis.kweights[ik] *
-                                              dot(ψ[ik][:, n], δ²Hψ[ik][:, n])
-                                          for n=1:size(ψ[ik], 2))
+            dynmat_δ²H[β, s, α, s] += mapreduce(+, 1:basis.model.n_components) do σ
+                sum(occupation[ik][n] * basis.kweights[ik] * dot(ψ[ik][σ, :, n], δ²Hψ[ik][σ, :, n])
+                    for n=1:size(ψ[ik], 3))
+            end
         end
     end
 
@@ -345,6 +352,7 @@ function compute_δHψ_αs(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, 
     model = basis.model
     psp_groups = [group for group in model.atom_groups
                   if model.atoms[first(group)] isa ElementPsp]
+    isempty(psp_groups) && return nothing
 
     ψ_minus_q = transfer_blochwave_equivalent_to_actual(basis, ψ, -q)
     map(enumerate(basis.kpoints)) do (ik, kpt)
diff --git a/src/terms/operators.jl b/src/terms/operators.jl
index 9d483e18f3..10ea390eb6 100644
--- a/src/terms/operators.jl
+++ b/src/terms/operators.jl
@@ -12,7 +12,7 @@ abstract type RealFourierOperator end
 
 # Unoptimized fallback, intended for exploratory use only.
 # For performance, call through Hamiltonian which saves FFTs.
-function LinearAlgebra.mul!(Hψ::AbstractVector, op::RealFourierOperator, ψ::AbstractVector)
+function LinearAlgebra.mul!(Hψ::AbstractMatrix, op::RealFourierOperator, ψ::AbstractMatrix)
     ψ_real = ifft(op.basis, op.kpoint, ψ)
     Hψ_fourier = similar(ψ)
     Hψ_real = similar(ψ_real)
@@ -24,13 +24,20 @@ function LinearAlgebra.mul!(Hψ::AbstractVector, op::RealFourierOperator, ψ::Ab
     Hψ .= Hψ_fourier .+ fft(op.basis, op.kpoint, Hψ_real)
     Hψ
 end
-function LinearAlgebra.mul!(Hψ::AbstractMatrix, op::RealFourierOperator, ψ::AbstractMatrix)
-    @views for i = 1:size(ψ, 2)
-        mul!(Hψ[:, i], op, ψ[:, i])
+function LinearAlgebra.mul!(Hψ::AbstractArray3, op::RealFourierOperator, ψ::AbstractArray3)
+    @views for n = 1:size(ψ, 3)
+        mul!(Hψ[:, :, n], op, ψ[:, :, n])
     end
     Hψ
 end
 Base.:*(op::RealFourierOperator, ψ) = mul!(similar(ψ), op, ψ)
+# Default transformation: from two components tensors to diagonal matrices for each
+# reciprocal vector.
+function Matrix(op::RealFourierOperator)
+    n_Gk = length(G_vectors(op.basis, op.kpoint))
+    n_components = op.basis.model.n_components
+    reshape(Array(op), n_components*n_Gk, n_components*n_Gk)
+end
 
 """
 Noop operation: don't do anything.
@@ -41,9 +48,10 @@ struct NoopOperator{T <: Real} <: RealFourierOperator
     kpoint::Kpoint{T}
 end
 apply!(Hψ, op::NoopOperator, ψ) = nothing
-function Matrix(op::NoopOperator)
+function Array(op::NoopOperator)
     n_Gk = length(G_vectors(op.basis, op.kpoint))
-    zeros_like(op.basis.G_vectors, eltype(op.basis), n_Gk, n_Gk)
+    n_components = op.basis.model.n_components
+    zeros_like(op.basis.G_vectors, eltype(op.basis), n_components, n_Gk, n_components, n_Gk)
 end
 
 """
@@ -58,22 +66,28 @@ struct RealSpaceMultiplication{T <: Real, AT <: AbstractArray} <: RealFourierOpe
     potential::AT
 end
 function apply!(Hψ, op::RealSpaceMultiplication, ψ)
-    Hψ.real .+= op.potential .* ψ.real
+    @views for σ = 1:op.basis.model.n_components
+        Hψ.real[σ, :, :, :] .+= op.potential .* ψ.real[σ, :, :, :]
+    end
+    Hψ.real
 end
-function Matrix(op::RealSpaceMultiplication)
+function Array(op::RealSpaceMultiplication)
     # V(G, G') = <eG|V|eG'> = 1/sqrt(Ω) <e_{G-G'}|V>
     pot_fourier = fft(op.basis, op.potential)
     n_G = length(G_vectors(op.basis, op.kpoint))
-    H = zeros(complex(eltype(op.basis)), n_G, n_G)
-    for (j, G′) in enumerate(G_vectors(op.basis, op.kpoint))
-        for (i, G) in enumerate(G_vectors(op.basis, op.kpoint))
-            # G_vectors(basis)[ind_ΔG] = G - G'
-            ind_ΔG = index_G_vectors(op.basis, G - G′)
-            if isnothing(ind_ΔG)
-                error("For full matrix construction, the FFT size must be " *
-                      "large enough so that Hamiltonian applications are exact")
+    n_components = op.basis.model.n_components
+    H = zeros(complex(eltype(op.basis)), n_components, n_G, n_components, n_G)
+    for σ = 1:n_components
+        for (j, G′) in enumerate(G_vectors(op.basis, op.kpoint))
+            for (i, G) in enumerate(G_vectors(op.basis, op.kpoint))
+                # G_vectors(basis)[ind_ΔG] = G - G'
+                ind_ΔG = index_G_vectors(op.basis, G - G′)
+                if isnothing(ind_ΔG)
+                    error("For full matrix construction, the FFT size must be " *
+                          "large enough so that Hamiltonian applications are exact")
+                end
+                H[σ, i, σ, j] = pot_fourier[ind_ΔG] / sqrt(op.basis.model.unit_cell_volume)
             end
-            H[i, j] = pot_fourier[ind_ΔG] / sqrt(op.basis.model.unit_cell_volume)
         end
     end
     H
@@ -91,9 +105,17 @@ struct FourierMultiplication{T <: Real, AT <: AbstractArray} <: RealFourierOpera
     multiplier::AT
 end
 function apply!(Hψ, op::FourierMultiplication, ψ)
-    Hψ.fourier .+= op.multiplier .* ψ.fourier
+    @views for σ = 1:op.basis.model.n_components
+        Hψ.fourier[σ, :] .+= op.multiplier .* ψ.fourier[σ, :]
+    end
+    Hψ.fourier
+end
+function Array(op::FourierMultiplication)
+    n_Gk = length(G_vectors(op.basis, op.kpoint))
+    n_components = op.basis.model.n_components
+    D = Diagonal(op.multiplier)
+    reshape(kron(D, I(n_components)), n_components, n_Gk, n_components, n_Gk)
 end
-Matrix(op::FourierMultiplication) = Array(Diagonal(op.multiplier))
 
 """
 Nonlocal operator in Fourier space in Kleinman-Bylander format,
@@ -108,9 +130,20 @@ struct NonlocalOperator{T <: Real, PT, DT} <: RealFourierOperator
     D::DT
 end
 function apply!(Hψ, op::NonlocalOperator, ψ)
-    Hψ.fourier .+= op.P * (op.D * (op.P' * ψ.fourier))
+    @views for σ = 1:op.basis.model.n_components
+        Hψ.fourier[σ, :, :] .+= op.P * (op.D * (op.P' * ψ.fourier[σ, :, :]))
+    end
+    Hψ.fourier
+end
+function Array(op::NonlocalOperator)
+    n_Gk = length(G_vectors(op.basis, op.kpoint))
+    n_components = op.basis.model.n_components
+    H = zeros(complex(eltype(op.basis)), n_components, n_Gk, n_components, n_Gk)
+    for σ = 1:op.basis.model.n_components
+        H[σ, :, σ, :] = op.P * op.D * op.P'
+    end
+    H
 end
-Matrix(op::NonlocalOperator) = op.P * op.D * op.P'
 
 """
 Magnetic field operator A⋅(-i∇).
@@ -122,15 +155,20 @@ struct MagneticFieldOperator{T <: Real, AT} <: RealFourierOperator
 end
 function apply!(Hψ, op::MagneticFieldOperator, ψ)
     # TODO this could probably be better optimized
-    for α = 1:3
+    @views for α = 1:3
         iszero(op.Apot[α]) && continue
         pα = [p[α] for p in Gplusk_vectors_cart(op.basis, op.kpoint)]
-        ∂αψ_fourier = pα .* ψ.fourier
+        ∂αψ_fourier = similar(ψ.fourier)
+        for σ = 1:op.basis.model.n_components
+            ∂αψ_fourier[σ, :] = pα .* ψ.fourier[σ, :]
+        end
         ∂αψ_real = ifft(op.basis, op.kpoint, ∂αψ_fourier)
-        Hψ.real .+= op.Apot[α] .* ∂αψ_real
+        for σ = 1:op.basis.model.n_components
+            Hψ.real[σ, :, :, :] .+= op.Apot[α] .* ∂αψ_real[σ, :, :, :]
+        end
     end
 end
-# TODO Implement  Matrix(op::MagneticFieldOperator)
+# TODO Implement  Array(op::MagneticFieldOperator)
 
 @doc raw"""
 Nonlocal "divAgrad" operator ``-½ ∇ ⋅ (A ∇)`` where ``A`` is a scalar field on the
@@ -143,17 +181,27 @@ struct DivAgradOperator{T <: Real, AT} <: RealFourierOperator
     A::AT
 end
 function apply!(Hψ, op::DivAgradOperator, ψ;
-                ψ_scratch=zeros(complex(eltype(op.basis)), op.basis.fft_size...))
+                ψ_scratch=zeros(complex(eltype(op.basis)),
+                                op.basis.model.n_components, op.basis.fft_size...))
     # TODO: Performance improvements: Unscaled plans, avoid remaining allocations
     #       (which are only on the small k-point-specific Fourier grid
     G_plus_k = [[p[α] for p in Gplusk_vectors_cart(op.basis, op.kpoint)] for α = 1:3]
-    for α = 1:3
-        ∂αψ_real = ifft!(ψ_scratch, op.basis, op.kpoint, im .* G_plus_k[α] .* ψ.fourier)
-        A∇ψ      = fft(op.basis, op.kpoint, ∂αψ_real .* op.A)
-        Hψ.fourier .-= im .* G_plus_k[α] .* A∇ψ ./ 2
+    @views for α = 1:3
+        ∂αψ_fourier = similar(ψ.fourier)
+        for σ = 1:op.basis.model.n_components
+            ∂αψ_fourier[σ, :] = im .* G_plus_k[α] .* ψ.fourier[σ, :]
+        end
+        ∂αψ_real = ifft!(ψ_scratch, op.basis, op.kpoint, ∂αψ_fourier)
+        for σ = 1:op.basis.model.n_components
+            ∂αψ_real[σ, :, :, :] .*= op.A
+        end
+        A∇ψ = fft(op.basis, op.kpoint, ∂αψ_real)
+        for σ = 1:op.basis.model.n_components
+            Hψ.fourier[σ, :] .-= im .* G_plus_k[α] .* A∇ψ[σ, :] ./ 2
+        end
     end
 end
-# TODO Implement  Matrix(op::DivAgrad)
+# TODO Implement  Array(op::DivAgrad)
 
 
 # Optimize RFOs by combining terms that can be combined
diff --git a/src/transfer.jl b/src/transfer.jl
index 88956c22fe..941341b534 100644
--- a/src/transfer.jl
+++ b/src/transfer.jl
@@ -108,13 +108,14 @@ Transfer an array `ψk` defined on basis_in ``k``-point kpt_in to basis_out ``k`
 function transfer_blochwave_kpt(ψk_in, basis_in::PlaneWaveBasis, kpt_in::Kpoint,
                                 basis_out::PlaneWaveBasis, kpt_out::Kpoint)
     kpt_in == kpt_out && return copy(ψk_in)
-    @assert length(G_vectors(basis_in, kpt_in)) == size(ψk_in, 1)
+    @assert length(G_vectors(basis_in, kpt_in)) == size(ψk_in, 2)
     idcsk_in, idcsk_out = transfer_mapping(basis_in, kpt_in, basis_out, kpt_out)
 
-    n_bands = size(ψk_in, 2)
-    ψk_out  = similar(ψk_in, length(G_vectors(basis_out, kpt_out)), n_bands)
+    n_bands      = size(ψk_in, 3)
+    n_components = basis_in.model.n_components
+    ψk_out  = similar(ψk_in, n_components, length(G_vectors(basis_out, kpt_out)), n_bands)
     ψk_out .= 0
-    ψk_out[idcsk_out, :] .= ψk_in[idcsk_in, :]
+    ψk_out[:, idcsk_out, :] .= ψk_in[:, idcsk_in, :]
 
     ψk_out
 end
@@ -124,6 +125,7 @@ Transfer Bloch wave between two basis sets. Limited feature set.
 """
 function transfer_blochwave(ψ_in, basis_in::PlaneWaveBasis, basis_out::PlaneWaveBasis)
     @assert basis_in.model.lattice == basis_out.model.lattice
+    @assert basis_in.model.n_components == basis_out.model.n_components
     @assert length(basis_in.kpoints) == length(basis_out.kpoints)
     @assert all(basis_in.kpoints[ik].coordinate == basis_out.kpoints[ik].coordinate
                 for ik = 1:length(basis_in.kpoints))
@@ -215,10 +217,12 @@ element of `basis.kpoints` equivalent to ``k-q``.
     for (ik, kpt) in enumerate(basis.kpoints)
         # … then perform the multiplication with f in real space and get the Fourier
         # coefficients.
-        for n = 1:size(ψ[ik], 2)
-            fψ[ik][:, n] = fft(basis, kpt,
-                               ifft(basis, ψ_minus_q[ik].kpt, ψ_minus_q[ik].ψk[:, n])
-                                 .* f_real[:, :, :, kpt.spin])
+        for n = 1:size(ψ[ik], 3)
+            ψkn_minus_q_real = ifft(basis, ψ_minus_q[ik].kpt, ψ_minus_q[ik].ψk[:, :, n])
+            for σ = 1:basis.model.n_components
+                fψ[ik][σ, :, n] = fft(basis, kpt, ψkn_minus_q_real[σ, :, :, :]
+                                                      .* f_real[:, :, :, kpt.spin])
+            end
         end
     end
     fψ
diff --git a/src/workarounds/forwarddiff_rules.jl b/src/workarounds/forwarddiff_rules.jl
index 998d4ca8b9..720085a367 100644
--- a/src/workarounds/forwarddiff_rules.jl
+++ b/src/workarounds/forwarddiff_rules.jl
@@ -47,12 +47,13 @@ function Base.:*(p::AbstractFFTs.Plan, x::AbstractArray{<:Complex{<:Dual{Tg}}})
     end
 end
 
-function build_fft_plans!(tmp::AbstractArray{Complex{T}}) where {T<:Dual}
-    opFFT  = AbstractFFTs.plan_fft(tmp)
-    opBFFT = AbstractFFTs.plan_bfft(tmp)
+function build_fft_plans!(tmp::AbstractArray{Complex{T}};
+                          region=1:ndims(tmp)) where {T<:ForwardDiff.Dual}
+    opFFT  = AbstractFFTs.plan_fft(tmp, region)
+    opBFFT = AbstractFFTs.plan_bfft(tmp, region)
     ipFFT  = DummyInplace{typeof(opFFT)}(opFFT)
     ipBFFT = DummyInplace{typeof(opBFFT)}(opBFFT)
-    ipFFT, opFFT, ipBFFT, opBFFT
+    (; ipFFT, opFFT, ipBFFT, opBFFT)
 end
 
 # Convert and strip off duals if that's the only way
@@ -107,6 +108,7 @@ function construct_value(model::Model{T}) where {T <: Dual}
           temperature=ForwardDiff.value(model.temperature),
           model.smearing,
           εF=ForwardDiff.value(model.εF),
+          model.n_components,
           model.spin_polarization,
           model.symmetries,
           # Can be safely disabled: this has been checked for basis.model
@@ -193,7 +195,7 @@ function self_consistent_field(basis_dual::PlaneWaveBasis{T};
     # TODO Could add δresults[α].δVind the dual part of the total local potential in ham_dual
     # and in this way return a ham that represents also the total change in Hamiltonian
 
-    energies, ham = energy_hamiltonian(basis_dual, ψ, occupation; ρ, eigenvalues, εF)
+    (; energies, ham) = energy_hamiltonian(basis_dual, ψ, occupation; ρ, eigenvalues, εF)
 
     # This has to be changed whenever the scfres structure changes
     (; ham, basis=basis_dual, energies, ρ, eigenvalues, occupation, εF, ψ,
diff --git a/test/anyons.jl b/test/anyons.jl
index 771cecf635..545b71d966 100644
--- a/test/anyons.jl
+++ b/test/anyons.jl
@@ -26,19 +26,19 @@ end
     # See https://arxiv.org/pdf/1901.10739.pdf
     # We test E11, which is a quantity defined in the above paper
 
-    ## Unit cell. Having one of the lattice vectors as zero means a 2D system
+    # Unit cell. Having one of the lattice vectors as zero means a 2D system
     a = 14
     lattice = a .* [[1 0 0.]; [0 1 0]; [0 0 0]];
 
-    ## Confining scalar potential
-    pot(x, y, z) = ((x - a/2)^2 + (y - a/2)^2);
+    # Confining scalar potential
+    pot(x, y, _) = ((x - a/2)^2 + (y - a/2)^2);
 
-    ## Parameters
+    # Parameters
     Ecut = 30
     n_electrons = 1
     β = 5;
 
-    ## Collect all the terms, build and run the model
+    # Collect all the terms, build and run the model
     terms = [Kinetic(; scaling_factor=2),
              ExternalFromReal(X -> pot(X...)),
              Anyonic(1, β)
diff --git a/test/chi0.jl b/test/chi0.jl
index b558696757..1bd4687b24 100644
--- a/test/chi0.jl
+++ b/test/chi0.jl
@@ -73,7 +73,7 @@ function test_chi0(testcase; symmetries=false, temperature=0, spin_polarization=
         # Test apply_χ0 without extra bands
         ψ_occ, occ_occ = DFTK.select_occupied_orbitals(basis, scfres.ψ, scfres.occupation;
                                                        threshold=scfres.occupation_threshold)
-        ε_occ = [scfres.eigenvalues[ik][1:size(ψk, 2)] for (ik, ψk) in enumerate(ψ_occ)]
+        ε_occ = [scfres.eigenvalues[ik][1:size(ψk, 3)] for (ik, ψk) in enumerate(ψ_occ)]
 
         diff_applied_χ0_noextra = apply_χ0(scfres.ham, ψ_occ, occ_occ, scfres.εF, ε_occ, δV;
                                            scfres.occupation_threshold)
diff --git a/test/compute_density.jl b/test/compute_density.jl
index 0b01dcb26c..65b3dddcd3 100644
--- a/test/compute_density.jl
+++ b/test/compute_density.jl
@@ -39,21 +39,21 @@
     end
 
     function test_orthonormality(basis, ψ; tol=1e-8)
-        n_k = length(ψ)
-        n_states = size(ψ[1], 2)
+        n_states = size(ψ[1], 3)
         n_fft = prod(basis.fft_size)
 
         for (ik, kpt) in enumerate(basis.kpoints)
-            # Fourier-transform the wave functions to real space
-            ψk = ψ[ik]
-            ψk_real = cat((DFTK.ifft(basis, kpt, ψik) for ψik in eachcol(ψk))..., dims=4)
+            for σ = 1:basis.model.n_components
+                # Fourier-transform the wave functions to real space
+                ψk = ψ[ik][σ, :, :]
+                ψk_real = cat((DFTK.ifft(basis, kpt, ψik) for ψik in eachcol(ψk))...; dims=4)
 
-            T = real(eltype(ψk_real))
-            ψk_real_mat = reshape(ψk_real, n_fft, n_states)
-            ψk_real_overlap = adjoint(ψk_real_mat) * ψk_real_mat
-            nondiag = ψk_real_overlap - I * (n_fft / basis.model.unit_cell_volume)
+                ψk_real_mat = reshape(ψk_real, n_fft, n_states)
+                ψk_real_overlap = adjoint(ψk_real_mat) * ψk_real_mat
+                nondiag = ψk_real_overlap - I * (n_fft / basis.model.unit_cell_volume)
 
-            @test maximum(abs.(nondiag)) < tol
+                @test maximum(abs, nondiag) < tol
+            end
         end
     end
 
@@ -72,7 +72,7 @@
         @test ham_full.basis.fft_size == ham_ir.basis.fft_size
 
         # Test density is the same in both schemes, and symmetric wrt the basis symmetries
-        @test maximum(abs.(ρ_ir - ρ_full)) < 10tol
+        @test maximum(abs, ρ_ir - ρ_full) < 10tol
         @test maximum(abs, DFTK.symmetrize_ρ(ham_ir.basis, ρ_ir) - ρ_ir) < tol
 
         # Test local potential is the same in both schemes
@@ -100,7 +100,8 @@
             # yields an eigenfunction of the Hamiltonian
             # Also check that the accumulated partial densities are equal
             # to the returned density.
-            for (ik, kpt) in enumerate(ham_ir.basis.kpoints)
+            @assert ham_ir.basis.model.n_components == 1
+            for ik in eachindex(ham_ir.basis.kpoints)
                 Hk_ir = ham_ir.blocks[ik]
                 for symop in ham_ir.basis.symmetries
                     Skpoint, ψSk = DFTK.apply_symop(symop, ham_ir.basis, Hk_ir.kpoint, ψ_ir[ik])
@@ -113,14 +114,14 @@
 
                     range = 1:length(eigenvalues_ir[ik]) - n_ignore
                     for iband in range
-                        ψnSk = ψSk[:, iband]
+                        ψnSk = ψSk[1, :, iband]
                         residual = norm(Hk_full * ψnSk - eigenvalues_ir[ik][iband] * ψnSk)
                         @test residual < 10tol
                     end  # iband
                 end  # symop
             end  # k
-        end # eigenvectors
-        end # testset
+        end  # eigenvectors
+        end  # testset
     end
 
     test_full_vs_irreducible("silicon (3,2,3)", silicon, [3, 2, 3], Ecut=5, tol=1e-10)
diff --git a/test/compute_jacobian_eigen.jl b/test/compute_jacobian_eigen.jl
index 8982ca2df6..6c75403ee7 100644
--- a/test/compute_jacobian_eigen.jl
+++ b/test/compute_jacobian_eigen.jl
@@ -8,6 +8,8 @@ using DFTK: precondprep!, FunctionPreconditioner
 using LinearMaps
 
 function eigen_ΩplusK(basis::PlaneWaveBasis{T}, ψ, occupation, numval) where {T}
+    n_components = basis.model.n_components
+    @assert n_components == 1
 
     pack(ψ) = reinterpret_real(pack_ψ(ψ))
     unpack(x) = unpack_ψ(reinterpret_complex(x), size.(ψ))
@@ -18,33 +20,36 @@ function eigen_ΩplusK(basis::PlaneWaveBasis{T}, ψ, occupation, numval) where {
 
     # preconditioner
     Pks = [PreconditionerTPA(basis, kpt) for kpt in basis.kpoints]
-    for (ik, ψk) in enumerate(ψ)
-        precondprep!(Pks[ik], ψk)
+    for ik = 1:length(Pks)
+        precondprep!(Pks[ik], to_composite_σG(basis, basis.kpoints[ik], ψ[ik]))
     end
     function f_ldiv!(x, y)
         for n = 1:size(y, 2)
             δψ = unpack(y[:, n])
-            proj_tangent!(δψ, ψ)
-            Pδψ = [ Pks[ik] \ δψk for (ik, δψk) in enumerate(δψ)]
-            proj_tangent!(Pδψ, ψ)
+            proj_tangent!(δψ, basis, ψ)
+            Pδψ = [Pks[ik] \ δψk for (ik, δψk) in enumerate(δψ)]
+            proj_tangent!(Pδψ, basis, ψ)
             x[:, n] .= pack(Pδψ)
         end
         x
     end
 
     # random starting point on the tangent space to avoid eigenvalue 0
-    n_bands = size(ψ[1], 2)
     x0 = map(1:numval) do _
-        initial = map(basis.kpoints) do kpt
+        initial = map(enumerate(basis.kpoints)) do (ik, kpt)
             n_Gk = length(G_vectors(basis, kpt))
-            randn(Complex{eltype(basis)}, n_Gk, n_bands)
+            randn(Complex{T}, n_components, n_Gk, size(ψ[ik], 3))
         end
-        pack(proj_tangent(initial, ψ))
+        pack(proj_tangent(basis, initial, ψ))
     end
     x0 = stack(x0)
 
     # Rayleigh-coefficients
-    Λ = [ψk'Hψk for (ψk, Hψk) in zip(ψ, H * ψ)]
+    Λ = map(enumerate(zip(ψ, H * ψ))) do (ik, (ψk, Hψk))
+        ψk = to_composite_σG(basis, basis.kpoints[ik], ψk)
+        Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk)
+        from_composite_σG(basis, basis.kpoints[ik], ψk'Hψk)
+    end
 
     # mapping of the linear system on the tangent space
     function ΩpK(x)
diff --git a/test/fourier_transforms.jl b/test/fourier_transforms.jl
index 7bbed3c091..af9ab6ce45 100644
--- a/test/fourier_transforms.jl
+++ b/test/fourier_transforms.jl
@@ -17,15 +17,15 @@
         f2_R = ifft(pw, f2_G)
         f3_G = fft!(similar(f_R), pw, f_R)
 
-        @test maximum(abs.(f2_G - f_G)) < 1e-12
-        @test maximum(abs.(f2_R - f_R)) < 1e-12
-        @test maximum(abs.(f3_G - f_G)) < 1e-12
+        @test maximum(abs, f2_G - f_G) < 1e-12
+        @test maximum(abs, f2_R - f_R) < 1e-12
+        @test maximum(abs, f3_G - f_G) < 1e-12
 
         ifft_mat = DFTK.ifft_matrix(pw)
         fft_mat = DFTK.fft_matrix(pw)
-        @test maximum(abs.(ifft_mat * fft_mat - I)) < 1e-12
-        @test maximum(abs.(ifft_mat * vec(f_G) - vec(f_R))) < 1e-12
-        @test maximum(abs.(fft_mat * vec(f_R) - vec(f_G))) < 1e-12
+        @test maximum(abs, ifft_mat * fft_mat - I) < 1e-12
+        @test maximum(abs, ifft_mat * vec(f_G) - vec(f_R)) < 1e-12
+        @test maximum(abs, fft_mat * vec(f_R) - vec(f_G)) < 1e-12
     end
 
     @testset "Transformation on the spherical basis set" begin
@@ -41,7 +41,7 @@
         f2_R = similar(f_R)
         ifft!(f2_R, pw, kpt, f2_G)
 
-        @test maximum(abs.(f2_G - f_G)) < 1e-12
-        @test maximum(abs.(f2_R - f_R)) < 1e-12
+        @test maximum(abs, f2_G - f_G) < 1e-12
+        @test maximum(abs, f2_R - f_R) < 1e-12
     end
 end
diff --git a/test/hamiltonian_consistency.jl b/test/hamiltonian_consistency.jl
index 11128503d8..d86102c3a8 100644
--- a/test/hamiltonian_consistency.jl
+++ b/test/hamiltonian_consistency.jl
@@ -1,9 +1,9 @@
 @testsetup module HamConsistency
 using Test
 using DFTK
-using Logging
-using DFTK: mpi_sum
+using DFTK: mpi_sum, filled_occupation, DivAgradOperator, MagneticFieldOperator
 using LinearAlgebra
+using Logging
 using ..TestCases: silicon
 testcase = silicon
 
@@ -11,10 +11,9 @@ function test_matrix_repr_operator(hamk, ψk; atol=1e-8)
     for operator in hamk.operators
         try
             operator_matrix = Matrix(operator)
-            @test norm(operator_matrix*ψk - operator*ψk) < atol
-        catch e
-            allowed_missing_operators = Union{DFTK.DivAgradOperator,
-                                              DFTK.MagneticFieldOperator}
+            @test norm(operator_matrix*ψk[1, :, :] - (operator*ψk)[1, :, :]) < atol
+        catch
+            allowed_missing_operators = Union{DivAgradOperator, MagneticFieldOperator}
             @assert operator isa allowed_missing_operators
             @info "Matrix of operator $(nameof(typeof(operator))) not yet supported" maxlog=1
         end
@@ -36,10 +35,12 @@ function test_consistency_term(term; rtol=1e-4, atol=1e-8, ε=1e-6, kgrid=[1, 2,
 
         n_electrons = testcase.n_electrons
         n_bands = div(n_electrons, 2, RoundUp)
-        filled_occ = DFTK.filled_occupation(model)
+        filled_occ = filled_occupation(model)
 
-        ψ = [Matrix(qr(randn(ComplexF64, length(G_vectors(basis, kpt)), n_bands)).Q)
-             for kpt in basis.kpoints]
+        ψ = map(basis.kpoints) do kpt
+            Q = Matrix(qr(randn(ComplexF64, length(G_vectors(basis, kpt)), n_bands)).Q)
+            from_composite_σG(basis, kpt, Q)
+        end
         occupation = [filled_occ * rand(n_bands) for _ = 1:length(basis.kpoints)]
         occ_scaling = n_electrons / sum(sum(occupation))
         occupation = [occ * occ_scaling for occ in occupation]
@@ -63,10 +64,11 @@ function test_consistency_term(term; rtol=1e-4, atol=1e-8, ε=1e-6, kgrid=[1, 2,
         diff = (compute_E(ε) - compute_E(-ε)) / (2ε)
 
         diff_predicted = 0.0
-        for ik = 1:length(basis.kpoints)
-            Hψk = ham.blocks[ik]*ψ[ik]
+        for (ik, kpt) in enumerate(basis.kpoints)
             test_matrix_repr_operator(ham.blocks[ik], ψ[ik]; atol)
-            δψkHψk = sum(occupation[ik][iband] * real(dot(δψ[ik][:, iband], Hψk[:, iband]))
+            δψk = to_composite_σG(basis, kpt, δψ[ik])
+            Hψk = to_composite_σG(basis, kpt, ham.blocks[ik]*ψ[ik])
+            δψkHψk = sum(occupation[ik][iband] * real(dot(δψk[:, iband], Hψk[:, iband]))
                          for iband=1:n_bands)
             diff_predicted += 2 * basis.kweights[ik] * δψkHψk
         end
diff --git a/test/hessian.jl b/test/hessian.jl
index 08577c6547..35e4638630 100644
--- a/test/hessian.jl
+++ b/test/hessian.jl
@@ -26,7 +26,7 @@ end
 
     @test isapprox(
         real(dot(ϕ, solve_ΩplusK(basis, ψ, rhs, occupation).δψ)),
-        real(dot(solve_ΩplusK(basis, ψ, ϕ, occupation).δψ, rhs)),
+        real(dot(solve_ΩplusK(basis, ψ, ϕ, occupation).δψ, rhs));
         atol=1e-7
     )
 end
@@ -40,12 +40,16 @@ end
 
     H = energy_hamiltonian(basis, ψ, occupation; ρ).ham
     # Rayleigh-coefficients
-    Λ = [ψk'Hψk for (ψk, Hψk) in zip(ψ, H * ψ)]
+    Λ = map(enumerate(zip(ψ, H * ψ))) do (ik, (ψk, Hψk))
+        ψk = to_composite_σG(basis, basis.kpoints[ik], ψk)
+        Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk)
+        from_composite_σG(basis, basis.kpoints[ik], ψk'Hψk)
+    end
 
     # Ω is complex-linear and so self-adjoint as a complex operator.
     @test isapprox(
         dot(ϕ, apply_Ω(rhs, ψ, H, Λ)),
-        dot(apply_Ω(ϕ, ψ, H, Λ), rhs),
+        dot(apply_Ω(ϕ, ψ, H, Λ), rhs);
         atol=1e-7
     )
 end
@@ -60,7 +64,7 @@ end
     # hence we test using the real dot product.
     @test isapprox(
         real(dot(ϕ, apply_K(basis, rhs, ψ, ρ, occupation))),
-        real(dot(apply_K(basis, ϕ, ψ, ρ, occupation), rhs)),
+        real(dot(apply_K(basis, ϕ, ψ, ρ, occupation), rhs));
         atol=1e-7
     )
 end
@@ -81,7 +85,7 @@ end
 
     @testset "self-adjointness of solve_ΩplusK_split" begin
         @test isapprox(real(dot(ϕ, solve_ΩplusK_split(scfres, rhs).δψ)),
-                        real(dot(solve_ΩplusK_split(scfres, ϕ).δψ, rhs)),
+                        real(dot(solve_ΩplusK_split(scfres, ϕ).δψ, rhs));
                         atol=1e-7)
     end
 
@@ -114,7 +118,7 @@ end
 
     @testset "self-adjointness of solve_ΩplusK_split" begin
         @test isapprox(real(dot(ϕ, solve_ΩplusK_split(scfres, rhs).δψ)),
-                        real(dot(solve_ΩplusK_split(scfres, ϕ).δψ, rhs)),
+                        real(dot(solve_ΩplusK_split(scfres, ϕ).δψ, rhs));
                         atol=1e-7)
     end
 end
diff --git a/test/multicomponents.jl b/test/multicomponents.jl
new file mode 100644
index 0000000000..dd4293d9ea
--- /dev/null
+++ b/test/multicomponents.jl
@@ -0,0 +1,119 @@
+# Note: The consistency tests at temperature may have an error of the order of magnitude
+# of `default_occupation_threshold`; use `nbandsalg` if small tolerance.
+@testsetup module Multicomponents
+using Test
+using DFTK
+using LinearAlgebra
+
+function test_consistency(setup; Ecut=5, kgrid=(4,4,4), tol=1e-5)
+    res1 = setup(Ecut, kgrid, tol, 1)
+    res2 = setup(Ecut, kgrid, tol, 2)
+    @test norm(res1 - res2) < tol
+end
+end
+
+@testitem "Energy & Density: LOBPCG" setup=[TestCases, Multicomponents] begin
+    using DFTK
+    test_consistency = Multicomponents.test_consistency
+    testcase = TestCases.silicon
+    testcase = merge(testcase, (; temperature=0.03))
+    eigensolver = lobpcg_hyper
+
+    test_consistency() do Ecut, kgrid, tol, n_components
+        model = model_PBE(testcase.lattice, testcase.atoms, testcase.positions;
+                          n_components, testcase.temperature)
+        basis = PlaneWaveBasis(model; Ecut, kgrid)
+        nbandsalg = AdaptiveBands(model; occupation_threshold=tol/10)
+        scfres = self_consistent_field(basis; eigensolver, tol, nbandsalg)
+        [scfres.energies.total, scfres.ρ]
+    end
+end
+
+@testitem "Energy & Density: diag_full" setup=[TestCases, Multicomponents] begin
+    using DFTK
+    test_consistency = Multicomponents.test_consistency
+    testcase = TestCases.silicon
+    testcase = merge(testcase, (; temperature=0.03))
+    eigensolver = diag_full
+
+    test_consistency() do Ecut, kgrid, tol, n_components
+        model = model_PBE(testcase.lattice, testcase.atoms, testcase.positions;
+                          n_components, testcase.temperature)
+        basis = PlaneWaveBasis(model; Ecut, kgrid)
+        nbandsalg = AdaptiveBands(model; occupation_threshold=tol/10)
+        scfres = self_consistent_field(basis; eigensolver, tol, nbandsalg)
+        [scfres.energies.total, scfres.ρ]
+    end
+end
+
+@testitem "Forces" setup=[TestCases, Multicomponents] begin
+    using DFTK
+    test_consistency = Multicomponents.test_consistency
+    testcase = TestCases.silicon
+    testcase = merge(testcase, (; temperature=0.03))
+
+    test_consistency() do Ecut, kgrid, tol, n_components
+        positions = [(ones(3)) / 4, -ones(3) / 8]
+        model = model_PBE(testcase.lattice, testcase.atoms, positions; n_components,
+                          testcase.temperature)
+        basis = PlaneWaveBasis(model; Ecut, kgrid)
+        nbandsalg = AdaptiveBands(model; occupation_threshold=tol/100)
+        scfres = self_consistent_field(basis; tol=tol/10, nbandsalg)
+        compute_forces(scfres)
+    end
+end
+
+@testitem "δρ, temperature" setup=[TestCases, Multicomponents] begin
+    using DFTK
+    using LinearAlgebra
+    test_consistency = Multicomponents.test_consistency
+    testcase = TestCases.aluminium_primitive
+    testcase = merge(testcase, (; temperature=0.03))
+
+    Ecut = 10
+    kgrid = (1, 1, 1)
+
+    test_consistency(; Ecut, kgrid) do Ecut, kgrid, tol, n_components
+        model = model_PBE(testcase.lattice, testcase.atoms, testcase.positions;
+                          n_components, testcase.temperature)
+        basis = PlaneWaveBasis(model; Ecut, kgrid)
+        nbandsalg = AdaptiveBands(model; occupation_threshold=tol/10)
+        scfres = self_consistent_field(basis; tol, nbandsalg)
+        δV = guess_density(basis)
+        apply_χ0(scfres, δV)
+    end
+end
+
+@testitem "Energy & Density: Newton" #=
+    =#    tags=[:dont_test_mpi] setup=[TestCases, Multicomponents] begin
+    using DFTK
+    test_consistency = Multicomponents.test_consistency
+    testcase = TestCases.silicon
+
+    test_consistency() do Ecut, kgrid, tol, n_components
+        model = model_PBE(testcase.lattice, testcase.atoms, testcase.positions;
+                          n_components, testcase.temperature)
+        basis = PlaneWaveBasis(model; Ecut, kgrid)
+        scfres_start = self_consistent_field(basis; maxiter=1)
+        # remove virtual orbitals
+        (; ψ) = DFTK.select_occupied_orbitals(basis, scfres_start.ψ, scfres_start.occupation)
+        newton(basis, ψ; tol=tol/10).ρ
+    end
+end
+
+@testitem "Energy & Density: Direct minimization" #=
+    =#    tags=[:dont_test_mpi] setup=[TestCases, Multicomponents] begin
+    using DFTK
+    test_consistency = Multicomponents.test_consistency
+    testcase = TestCases.silicon
+
+    test_consistency() do Ecut, kgrid, tol, n_components
+        model = model_PBE(testcase.lattice, testcase.atoms, testcase.positions;
+                          n_components, testcase.temperature)
+        basis = PlaneWaveBasis(model; Ecut, kgrid)
+        scfres_start = self_consistent_field(basis; maxiter=1)
+        # remove virtual orbitals
+        (; ψ) = DFTK.select_occupied_orbitals(basis, scfres_start.ψ, scfres_start.occupation)
+        direct_minimization(basis; ψ, tol=tol/10).ρ
+    end
+end
diff --git a/test/scf_compare.jl b/test/scf_compare.jl
index 689630c765..7bf48a7a2e 100644
--- a/test/scf_compare.jl
+++ b/test/scf_compare.jl
@@ -80,7 +80,7 @@ end
     if mpi_nprocs() == 1  # Distributed implementation not yet available
         @testset "Direct minimization" begin
             ρ_dm = direct_minimization(basis; ψ, tol).ρ
-            @test maximum(abs.(ρ_dm - ρ_def)) < 10tol
+            @test maximum(abs, ρ_dm - ρ_def) < 10tol
         end
     end
 
@@ -88,7 +88,7 @@ end
     if mpi_nprocs() == 1  # Distributed implementation not yet available
         @testset "Newton" begin
             ρ_newton = newton(basis, ψ; tol).ρ
-            @test maximum(abs.(ρ_newton - ρ_def)) < 10tol
+            @test maximum(abs, ρ_newton - ρ_def) < 10tol
         end
     end
 end
diff --git a/test/serialisation.jl b/test/serialisation.jl
index 2e282c410a..ce37f7d0b0 100644
--- a/test/serialisation.jl
+++ b/test/serialisation.jl
@@ -9,6 +9,7 @@ function test_scfres_agreement(tested, ref; test_ψ=true)
     @test tested.basis.model.εF                == ref.basis.model.εF
     @test tested.basis.model.symmetries        == ref.basis.model.symmetries
     @test tested.basis.model.spin_polarization == ref.basis.model.spin_polarization
+    @test tested.basis.model.n_components      == ref.basis.model.n_components
 
     @test tested.basis.model.positions == ref.basis.model.positions
     @test atomic_symbol.(tested.basis.model.atoms) == atomic_symbol.(ref.basis.model.atoms)
diff --git a/test/todict.jl b/test/todict.jl
index 8ca5808b13..cb7ecb85c7 100644
--- a/test/todict.jl
+++ b/test/todict.jl
@@ -8,10 +8,11 @@ function test_agreement_bands(band_data, dict; explicit_reshape=false, test_ψ=t
 
     basis = band_data.basis
     model = basis.model
-    n_kpoints = length(basis.kcoords_global)
-    n_spin    = model.n_spin_components
-    n_bands   = length(band_data.eigenvalues[1])
-    max_n_G   = DFTK.mpi_max(maximum(kpt -> length(G_vectors(basis, kpt)), basis.kpoints),
+    n_kpoints    = length(basis.kcoords_global)
+    n_spin       = model.n_spin_components
+    n_bands      = length(band_data.eigenvalues[1])
+    n_components = model.n_components
+    max_n_G      = DFTK.mpi_max(maximum(kpt -> length(G_vectors(basis, kpt)), basis.kpoints),
                              basis.comm_kpts)
     rotations       = [symop.W for symop in basis.symmetries]
     translations    = [symop.w for symop in basis.symmetries]
@@ -38,6 +39,7 @@ function test_agreement_bands(band_data, dict; explicit_reshape=false, test_ψ=t
         @test dict["n_bands"]           == n_bands
         @test dict["n_kpoints"]         == n_kpoints
         @test dict["n_atoms"]           == length(model.atoms)
+        @test dict["n_components"]      == n_components
         @test dict["n_spin_components"] == n_spin
         @test dict["model_name"]        == model.model_name
         @test dict["temperature"]       ≈  model.temperature  atol=1e-12
@@ -68,7 +70,7 @@ function test_agreement_bands(band_data, dict; explicit_reshape=false, test_ψ=t
         if test_ψ
             n_G_resh    = condreshape(dict["kpt_n_G_vectors"],  (n_kpoints, n_spin))
             G_vecs_resh = condreshape(dict["kpt_G_vectors"],    (3, max_n_G, n_kpoints, n_spin))
-            ψ_resh      = condreshape(dict["ψ"], (max_n_G, n_bands, n_kpoints, n_spin))
+            ψ_resh      = condreshape(dict["ψ"], (n_components, max_n_G, n_bands, n_kpoints, n_spin))
         end
     end
     n_iter_resh      = MPI.bcast(n_iter_resh,      0, MPI.COMM_WORLD)
@@ -92,7 +94,7 @@ function test_agreement_bands(band_data, dict; explicit_reshape=false, test_ψ=t
             @test n_G_resh[ikgl, σ] == length(basis.kpoints[ik].G_vectors)
             @test all(G_vecs_resh[:, iG, ikgl, σ] == G
                       for (iG, G) in enumerate(basis.kpoints[ik].G_vectors))
-            @test ψ_resh[1:n_G_resh[ikgl, σ], :, ikgl, σ] ≈ band_data.ψ[ik] atol=1e-12
+            @test ψ_resh[:, 1:n_G_resh[ikgl, σ], :, ikgl, σ] ≈ band_data.ψ[ik] atol=1e-12
         end
     end
 end  # function
diff --git a/test/transfer.jl b/test/transfer.jl
index e21ea073b3..4acf53c14f 100644
--- a/test/transfer.jl
+++ b/test/transfer.jl
@@ -23,11 +23,12 @@
     ψ_bb = transfer_blochwave(ψ_b, bigger_basis, basis)
     @test norm(ψ - ψ_bb) < eps(eltype(basis))
 
-    T    = compute_transfer_matrix(basis, bigger_basis)  # transfer
-    Tᵇ   = compute_transfer_matrix(bigger_basis, basis)  # back-transfer
-    Tψ   = [Tk * ψk   for (Tk, ψk)   in zip(T, ψ)]
-    TᵇTψ = [Tᵇk * Tψk for (Tᵇk, Tψk) in zip(Tᵇ, Tψ)]
-    @test norm(ψ - TᵇTψ) < eps(eltype(basis))
+    T     = compute_transfer_matrix(basis, bigger_basis)  # transfer
+    Tᵇ    = compute_transfer_matrix(bigger_basis, basis)  # back-transfer
+    ψ_σG  = to_composite_σG(basis, ψ)
+    Tψ    = [Tk * ψk   for (Tk, ψk)   in zip(T, ψ_σG)]
+    TᵇTψ  = [Tᵇk * Tψk for (Tᵇk, Tψk) in zip(Tᵇ, Tψ)]
+    @test norm(ψ_σG - TᵇTψ) < eps(eltype(basis))
 
     # TᵇT should be the identity and TTᵇ should be a projection
     TᵇT = [Tᵇk * Tk  for (Tk, Tᵇk) in zip(T, Tᵇ)]
@@ -40,7 +41,7 @@
     bigger_basis = PlaneWaveBasis(model; Ecut, kgrid, kshift)
     ψ_b = transfer_blochwave(ψ, basis, bigger_basis)
     ψ_bb = transfer_blochwave(ψ_b, bigger_basis, basis)
-    @test norm(ψ-ψ_bb) < eps(eltype(basis))
+    @test norm(ψ - ψ_bb) < eps(eltype(basis))
 end
 
 @testitem "Transfer of density" begin

From 097fb2b4da7a29b7f2284cf4d157e380f7d271ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89tienne=20Polack?= <polack@hollved.net>
Date: Mon, 11 Dec 2023 23:20:43 +0100
Subject: [PATCH 2/3] regression tests & benchmark

---
 .github/workflows/ci.yaml         |  10 +-
 .github/workflows/regression.yaml |  98 ++++++++++++++++++
 .gitignore                        |   2 +
 Project.toml                      |   3 +-
 benchmark/Project.toml            |  10 ++
 benchmark/benchmarks.jl           |   6 ++
 benchmark/humongous/Project.toml  |  10 ++
 benchmark/humongous/benchmarks.jl |  21 ++++
 benchmark/humongous/run.jl        |  43 ++++++++
 benchmark/load.jl                 |  14 +++
 benchmark/regression/testcases.jl | 158 ++++++++++++++++++++++++++++++
 benchmark/run.jl                  |  14 +++
 test/runtests_runner.jl           |  30 ++++++
 13 files changed, 413 insertions(+), 6 deletions(-)
 create mode 100644 .github/workflows/regression.yaml
 create mode 100644 benchmark/Project.toml
 create mode 100644 benchmark/benchmarks.jl
 create mode 100644 benchmark/humongous/Project.toml
 create mode 100644 benchmark/humongous/benchmarks.jl
 create mode 100644 benchmark/humongous/run.jl
 create mode 100644 benchmark/load.jl
 create mode 100644 benchmark/regression/testcases.jl
 create mode 100644 benchmark/run.jl

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index f56c5cd9b4..896dec11b3 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -21,11 +21,11 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - {mode: stable,  os: ubuntu-latest,  payload: noslow-example }
-          - {mode: stable,  os: macOS-latest,   payload: noslow         }
-          - {mode: stable,  os: windows-latest, payload: noslow         }
-          - {mode: stable,  os: ubuntu-latest,  payload: noslow-mpi     }
-          - {mode: nightly, os: ubuntu-latest,  payload: noslow         }
+          - {mode: stable,  os: ubuntu-latest,  payload: example-noslow-noregression }
+          - {mode: stable,  os: macOS-latest,   payload: noslow-noregression         }
+          - {mode: stable,  os: windows-latest, payload: noslow-noregression         }
+          - {mode: stable,  os: ubuntu-latest,  payload: mpi-noslow-noregression     }
+          - {mode: nightly, os: ubuntu-latest,  payload: noslow-noregression         }
     env:
       GKS_ENCODING: utf8
       GKSwstype: 100       # Needed for Plots-related tests
diff --git a/.github/workflows/regression.yaml b/.github/workflows/regression.yaml
new file mode 100644
index 0000000000..ae2e25eda2
--- /dev/null
+++ b/.github/workflows/regression.yaml
@@ -0,0 +1,98 @@
+name: Regression
+on:
+  push:
+    branches:
+      - master
+    tags: ['*']
+  pull_request:
+  schedule:
+    - cron:  '0 4 * * 6'  # Run every Saturday
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+
+jobs:
+  test:
+    name: Benchmarking ${{ matrix.description }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - {description: run,  payload: benchmarks.jl  }
+          - {description: load, payload: load.jl }
+    steps:
+      # Remove older benchmark comment
+      - name: pr-deleter
+        uses: maheshrayas/action-pr-comment-delete@v3.0
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          org: <orgname>
+          repo: <repo>
+          user: github-actions[bot]
+          issue: ${{github.event.number}}
+
+      - uses: actions/checkout@v4
+
+      - name: Setup Julia stable
+        uses: julia-actions/setup-julia@v1
+        with:
+          version: '1.9'
+          arch: x64
+
+      - uses: julia-actions/cache@v1
+        with:
+          include-matrix: false
+      - uses: julia-actions/julia-buildpkg@v1
+
+      - name: Install dependencies
+        run: |
+          julia --project=benchmark -e '
+          using Pkg
+          Pkg.develop(PackageSpec(; path=pwd()))
+          Pkg.instantiate()'
+
+      - name: Run benchmarks against master
+        # Remove baseline once merged. Regression tests will only work after this is merged
+        # in master.
+        run: |
+          julia --project=benchmark -e "
+            using BenchmarkCI
+            baseline = \"HEAD\"
+            script = \"\$(pwd())/benchmark/${{ matrix.payload }}\"
+            BenchmarkCI.judge(; baseline, script, retune=true)"
+        if: ${{ github.event_name == 'pull_request' }}
+
+      - name: Run benchmarks against last release
+        run: |
+          julia --project=benchmark -e "
+            import Pkg
+            baseline = \"v\" * Pkg.TOML.parsefile(\"Project.toml\")[\"version\"]
+            script = \"\$(pwd())/benchmark/${{ matrix.payload }}\"
+            using BenchmarkCI
+            BenchmarkCI.judge(; baseline, script, retune=true)"
+        if: ${{ github.event_name == 'schedule' ||
+                github.event.push.ref == 'refs/heads/master' }}
+
+      - name: Print judgement
+        run: |
+          julia --project=benchmark -e '
+            using BenchmarkCI
+            BenchmarkCI.displayjudgement()'
+
+      - name: Post results
+        run: |
+          julia --project=benchmark -e '
+            using BenchmarkCI
+            BenchmarkCI.postjudge()'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Is report successful
+        run: |
+          res=$(julia --project=benchmark -e '
+                  using BenchmarkCI
+                  BenchmarkCI.displayjudgement()' | grep --count ':x:')
+          if [[ $res -gt 1 ]]; then exit 1; fi
diff --git a/.gitignore b/.gitignore
index f3d553e487..d82b137880 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,5 @@ Manifest.toml
 /LocalPreferences.toml
 .vscode
 .CondaPkg
+/.benchmarkci
+/benchmark/**/*.json
diff --git a/Project.toml b/Project.toml
index 415a2a168b..2957010e46 100644
--- a/Project.toml
+++ b/Project.toml
@@ -126,6 +126,7 @@ ASEconvert = "3da9722f-58c2-4165-81be-b4d7253e8fd2"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 AtomsIO = "1692102d-eeb4-4df9-807b-c9517f998d44"
 AtomsIOPython = "9e4c859b-2281-48ef-8059-f50fe53c37b0"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
@@ -148,4 +149,4 @@ WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
 wannier90_jll = "c5400fa0-8d08-52c2-913f-1e3f656c1ce9"
 
 [targets]
-test = ["Test", "TestItemRunner", "ASEconvert", "Aqua", "AtomsIO", "AtomsIOPython", "CUDA", "CUDA_Runtime_jll", "ComponentArrays", "DoubleFloats", "FiniteDiff", "FiniteDifferences", "GenericLinearAlgebra", "IntervalArithmetic", "JLD2", "JSON3", "Logging", "Plots", "QuadGK", "Random", "KrylovKit", "Wannier", "WriteVTK", "wannier90_jll"]
+test = ["Test", "TestItemRunner", "ASEconvert", "Aqua", "AtomsIO", "AtomsIOPython", "BenchmarkTools", "CUDA", "CUDA_Runtime_jll", "ComponentArrays", "DoubleFloats", "FiniteDiff", "FiniteDifferences", "GenericLinearAlgebra", "IntervalArithmetic", "JLD2", "JSON3", "Logging", "Plots", "QuadGK", "Random", "KrylovKit", "Wannier", "WriteVTK", "wannier90_jll"]
diff --git a/benchmark/Project.toml b/benchmark/Project.toml
new file mode 100644
index 0000000000..6b2665e976
--- /dev/null
+++ b/benchmark/Project.toml
@@ -0,0 +1,10 @@
+[deps]
+AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
+BenchmarkCI = "20533458-34a3-403d-a444-e18f38190b5b"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+DFTK = "acf6eb54-70d9-11e9-0013-234b7a5f5337"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
+TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
+Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
+UnitfulAtomic = "a7773ee8-282e-5fa2-be4e-bd808c38a91a"
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
new file mode 100644
index 0000000000..fcda531cab
--- /dev/null
+++ b/benchmark/benchmarks.jl
@@ -0,0 +1,6 @@
+using BenchmarkTools
+using TestItemRunner
+
+const SUITE = BenchmarkGroup()
+
+@run_package_tests filter=ti->(:regression ∈ ti.tags)
diff --git a/benchmark/humongous/Project.toml b/benchmark/humongous/Project.toml
new file mode 100644
index 0000000000..3546e8e693
--- /dev/null
+++ b/benchmark/humongous/Project.toml
@@ -0,0 +1,10 @@
+[deps]
+AtomsIO = "1692102d-eeb4-4df9-807b-c9517f998d44"
+BenchmarkCI = "20533458-34a3-403d-a444-e18f38190b5b"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+DFTK = "acf6eb54-70d9-11e9-0013-234b7a5f5337"
+LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2"
+PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
+TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
diff --git a/benchmark/humongous/benchmarks.jl b/benchmark/humongous/benchmarks.jl
new file mode 100644
index 0000000000..6d04955361
--- /dev/null
+++ b/benchmark/humongous/benchmarks.jl
@@ -0,0 +1,21 @@
+using BenchmarkTools
+using TestItemRunner
+
+function run_scenario(scenario, complexity)
+    scenario_filter(i) = occursin(string(scenario), i.filename) && complexity ∈ i.tags
+    @run_package_tests filter=scenario_filter
+end
+
+all_scenarios() = [:AlSiO2H, :Cr19, :Fe2MnAl, :Mn2RuGa, :WFe]
+function make_suite(; scenarios=all_scenarios(), complexity=:debug)
+    @assert complexity ∈ [:debug, :small, :full]
+    @assert all(scenarios .∈ Ref(all_scenarios()))
+
+    suite = BenchmarkGroup()
+    for scenario in scenarios
+        suite[scenario] = @benchmarkable run_scenario($scenario, $complexity)
+    end
+    suite
+end
+
+const SUITE = make_suite(; scenarios=[:AlSiO2H])
diff --git a/benchmark/humongous/run.jl b/benchmark/humongous/run.jl
new file mode 100644
index 0000000000..537755d0b6
--- /dev/null
+++ b/benchmark/humongous/run.jl
@@ -0,0 +1,43 @@
+ROOTPATH = abspath(joinpath(@__DIR__, "../.."))
+import Pkg
+Pkg.activate(@__DIR__)
+if !isfile(joinpath(@__DIR__, "Manifest.toml"))
+    Pkg.develop(Pkg.PackageSpec(; path=ROOTPATH))
+    Pkg.instantiate()
+end
+
+import BenchmarkCI
+import LibGit2
+
+"""
+Launch with
+```julia
+julia --project=benchmark/humongous -e '
+   include("benchmark/humongous/run.jl")
+   run_benchmark()'
+```
+"""
+function run_benchmark(; retune=false, baseline="origin/master", target="HEAD",
+                       script=nothing)
+    mktempdir(mktempdir()) do repo_dir  # TestItemRunner needs access to parent directory as well.
+        project = joinpath(ROOTPATH, "benchmark", "humongous")
+        # Workaround to be able to benchmark releases before the use of PkgBenchmark.
+        # WARN: In this case, we need PkgBenchmark to be installed globally.
+        if isnothing(script)
+            # We run the default benchmark.
+            script = joinpath(project, "benchmarks.jl")
+        else
+            occursin(ROOTPATH, abspath(script)) &&
+                error("Script should be outside the repository.")
+        end
+        script_copy = joinpath(repo_dir, "benchmarks.jl")
+        cp(script, script_copy)
+
+        LibGit2.clone("https://github.com/epolack/DFTK-testproblems",
+                      joinpath(repo_dir, "test"))
+
+        BenchmarkCI.judge(; baseline, target, retune, script=script_copy, project)
+
+        BenchmarkCI.displayjudgement()
+    end
+end
diff --git a/benchmark/load.jl b/benchmark/load.jl
new file mode 100644
index 0000000000..3d0e20fc21
--- /dev/null
+++ b/benchmark/load.jl
@@ -0,0 +1,14 @@
+using BenchmarkTools
+
+const SUITE = BenchmarkGroup()
+
+julia_cmd = unsafe_string(Base.JLOptions().julia_bin)
+SUITE["load"] = @benchmarkable run(`$julia_cmd \
+                                        --startup-file=no \
+                                        --project=$(Base.active_project()) \
+                                        -e 'using DFTK'`)
+SUITE["pecompilation"] =
+    @benchmarkable run(`$julia_cmd \
+                           --startup-file=no \
+                           --project=$(Base.active_project()) \
+                           -e 'Base.compilecache(Base.identify_package("DFTK"))'`)
diff --git a/benchmark/regression/testcases.jl b/benchmark/regression/testcases.jl
new file mode 100644
index 0000000000..62f07642b4
--- /dev/null
+++ b/benchmark/regression/testcases.jl
@@ -0,0 +1,158 @@
+@testsetup module Regression
+using DFTK
+using Unitful
+using UnitfulAtomic
+using AtomsBase
+using ..TestCases: magnesium
+
+high_symmetry = let
+    a = 4.474
+    lattice = [[0, a, a], [a, 0, a], [a, a, 0]]u"bohr"
+    x = 6.711
+    y = 2.237
+    atoms = [
+        Atom(:Cu, [0, 0, 0]u"bohr", magnetic_moment=0),
+        Atom(:O,  [x, y, x]u"bohr", magnetic_moment=0),
+        Atom(:O,  [x, y, y]u"bohr", magnetic_moment=0),
+    ]
+    system = periodic_system(atoms, lattice)
+    merge(DFTK.parse_system(system), (; temperature=0.03, Ecut=20, kgrid=[4,4,4],
+          n_electrons=45, description="high_sym"))
+end
+high_kpoints = merge(magnesium, (; kgrid=[13,13,13], Ecut=20, description="high_kpoint"))
+high_Ecut = merge(magnesium, (; kgrid=[4,4,4], Ecut=60, description="high_Ecut"))
+
+testcases = (; high_symmetry, high_kpoints, high_Ecut)
+end
+
+
+@testitem "Hamiltonian application" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using LinearAlgebra
+    using BenchmarkTools
+    using .Main: SUITE
+
+    for testcase in Regression.testcases
+        model = Model(testcase.lattice, testcase.atoms, testcase.positions;
+                      testcase.temperature, terms=[Kinetic()])
+        basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid)
+
+        n_electrons = testcase.n_electrons
+        n_bands = div(n_electrons, 2, RoundUp)
+        ψ = [Matrix(qr(randn(ComplexF64, length(G_vectors(basis, kpt)), n_bands)).Q)
+             for kpt in basis.kpoints]
+        filled_occ = DFTK.filled_occupation(model)
+        occupation = [filled_occ * rand(n_bands) for _ = 1:length(basis.kpoints)]
+        occ_scaling = n_electrons / sum(sum(occupation))
+        occupation = [occ * occ_scaling for occ in occupation]
+
+        (; ham) = energy_hamiltonian(basis, ψ, occupation)
+
+        SUITE["ham"][testcase.description] =
+            @benchmarkable for ik = 1:length($(basis.kpoints))
+                $(ham.blocks)[ik]*$ψ[ik]
+            end
+    end
+end
+
+@testitem "Single SCF step" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using BenchmarkTools
+    using .Main: SUITE
+
+    for testcase in Regression.testcases
+        model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions;
+                          testcase.temperature)
+        basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid)
+        SUITE["scf"][testcase.description] =
+            @benchmarkable self_consistent_field($basis; tol=1e5)
+    end
+end
+
+@testitem "Density + symmetrization" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using BenchmarkTools
+    using .Main: SUITE
+
+    for testcase in Regression.testcases
+        model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions;
+                          testcase.temperature)
+        basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid)
+        scfres = self_consistent_field(basis; tol=10)
+
+        ψ, occupation = DFTK.select_occupied_orbitals(basis, scfres.ψ, scfres.occupation;
+                                                      threshold=1e-6)
+
+        SUITE["density"]["ρ"][testcase.description] =
+            @benchmarkable compute_density($basis, $ψ, $occupation)
+        SUITE["density"]["sym"][testcase.description] =
+            @benchmarkable DFTK.symmetrize_ρ($basis, $(scfres.ρ))
+    end
+end
+
+@testitem "Basis construction" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using BenchmarkTools
+    using .Main: SUITE
+
+    for testcase in Regression.testcases
+        model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions;
+                          testcase.temperature)
+        SUITE["basis"][testcase.description] =
+            @benchmarkable PlaneWaveBasis($model;
+                                          Ecut=$(testcase.Ecut), kgrid=$(testcase.kgrid))
+    end
+end
+
+@testitem "Sternheimer" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using BenchmarkTools
+    using .Main: SUITE
+
+    for testcase in Regression.testcases
+        model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions;
+                          testcase.temperature)
+        basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid)
+        scfres = self_consistent_field(basis; tol=10)
+
+        rhs = DFTK.compute_projected_gradient(basis, scfres.ψ, scfres.occupation)
+        SUITE["response"]["sternheimer"][testcase.description] =
+            @benchmarkable DFTK.solve_ΩplusK_split($scfres, $rhs; tol=1e-1)
+    end
+end
+
+@testitem "Response with AD" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using BenchmarkTools
+    using LinearAlgebra
+    using ForwardDiff
+    using .Main: SUITE
+
+    function make_basis(ε::T; a=10., Ecut=30) where {T}
+        lattice=T(a) * I(3)  # lattice is a cube of ``a`` Bohrs
+        # Helium at the center of the box
+        atoms     = [ElementPsp(:He; psp=load_psp("hgh/lda/He-q2"))]
+        positions = [[1/2, 1/2, 1/2]]
+
+        model = model_DFT(lattice, atoms, positions, [:lda_x, :lda_c_vwn];
+                          extra_terms=[ExternalFromReal(r -> -ε * (r[1] - a/2))],
+                          symmetries=false)
+        PlaneWaveBasis(model; Ecut, kgrid=[1, 1, 1])  # No k-point sampling on isolated system
+    end
+
+    # dipole moment of a given density (assuming the current geometry)
+    function dipole(basis, ρ)
+        @assert isdiag(basis.model.lattice)
+        a  = basis.model.lattice[1, 1]
+        rr = [a * (r[1] - 1/2) for r in r_vectors(basis)]
+        sum(rr .* ρ) * basis.dvol
+    end
+
+    # Function to compute the dipole for a given field strength
+    function compute_dipole(ε; tol=1e-2, kwargs...)
+        scfres = self_consistent_field(make_basis(ε; kwargs...); tol)
+        dipole(scfres.basis, scfres.ρ)
+    end
+
+    SUITE["response"]["ad"] = @benchmarkable ForwardDiff.derivative($compute_dipole, 0.0)
+end
diff --git a/benchmark/run.jl b/benchmark/run.jl
new file mode 100644
index 0000000000..8ab416ed9b
--- /dev/null
+++ b/benchmark/run.jl
@@ -0,0 +1,14 @@
+ROOTPATH  = joinpath(@__DIR__, "..")
+import Pkg
+Pkg.activate(@__DIR__)
+if !isfile(joinpath(@__DIR__, "Manifest.toml"))
+    Pkg.develop(Pkg.PackageSpec(; path=ROOTPATH))
+    Pkg.instantiate()
+end
+
+using BenchmarkCI
+
+# Remove target once merged. Regression tests will only work after this is merged in master.
+BenchmarkCI.judge(; baseline="HEAD")
+
+BenchmarkCI.displayjudgement()
diff --git a/test/runtests_runner.jl b/test/runtests_runner.jl
index 7616bd8741..9e9e4b1976 100644
--- a/test/runtests_runner.jl
+++ b/test/runtests_runner.jl
@@ -29,4 +29,34 @@ function dftk_testfilter(ti)
         return false
     end
 end
+
+using Test
+function TestItemRunner.run_testitem(filepath, use_default_usings, setups, package_name,
+                                     original_code, line, column, test_setup_module_set)
+    mod = Core.eval(Main, :(module $(gensym()) end))
+
+    if use_default_usings
+        Core.eval(mod, :(using Test))
+
+        if package_name!=""
+            Core.eval(mod, :(using $(Symbol(package_name))))
+        end
+    end
+
+    for m in setups
+        Core.eval(mod, Expr(:using, Expr(:., :., :., nameof(test_setup_module_set.setupmodule), m)))
+    end
+
+    code = string('\n'^line, ' '^column, original_code)
+
+    TestItemRunner.withpath(filepath) do
+        # Replace the test by the current testset.
+        description = Test.pop_testset().description
+        @testset "$(description)" begin
+            Base.invokelatest(include_string, mod, code, filepath)
+        end
+        Test.push_testset(Test.FallbackTestSet())  # so the parent pops nothing
+    end
+end
+
 @run_package_tests filter=dftk_testfilter verbose=true

From 35f72a4ee223e004c5d2328892c9a4c1ca908102 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89tienne=20Polack?= <polack@hollved.net>
Date: Tue, 20 Feb 2024 14:57:57 +0100
Subject: [PATCH 3/3] fix for components

---
 benchmark/regression/testcases.jl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/benchmark/regression/testcases.jl b/benchmark/regression/testcases.jl
index 62f07642b4..cc7c7ea8f9 100644
--- a/benchmark/regression/testcases.jl
+++ b/benchmark/regression/testcases.jl
@@ -39,8 +39,7 @@ end
 
         n_electrons = testcase.n_electrons
         n_bands = div(n_electrons, 2, RoundUp)
-        ψ = [Matrix(qr(randn(ComplexF64, length(G_vectors(basis, kpt)), n_bands)).Q)
-             for kpt in basis.kpoints]
+        ψ = [DFTK.random_orbitals(basis, kpt, n_bands) for kpt in basis.kpoints]
         filled_occ = DFTK.filled_occupation(model)
         occupation = [filled_occ * rand(n_bands) for _ = 1:length(basis.kpoints)]
         occ_scaling = n_electrons / sum(sum(occupation))