diff --git a/.github/workflows/debug_checks.yml b/.github/workflows/debug_checks.yml
index 9f21bc82f..cbbc28ed5 100644
--- a/.github/workflows/debug_checks.yml
+++ b/.github/workflows/debug_checks.yml
@@ -11,7 +11,7 @@ jobs:
         # Only run on linux to save CI server cpu-hours
         os: [ubuntu-latest]
       fail-fast: false
-    timeout-minutes: 240
+    timeout-minutes: 360
 
     steps:
       - uses: actions/checkout@v4
diff --git a/Project.toml b/Project.toml
index ed635ae29..97abfed83 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,6 +10,8 @@ Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
+FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
+GR_jll = "d2c73de3-f751-5644-a686-071e5b155ba9"
 Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 HDF5_jll = "0234f1f7-429e-5d53-9886-15a909be8d59"
@@ -17,6 +19,7 @@ IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a"
 IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
 Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
+LegendrePolynomials = "3db4a2ba-fc88-11e8-3e01-49c72059a882"
 LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
@@ -32,14 +35,17 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 Primes = "27ebfcd6-29c5-5fa9-bf4b-fb8fc14df3ae"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
 PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee"
 Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
 Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
 TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
diff --git a/cheb_matrix_test.jl b/cheb_matrix_test.jl
deleted file mode 100644
index 67cac6d10..000000000
--- a/cheb_matrix_test.jl
+++ /dev/null
@@ -1,531 +0,0 @@
-using Printf
-using Plots
-using LaTeXStrings
-
-if abspath(PROGRAM_FILE) == @__FILE__
-    using Pkg
-    Pkg.activate(".")
-
-    import moment_kinetics
-	using moment_kinetics.input_structs: grid_input, advection_input
-	using moment_kinetics.coordinates: define_coordinate
-	using moment_kinetics.chebyshev: setup_chebyshev_pseudospectral
-	using moment_kinetics.calculus: derivative!, integral
-    #import LinearAlgebra
-    using LinearAlgebra: mul!, lu
-    zero = 1.0e-10
-    function Djj(x::Array{Float64,1},j::Int64)
-        return -0.5*x[j]/( 1.0 - x[j]^2)
-    end
-    function Djk(x::Array{Float64,1},j::Int64,k::Int64,c_j::Float64,c_k::Float64)
-        return  (c_j/c_k)*((-1)^(k+j))/(x[j] - x[k])
-    end
-    
-    """
-    The function below is based on the numerical method outlined in 
-    Chapter 8.2 from Trefethen 1994 
-    https://people.maths.ox.ac.uk/trefethen/8all.pdf
-    full list of Chapters may be obtained here 
-    https://people.maths.ox.ac.uk/trefethen/pdetext.html
-    """
-    
-    function cheb_derivative_matrix!(D::Array{Float64,2},x::Array{Float64,1},n) 
-        D[:,:] .= 0.0
-        
-        # top left, bottom right
-        D[1,1] = (2.0*(n - 1.0)^2 + 1.0)/6.0
-        D[n,n] = -(2.0*(n - 1.0)^2 + 1.0)/6.0
-        
-        # top row 
-        j = 1
-        c_j = 2.0 
-        c_k = 1.0
-        for k in 2:n-1
-            D[j,k] = Djk(x,j,k,c_j,c_k)
-        end
-        k = n 
-        c_k = 2.0
-        D[j,k] = Djk(x,j,k,c_j,c_k)
-        
-        # bottom row 
-        j = n
-        c_j = 2.0 
-        c_k = 1.0
-        for k in 2:n-1
-            D[j,k] = Djk(x,j,k,c_j,c_k)
-        end
-        k = 1
-        c_k = 2.0
-        D[j,k] = Djk(x,j,k,c_j,c_k)
-        
-        #left column
-        k = 1
-        c_j = 1.0 
-        c_k = 2.0
-        for j in 2:n-1
-            D[j,k] = Djk(x,j,k,c_j,c_k)
-        end
-        
-        #right column
-        k = n
-        c_j = 1.0 
-        c_k = 2.0
-        for j in 2:n-1
-            D[j,k] = Djk(x,j,k,c_j,c_k)
-        end
-        
-        # interior rows and columns
-        for j in 2:n-1
-            D[j,j] = Djj(x,j)
-            #D[j,j] = -0.5*x[j]/( 1.0 - x[j]^2)
-            for k in 2:n-1
-                if j == k 
-                    continue
-                end
-                c_k = 1.0
-                c_j = 1.0
-                #D[j,k] = (c_j/c_k)*((-1)^(k+j))/(x[j] - x[k])
-                D[j,k] = Djk(x,j,k,c_j,c_k)
-            end
-        end
-    end 
-    
-    function cheb_derivative_matrix_reversed!(D::Array{Float64,2},x) 
-        D_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
-        cheb_derivative_matrix_elementwise_reversed!(D_elementwise,x.ngrid,x.L,x.nelement_global)    
-        if x.ngrid < 8
-            println("\n D_elementwise \n")
-            for i in 1:x.ngrid
-                for j in 1:x.ngrid
-                    @printf("%.1f ", D_elementwise[i,j])
-                end
-                println("")
-            end
-        end 
-        assign_cheb_derivative_matrix!(D,D_elementwise,x)
-    end
-    
-    function cheb_second_derivative_matrix_reversed!(D::Array{Float64,2},x) 
-        D_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
-        cheb_derivative_matrix_elementwise_reversed!(D_elementwise,x.ngrid,x.L,x.nelement_global)    
-        D2_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
-        mul!(D2_elementwise,D_elementwise,D_elementwise)
-        if x.ngrid < 8
-            println("\n D2_elementwise \n")
-            for i in 1:x.ngrid
-                for j in 1:x.ngrid
-                    @printf("%.1f ", D2_elementwise[i,j])
-                end
-                println("")
-            end
-        end
-        assign_cheb_derivative_matrix!(D,D2_elementwise,x)
-    end
-    
-    function assign_cheb_derivative_matrix!(D::Array{Float64,2},D_elementwise::Array{Float64,2},x) 
-        
-        # zero output matrix before assignment 
-        D[:,:] .= 0.0
-        imin = x.imin
-        imax = x.imax
-        
-        # fill in first element 
-        j = 1
-        if x.bc == "zero"
-            D[imin[j],imin[j]:imax[j]] .+= D_elementwise[1,:]./2.0
-            D[imin[j],imin[j]] += D_elementwise[x.ngrid,x.ngrid]/2.0
-        else 
-            D[imin[j],imin[j]:imax[j]] .+= D_elementwise[1,:]
-        end
-        for k in 2:imax[j]-imin[j] 
-            D[k,imin[j]:imax[j]] .+= D_elementwise[k,:]
-        end
-        if x.nelement_local > 1 || x.bc == "zero"
-            D[imax[j],imin[j]:imax[j]] .+= D_elementwise[x.ngrid,:]./2.0
-        else
-            D[imax[j],imin[j]:imax[j]] .+= D_elementwise[x.ngrid,:]
-        end 
-        # remaining elements recalling definitions of imax and imin
-        for j in 2:x.nelement_local
-            #lower boundary condition on element
-            D[imin[j]-1,imin[j]-1:imax[j]] .+= D_elementwise[1,:]./2.0
-            for k in 2:imax[j]-imin[j]+1 
-                D[k+imin[j]-2,imin[j]-1:imax[j]] .+= D_elementwise[k,:]
-            end
-            # upper boundary condition on element 
-            if j == x.nelement_local && !(x.bc == "zero")
-                D[imax[j],imin[j]-1:imax[j]] .+= D_elementwise[x.ngrid,:]
-            elseif j == x.nelement_local && x.bc == "zero"
-                D[imax[j],imin[j]-1:imax[j]] .+= D_elementwise[x.ngrid,:]./2.0
-                D[imax[j],imax[j]] += D_elementwise[1,1]/2.0
-            else 
-                D[imax[j],imin[j]-1:imax[j]] .+= D_elementwise[x.ngrid,:]./2.0
-            end
-        end
-        
-    end
-    
-    function cheb_derivative_matrix_elementwise_reversed!(D::Array{Float64,2},n::Int64,L::Float64,nelement::Int64) 
-        
-        #define Chebyshev points in reversed order x_j = { -1, ... , 1}
-        x = Array{Float64,1}(undef,n)
-        for j in 1:n
-            x[j] = cospi((n-j)/(n-1))
-        end
-        
-        # zero matrix before allocating values
-        D[:,:] .= 0.0
-        
-        # top row 
-        j = 1
-        c_j = 2.0 
-        c_k = 1.0
-        for k in 2:n-1
-            D[j,k] = Djk(x,j,k,c_j,c_k)
-        end
-        k = n 
-        c_k = 2.0
-        D[j,k] = Djk(x,j,k,c_j,c_k)
-        
-        # bottom row 
-        j = n
-        c_j = 2.0 
-        c_k = 1.0
-        for k in 2:n-1
-            D[j,k] = Djk(x,j,k,c_j,c_k)
-        end
-        k = 1
-        c_k = 2.0
-        D[j,k] = Djk(x,j,k,c_j,c_k)
-        
-        #left column
-        k = 1
-        c_j = 1.0 
-        c_k = 2.0
-        for j in 2:n-1
-            D[j,k] = Djk(x,j,k,c_j,c_k)
-        end
-        
-        #right column
-        k = n
-        c_j = 1.0 
-        c_k = 2.0
-        for j in 2:n-1
-            D[j,k] = Djk(x,j,k,c_j,c_k)
-        end
-        
-        
-        # top left, bottom right
-        #D[n,n] = (2.0*(n - 1.0)^2 + 1.0)/6.0
-        #D[1,1] = -(2.0*(n - 1.0)^2 + 1.0)/6.0        
-        # interior rows and columns
-        for j in 2:n-1
-            #D[j,j] = Djj(x,j)
-            for k in 2:n-1
-                if j == k 
-                    continue
-                end
-                c_k = 1.0
-                c_j = 1.0
-                D[j,k] = Djk(x,j,k,c_j,c_k)
-            end
-        end
-        
-        # calculate diagonal entries to guarantee that
-        # D * (1, 1, ..., 1, 1) = (0, 0, ..., 0, 0)
-        for j in 1:n
-            D[j,j] = -sum(D[j,:])
-        end
-        
-        #multiply by scale factor for element length
-        D .= (2.0*float(nelement)/L).*D
-    end 
-    
-    """
-    function integrating d y / d t = f(t)
-    """
-    function forward_euler_step!(ynew,yold,f,dt,n)
-        for i in 1:n
-            ynew[i] = yold[i] + dt*f[i]
-        end
-    end
-    """
-    function creating lu object for A = I - dt*nu*D2
-    """
-    function diffusion_matrix(D2,n,dt,nu;return_A=false)
-        A = Array{Float64,2}(undef,n,n)
-        for i in 1:n
-            for j in 1:n
-                A[i,j] = - dt*nu*D2[i,j]
-            end
-            A[i,i] += 1.0
-        end
-        lu_obj = lu(A)
-        if return_A
-            return lu_obj, A
-        else
-            return lu_obj
-        end
-    end
-    
-    #using LinearAlgebra.mul
-    discretization = "chebyshev_pseudospectral"
-    #discretization = "finite_difference"
-	etol = 1.0e-15
-    outprefix = "derivative_test"
-	###################
-	## df/dx Nonperiodic (No) BC test
-	###################
-	
-	# define inputs needed for the test
-	ngrid = 17 #number of points per element 
-	nelement_local = 10 # number of elements per rank
-	nelement_global = nelement_local # total number of elements 
-	L = 1.0 #physical box size in reference units 
-	bc = "" #not required to take a particular value, not used 
-	# fd_option and adv_input not actually used so given values unimportant
-	fd_option = "fourth_order_centered"
-	adv_input = advection_input("default", 1.0, 0.0, 0.0)
-	nrank = 1
-    irank = 0
-    comm = false
-	# create the 'input' struct containing input info needed to create a
-	# coordinate
-    input = grid_input("coord", ngrid, nelement_global, nelement_local, 
-		nrank, irank, L, discretization, fd_option, bc, adv_input,comm)
-	# create the coordinate struct 'x'
-	println("made inputs")
-	x = define_coordinate(input)
-	println("made x")
-    Dx = Array{Float64,2}(undef, x.n, x.n)
-    xchebgrid = Array{Float64,1}(undef, x.n)
-    for i in 1:x.n
-        xchebgrid[i] = cos(pi*(i - 1)/(x.n - 1))
-    end
-    #println("x",xchebgrid[:])
-    cheb_derivative_matrix!(Dx,xchebgrid,x.n)
-    #println("")
-    #println("Dx \n")
-    #for i in 1:x.n
-    #    println(Dx[i,:])
-    #end
-    
-     # create array for the function f(x) to be differentiated/integrated
-	f = Array{Float64,1}(undef, x.n)
-	# create array for the derivative df/dx
-	df = Array{Float64,1}(undef, x.n)
-	df2 = Array{Float64,1}(undef, x.n)
-	df2cheb = Array{Float64,1}(undef, x.n)
-    df_exact = Array{Float64,1}(undef, x.n)
-    df2_exact = Array{Float64,1}(undef, x.n)
-    df_err = Array{Float64,1}(undef, x.n)
-    df2_err = Array{Float64,1}(undef, x.n)
-    df2cheb_err = Array{Float64,1}(undef, x.n)
-
-    for ix in 1:x.n
-        f[ix] = sin(pi*xchebgrid[ix])
-        df_exact[ix] = (pi)*cos(pi*xchebgrid[ix])
-    end
-    mul!(df,Dx,f)
-    for ix in 1:x.n
-        df_err[ix] = df[ix]-df_exact[ix]
-    end
-    # test standard cheb D f = df 
-    #println("df \n",df)
-    #println("df_exact \n",df_exact)
-    #println("df_err \n",df_err)
-    input = grid_input("coord", ngrid, nelement_global, nelement_local, 
-		nrank, irank, L, discretization, fd_option, "zero", adv_input,comm)
-	# create the coordinate struct 'x'
-	x = define_coordinate(input)
-   
-    Dxreverse = Array{Float64,2}(undef, x.n, x.n)
-    cheb_derivative_matrix_reversed!(Dxreverse,x)
-    Dxreverse2 = Array{Float64,2}(undef, x.n, x.n)
-    mul!(Dxreverse2,Dxreverse,Dxreverse)
-    D2xreverse = Array{Float64,2}(undef, x.n, x.n)
-    cheb_second_derivative_matrix_reversed!(D2xreverse,x)
-    
-    Dxreverse2[1,1] = 2.0*Dxreverse2[1,1]
-    Dxreverse2[end,end] = 2.0*Dxreverse2[end,end]
-    #println("x.grid \n",x.grid)
-    if x.n < 20
-        println("\n Dxreverse \n")
-        for i in 1:x.n
-            for j in 1:x.n
-                @printf("%.1f ", Dxreverse[i,j])
-            end
-            println("")
-        end
-        println("\n Dxreverse*Dxreverse \n")
-        for i in 1:x.n
-            for j in 1:x.n
-                @printf("%.1f ", Dxreverse2[i,j])
-            end
-            println("")
-        end
-        
-        println("\n D2xreverse \n")
-        for i in 1:x.n
-            for j in 1:x.n
-                @printf("%.1f ", D2xreverse[i,j])
-            end
-            println("")
-        end
-        println("\n")
-    end
-
-    alpha = 512.0    
-    for ix in 1:x.n
-#        f[ix] = sin(2.0*pi*x.grid[ix]/x.L)
-#        df_exact[ix] = (2.0*pi/x.L)*cos(2.0*pi*x.grid[ix]/x.L)
-#        df2_exact[ix] = -(2.0*pi/x.L)*(2.0*pi/x.L)*sin(2.0*pi*x.grid[ix]/x.L)
- 
-        f[ix] = exp(-alpha*(x.grid[ix])^2)
-        df_exact[ix] = -2.0*alpha*x.grid[ix]*exp(-alpha*(x.grid[ix])^2)
-        df2_exact[ix] = ((2.0*alpha*x.grid[ix])^2 - 2.0*alpha)*exp(-alpha*(x.grid[ix])^2)
-    end
-    println("test f: \n",f)
-    # calculate d f / d x from matrix 
-    mul!(df,Dxreverse,f)
-    # calculate d^2 f / d x from second application of Dx matrix 
-    mul!(df2,Dxreverse2,f)
-    # calculate d^2 f / d x from applition of D2x matrix 
-    mul!(df2cheb,D2xreverse,f)
-    for ix in 1:x.n
-        df_err[ix] = df[ix]-df_exact[ix]
-        df2_err[ix] = df2[ix]-df2_exact[ix]
-        df2cheb_err[ix] = df2cheb[ix]-df2_exact[ix]
-    end
-    println("Reversed - multiple elements")
-    #println("df \n",df)
-    #println("df_exact \n",df_exact)
-    println("df_err \n",df_err)
-    #println("df2 \n",df2)
-    #println("df2_exact \n",df2_exact)
-    println("df2_err \n",df2_err)
-    println("df2cheb_err \n",df2cheb_err)
-    
-    println("max(df_err) \n",maximum(abs.(df_err)))
-    println("max(df2_err) \n",maximum(abs.(df2_err)))
-    println("max(df2cheb_err) \n",maximum(abs.(df2cheb_err)))
-    
-    ### attempt at matrix inversion via LU decomposition
-    Dt = 0.1
-    Nu = 1.0
-    lu_obj, AA = diffusion_matrix(Dxreverse2,x.n,Dt,Nu,return_A=true)
-    #AA = Array{Float64,2}(undef,x.n,x.n)
-    #for i in 1:x.n
-    #    for j in 1:x.n
-    #        AA[i,j] = - Dt*Nu*Dxreverse2[i,j]
-    #    end
-    #    AA[i,i] += 1.0
-    #end
-    #lu_obj = lu(AA)
-    if x.n < 20
-        println("L : \n",lu_obj.L)
-        println("U : \n",lu_obj.U)
-        println("p vector : \n",lu_obj.p)
-    end
-    LUtest = true
-    AA_test_lhs = lu_obj.L*lu_obj.U 
-    AA_test_rhs = AA[lu_obj.p,:]
-    for i in 1:x.n
-        for j in 1:x.n
-            if abs.(AA_test_lhs[i,j]-AA_test_rhs[i,j]) > zero
-                global LUtest = false
-            end
-        end
-    end
-    println("LU == AA : \n",LUtest)
-    
-    #bb = ones(x.n) try this for bc = "" rather than bc = "zero"
-    bb = Array{Float64,1}(undef,x.n)
-    yy = Array{Float64,1}(undef,x.n)
-    #for i in 1:x.n
-    #    bb[i] = f[i]#exp(-(4.0*x.grid[i]/x.L)^2)
-    #end
-    #yy = lu_obj \ bb # solution to AA yy = bb 
-    #println("result", yy)
-    #println("check result", AA*yy, bb)
-    MMS_test = false 
-    evolution_test = true 
-    
-    if MMS_test
-        ntest = 5
-        MMS_errors = Array{Float64,1}(undef,ntest)
-        Dt_list = Array{Float64,1}(undef,ntest)
-        fac_list = Array{Int64,1}(undef,ntest)
-        fac_list .= [1, 10, 100, 1000, 10000]
-        #for itest in [1, 10, 100, 1000, 10000]
-        for itest in 1:ntest
-            fac = fac_list[itest]
-            #println(fac)
-            ntime = 1000*fac
-            nwrite = 100*fac
-            dt = 0.001/fac
-            #println(ntime," ",dt)
-            nu = 1.0
-            LU_obj = diffusion_matrix(Dxreverse2,x.n,dt,nu)
-            
-            time = Array{Float64,1}(undef,ntime)
-            ff = Array{Float64,2}(undef,x.n,ntime)
-            ss = Array{Float64,1}(undef,x.n) #source
-
-            time[1] = 0.0
-            ff[:,1] .= f[:] #initial condition
-            for i in 1:ntime-1
-                time[i+1] = (i+1)*dt
-                bb .= ff[:,i]
-                yy .= LU_obj\bb # implicit backward euler diffusion step
-                @. ss = -nu*df2_exact # source term
-                 # explicit forward_euler_step with source
-                @views forward_euler_step!(ff[:,i+1],yy,ss,dt,x.n)
-            end
-
-            ff_error = Array{Float64,1}(undef,x.n)
-            ff_error[:] .= abs.(ff[:,end] - ff[:,1])
-            maxfferr = maximum(ff_error)
-            #println("ff_error \n",ff_error)
-            println("max(ff_error) \n",maxfferr)
-            #println("t[end]: ",time[end])
-            MMS_errors[itest] = maxfferr
-            Dt_list[itest] = dt
-        end 
-        @views plot(Dt_list, [MMS_errors, 100.0*Dt_list], label=[L"max(\epsilon(f))" L"100\Delta t"], 
-                     xlabel=L"\Delta t", ylabel="", xscale=:log10, yscale=:log10, shape =:circle)
-        outfile = string("ff_err_vs_dt.pdf")
-        savefig(outfile)
-    end
-    
-    if evolution_test
-        ntime = 100
-        nwrite = 1
-        dt = 0.001
-        nu = 1.0
-        LU_obj = diffusion_matrix(Dxreverse2,x.n,dt,nu)
-        
-        time = Array{Float64,1}(undef,ntime)
-        ff = Array{Float64,2}(undef,x.n,ntime)
-        ss = Array{Float64,1}(undef,x.n) #source
-
-        time[1] = 0.0
-        ff[:,1] .= f[:] #initial condition
-        for i in 1:ntime-1
-            time[i+1] = (i+1)*dt
-            bb .= ff[:,i]
-            yy .= LU_obj\bb # implicit backward euler diffusion step
-            @. ss = 0.0 # source term
-             # explicit forward_euler_step with source
-            @views forward_euler_step!(ff[:,i+1],yy,ss,dt,x.n)
-        end
-
-        ffmin = minimum(ff)
-        ffmax = maximum(ff)
-        anim = @animate for i in 1:nwrite:ntime
-                @views plot(x.grid, ff[:,i], xlabel="x", ylabel="f", ylims = (ffmin,ffmax))
-            end
-        outfile = string("ff_vs_x.gif")
-        gif(anim, outfile, fps=5)
-    end
-end
diff --git a/debug_test/fokker_planck_collisions_inputs.jl b/debug_test/fokker_planck_collisions_inputs.jl
new file mode 100644
index 000000000..5f6a8d559
--- /dev/null
+++ b/debug_test/fokker_planck_collisions_inputs.jl
@@ -0,0 +1,67 @@
+test_type = "Fokker-Planck collisions"
+
+# default input for test
+test_input_full_f = Dict(
+     "run_name" => "full_f",
+     "dt" => 0.0,
+     "nstep" => 3,
+     "nwrite" => 2,
+     "nwrite_dfns" => 2,
+     "Bmag" => 1.0,
+     "Bzed" => 1.0,
+     "T_e" => 1.0,
+     "T_wall" => 1.0,
+     "electron_physics" => "boltzmann_electron_response",
+     "evolve_moments_conservation" => false,
+     "evolve_moments_density" => false,
+     "evolve_moments_parallel_flow" => false,
+     "evolve_moments_parallel_pressure" => false,
+     "initial_density1" => 0.5,
+     "initial_density2" => 0.5,
+     "initial_temperature1" => 1.0,
+     "initial_temperature2" => 1.0,
+     "charge_exchange_frequency" => 0.0,
+     "ionization_frequency" => 0.0,
+     "constant_ionization_rate" => false,
+     "n_ion_species" => 1,
+     "n_neutral_species" => 0,
+     "n_rk_stages" => 4,
+     "nuii" => 1.0,
+     "r_bc" => "periodic",
+     "r_discretization" => "chebyshev_pseudospectral",
+     "r_nelement" => 1,
+     "r_ngrid" => 3,
+     "rhostar" => 1.0,
+     "split_operators" => false,
+     "vpa_L" => 6.0,
+     "vpa_bc" => "zero",
+     "vpa_discretization" => "gausslegendre_pseudospectral",
+     "vpa_nelement" => 2,
+     "vpa_ngrid" => 3,
+     "vperp_L" => 3.0,
+     "vperp_discretization" => "gausslegendre_pseudospectral",
+     "vperp_nelement" => 2,
+     "vperp_ngrid" => 3,
+     "z_IC_density_amplitude1" => 0.001,
+     "z_IC_density_amplitude2" => 0.001,
+     "z_IC_density_phase1" => 0.0,
+     "z_IC_density_phase2" => 0.0,
+     "z_IC_option1" => "sinusoid",
+     "z_IC_option2" => "sinusoid",
+     "z_IC_temperature_amplitude1" => 0.0,
+     "z_IC_temperature_amplitude2" => 0.0,
+     "z_IC_temperature_phase1" => 0.0,
+     "z_IC_temperature_phase2" => 0.0,
+     "z_IC_upar_amplitude1" => 0.0,
+     "z_IC_upar_amplitude2" => 0.0,
+     "z_IC_upar_phase1" => 0.0,
+     "z_IC_upar_phase2" => 0.0,
+     "z_bc" => "wall",
+     "z_discretization" => "chebyshev_pseudospectral",
+     "z_nelement" => 1,
+     "z_ngrid" => 3,
+    )
+
+test_input_list = [
+     test_input_full_f ,
+    ]
diff --git a/debug_test/fokker_planck_collisions_tests.jl b/debug_test/fokker_planck_collisions_tests.jl
new file mode 100644
index 000000000..5fd838da8
--- /dev/null
+++ b/debug_test/fokker_planck_collisions_tests.jl
@@ -0,0 +1,23 @@
+module FokkerPlanckCollisionsDebug
+
+# Debug test using Fokker-Planck collision operator
+
+include("setup.jl")
+
+# Create a temporary directory for test output
+test_output_directory = get_MPI_tempdir()
+mkpath(test_output_directory)
+
+
+# Input parameters for the test
+include("fokker_planck_collisions_inputs.jl")
+
+# Defines the test functions, using variables defined in the *_inputs.jl file
+include("runtest_template.jl")
+
+end # FokkerPlanckCollisionsDebug
+
+
+using .FokkerPlanckCollisionsDebug
+
+FokkerPlanckCollisionsDebug.runtests()
diff --git a/debug_test/harrisonthompson_inputs.jl b/debug_test/harrisonthompson_inputs.jl
index 1ead157cc..bf388c61e 100644
--- a/debug_test/harrisonthompson_inputs.jl
+++ b/debug_test/harrisonthompson_inputs.jl
@@ -72,6 +72,6 @@ test_input_chebyshev = merge(test_input_finite_difference,
                                   "vz_nelement" => 2))
 
 test_input_list = [
-     test_input_finite_difference,
+     #test_input_finite_difference,
      test_input_chebyshev,
     ]
diff --git a/debug_test/mms_inputs.jl b/debug_test/mms_inputs.jl
index c9d9bf5f3..328e7aa3c 100644
--- a/debug_test/mms_inputs.jl
+++ b/debug_test/mms_inputs.jl
@@ -58,7 +58,6 @@ test_input = Dict(
     "vperp_ngrid" => 3,
     "vperp_nelement" => 2,
     "vperp_L" => 6.0,
-    "vperp_bc" => "periodic",
     "vperp_discretization" => "chebyshev_pseudospectral",
     "vz_ngrid" => 3,
     "vz_nelement" => 2,
diff --git a/debug_test/runtest_template.jl b/debug_test/runtest_template.jl
index c5245eaa7..7d7928e3a 100644
--- a/debug_test/runtest_template.jl
+++ b/debug_test/runtest_template.jl
@@ -58,6 +58,12 @@ function runtests(; restart=false)
         n_factors = length(factor(Vector, global_size[]))
 
         for input ∈ test_input_list, debug_loop_type ∈ dimension_combinations_to_test
+            if :sn ∈ debug_loop_type && "n_neutral_species" ∈ keys(input) &&
+                    input["n_neutral_species"] <= 0
+                # Skip neutral dimension parallelisation options if the number of neutral
+                # species is zero, as these would just be equivalent to running in serial
+                continue
+            end
             ndims = length(debug_loop_type)
             for i ∈ 1:(ndims+n_factors-1)÷n_factors
                 debug_loop_parallel_dims =
diff --git a/debug_test/runtests.jl b/debug_test/runtests.jl
index 85b7bb380..55550595e 100644
--- a/debug_test/runtests.jl
+++ b/debug_test/runtests.jl
@@ -5,6 +5,7 @@ include("setup.jl")
 function runtests()
     @testset "moment_kinetics tests" begin
         include(joinpath(@__DIR__, "sound_wave_tests.jl"))
+        include(joinpath(@__DIR__, "fokker_planck_collisions_tests.jl"))
         include(joinpath(@__DIR__, "wall_bc_tests.jl"))
         include(joinpath(@__DIR__, "harrisonthompson.jl"))
         include(joinpath(@__DIR__, "mms_tests.jl"))
diff --git a/debug_test/sound_wave_inputs.jl b/debug_test/sound_wave_inputs.jl
index 83721b630..dee7158cf 100644
--- a/debug_test/sound_wave_inputs.jl
+++ b/debug_test/sound_wave_inputs.jl
@@ -258,22 +258,22 @@ test_input_chebyshev_cx0_1D1V_split_3_moments =
                "evolve_moments_parallel_pressure" => true))
 
 test_input_list = [
-     test_input_finite_difference,
+     #test_input_finite_difference,
      #test_input_finite_difference_split_1_moment,
      #test_input_finite_difference_split_2_moments,
      #test_input_finite_difference_split_3_moments,
-     test_input_finite_difference_cx0,
+     #test_input_finite_difference_cx0,
      #test_input_finite_difference_cx0_split_1_moment,
      #test_input_finite_difference_cx0_split_2_moments,
      #test_input_finite_difference_cx0_split_3_moments,
-     test_input_finite_difference_1D1V,
-     test_input_finite_difference_1D1V_split_1_moment,
-     test_input_finite_difference_1D1V_split_2_moments,
-     test_input_finite_difference_1D1V_split_3_moments,
-     test_input_finite_difference_cx0_1D1V,
-     test_input_finite_difference_cx0_1D1V_split_1_moment,
-     test_input_finite_difference_cx0_1D1V_split_2_moments,
-     test_input_finite_difference_cx0_1D1V_split_3_moments,
+     #test_input_finite_difference_1D1V,
+     #test_input_finite_difference_1D1V_split_1_moment,
+     #test_input_finite_difference_1D1V_split_2_moments,
+     #test_input_finite_difference_1D1V_split_3_moments,
+     #test_input_finite_difference_cx0_1D1V,
+     #test_input_finite_difference_cx0_1D1V_split_1_moment,
+     #test_input_finite_difference_cx0_1D1V_split_2_moments,
+     #test_input_finite_difference_cx0_1D1V_split_3_moments,
      test_input_chebyshev,
      #test_input_chebyshev_split_1_moment,
      #test_input_chebyshev_split_2_moments,
diff --git a/docs/src/zz_fokker_planck.md b/docs/src/zz_fokker_planck.md
new file mode 100644
index 000000000..e52ff4a07
--- /dev/null
+++ b/docs/src/zz_fokker_planck.md
@@ -0,0 +1,6 @@
+`fokker_planck`
+===============
+
+```@autodocs
+Modules = [moment_kinetics.fokker_planck]
+```
diff --git a/docs/src/zz_fokker_planck_calculus.md b/docs/src/zz_fokker_planck_calculus.md
new file mode 100644
index 000000000..5b7f2121f
--- /dev/null
+++ b/docs/src/zz_fokker_planck_calculus.md
@@ -0,0 +1,6 @@
+`fokker_planck_calculus`
+========================
+
+```@autodocs
+Modules = [moment_kinetics.fokker_planck_calculus]
+```
diff --git a/docs/src/zz_fokker_planck_test.md b/docs/src/zz_fokker_planck_test.md
new file mode 100644
index 000000000..1c2308d76
--- /dev/null
+++ b/docs/src/zz_fokker_planck_test.md
@@ -0,0 +1,6 @@
+`fokker_planck_test`
+====================
+
+```@autodocs
+Modules = [moment_kinetics.fokker_planck_test]
+```
diff --git a/docs/src/zz_gauss_legendre.md b/docs/src/zz_gauss_legendre.md
new file mode 100644
index 000000000..0194f8183
--- /dev/null
+++ b/docs/src/zz_gauss_legendre.md
@@ -0,0 +1,6 @@
+`gauss_legendre`
+================
+
+```@autodocs
+Modules = [moment_kinetics.gauss_legendre]
+```
diff --git a/examples/fokker-planck/fokker-planck-relaxation.toml b/examples/fokker-planck/fokker-planck-relaxation.toml
new file mode 100644
index 000000000..b12454e6f
--- /dev/null
+++ b/examples/fokker-planck/fokker-planck-relaxation.toml
@@ -0,0 +1,65 @@
+# cheap input file for a 0D2V relaxation to a collisional Maxwellian distribution with self-ion collisions.
+n_ion_species = 1
+n_neutral_species = 0
+electron_physics = "boltzmann_electron_response"
+evolve_moments_density = false
+evolve_moments_parallel_flow = false
+evolve_moments_parallel_pressure = false
+evolve_moments_conservation = false
+T_e = 1.0
+T_wall = 1.0
+rhostar = 1.0
+Bzed = 1.0
+Bmag = 1.0
+initial_density1 = 0.5
+initial_temperature1 = 1.0
+initial_density2 = 0.5
+initial_temperature2 = 1.0
+z_IC_option1 = "sinusoid"
+z_IC_density_amplitude1 = 0.001
+z_IC_density_phase1 = 0.0
+z_IC_upar_amplitude1 = 0.0
+z_IC_upar_phase1 = 0.0
+z_IC_temperature_amplitude1 = 0.0
+z_IC_temperature_phase1 = 0.0
+z_IC_option2 = "sinusoid"
+z_IC_density_amplitude2 = 0.001
+z_IC_density_phase2 = 0.0
+z_IC_upar_amplitude2 = 0.0
+z_IC_upar_phase2 = 0.0
+z_IC_temperature_amplitude2 = 0.0
+z_IC_temperature_phase2 = 0.0
+charge_exchange_frequency = 0.0
+ionization_frequency = 0.0
+constant_ionization_rate = false
+# nuii sets the normalised input C[F,F] Fokker-Planck collision frequency
+nuii = 1.0
+nstep = 5000
+dt = 1.0e-2
+nwrite = 5000
+nwrite_dfns = 5000
+use_semi_lagrange = false
+n_rk_stages = 4
+split_operators = false
+z_ngrid = 1
+z_nelement = 1
+z_nelement_local = 1
+z_bc = "wall"
+z_discretization = "chebyshev_pseudospectral"
+r_ngrid = 1
+r_nelement = 1
+r_nelement_local = 1
+r_bc = "periodic"
+r_discretization = "chebyshev_pseudospectral"
+vpa_ngrid = 3
+vpa_nelement = 6
+vpa_L = 6.0
+vpa_bc = "zero"
+vpa_discretization = "gausslegendre_pseudospectral"
+vperp_ngrid = 3
+vperp_nelement = 3
+vperp_L = 3.0
+vperp_discretization = "gausslegendre_pseudospectral"
+# Fokker-Planck operator requires the "gausslegendre_pseudospectral
+# options for the vpa and vperp grids
+
diff --git a/examples/numerical-dissipation/num-diss-relaxation.toml b/examples/numerical-dissipation/num-diss-relaxation.toml
new file mode 100644
index 000000000..29965e248
--- /dev/null
+++ b/examples/numerical-dissipation/num-diss-relaxation.toml
@@ -0,0 +1,68 @@
+# cheap input file for a 0D2V relaxation with numerical diffusion terms d^2 F / dvpa^2 and d^2 F / vperp^2.
+n_ion_species = 1
+n_neutral_species = 0
+electron_physics = "boltzmann_electron_response"
+evolve_moments_density = false
+evolve_moments_parallel_flow = false
+evolve_moments_parallel_pressure = false
+evolve_moments_conservation = false
+T_e = 1.0
+T_wall = 1.0
+rhostar = 1.0
+Bzed = 1.0
+Bmag = 1.0
+initial_density1 = 0.5
+initial_temperature1 = 1.0
+initial_density2 = 0.5
+initial_temperature2 = 1.0
+z_IC_option1 = "sinusoid"
+z_IC_density_amplitude1 = 0.001
+z_IC_density_phase1 = 0.0
+z_IC_upar_amplitude1 = 0.0
+z_IC_upar_phase1 = 0.0
+z_IC_temperature_amplitude1 = 0.0
+z_IC_temperature_phase1 = 0.0
+z_IC_option2 = "sinusoid"
+z_IC_density_amplitude2 = 0.001
+z_IC_density_phase2 = 0.0
+z_IC_upar_amplitude2 = 0.0
+z_IC_upar_phase2 = 0.0
+z_IC_temperature_amplitude2 = 0.0
+z_IC_temperature_phase2 = 0.0
+charge_exchange_frequency = 0.0
+ionization_frequency = 0.0
+constant_ionization_rate = false
+nuii = 0.0
+nstep = 2000
+dt = 1.0e-3
+nwrite = 2000
+nwrite_dfns = 2000
+use_semi_lagrange = false
+n_rk_stages = 4
+split_operators = false
+z_ngrid = 1
+z_nelement = 1
+z_nelement_local = 1
+z_bc = "wall"
+z_discretization = "chebyshev_pseudospectral"
+r_ngrid = 1
+r_nelement = 1
+r_nelement_local = 1
+r_bc = "periodic"
+r_discretization = "chebyshev_pseudospectral"
+vpa_ngrid = 5
+vpa_nelement = 16
+vpa_L = 6.0
+vpa_bc = "zero"
+vpa_discretization = "gausslegendre_pseudospectral"
+vperp_ngrid = 5
+vperp_nelement = 8
+vperp_L = 3.0
+vperp_bc = "zero"
+vperp_discretization = "gausslegendre_pseudospectral"
+
+[numerical_dissipation]
+vpa_dissipation_coefficient = 0.1
+vperp_dissipation_coefficient = 0.1
+z_dissipation_coefficient = -1.0
+r_dissipation_coefficient = -1.0
diff --git a/moment_kinetics_input.jl b/moment_kinetics_input.jl
deleted file mode 120000
index 43212a28d..000000000
--- a/moment_kinetics_input.jl
+++ /dev/null
@@ -1 +0,0 @@
-src/moment_kinetics_input.jl
\ No newline at end of file
diff --git a/post_processing_input.jl b/post_processing_input.jl
deleted file mode 120000
index 77acb92c8..000000000
--- a/post_processing_input.jl
+++ /dev/null
@@ -1 +0,0 @@
-src/post_processing_input.jl
\ No newline at end of file
diff --git a/run_MMS_test.jl b/run_MMS_test.jl
index 6f3abed4c..50f8f9a14 100644
--- a/run_MMS_test.jl
+++ b/run_MMS_test.jl
@@ -14,7 +14,8 @@ if abspath(PROGRAM_FILE) == @__FILE__
    #test_option = "collisionless_wall-1D-1V-constant-Er"
    #test_option = "collisionless_wall-1D-1V-constant-Er-zngrid-5"
    #test_option = "collisionless_wall-1D-1V-constant-Er-ngrid-5"
-   test_option = "collisionless_wall-1D-1V-constant-Er-ngrid-5-opt"
+   #test_option = "collisionless_wall-1D-1V-constant-Er-ngrid-5-opt"
+   test_option = "krook_wall-1D-2V"
    #test_option = "collisionless_wall-1D-3V"
    #test_option = "collisionless_wall-2D-3V"
    #test_option = "collisionless_wall-2D-3V-Er-zero-at-plate"
@@ -171,6 +172,12 @@ if abspath(PROGRAM_FILE) == @__FILE__
                     ]
         scan_type = "vpaz_nelement"
         scan_name = "1D-1V-wall_cheb"
+    elseif test_option == "krook_wall-1D-2V"
+        # Krook wall test, no sheath for electrons, no radial coordinate
+        path_list = ["runs/1D-wall_MMS_nel_r_1_z_2_vpa_2_vperp_2_krook","runs/1D-wall_MMS_nel_r_1_z_4_vpa_4_vperp_4_krook",
+                        "runs/1D-wall_MMS_nel_r_1_z_8_vpa_8_vperp_8_krook"        ]
+        scan_type = "vpavperpz_nelement"
+        scan_name = "1D-2V-wall_cheb_krook"
     end
     mk.plot_MMS_sequence.get_MMS_error_data(path_list,scan_type,scan_name)
 end
diff --git a/run_MPI_test.jl b/run_MPI_test.jl
deleted file mode 100644
index 818796312..000000000
--- a/run_MPI_test.jl
+++ /dev/null
@@ -1,203 +0,0 @@
-if abspath(PROGRAM_FILE) == @__FILE__
-    using Pkg
-    Pkg.activate(".")
-
-    import moment_kinetics
-	using moment_kinetics.input_structs: grid_input, advection_input
-	using moment_kinetics.coordinates: define_coordinate
-	using moment_kinetics.chebyshev: setup_chebyshev_pseudospectral
-	using moment_kinetics.calculus: derivative!, integral
-	import MPI 
-	using Plots
-	MPI.Init()
-	comm = MPI.COMM_WORLD
-	nrank = MPI.Comm_size(comm) # number of ranks 
-	irank = MPI.Comm_rank(comm) # rank of this process
-	#println("Hello world, I am $(irank) of $(nrank)")
-	MPI.Barrier(comm)
-	#println("comm: ",comm)
-	discretization = "chebyshev_pseudospectral"
-
-	etol = 1.0e-15
-	
-	
-	###################
-	## df/dx Nonperiodic (No) BC test
-	###################
-	
-	# define inputs needed for the test
-	ngrid = 20 #number of points per element 
-	nelement_local = 2 # number of elements per rank
-	nelement_global = nelement_local*nrank # total number of elements 
-	if irank == 0
-		println("ngrid = ",ngrid," nelement_local = ",nelement_local,
-			" nelement_global = ",nelement_global," nrank = ",nrank)
-	end
-	L = 6.0 #physical box size in reference units 
-	bc = "" #not required to take a particular value, not used 
-	# fd_option and adv_input not actually used so given values unimportant
-	fd_option = ""
-	adv_input = advection_input("default", 1.0, 0.0, 0.0)
-	# create the 'input' struct containing input info needed to create a
-	# coordinate
-	input = grid_input("coord", ngrid, nelement_global, nelement_local, 
-		nrank, irank, L, discretization, fd_option, bc, adv_input,comm)
-	# create the coordinate struct 'x'
-	#println("made inputs")
-	x = define_coordinate(input)
-	#println("made x")
-	# create arrays needed for Chebyshev pseudospectral treatment in x
-	# and create the plans for the forward and backward fast Chebyshev
-	# transforms
-	spectral = setup_chebyshev_pseudospectral(x)
-	#println("made spectral")
-	# create array for the function f(x) to be differentiated/integrated
-	f = Array{Float64,1}(undef, x.n)
-	g = Array{Float64,1}(undef, x.n)
-	x_for_plot = Array{Float64,2}(undef, x.n, nrank)
-	g_for_plot = Array{Float64,2}(undef, x.n, nrank)
-	df_for_plot = Array{Float64,2}(undef, x.n, nrank)
-	# create array for the derivative df/dx
-	df = Array{Float64,1}(undef, x.n)
-	# initialize f
-	for ix ∈ 1:x.n
-		f[ix] = ( (cospi(2.0*x.grid[ix]/x.L)+sinpi(2.0*x.grid[ix]/x.L))
-				  * exp(-x.grid[ix]^2) )
-	    g[ix] = (2.0*pi/x.L)*( (cospi(2.0*x.grid[ix]/x.L)-sinpi(2.0*x.grid[ix]/x.L))
-				  * exp(-x.grid[ix]^2) )  - 2.0*x.grid[ix]*f[ix]
-	end
-	# differentiate f
-	derivative!(df, f, x, spectral)
-	# plot df and g per process
-	outprefix = "run_MPI_test.plot."
-	plot([x.grid,x.grid], [g,df], xlabel="x", ylabel="", label=["g" "df"],
-         shape =:circle, markersize = 5, linewidth=2)
-	outfile = outprefix*string(irank)*".pdf"
-	savefig(outfile)
-	# plot df and g on rank 0
-	x_for_plot .= 0.0
-	g_for_plot .= 0.0
-	df_for_plot .= 0.0
-	for ix ∈ 1:x.n
-		x_for_plot[ix,irank+1] = x.grid[ix]
-		g_for_plot[ix,irank+1] = g[ix]
-		df_for_plot[ix,irank+1] = df[ix]
-	end
-	MPI.Reduce!(x_for_plot,.+,comm)
-	MPI.Reduce!(g_for_plot,.+,comm)
-	MPI.Reduce!(df_for_plot,.+,comm)
-	if irank == 0
-		outprefix = "run_MPI_test.plot."
-		xlist = [x_for_plot[:,1]]
-		ylist = [g_for_plot[:,1]]
-		labels = Matrix{String}(undef, 1, 2*nrank)
-		labels[1] = "g"
-		for iproc in 2:nrank
-			push!(xlist,x_for_plot[:,iproc])
-			push!(ylist,g_for_plot[:,iproc])
-			labels[iproc] ="g"
-		end
-		push!(xlist,x_for_plot[:,1])
-		push!(ylist,df_for_plot[:,1])
-		labels[1+nrank]="df"
-		for iproc in 2:nrank
-			push!(xlist,x_for_plot[:,iproc])
-			push!(ylist,df_for_plot[:,iproc])
-			labels[iproc+nrank]="df"
-		end
-		#println(labels)
-		plot(xlist, ylist, xlabel="x", ylabel="", label=labels, markersize = 1, linewidth=1)
-		outfile = outprefix*"global.pdf"
-		savefig(outfile)
-		println(outfile)	
-	end
-	# integrate df/dx
-	#println("x.grid",x.grid)
-	#println("x.wgts",x.wgts)
-	#println("df",df)
-	intdf = integral(df, x.wgts)
-	#println(intdf)
-	intdf_out = MPI.Reduce(intdf,+,comm)
-	# Test that error intdf is less than the specified error tolerance etol
-	#@test abs(intdf) < etol
-	if(irank == 0)
-		println( "abs(intdf_out) = ", abs(intdf_out), ": etol = ",etol)
-	end
-	
-	###################
-	##  df/dx Periodic BC test
-	###################
-	MPI.Barrier(comm)
-	
-	
-	bc = "periodic" 
-	# create the 'input' struct containing input info needed to create a
-	# coordinate, other values taken from above
-	input = grid_input("coord", ngrid, nelement_global, nelement_local, 
-		nrank, irank, L, discretization, fd_option, bc, adv_input,comm)
-	# create the coordinate struct 'x'
-	x = define_coordinate(input)
-	# create arrays needed for Chebyshev pseudospectral treatment in x
-	# and create the plans for the forward and backward fast Chebyshev
-	# transforms
-	spectral = setup_chebyshev_pseudospectral(x)
-	# initialize f
-	for ix ∈ 1:x.n
-	    # sine wave test
-		f[ix] =  - cospi(2.0*x.grid[ix]/x.L) +2.0*x.grid[ix]/x.L
-	    g[ix] =  (2.0*pi/x.L)*sinpi(2.0*x.grid[ix]/x.L) + 2.0/x.L  
-	end
-	# differentiate f
-	derivative!(df, f, x, spectral)
-	# plot df and g per process
-	outprefix = "run_MPI_test.dfperiodic.plot."
-	plot([x.grid,x.grid], [g,df], xlabel="x", ylabel="", label=["g" "df"],
-         shape =:circle, markersize = 5, linewidth=2)
-	outfile = outprefix*string(irank)*".pdf"
-	savefig(outfile)
-	# plot df and g on rank 0
-	x_for_plot .= 0.0
-	g_for_plot .= 0.0
-	df_for_plot .= 0.0
-	for ix ∈ 1:x.n
-		x_for_plot[ix,irank+1] = x.grid[ix]
-		g_for_plot[ix,irank+1] = g[ix]
-		df_for_plot[ix,irank+1] = df[ix]
-	end
-	MPI.Reduce!(x_for_plot,.+,comm)
-	MPI.Reduce!(g_for_plot,.+,comm)
-	MPI.Reduce!(df_for_plot,.+,comm)
-	if irank == 0
-		outprefix = "run_MPI_test.dfperiodic.plot."
-		xlist = [x_for_plot[:,1]]
-		ylist = [g_for_plot[:,1]]
-		labels = Matrix{String}(undef, 1, 2*nrank)
-		labels[1] = "g"
-		for iproc in 2:nrank
-			push!(xlist,x_for_plot[:,iproc])
-			push!(ylist,g_for_plot[:,iproc])
-			labels[iproc] ="g"
-		end
-		push!(xlist,x_for_plot[:,1])
-		push!(ylist,df_for_plot[:,1])
-		labels[1+nrank]="df"
-		for iproc in 2:nrank
-			push!(xlist,x_for_plot[:,iproc])
-			push!(ylist,df_for_plot[:,iproc])
-			labels[iproc+nrank]="df"
-		end
-		plot(xlist, ylist, xlabel="x", ylabel="", label=labels, markersize = 1, linewidth=1)
-		outfile = outprefix*"global.pdf"
-		savefig(outfile)
-		println(outfile)	
-	end
-	# integrate df/dx
-	intdf = integral(df, x.wgts)
-	intdf_out = MPI.Reduce(intdf,+,comm)
-	# Test that error intdf -2.0 is less than the specified error tolerance etol
-	#@test abs(intdf) < etol
-	if(irank == 0)
-		println( "abs(intdf_out-2.0) = ", abs(intdf_out-2.0), ": etol = ",etol)
-	end
-	MPI.Finalize()
-end 
\ No newline at end of file
diff --git a/run_MPI_test2D.jl b/run_MPI_test2D.jl
deleted file mode 100644
index 50816ec00..000000000
--- a/run_MPI_test2D.jl
+++ /dev/null
@@ -1,731 +0,0 @@
-if abspath(PROGRAM_FILE) == @__FILE__
-    using Pkg
-    Pkg.activate(".")
-
-    import moment_kinetics
-	using moment_kinetics.input_structs: grid_input, advection_input
-	using moment_kinetics.coordinates: define_coordinate
-	using moment_kinetics.chebyshev: setup_chebyshev_pseudospectral
-	using moment_kinetics.calculus: derivative!, integral
-	#using coordinates: coordinate_info
-	import MPI 
-	using Plots
-	
-	function reconcile_element_boundaries_MPI!(df1d::Array{Float64,Ndims},
-	dfdx_lower_endpoints::Array{Float64,N}, dfdx_upper_endpoints::Array{Float64,N},
-	send_buffer::Array{Float64,N}, receive_buffer::Array{Float64,N}, coord) where {Ndims,N}
-		
-		#counter to test if endpoint data assigned
-		assignment_counter = 0
-		
-		# now deal with endpoints that are stored across ranks
-		comm = coord.comm
-		nrank = coord.nrank 
-		irank = coord.irank 
-		#send_buffer = coord.send_buffer
-		#receive_buffer = coord.receive_buffer
-		# sending pattern is cyclic. First we send data form irank -> irank + 1
-		# to fix the lower endpoints, then we send data from irank -> irank - 1
-		# to fix upper endpoints. Special exception for the periodic points.
-		# receive_buffer[1] is for data received, send_buffer[1] is data to be sent
-		
-		send_buffer .= dfdx_upper_endpoints #highest end point on THIS rank
-		# pass data from irank -> irank + 1, receive data from irank - 1
-		idst = mod(irank+1,nrank) # destination rank for sent data
-		isrc = mod(irank-1,nrank) # source rank for received data
-		#MRH what value should tag take here and below? Esp if nrank >= 32
-		rreq = MPI.Irecv!(receive_buffer, comm; source=isrc, tag=isrc+32)
-		sreq = MPI.Isend(send_buffer, comm; dest=idst, tag=irank+32)
-		#print("$irank: Sending   $irank -> $idst = $send_buffer\n")
-		stats = MPI.Waitall([rreq, sreq])
-		#print("$irank: Received $isrc -> $irank = $receive_buffer\n")
-		MPI.Barrier(comm)
-		
-		# no update receive buffer, taking into account the reconciliation
-		if irank == 0
-			if coord.bc == "periodic"
-				#update the extreme lower endpoint with data from irank = nrank -1	
-				receive_buffer .= 0.5*(receive_buffer .+ dfdx_lower_endpoints)
-			else #directly use value from Cheb
-				receive_buffer .= dfdx_lower_endpoints
-			end
-		else # enforce continuity at lower endpoint
-			receive_buffer .= 0.5*(receive_buffer .+ dfdx_lower_endpoints)
-		end
-		
-		#now update the df1d array -- using a slice appropriate to the dimension reconciled
-		# test against coord name -- make sure to use exact string delimiters e.g. "x" not 'x'
-		# test against Ndims (autodetermined) to choose which array slices to use in assigning endpoints
-		#println("coord.name: ",coord.name," Ndims: ",Ndims)
-		if coord.name == "x" && Ndims==2
-			df1d[1,:] .= receive_buffer[:]
-			assignment_counter += 1
-		elseif coord.name == "x" && Ndims==3
-			df1d[1,:,:] .= receive_buffer[:,:]
-			assignment_counter += 1
-		elseif coord.name == "y" && Ndims==2
-			df1d[:,1] .= receive_buffer[:]
-			assignment_counter += 1
-		elseif coord.name == "y" && Ndims==3
-			df1d[:,1,:] .= receive_buffer[:,:]
-			assignment_counter += 1
-		end
-		
-		send_buffer .= dfdx_lower_endpoints #lowest end point on THIS rank
-		# pass data from irank -> irank - 1, receive data from irank + 1
-		idst = mod(irank-1,nrank) # destination rank for sent data
-		isrc = mod(irank+1,nrank) # source rank for received data
-		#MRH what value should tag take here and below? Esp if nrank >= 32
-		rreq = MPI.Irecv!(receive_buffer, comm; source=isrc, tag=isrc+32)
-		sreq = MPI.Isend(send_buffer, comm; dest=idst, tag=irank+32)
-		#print("$irank: Sending   $irank -> $idst = $send_buffer\n")
-		stats = MPI.Waitall([rreq, sreq])
-		#print("$irank: Received $isrc -> $irank = $receive_buffer\n")
-		MPI.Barrier(comm)
-		
-		if irank == nrank-1
-			if coord.bc == "periodic"
-				#update the extreme upper endpoint with data from irank = 0
-				receive_buffer .= 0.5*(receive_buffer .+ dfdx_upper_endpoints)
-			else #directly use value from Cheb
-				receive_buffer .= dfdx_upper_endpoints
-			end
-		else # enforce continuity at upper endpoint
-			receive_buffer .= 0.5*(receive_buffer .+ dfdx_upper_endpoints)
-		end
-	
-		#now update the df1d array -- using a slice appropriate to the dimension reconciled
-		# test against coord name -- make sure to use exact string delimiters e.g. "x" not 'x'
-		# test against Ndims (autodetermined) to choose which array slices to use in assigning endpoints
-		#println("coord.name: ",coord.name," Ndims: ",Ndims)
-		if coord.name=="x" && Ndims ==2
-			df1d[end,:] .= receive_buffer[:]
-			assignment_counter += 1
-		elseif coord.name=="x" && Ndims ==3
-			df1d[end,:,:] .= receive_buffer[:,:]
-			assignment_counter += 1
-		elseif coord.name=="y" && Ndims ==2
-			df1d[:,end] .= receive_buffer[:]
-			assignment_counter += 1
-		elseif coord.name=="y" && Ndims ==3
-			df1d[:,end,:] .= receive_buffer[:,:]
-			assignment_counter += 1
-		end
-	
-		if  !(assignment_counter == 2)
-			println("ERROR: failure to assign endpoints in reconcile_element_boundaries_MPI! (centered): coord.name: ",coord.name," Ndims: ",Ndims)
-		end
-	end
-	
-	function apply_adv_fac!(buffer::Array{Float64,Ndims},adv_fac::Array{Float64,Ndims},endpoints::Array{Float64,Ndims},sgn::Int64) where Ndims
-		#buffer contains off-process endpoint
-		#adv_fac < 0 is positive advection speed
-		#adv_fac > 0 is negative advection speed
-		#endpoint is local on-process endpoint
-		#sgn = 1 for send irank -> irank + 1
-		#sgn = -1 for send irank + 1 -> irank
-		#loop over all indices in array
-		for i in eachindex(buffer,adv_fac,endpoints)
-			if sgn*adv_fac[i] > 0.0 
-			# replace buffer value (c with endpoint value 
-				buffer[i] = endpoints[i]
-			elseif sgn*adv_fac[i] < 0.0
-				break #do nothing
-			else #average values 
-				buffer[i] = 0.5*(buffer[i] + endpoints[i])
-			end
-		end
-		
-	end
-	
-	function reconcile_element_boundaries_MPI!(df1d::Array{Float64,Ndims}, 
-	adv_fac_lower_endpoints::Array{Float64,N}, adv_fac_upper_endpoints::Array{Float64,N},
-	dfdx_lower_endpoints::Array{Float64,N}, dfdx_upper_endpoints::Array{Float64,N},
-	send_buffer::Array{Float64,N}, receive_buffer::Array{Float64,N}, coord) where {Ndims,N}
-		
-		#counter to test if endpoint data assigned
-		assignment_counter = 0
-		
-		# now deal with endpoints that are stored across ranks
-		comm = coord.comm
-		nrank = coord.nrank 
-		irank = coord.irank 
-		#send_buffer = coord.send_buffer
-		#receive_buffer = coord.receive_buffer
-		# sending pattern is cyclic. First we send data form irank -> irank + 1
-		# to fix the lower endpoints, then we send data from irank -> irank - 1
-		# to fix upper endpoints. Special exception for the periodic points.
-		# receive_buffer[1] is for data received, send_buffer[1] is data to be sent
-		
-		send_buffer .= dfdx_upper_endpoints #highest end point on THIS rank
-		# pass data from irank -> irank + 1, receive data from irank - 1
-		idst = mod(irank+1,nrank) # destination rank for sent data
-		isrc = mod(irank-1,nrank) # source rank for received data
-		#MRH what value should tag take here and below? Esp if nrank >= 32
-		rreq = MPI.Irecv!(receive_buffer, comm; source=isrc, tag=isrc+32)
-		sreq = MPI.Isend(send_buffer, comm; dest=idst, tag=irank+32)
-		#print("$irank: Sending   $irank -> $idst = $send_buffer\n")
-		stats = MPI.Waitall([rreq, sreq])
-		#print("$irank: Received $isrc -> $irank = $receive_buffer\n")
-		MPI.Barrier(comm)
-		
-		# no update receive buffer, taking into account the reconciliation
-		if irank == 0
-			if coord.bc == "periodic"
-				# depending on adv_fac, update the extreme lower endpoint with data from irank = nrank -1	
-				apply_adv_fac!(receive_buffer,adv_fac_lower_endpoints,1,dfdx_lower_endpoints)
-			else # directly use value from Cheb at extreme lower point 
-				receive_buffer .= dfdx_lower_endpoints
-			end
-		else # depending on adv_fac, update the lower endpoint with data from irank = nrank -1	
-			apply_adv_fac!(receive_buffer,adv_fac_lower_endpoints,dfdx_lower_endpoints,1)
-		end
-		
-		#now update the df1d array -- using a slice appropriate to the dimension reconciled
-		# test against coord name -- make sure to use exact string delimiters e.g. "x" not 'x'
-		# test against Ndims (autodetermined) to choose which array slices to use in assigning endpoints
-		#println("coord.name: ",coord.name," Ndims: ",Ndims)
-		if coord.name == "x" && Ndims==2
-			df1d[1,:] .= receive_buffer[:]
-			assignment_counter += 1
-		elseif coord.name == "x" && Ndims==3
-			df1d[1,:,:] .= receive_buffer[:,:]
-			assignment_counter += 1
-		elseif coord.name == "y" && Ndims==2
-			df1d[:,1] .= receive_buffer[:]
-			assignment_counter += 1
-		elseif coord.name == "y" && Ndims==3
-			df1d[:,1,:] .= receive_buffer[:,:]
-			assignment_counter += 1
-		end
-		
-		send_buffer .= dfdx_lower_endpoints #lowest end point on THIS rank
-		# pass data from irank -> irank - 1, receive data from irank + 1
-		idst = mod(irank-1,nrank) # destination rank for sent data
-		isrc = mod(irank+1,nrank) # source rank for received data
-		#MRH what value should tag take here and below? Esp if nrank >= 32
-		rreq = MPI.Irecv!(receive_buffer, comm; source=isrc, tag=isrc+32)
-		sreq = MPI.Isend(send_buffer, comm; dest=idst, tag=irank+32)
-		#print("$irank: Sending   $irank -> $idst = $send_buffer\n")
-		stats = MPI.Waitall([rreq, sreq])
-		#print("$irank: Received $isrc -> $irank = $receive_buffer\n")
-		MPI.Barrier(comm)
-		
-		if irank == nrank-1
-			if coord.bc == "periodic"
-				# depending on adv_fac, update the extreme upper endpoint with data from irank = 0
-				apply_adv_fac!(receive_buffer,adv_fac_lower_endpoints,-1,dfdx_upper_endpoints)
-			else #directly use value from Cheb
-				receive_buffer .= dfdx_upper_endpoints
-			end
-		else # enforce continuity at upper endpoint
-			apply_adv_fac!(receive_buffer,adv_fac_lower_endpoints,dfdx_upper_endpoints,-1)
-		end
-	
-		#now update the df1d array -- using a slice appropriate to the dimension reconciled
-		# test against coord name -- make sure to use exact string delimiters e.g. "x" not 'x'
-		# test against Ndims (autodetermined) to choose which array slices to use in assigning endpoints
-		#println("coord.name: ",coord.name," Ndims: ",Ndims)
-		if coord.name=="x" && Ndims ==2
-			df1d[end,:] .= receive_buffer[:]
-			assignment_counter += 1
-		elseif coord.name=="x" && Ndims ==3
-			df1d[end,:,:] .= receive_buffer[:,:]
-			assignment_counter += 1
-		elseif coord.name=="y" && Ndims ==2
-			df1d[:,end] .= receive_buffer[:]
-			assignment_counter += 1
-		elseif coord.name=="y" && Ndims ==3
-			df1d[:,end,:] .= receive_buffer[:,:]
-			assignment_counter += 1
-		end
-	
-		if  !(assignment_counter == 2)
-			println("ERROR: failure to assign endpoints in reconcile_element_boundaries_MPI! (upwind): coord.name: ",coord.name," Ndims: ",Ndims)
-		end
-	end
-	
-	#3D version
-	function derivative_x!(dfdx::Array{Float64,3},f::Array{Float64,3},
-		dfdx_lower_endpoints::Array{Float64,2}, dfdx_upper_endpoints::Array{Float64,2},
-		x_send_buffer::Array{Float64,2},x_receive_buffer::Array{Float64,2},
-		x_spectral,x,y,z)
-	
-		# differentiate f w.r.t x
-		for iz in 1:z.n
-			for iy in 1:y.n
-				@views derivative!(dfdx[:,iy,iz], f[:,iy,iz], x, adv_fac[:,iy,iz], x_spectral)
-				# get external endpoints to reconcile via MPI
-				dfdx_lower_endpoints[iy,iz] = x.scratch_2d[1,1]
-				dfdx_upper_endpoints[iy,iz] = x.scratch_2d[end,end] 
-			end
-		end
-		# now reconcile element boundaries across
-		# processes with large message involving all y 
-		if x.nelement_local < x.nelement_global
-			reconcile_element_boundaries_MPI!(dfdx,
-			 adv_fac[1,:,:], adv_fac[end,:,:],
-			 dfdx_lower_endpoints,dfdx_upper_endpoints,
-			 x_send_buffer, x_receive_buffer, x)
-		end
-		
-	end
-	
-	#3D version with upwind
-	function derivative_x!(dfdx::Array{Float64,3},f::Array{Float64,3},
-		dfdx_lower_endpoints::Array{Float64,2}, dfdx_upper_endpoints::Array{Float64,2},
-		x_send_buffer::Array{Float64,2},x_receive_buffer::Array{Float64,2},
-		x_spectral,x,y,z,adv_fac::Array{Float64,3},
-		adv_fac_lower_buffer::Array{Float64,2},adv_fac_upper_buffer::Array{Float64,2})
-	
-		# differentiate f w.r.t x
-		for iz in 1:z.n
-			for iy in 1:y.n
-				@views derivative!(dfdx[:,iy,iz], f[:,iy,iz], x, x_spectral)
-				# get external endpoints to reconcile via MPI
-				dfdx_lower_endpoints[iy,iz] = x.scratch_2d[1,1]
-				dfdx_upper_endpoints[iy,iz] = x.scratch_2d[end,end] 
-				adv_fac_lower_buffer[iy,iz] = adv_fac[1,iy,iz]
-				adv_fac_upper_buffer[iy,iz] = adv_fac[end,iy,iz] 
-			end
-		end
-		# now reconcile element boundaries across
-		# processes with large message involving all y 
-		if x.nelement_local < x.nelement_global
-			reconcile_element_boundaries_MPI!(dfdx,
-			 adv_fac_lower_buffer, adv_fac_upper_buffer,
-			 dfdx_lower_endpoints,dfdx_upper_endpoints,
-			 x_send_buffer, x_receive_buffer, x)
-		end
-		
-	end
-	
-	#2D version
-	function derivative_x!(dfdx::Array{Float64,2},f::Array{Float64,2},
-		dfdx_lower_endpoints::Array{Float64,1}, dfdx_upper_endpoints::Array{Float64,1},
-		x_send_buffer::Array{Float64,1},x_receive_buffer::Array{Float64,1},
-		x_spectral,x,y)
-	
-		# differentiate f w.r.t x
-		for iy in 1:y.n
-			@views derivative!(dfdx[:,iy], f[:,iy], x, x_spectral)
-			# get external endpoints to reconcile via MPI
-			dfdx_lower_endpoints[iy] = x.scratch_2d[1,1]
-			dfdx_upper_endpoints[iy] = x.scratch_2d[end,end] 
-		end
-		# now reconcile element boundaries across
-		# processes with large message involving all y 
-		if x.nelement_local < x.nelement_global
-			reconcile_element_boundaries_MPI!(dfdx,
-			 dfdx_lower_endpoints,dfdx_upper_endpoints,
-			 x_send_buffer, x_receive_buffer, x)
-		end
-		
-	end
-	
-	#2D version with upwind
-	function derivative_x!(dfdx::Array{Float64,2},f::Array{Float64,2},
-		dfdx_lower_endpoints::Array{Float64,1}, dfdx_upper_endpoints::Array{Float64,1},
-		x_send_buffer::Array{Float64,1},x_receive_buffer::Array{Float64,1},
-		x_spectral,x,y,adv_fac::Array{Float64,2},
-		adv_fac_lower_buffer::Array{Float64,1},adv_fac_upper_buffer::Array{Float64,1})
-	
-		# differentiate f w.r.t x
-		for iy in 1:y.n
-			@views derivative!(dfdx[:,iy], f[:,iy], x, adv_fac[:,iy], x_spectral)
-			# get external endpoints to reconcile via MPI
-			dfdx_lower_endpoints[iy] = x.scratch_2d[1,1]
-			dfdx_upper_endpoints[iy] = x.scratch_2d[end,end] 
-			adv_fac_lower_buffer[iy] = adv_fac[1,iy]
-			adv_fac_upper_buffer[iy] = adv_fac[end,iy]
-		end
-		# now reconcile element boundaries across
-		# processes with large message involving all y 
-		# pass adv_fac[1,:] -- lower x endpoints for all y
-		# and adv_fac[end,:] -- upper x endpoints for all y
-		if x.nelement_local < x.nelement_global
-			@views reconcile_element_boundaries_MPI!(dfdx, 
-			adv_fac_lower_buffer, adv_fac_upper_buffer,
-			 dfdx_lower_endpoints,dfdx_upper_endpoints,
-			 x_send_buffer, x_receive_buffer, x)
-		end
-		
-	end
-	
-	#3D version
-	function derivative_y!(dfdy::Array{Float64,3},f::Array{Float64,3},
-		dfdy_lower_endpoints::Array{Float64,2}, dfdy_upper_endpoints::Array{Float64,2},
-		y_send_buffer::Array{Float64,2},y_receive_buffer::Array{Float64,2},
-		y_spectral,x,y,z)
-	
-		# differentiate f w.r.t y
-		for iz in 1:z.n
-			for ix in 1:x.n
-				@views derivative!(dfdy[ix,:,iz], f[ix,:,iz], y, y_spectral)
-				# get external endpoints to reconcile via MPI
-				dfdy_lower_endpoints[ix,iz] = y.scratch_2d[1,1]
-				dfdy_upper_endpoints[ix,iz] = y.scratch_2d[end,end] 
-			end
-		end
-		# now reconcile element boundaries across
-		# processes with large message involving all y 
-		if y.nelement_local < y.nelement_global
-			reconcile_element_boundaries_MPI!(dfdy,
-			 dfdy_lower_endpoints,dfdy_upper_endpoints,
-			 y_send_buffer, y_receive_buffer, y)
-		end
-	end
-	
-	#2D version
-	function derivative_y!(dfdy::Array{Float64,2},f::Array{Float64,2},
-		dfdy_lower_endpoints::Array{Float64,1}, dfdy_upper_endpoints::Array{Float64,1},
-		y_send_buffer::Array{Float64,1},y_receive_buffer::Array{Float64,1},
-		y_spectral,x,y)
-	
-		# differentiate f w.r.t y
-		for ix in 1:x.n
-			@views derivative!(dfdy[ix,:], f[ix,:], y, y_spectral)
-			# get external endpoints to reconcile via MPI
-			dfdy_lower_endpoints[ix] = y.scratch_2d[1,1]
-			dfdy_upper_endpoints[ix] = y.scratch_2d[end,end] 
-		end
-		# now reconcile element boundaries across
-		# processes with large message involving all y 
-		if y.nelement_local < y.nelement_global
-			reconcile_element_boundaries_MPI!(dfdy,
-			 dfdy_lower_endpoints,dfdy_upper_endpoints,
-			 y_send_buffer, y_receive_buffer, y)
-		end
-	end
-	
-	# define inputs needed for the xy calculus test
-	# nrank must be nrank = y_nblocks*x_nblocks, i.e.,
-	# mpirun -n nrank run_MPI_test2D.jl
-	x_ngrid = 10 #number of points per element 
-	x_nelement_local  = 1 
-	x_nelement_global = 1 # number of elements 
-	y_ngrid = 12
-	y_nelement_local  = 1
-	y_nelement_global = 4
-	
-	y_nblocks = floor(Int,x_nelement_global/x_nelement_local)
-	x_nblocks = floor(Int,y_nelement_global/y_nelement_local)
-	
-	MPI.Init()
-	comm = MPI.COMM_WORLD
-	nrank = MPI.Comm_size(comm) # number of ranks 
-	irank = MPI.Comm_rank(comm) # rank of this process
-	MPI.Barrier(comm)
-	
-	
-	if irank == 0
-		println("x_ngrid = ",x_ngrid," x_nelement_local = ",x_nelement_local,
-			" x_nelement_global = ",x_nelement_global)
-		println("y_ngrid = ",y_ngrid," y_nelement_local = ",
-			y_nelement_local," y_nelement_global = ",y_nelement_global)
-		println("nrank: ",nrank)
-		println("y_nblocks: ",y_nblocks)
-		println("x_nblocks: ",x_nblocks)
-	
-		
-	end
-	
-	discretization = "chebyshev_pseudospectral"
-	etol = 1.0e-15
-	
-	y_nrank_per_block = floor(Int,nrank/y_nblocks)
-	y_iblock = mod(irank,y_nblocks) # irank - > y_iblock 
-	y_irank_sub = floor(Int,irank/y_nblocks) # irank -> y_irank_sub
-	# irank = y_iblock + x_nrank_per_block * y_irank_sub
-	# useful information for debugging
-	#println("y_nrank_per_block: ",y_nrank_per_block)
-	#println("y_iblock: ",y_iblock)
-	#println("y_irank_sub: ",y_irank_sub)
-	
-	x_nrank_per_block = floor(Int,nrank/x_nblocks)
-	x_iblock = y_irank_sub # irank - > x_iblock 
-	x_irank_sub = y_iblock # irank -> x_irank_sub
-	# irank = x_iblock * x_nrank_per_block + x_irank_sub 
-	# useful information for debugging
-	#println("x_nrank_per_block: ",x_nrank_per_block)
-	#println("x_iblock: ",x_iblock)
-	#println("x_irank_sub: ",x_irank_sub)	
-	
-	# MPI.Comm_split(comm,color,key)
-	# comm -> communicator to be split
-	# color -> label of group of processes
-	# key -> label of process in group
-	y_comm_sub = MPI.Comm_split(comm,y_iblock,y_irank_sub)
-	x_comm_sub = MPI.Comm_split(comm,x_iblock,x_irank_sub)
-	
-	L = 6.0 #physical box size in reference units 
-	bc = "" #not required to take a particular value, not used 
-	# fd_option and adv_input not actually used so given values unimportant
-	fd_option = ""
-	adv_input = advection_input("default", 1.0, 0.0, 0.0)
-	# create the 'input' struct containing input info needed to create a coordinate
-	y_input = grid_input("y", y_ngrid, y_nelement_global, y_nelement_local, 
-		y_nrank_per_block, y_irank_sub, L, discretization, fd_option, bc, adv_input,y_comm_sub)
-	x_input = grid_input("x", x_ngrid, x_nelement_global, x_nelement_local, 
-		x_nrank_per_block, x_irank_sub, L, discretization, fd_option, bc, adv_input,x_comm_sub)
-	# z dimension kept entirely local
-	z_ngrid = 7
-	z_nelement_local = 1
-	z_nelement_global = 1
-	z_nrank_per_block = 0 # dummy value
-	z_irank_sub = 0 #dummy value
-	z_comm_sub = false #dummy value
-	z_input = grid_input("z", z_ngrid, x_nelement_global, x_nelement_local, 
-		x_nrank_per_block, x_irank_sub, L, discretization, fd_option, bc, adv_input,z_comm_sub)
-	z = define_coordinate(z_input)
-	
-	# create the coordinate struct 'x'
-	y = define_coordinate(y_input)
-	x = define_coordinate(x_input)
-	# create arrays needed for Chebyshev pseudospectral treatment in x
-	y_spectral = setup_chebyshev_pseudospectral(y)
-	x_spectral = setup_chebyshev_pseudospectral(x)
-	
-	
-	# create a 3D array for the function f(x,y) to be differentiated/integrated
-	df3Ddx = Array{Float64,3}(undef, x.n, y.n, z.n)
-	f3D = Array{Float64,3}(undef, x.n, y.n, z.n)
-	adv_fac3D = Array{Float64,3}(undef, x.n, y.n, z.n)
-	g3D = Array{Float64,3}(undef, x.n, y.n, z.n)
-	dk3Ddy = Array{Float64,3}(undef, x.n, y.n, z.n)
-	k3D = Array{Float64,3}(undef, x.n, y.n, z.n)
-	h3D = Array{Float64,3}(undef, x.n, y.n, z.n)
-	# create a 2D array for the function f(x,y) to be differentiated/integrated
-	k = Array{Float64,2}(undef, x.n, y.n)
-	f = Array{Float64,2}(undef, x.n, y.n)
-	adv_fac = Array{Float64,2}(undef, x.n, y.n)
-	g = Array{Float64,2}(undef, x.n, y.n)
-	h = Array{Float64,2}(undef, x.n, y.n)
-	# create array for the derivative df/dx
-	dfdx = Array{Float64,2}(undef, x.n, y.n)
-	dkdy = Array{Float64,2}(undef, x.n, y.n)
-	
-	y_for_plot = Array{Float64,2}(undef, y.n, nrank)
-	x_for_plot = Array{Float64,2}(undef, x.n, nrank)
-	h_for_plot = Array{Float64,3}(undef, x.n, y.n, nrank)
-	g_for_plot = Array{Float64,3}(undef, x.n, y.n, nrank)
-	dfdx_for_plot = Array{Float64,3}(undef, x.n, y.n, nrank)
-	dkdy_for_plot = Array{Float64,3}(undef, x.n, y.n, nrank)
-	
-	# initialize f
-	
-	#println("x",x.grid)
-	#println("y",y.grid)
-	for iz ∈ 1:z.n
-		for iy ∈ 1:y.n
-			for ix ∈ 1:x.n
-				#adv_fac3D[ix,iy,iz] = 1.0 # so always take upper endpoint
-				adv_fac3D[ix,iy,iz] = -1.0 # so always take lower endpoint
-				f3D[ix,iy,iz] =  sinpi(2.0*x.grid[ix]/x.L) #*sinpi(2.0*y.grid[iy]/y.L) 
-				g3D[ix,iy,iz] =  (2.0*pi/x.L)*cospi(2.0*x.grid[ix]/x.L) #*sinpi(2.0*y.grid[iy]/y.L)
-				k3D[ix,iy,iz] =  sinpi(2.0*y.grid[iy]/y.L) #*sinpi(2.0*y.grid[iy]/y.L) 
-				h3D[ix,iy,iz] =  (2.0*pi/y.L)*cospi(2.0*y.grid[iy]/y.L) #*cospi(2.0*x.grid[ix]/x.L) 
-			end
-		end
-	end
-	
-	for iy ∈ 1:y.n
-		for ix ∈ 1:x.n
-			#adv_fac[ix,iy] = 1.0 # so always take upper endpoint
-			adv_fac[ix,iy] = -1.0 # so always take lower endpoint
-			k[ix,iy] =  sinpi(2.0*y.grid[iy]/y.L) #*sinpi(2.0*y.grid[iy]/y.L) 
-			f[ix,iy] =  sinpi(2.0*x.grid[ix]/x.L) #*sinpi(2.0*y.grid[iy]/y.L) 
-			g[ix,iy] =  (2.0*pi/x.L)*cospi(2.0*x.grid[ix]/x.L) #*sinpi(2.0*y.grid[iy]/y.L)
-			h[ix,iy] =  (2.0*pi/y.L)*cospi(2.0*y.grid[iy]/y.L) #*cospi(2.0*x.grid[ix]/x.L) 
-		end
-	end
-	
-	#required buffer arrays
-	x_send_buffer = Array{Float64,1}(undef,y.n)
-	x_receive_buffer = Array{Float64,1}(undef,y.n)
-	dfdx_lower_endpoints = Array{Float64,1}(undef,y.n)
-	dfdx_upper_endpoints = Array{Float64,1}(undef,y.n)
-	adv_lw = Array{Float64,1}(undef, y.n)
-	adv_up = Array{Float64,1}(undef, y.n)
-	# differentiate f w.r.t x
-	#derivative_x!(dfdx,f,dfdx_lower_endpoints,dfdx_upper_endpoints,x_send_buffer,x_receive_buffer,x_spectral,x,y)
-	derivative_x!(dfdx,f,dfdx_lower_endpoints,dfdx_upper_endpoints,x_send_buffer,x_receive_buffer,x_spectral,x,y,adv_fac,adv_lw,adv_up)
-
-	#required buffer arrays
-	x3D_send_buffer = Array{Float64,2}(undef,y.n,z.n)
-	x3D_receive_buffer = Array{Float64,2}(undef,y.n,z.n)
-	df3Ddx_lower_endpoints = Array{Float64,2}(undef,y.n,z.n)
-	df3Ddx_upper_endpoints = Array{Float64,2}(undef,y.n,z.n)
-	adv_lw3D = Array{Float64,2}(undef, y.n, z.n)
-	adv_up3D = Array{Float64,2}(undef, y.n, z.n)
-	# differentiate f w.r.t x
-	#derivative_x!(df3Ddx,f3D,df3Ddx_lower_endpoints,df3Ddx_upper_endpoints,x3D_send_buffer,x3D_receive_buffer,x_spectral,x,y,z)
-	derivative_x!(df3Ddx,f3D,df3Ddx_lower_endpoints,df3Ddx_upper_endpoints,x3D_send_buffer,x3D_receive_buffer,x_spectral,x,y,z,adv_fac3D,adv_lw3D,adv_up3D)
-		
-	#required buffer arrays
-	y_send_buffer = Array{Float64,1}(undef,x.n)
-	y_receive_buffer = Array{Float64,1}(undef,x.n)
-	dkdy_lower_endpoints = Array{Float64,1}(undef,x.n)
-	dkdy_upper_endpoints = Array{Float64,1}(undef,x.n)
-	# differentiate k w.r.t y
-	derivative_y!(dkdy,k,dkdy_lower_endpoints,dkdy_upper_endpoints,y_send_buffer,y_receive_buffer,y_spectral,x,y)
-	
-	y3D_send_buffer = Array{Float64,2}(undef,x.n,z.n)
-	y3D_receive_buffer = Array{Float64,2}(undef,x.n,z.n)
-	dk3Ddy_lower_endpoints = Array{Float64,2}(undef,x.n,z.n)
-	dk3Ddy_upper_endpoints = Array{Float64,2}(undef,x.n,z.n)
-	# differentiate f w.r.t x
-	derivative_y!(dk3Ddy,k3D,dk3Ddy_lower_endpoints,dk3Ddy_upper_endpoints,y3D_send_buffer,y3D_receive_buffer,y_spectral,x,y,z)
-	
-	# Test that error intdf is less than the specified error tolerance etol
-	#@test abs(intdf) < etol
-	# here we do a 1D integral in the x and y dimensions separately
-	
-	for iz in 1:1
-		for iy in 1:1
-			intdf3D = integral(df3Ddx[:,iy,iz], x.wgts)
-			intdf3D_out = MPI.Reduce(intdf3D,+,x.comm)
-			if(x_irank_sub == 0 && x_iblock == 0)
-				println( "abs(intdf3D_out) = ", abs(intdf3D_out), ": etol = ",etol)
-			end
-		end
-	end
-	
-	for iy in 1:1
-		intdf = integral(dfdx[:,iy], x.wgts)
-		intdf_out = MPI.Reduce(intdf,+,x.comm)
-		if(x_irank_sub == 0 && x_iblock == 0)
-			println( "abs(intdf_out) = ", abs(intdf_out), ": etol = ",etol)
-		end
-	end
-
-	for iz in 1:1
-		for ix in 1:1
-			intdk3D = integral(dkdy[ix,:,iz], y.wgts)
-			intdk3D_out = MPI.Reduce(intdk3D,+,y.comm)
-			if(y_irank_sub == 0 && y_iblock == 0)
-				println( "abs(intdk3D_out) = ", abs(intdk3D_out), ": etol = ",etol)
-			end
-		end
-	end
-	
-	for ix in 1:1
-		intdk = integral(dkdy[ix,:], y.wgts)
-		intdk_out = MPI.Reduce(intdk,+,y.comm)
-		if(y_irank_sub == 0 && y_iblock == 0)
-			println( "abs(intdk_out) = ", abs(intdk_out), ": etol = ",etol)
-		end
-	end
-	#println(intdf)
-	
-
-	
-	#if(irank == 0)
-		#println( "abs(intdf_out) = ", abs(intdf_out), ": etol = ",etol)
-	#end
-	
-	
-	# plot df g h per process
-	outprefix = "run_MPI_test2D.plot."
-	for iy in 1:1
-		plot([x.grid,x.grid], [g[:,iy],dfdx[:,iy]], xlabel="x", ylabel="", label=["g" "df/dx"],
-			 line = (2, [:solid :dash]), markersize = 2, linewidth=1)
-		outfile = outprefix*"iy."*string(iy)*"."*string(irank)*".pdf"
-		savefig(outfile)
-	end
-	for ix in 1:1
-	plot([y.grid,y.grid], [h[ix,:],dkdy[ix,:]], xlabel="y", ylabel="", label=["h" "dk/dy"],
-         line = (2, [:solid :dash]), markersize = 2, linewidth=1)
-	outfile = outprefix*"ix."*string(ix)*"."*string(irank)*".pdf"
-		savefig(outfile)
-	end
-	
-	# get data onto irank = 0 for plotting
-	
-	y_for_plot .= 0.0
-	x_for_plot .= 0.0
-	h_for_plot .= 0.0
-	g_for_plot .= 0.0
-	dfdx_for_plot .= 0.0
-	dkdy_for_plot .= 0.0
-	
-	y_for_plot[:,irank+1] .= y.grid[:]
-	x_for_plot[:,irank+1] .= x.grid[:]
-	h_for_plot[:,:,irank+1] .= h[:,:]
-	g_for_plot[:,:,irank+1] .= g[:,:]
-	dfdx_for_plot[:,:,irank+1] .= dfdx[:,:]
-	dkdy_for_plot[:,:,irank+1] .= dkdy[:,:]
-	
-	MPI.Reduce!(y_for_plot,.+,comm)
-	MPI.Reduce!(x_for_plot,.+,comm)
-	MPI.Reduce!(g_for_plot,.+,comm)
-	MPI.Reduce!(h_for_plot,.+,comm)
-	MPI.Reduce!(dfdx_for_plot,.+,comm)
-	MPI.Reduce!(dkdy_for_plot,.+,comm)
-	
-	#plot the data after the reduction operation	
-	if irank == 0
-		for iy in 1:1
-			# plot all x blocks with iy = 1
-			for x_iblockprim = 0:x_nblocks-1  
-				xlist = []
-				func_list = []
-				labels = Matrix{String}(undef, 1, 2*x_nrank_per_block)
-				
-				for iproc in 0:x_nrank_per_block-1
-					irankprim = x_iblockprim * x_nrank_per_block + iproc
-					push!(xlist,x_for_plot[:,irankprim+1])
-					push!(func_list,g_for_plot[:,iy,irankprim+1])
-					labels[iproc+1] ="g"
-				end
-				
-				for iproc in 0:x_nrank_per_block-1
-					irankprim = x_iblockprim * x_nrank_per_block + iproc
-					push!(xlist,x_for_plot[:,irankprim+1])
-					push!(func_list,dfdx_for_plot[:,iy,irankprim+1])
-					labels[iproc+1+x_nrank_per_block] ="df/dx"
-				end
-				#println(xlist)
-				#println(func_list)
-				#println(labels)
-				plot(xlist, func_list, xlabel="x", ylabel="", label=labels, markersize = 1, linewidth=1)
-				outfile = "run_MPI_test2D.plot.iy."*string(iy)*".x_iblock."*string(x_iblockprim)*".global.pdf"
-				savefig(outfile)
-				println(outfile)	
-			end
-		end
-		
-		for ix in 1:1
-			# plot all y blocks with ix = 1 
-			for y_iblockprim = 0:y_nblocks-1  
-				xlist = []
-				func_list = []
-				labels = Matrix{String}(undef, 1, 2*y_nrank_per_block)
-				
-				for iproc in 0:y_nrank_per_block-1
-					irankprim = y_iblockprim + x_nrank_per_block * iproc
-					push!(xlist,y_for_plot[:,irankprim+1])
-					push!(func_list,h_for_plot[ix,:,irankprim+1])
-					labels[iproc+1] ="h"
-				end
-				
-				for iproc in 0:y_nrank_per_block-1
-					irankprim = y_iblockprim + x_nrank_per_block * iproc
-					push!(xlist,y_for_plot[:,irankprim+1])
-					push!(func_list,dkdy_for_plot[ix,:,irankprim+1])
-					labels[iproc+1+y_nrank_per_block] ="dk/dy"
-				end
-				#println(xlist)
-				#println(func_list)
-				#println(labels)
-				plot(xlist, func_list, xlabel="y", ylabel="", label=labels, markersize = 1, linewidth=1)
-				outfile = "run_MPI_test2D.plot.ix."*string(ix)*".y_iblock."*string(y_iblockprim)*".global.pdf"
-				savefig(outfile)
-				println(outfile)	
-			end
-		end
-	end	
-	
-	MPI.Finalize()
-end
-	
\ No newline at end of file
diff --git a/run_MPI_test_startingscript.jl b/run_MPI_test_startingscript.jl
deleted file mode 100644
index 696a89293..000000000
--- a/run_MPI_test_startingscript.jl
+++ /dev/null
@@ -1,49 +0,0 @@
-if abspath(PROGRAM_FILE) == @__FILE__
-    using Pkg
-    Pkg.activate(".")
-
-    import moment_kinetics
-	using moment_kinetics.input_structs: grid_input, advection_input
-	using moment_kinetics.coordinates: define_coordinate
-	using moment_kinetics.chebyshev: setup_chebyshev_pseudospectral
-	using moment_kinetics.calculus: derivative!, integral
-
-	discretization = "chebyshev_pseudospectral"
-
-	etol = 1.0e-15
-	# define inputs needed for the test
-	ngrid = 8 
-	nelement = 5
-	L = 6.0
-	bc = "periodic"
-	# fd_option and adv_input not actually used so given values unimportant
-	fd_option = ""
-	adv_input = advection_input("default", 1.0, 0.0, 0.0)
-	# create the 'input' struct containing input info needed to create a
-	# coordinate
-	input = grid_input("coord", ngrid, nelement, L,
-		discretization, fd_option, bc, adv_input)
-	# create the coordinate struct 'x'
-	x = define_coordinate(input)
-	# create arrays needed for Chebyshev pseudospectral treatment in x
-	# and create the plans for the forward and backward fast Chebyshev
-	# transforms
-	spectral = setup_chebyshev_pseudospectral(x)
-	# create array for the function f(x) to be differentiated/integrated
-	f = Array{Float64,1}(undef, x.n)
-	# create array for the derivative df/dx
-	df = Array{Float64,1}(undef, x.n)
-	# initialize f
-	for ix ∈ 1:x.n
-		f[ix] = ( (cospi(2.0*x.grid[ix]/x.L)+sinpi(2.0*x.grid[ix]/x.L))
-				  * exp(-x.grid[ix]^2) )
-	end
-	# differentiate f
-	derivative!(df, f, x, spectral)
-	# integrate df/dx
-	intdf = integral(df, x.wgts)
-
-	# Test that error intdf is less than the specified error tolerance etol
-	#@test abs(intdf) < etol
-	println( "abs(intdf) = ", abs(intdf), ": etol = ",etol)
-end 
\ No newline at end of file
diff --git a/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_16_vperp_1_diss.toml b/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_16_vperp_1_diss.toml
index 0fb7e9edd..60f09b084 100644
--- a/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_16_vperp_1_diss.toml
+++ b/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_16_vperp_1_diss.toml
@@ -56,7 +56,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 1
 vperp_nelement = 1
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_16_vperp_1_krook.toml b/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_16_vperp_1_krook.toml
index e9d44cc5b..98efd587f 100644
--- a/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_16_vperp_1_krook.toml
+++ b/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_16_vperp_1_krook.toml
@@ -58,7 +58,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 1
 vperp_nelement = 1
 vperp_L = 6.0
-vperp_bc = "periodic"
 vperp_discretization = "chebyshev_pseudospectral"
 
 vz_ngrid = 17
diff --git a/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_8_vperp_8_krook.toml b/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_8_vperp_8_krook.toml
index ae02bfd9c..135f5817e 100644
--- a/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_8_vperp_8_krook.toml
+++ b/runs/1D-wall_MMS_new_nel_r_1_z_16_vpa_8_vperp_8_krook.toml
@@ -58,7 +58,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 17
 vperp_nelement = 8
 vperp_L = 6.0
-vperp_bc = "periodic"
 vperp_discretization = "chebyshev_pseudospectral"
 
 vz_ngrid = 17
diff --git a/runs/2D-sound-wave_cheb-manf-Dirichlet.toml b/runs/2D-sound-wave_cheb-manf-Dirichlet.toml
index 9f19ed6c4..8efc5dfd0 100644
--- a/runs/2D-sound-wave_cheb-manf-Dirichlet.toml
+++ b/runs/2D-sound-wave_cheb-manf-Dirichlet.toml
@@ -49,7 +49,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 8
 vperp_L = 8.0
-vperp_bc = "periodic"
 vperp_discretization = "finite_difference"
 
 [manufactured_solns]
diff --git a/runs/2D-sound-wave_cheb-vperp-manf.toml b/runs/2D-sound-wave_cheb-vperp-manf.toml
index 8bfce7cdc..a990f3455 100644
--- a/runs/2D-sound-wave_cheb-vperp-manf.toml
+++ b/runs/2D-sound-wave_cheb-vperp-manf.toml
@@ -49,7 +49,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 8
 vperp_L = 8.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb-vperp.toml b/runs/2D-sound-wave_cheb-vperp.toml
index 5ff7a9c11..2c4aad8f2 100644
--- a/runs/2D-sound-wave_cheb-vperp.toml
+++ b/runs/2D-sound-wave_cheb-vperp.toml
@@ -49,5 +49,4 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 8
 vperp_L = 8.0
-vperp_bc = "periodic"
 vperp_discretization = "finite_difference_vperp"
diff --git a/runs/2D-sound-wave_cheb-with-neutrals-manf.toml b/runs/2D-sound-wave_cheb-with-neutrals-manf.toml
index 689af6f5d..0db504551 100644
--- a/runs/2D-sound-wave_cheb-with-neutrals-manf.toml
+++ b/runs/2D-sound-wave_cheb-with-neutrals-manf.toml
@@ -53,7 +53,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 8
 vperp_L = 8.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb-with-neutrals-small.toml b/runs/2D-sound-wave_cheb-with-neutrals-small.toml
index 60134f5d1..cf6c210d7 100644
--- a/runs/2D-sound-wave_cheb-with-neutrals-small.toml
+++ b/runs/2D-sound-wave_cheb-with-neutrals-small.toml
@@ -49,7 +49,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 2
 vperp_L = 8.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_16_vperp_16.toml b/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_16_vperp_16.toml
index 24c9a8d48..2f8a0ed55 100644
--- a/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_16_vperp_16.toml
+++ b/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_16_vperp_16.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 16
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_2_vperp_2.toml b/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_2_vperp_2.toml
index 37f1c2022..53d15bda5 100644
--- a/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_2_vperp_2.toml
+++ b/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_2_vperp_2.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 2
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_4_vperp_4.toml b/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_4_vperp_4.toml
index 8183821d4..27dcd15dc 100644
--- a/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_4_vperp_4.toml
+++ b/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_4_vperp_4.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 4
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_8_vperp_8.toml b/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_8_vperp_8.toml
index 417d58147..73805d403 100644
--- a/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_8_vperp_8.toml
+++ b/runs/2D-sound-wave_cheb_cxiz_nel_r_2_z_2_vpa_8_vperp_8.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 8
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_ion_only_nel_r_12_z_12_vpa_12_vperp_12.toml b/runs/2D-sound-wave_cheb_ion_only_nel_r_12_z_12_vpa_12_vperp_12.toml
index 4099fe115..8d5dea85c 100644
--- a/runs/2D-sound-wave_cheb_ion_only_nel_r_12_z_12_vpa_12_vperp_12.toml
+++ b/runs/2D-sound-wave_cheb_ion_only_nel_r_12_z_12_vpa_12_vperp_12.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 12
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_ion_only_nel_r_16_z_16_vpa_16_vperp_16.toml b/runs/2D-sound-wave_cheb_ion_only_nel_r_16_z_16_vpa_16_vperp_16.toml
index 94f315ddc..bd58a2fb6 100644
--- a/runs/2D-sound-wave_cheb_ion_only_nel_r_16_z_16_vpa_16_vperp_16.toml
+++ b/runs/2D-sound-wave_cheb_ion_only_nel_r_16_z_16_vpa_16_vperp_16.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 16
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_ion_only_nel_r_2_z_2_vpa_2_vperp_2.toml b/runs/2D-sound-wave_cheb_ion_only_nel_r_2_z_2_vpa_2_vperp_2.toml
index a7b0ae9f3..9535c9525 100644
--- a/runs/2D-sound-wave_cheb_ion_only_nel_r_2_z_2_vpa_2_vperp_2.toml
+++ b/runs/2D-sound-wave_cheb_ion_only_nel_r_2_z_2_vpa_2_vperp_2.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 2
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_ion_only_nel_r_4_z_4_vpa_4_vperp_4.toml b/runs/2D-sound-wave_cheb_ion_only_nel_r_4_z_4_vpa_4_vperp_4.toml
index 5da1bc0f4..4df97d26b 100644
--- a/runs/2D-sound-wave_cheb_ion_only_nel_r_4_z_4_vpa_4_vperp_4.toml
+++ b/runs/2D-sound-wave_cheb_ion_only_nel_r_4_z_4_vpa_4_vperp_4.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 4
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_ion_only_nel_r_8_z_8_vpa_8_vperp_8.toml b/runs/2D-sound-wave_cheb_ion_only_nel_r_8_z_8_vpa_8_vperp_8.toml
index bfc3b50e2..d610acab7 100644
--- a/runs/2D-sound-wave_cheb_ion_only_nel_r_8_z_8_vpa_8_vperp_8.toml
+++ b/runs/2D-sound-wave_cheb_ion_only_nel_r_8_z_8_vpa_8_vperp_8.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 8
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_16_vperp_16.toml b/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_16_vperp_16.toml
index 120213b7d..103e0949a 100644
--- a/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_16_vperp_16.toml
+++ b/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_16_vperp_16.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 16
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_2_vperp_2.toml b/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_2_vperp_2.toml
index e2ded34e1..4903417f0 100644
--- a/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_2_vperp_2.toml
+++ b/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_2_vperp_2.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 2
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_4_vperp_4.toml b/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_4_vperp_4.toml
index f7f2ab310..f1b160980 100644
--- a/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_4_vperp_4.toml
+++ b/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_4_vperp_4.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 4
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_8_vperp_8.toml b/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_8_vperp_8.toml
index e6af36280..59424f32a 100644
--- a/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_8_vperp_8.toml
+++ b/runs/2D-sound-wave_cheb_nel_r_2_z_2_vpa_8_vperp_8.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 8
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_16.toml b/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_16.toml
index 2848ab359..b51c20e82 100644
--- a/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_16.toml
+++ b/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_16.toml
@@ -53,7 +53,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 16
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_2.toml b/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_2.toml
index 71f5912a2..468a3e838 100644
--- a/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_2.toml
+++ b/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_2.toml
@@ -53,7 +53,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 2
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_4.toml b/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_4.toml
index 0e21a43a4..77521a9d9 100644
--- a/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_4.toml
+++ b/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_4.toml
@@ -53,7 +53,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 4
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_8.toml b/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_8.toml
index f7dcd08c8..dab39facd 100644
--- a/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_8.toml
+++ b/runs/2D-wall-Dirichlet_nel_r_2_z_2_vpa_8.toml
@@ -53,7 +53,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 8
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall-bc_cheb.toml b/runs/2D-wall-bc_cheb.toml
index ac8bb46d0..93d4da750 100644
--- a/runs/2D-wall-bc_cheb.toml
+++ b/runs/2D-wall-bc_cheb.toml
@@ -65,7 +65,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 8
 vperp_nelement = 8
 vperp_L = 8.0
-vperp_bc = "periodic"
 vperp_discretization = "chebyshev_pseudospectral"
 
 [manufactured_solns]
diff --git a/runs/2D-wall_MMS_nel_r_16_z_16_vpa_16_vperp_1_diss.toml b/runs/2D-wall_MMS_nel_r_16_z_16_vpa_16_vperp_1_diss.toml
index 9721aa455..52f761612 100644
--- a/runs/2D-wall_MMS_nel_r_16_z_16_vpa_16_vperp_1_diss.toml
+++ b/runs/2D-wall_MMS_nel_r_16_z_16_vpa_16_vperp_1_diss.toml
@@ -56,7 +56,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 1
 vperp_nelement = 1
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_MMS_nel_r_2_z_2_vpa_16_vperp_1_diss.toml b/runs/2D-wall_MMS_nel_r_2_z_2_vpa_16_vperp_1_diss.toml
index 127af686c..e67fc2df9 100644
--- a/runs/2D-wall_MMS_nel_r_2_z_2_vpa_16_vperp_1_diss.toml
+++ b/runs/2D-wall_MMS_nel_r_2_z_2_vpa_16_vperp_1_diss.toml
@@ -56,7 +56,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 1
 vperp_nelement = 1
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_MMS_nel_r_32_z_32_vpa_16_vperp_1_diss.toml b/runs/2D-wall_MMS_nel_r_32_z_32_vpa_16_vperp_1_diss.toml
index ddc151d63..5bbb30479 100644
--- a/runs/2D-wall_MMS_nel_r_32_z_32_vpa_16_vperp_1_diss.toml
+++ b/runs/2D-wall_MMS_nel_r_32_z_32_vpa_16_vperp_1_diss.toml
@@ -56,7 +56,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 1
 vperp_nelement = 1
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_MMS_nel_r_32_z_32_vpa_16_vperp_1_diss5.toml b/runs/2D-wall_MMS_nel_r_32_z_32_vpa_16_vperp_1_diss5.toml
index 60623634b..0ebf89144 100644
--- a/runs/2D-wall_MMS_nel_r_32_z_32_vpa_16_vperp_1_diss5.toml
+++ b/runs/2D-wall_MMS_nel_r_32_z_32_vpa_16_vperp_1_diss5.toml
@@ -56,7 +56,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 1
 vperp_nelement = 1
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_MMS_nel_r_4_z_4_vpa_16_vperp_1_diss.toml b/runs/2D-wall_MMS_nel_r_4_z_4_vpa_16_vperp_1_diss.toml
index 60defe401..7ace633dd 100644
--- a/runs/2D-wall_MMS_nel_r_4_z_4_vpa_16_vperp_1_diss.toml
+++ b/runs/2D-wall_MMS_nel_r_4_z_4_vpa_16_vperp_1_diss.toml
@@ -56,7 +56,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 1
 vperp_nelement = 1
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_MMS_nel_r_8_z_8_vpa_16_vperp_1_diss.toml b/runs/2D-wall_MMS_nel_r_8_z_8_vpa_16_vperp_1_diss.toml
index b2ec23681..672f7d289 100644
--- a/runs/2D-wall_MMS_nel_r_8_z_8_vpa_16_vperp_1_diss.toml
+++ b/runs/2D-wall_MMS_nel_r_8_z_8_vpa_16_vperp_1_diss.toml
@@ -56,7 +56,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 1
 vperp_nelement = 1
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals-small.toml b/runs/2D-wall_cheb-with-neutrals-small.toml
index 655bbd490..a8134e276 100644
--- a/runs/2D-wall_cheb-with-neutrals-small.toml
+++ b/runs/2D-wall_cheb-with-neutrals-small.toml
@@ -50,7 +50,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 5
 vperp_nelement = 2
 vperp_L = 8.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_12_vperp_12.toml b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_12_vperp_12.toml
index ac9ab4880..2d5ba9fc2 100644
--- a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_12_vperp_12.toml
+++ b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_12_vperp_12.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 12
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_16_vperp_16.toml b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_16_vperp_16.toml
index 02899d9fa..6ed59d97d 100644
--- a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_16_vperp_16.toml
+++ b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_16_vperp_16.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 16
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_24_vperp_24.toml b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_24_vperp_24.toml
index 416d6b116..566d6744d 100644
--- a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_24_vperp_24.toml
+++ b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_24_vperp_24.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 24
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_2_vperp_2.toml b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_2_vperp_2.toml
index 2fe1ec1fc..d6e47771d 100644
--- a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_2_vperp_2.toml
+++ b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_2_vperp_2.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 2
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_4_vperp_4.toml b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_4_vperp_4.toml
index f424f965a..f9e95e223 100644
--- a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_4_vperp_4.toml
+++ b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_4_vperp_4.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 4
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_8_vperp_8.toml b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_8_vperp_8.toml
index 5fdc2e510..8f2478bd2 100644
--- a/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_8_vperp_8.toml
+++ b/runs/2D-wall_cheb-with-neutrals-with-sheath_nel_r_1_z_2_vpa_8_vperp_8.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 8
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals.toml b/runs/2D-wall_cheb-with-neutrals.toml
index 6288ec559..e7d60078f 100644
--- a/runs/2D-wall_cheb-with-neutrals.toml
+++ b/runs/2D-wall_cheb-with-neutrals.toml
@@ -50,7 +50,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 11
 vperp_L = 16.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_16_z_16_vpa_16_vperp_16.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_16_z_16_vpa_16_vperp_16.toml
index 147cfc805..c084f3d28 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_16_z_16_vpa_16_vperp_16.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_16_z_16_vpa_16_vperp_16.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 16
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_12_vpa_12_vperp_12.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_12_vpa_12_vperp_12.toml
index a1d8ebd6d..976392adc 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_12_vpa_12_vperp_12.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_12_vpa_12_vperp_12.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 12
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_16_vpa_16_vperp_16.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_16_vpa_16_vperp_16.toml
index fb25575bc..5b22dd944 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_16_vpa_16_vperp_16.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_16_vpa_16_vperp_16.toml
@@ -54,7 +54,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 16
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_12_vperp_12.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_12_vperp_12.toml
index 3673d632a..4ae06bc8c 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_12_vperp_12.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_12_vperp_12.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 12
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_16_vperp_16.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_16_vperp_16.toml
index 749dc2f14..cb5e4038c 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_16_vperp_16.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_16_vperp_16.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 16
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_24_vperp_24.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_24_vperp_24.toml
index aa25d5783..cb6dd76b3 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_24_vperp_24.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_24_vperp_24.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 24
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_2_vperp_2.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_2_vperp_2.toml
index cf8f33021..6f38efa9d 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_2_vperp_2.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_2_vperp_2.toml
@@ -54,7 +54,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 2
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_4_vperp_4.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_4_vperp_4.toml
index 7a53c6bcc..2ad591e4e 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_4_vperp_4.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_4_vperp_4.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 4
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_8_vperp_8.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_8_vperp_8.toml
index 94feaf81f..65920af8b 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_8_vperp_8.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_2_vpa_8_vperp_8.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 8
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_4_vpa_4_vperp_4.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_4_vpa_4_vperp_4.toml
index 33eabe22e..7a7673f95 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_4_vpa_4_vperp_4.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_4_vpa_4_vperp_4.toml
@@ -54,7 +54,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 4
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_8_vpa_8_vperp_8.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_8_vpa_8_vperp_8.toml
index 28de00684..86725b3a7 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_8_vpa_8_vperp_8.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_1_z_8_vpa_8_vperp_8.toml
@@ -54,7 +54,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 8
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_12_vperp_12.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_12_vperp_12.toml
index 5c8e53a4a..69a41b50d 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_12_vperp_12.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_12_vperp_12.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 12
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_16_vperp_16.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_16_vperp_16.toml
index 54f59efbc..f5101a689 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_16_vperp_16.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_16_vperp_16.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 16
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_2_vperp_2.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_2_vperp_2.toml
index ed9beb1c8..a5fe8f4b8 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_2_vperp_2.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_2_vperp_2.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 2
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_4_vperp_4.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_4_vperp_4.toml
index e2df0c0de..3a28baf53 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_4_vperp_4.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_4_vperp_4.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 4
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_8_vperp_8.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_8_vperp_8.toml
index e8af4a3ba..84ba8ade6 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_8_vperp_8.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_2_z_2_vpa_8_vperp_8.toml
@@ -52,7 +52,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 6
 vperp_nelement = 8
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_4_z_4_vpa_4_vperp_4.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_4_z_4_vpa_4_vperp_4.toml
index 2153ff79b..c22916ed8 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_4_z_4_vpa_4_vperp_4.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_4_z_4_vpa_4_vperp_4.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 4
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_6_z_6_vpa_6_vperp_6.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_6_z_6_vpa_6_vperp_6.toml
index 3b47196eb..c6b74b038 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_6_z_6_vpa_6_vperp_6.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_6_z_6_vpa_6_vperp_6.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 6
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2D-wall_cheb-with-neutrals_nel_r_8_z_8_vpa_8_vperp_8.toml b/runs/2D-wall_cheb-with-neutrals_nel_r_8_z_8_vpa_8_vperp_8.toml
index 82c82d23a..af575b988 100644
--- a/runs/2D-wall_cheb-with-neutrals_nel_r_8_z_8_vpa_8_vperp_8.toml
+++ b/runs/2D-wall_cheb-with-neutrals_nel_r_8_z_8_vpa_8_vperp_8.toml
@@ -51,7 +51,6 @@ vpa_discretization = "chebyshev_pseudospectral"
 vperp_ngrid = 9
 vperp_nelement = 8
 vperp_L = 6.0
-vperp_bc = "periodic"
 #vperp_discretization = "finite_difference"
 vperp_discretization = "chebyshev_pseudospectral"
 
diff --git a/runs/2V-evolve_ngrid_3_nel_r_1_z_1_vpa_6_vperp_3_fkpl_test.toml b/runs/2V-evolve_ngrid_3_nel_r_1_z_1_vpa_6_vperp_3_fkpl_test.toml
new file mode 100644
index 000000000..2f32d9ca3
--- /dev/null
+++ b/runs/2V-evolve_ngrid_3_nel_r_1_z_1_vpa_6_vperp_3_fkpl_test.toml
@@ -0,0 +1,96 @@
+#use_manufactured_solns_for_init = true
+#use_manufactured_solns_for_advance = false
+n_ion_species = 1
+n_neutral_species = 0
+electron_physics = "boltzmann_electron_response"
+#electron_physics = "boltzmann_electron_response_with_simple_sheath"
+evolve_moments_density = false
+evolve_moments_parallel_flow = false
+evolve_moments_parallel_pressure = false
+evolve_moments_conservation = false
+#force_Er_zero_at_wall = false #true
+#Er_constant = 0.0
+#epsilon_offset = 0.1
+#use_vpabar_in_mms_dfni = true
+T_e = 1.0
+T_wall = 1.0
+rhostar = 1.0
+Bzed = 1.0
+Bmag = 1.0
+initial_density1 = 0.5
+initial_temperature1 = 1.0
+initial_density2 = 0.5
+initial_temperature2 = 1.0
+z_IC_option1 = "sinusoid"
+z_IC_density_amplitude1 = 0.001
+z_IC_density_phase1 = 0.0
+z_IC_upar_amplitude1 = 0.0
+z_IC_upar_phase1 = 0.0
+z_IC_temperature_amplitude1 = 0.0
+z_IC_temperature_phase1 = 0.0
+z_IC_option2 = "sinusoid"
+z_IC_density_amplitude2 = 0.001
+z_IC_density_phase2 = 0.0
+z_IC_upar_amplitude2 = 0.0
+z_IC_upar_phase2 = 0.0
+z_IC_temperature_amplitude2 = 0.0
+z_IC_temperature_phase2 = 0.0
+charge_exchange_frequency = 0.0
+ionization_frequency = 0.0
+constant_ionization_rate = false
+nuii = 1.0
+#nuii_krook = 0.0
+#nuii_pitch = 0.0
+nstep = 5000
+dt = 1.0e-2
+nwrite = 5000
+nwrite_dfns = 5000
+use_semi_lagrange = false
+n_rk_stages = 4
+split_operators = false
+z_ngrid = 1
+z_nelement = 1
+z_nelement_local = 1
+z_bc = "wall"
+z_discretization = "chebyshev_pseudospectral"
+r_ngrid = 1
+r_nelement = 1
+r_nelement_local = 1
+r_bc = "periodic"
+r_discretization = "chebyshev_pseudospectral"
+vpa_ngrid = 3
+vpa_nelement = 6
+vpa_L = 6.0
+vpa_bc = "zero"
+#vpa_discretization = "chebyshev_pseudospectral"
+vpa_discretization = "gausslegendre_pseudospectral"
+vperp_ngrid = 3
+vperp_nelement = 3
+vperp_L = 3.0
+#vperp_discretization = "finite_difference"
+#vperp_discretization = "chebyshev_pseudospectral"
+vperp_discretization = "gausslegendre_pseudospectral"
+
+#vz_ngrid = 17
+#vz_nelement = 4
+#vz_L = 12.0
+#vz_bc = "periodic"
+#vz_discretization = "chebyshev_pseudospectral"
+
+#vr_ngrid = 17
+#vr_nelement = 4
+#vr_L = 12.0
+#vr_bc = "periodic"
+#vr_discretization = "chebyshev_pseudospectral"
+
+#vzeta_ngrid = 17
+#vzeta_nelement = 4
+#vzeta_L = 12.0
+#vzeta_bc = "periodic"
+#vzeta_discretization = "chebyshev_pseudospectral"
+
+#[numerical_dissipation]
+#vpa_dissipation_coefficient = 0.0
+#vperp_dissipation_coefficient = 0.0
+#z_dissipation_coefficient = 0.1
+#r_dissipation_coefficient = 0.0
diff --git a/src/calculus.jl b/src/calculus.jl
index 36bdeb8b7..6c6305a7a 100644
--- a/src/calculus.jl
+++ b/src/calculus.jl
@@ -2,12 +2,12 @@
 """
 module calculus
 
-export derivative!, second_derivative!
+export derivative!, second_derivative!, laplacian_derivative!
 export reconcile_element_boundaries_MPI!
 export integral
 
 using ..moment_kinetics_structs: discretization_info, null_spatial_dimension_info,
-                                 null_velocity_dimension_info
+                                 null_velocity_dimension_info, weak_discretization_info
 using ..type_definitions: mk_float, mk_int
 using MPI
 using ..communication: block_rank
@@ -61,7 +61,7 @@ function derivative!(df, f, coord, spectral)
     # get the derivative at each grid point within each element and store in
     # coord.scratch_2d
     elementwise_derivative!(coord, f, spectral)
-    # map the derivative from the elem;ntal grid to the full grid;
+    # map the derivative from the elemental grid to the full grid;
     # at element boundaries, use the average of the derivatives from neighboring elements.
     derivative_elements_to_full_grid!(df, coord.scratch_2d, coord)
 end
@@ -73,8 +73,8 @@ function derivative!(df, f, coord, spectral::Union{null_spatial_dimension_info,
     return nothing
 end
 
-function second_derivative!(d2f, f, Q, coord, spectral)
-    # computes d / d coord ( Q . d f / d coord)
+function second_derivative!(d2f, f, coord, spectral)
+    # computes d^2f / d(coord)^2
     # For spectral element methods, calculate second derivative by applying first
     # derivative twice, with special treatment for element boundaries
 
@@ -86,9 +86,6 @@ function second_derivative!(d2f, f, Q, coord, spectral)
     # Save elementwise first derivative result
     coord.scratch2_2d .= coord.scratch_2d
 
-    #form Q . d f / d coord
-    coord.scratch3 .= Q .* coord.scratch3
-
     # Second derivative for element interiors
     elementwise_derivative!(coord, coord.scratch3, spectral)
     derivative_elements_to_full_grid!(d2f, coord.scratch_2d, coord)
@@ -143,6 +140,55 @@ function second_derivative!(d2f, f, Q, coord, spectral)
     return nothing
 end
 
+"""
+    mass_matrix_solve!(f, b, spectral::weak_discretization_info)
+
+Solve
+```math
+M.f = b
+```
+for \$a\$, where \$M\$ is the mass matrix of a weak-form finite element method and \$b\$
+is an input.
+"""
+function mass_matrix_solve! end
+
+"""
+Apply 'K-matrix' as part of a weak-form second derivative
+"""
+function elementwise_apply_Kmat! end
+
+function second_derivative!(d2f, f, coord, spectral::weak_discretization_info)
+    # obtain the RHS of numerical weak-form of the equation 
+    # g = d^2 f / d coord^2, which is 
+    # M * g = K * f, with M the mass matrix and K an appropriate stiffness matrix
+    # by multiplying by basis functions and integrating by parts    
+    elementwise_apply_Kmat!(coord, f, spectral)
+    # map the RHS vector K * f from the elemental grid to the full grid;
+    # at element boundaries, use the average of K * f from neighboring elements.
+    derivative_elements_to_full_grid!(coord.scratch, coord.scratch_2d, coord)
+    # solve weak form matrix problem M * g = K * f to obtain g = d^2 f / d coord^2
+    mass_matrix_solve!(d2f, coord.scratch, spectral)
+end
+
+"""
+Apply 'L-matrix' as part of a weak-form Laplacian derivative
+"""
+function elementwise_apply_Lmat! end
+
+function laplacian_derivative!(d2f, f, coord, spectral::weak_discretization_info)
+    # for coord.name 'vperp' obtain the RHS of numerical weak-form of the equation 
+    # g = (1/coord) d/d coord ( coord  d f / d coord ), which is 
+    # M * g = K * f, with M the mass matrix, and K an appropriate stiffness matrix,
+    # by multiplying by basis functions and integrating by parts.
+    # for all other coord.name, do exactly the same as second_derivative! above.
+    elementwise_apply_Lmat!(coord, f, spectral)
+    # map the RHS vector K * f from the elemental grid to the full grid;
+    # at element boundaries, use the average of K * f from neighboring elements.
+    derivative_elements_to_full_grid!(coord.scratch, coord.scratch_2d, coord)
+    # solve weak form matrix problem M * g = K * f to obtain g = d^2 f / d coord^2
+    mass_matrix_solve!(d2f, coord.scratch, spectral)
+end
+
 """
 """
 function derivative_elements_to_full_grid!(df1d, df2d, coord, adv_fac::AbstractArray{mk_float,1})
diff --git a/src/chebyshev.jl b/src/chebyshev.jl
index 92413f219..9bac36633 100644
--- a/src/chebyshev.jl
+++ b/src/chebyshev.jl
@@ -6,9 +6,11 @@ export update_fcheby!
 export update_df_chebyshev!
 export setup_chebyshev_pseudospectral
 export scaled_chebyshev_grid
+export scaled_chebyshev_radau_grid
 export chebyshev_spectral_derivative!
 export chebyshev_info
 
+using LinearAlgebra: mul!
 using FFTW
 using ..type_definitions: mk_float, mk_int
 using ..array_allocation: allocate_float, allocate_complex
@@ -20,7 +22,7 @@ using ..moment_kinetics_structs: discretization_info
 """
 Chebyshev pseudospectral discretization
 """
-struct chebyshev_info{TForward <: FFTW.cFFTWPlan, TBackward <: AbstractFFTs.ScaledPlan} <: discretization_info
+struct chebyshev_base_info{TForward <: FFTW.cFFTWPlan, TBackward <: AbstractFFTs.ScaledPlan}
     # fext is an array for storing f(z) on the extended domain needed
     # to perform complex-to-complex FFT using the fact that f(theta) is even in theta
     fext::Array{Complex{mk_float},1}
@@ -30,11 +32,20 @@ struct chebyshev_info{TForward <: FFTW.cFFTWPlan, TBackward <: AbstractFFTs.Scal
     f::Array{mk_float,2}
     # Chebyshev spectral coefficients of derivative of f
     df::Array{mk_float,1}
-    # plan for the complex-to-complex, in-place, forward Fourier transform on Chebyshev-Gauss-Lobatto grid
+    # plan for the complex-to-complex, in-place, forward Fourier transform on Chebyshev-Gauss-Lobatto/Radau grid
     forward::TForward
-    # plan for the complex-to-complex, in-place, backward Fourier transform on Chebyshev-Gauss-Lobatto grid
-    #backward_transform::FFTW.cFFTWPlan
+    # plan for the complex-to-complex, in-place, backward Fourier transform on Chebyshev-Gauss-Lobatto/Radau grid
+    # backward_transform::FFTW.cFFTWPlan
     backward::TBackward
+    # elementwise differentiation matrix (ngrid*ngrid)
+    Dmat::Array{mk_float,2}
+    # elementwise differentiation vector (ngrid) for the point x = -1
+    D0::Array{mk_float,1}
+end
+
+struct chebyshev_info{TForward <: FFTW.cFFTWPlan, TBackward <: AbstractFFTs.ScaledPlan} <: discretization_info
+    lobatto::chebyshev_base_info{TForward, TBackward}
+    radau::chebyshev_base_info{TForward, TBackward}
 end
 
 """
@@ -42,6 +53,12 @@ create arrays needed for explicit Chebyshev pseudospectral treatment
 and create the plans for the forward and backward fast Fourier transforms
 """
 function setup_chebyshev_pseudospectral(coord)
+    lobatto = setup_chebyshev_pseudospectral_lobatto(coord)
+    radau = setup_chebyshev_pseudospectral_radau(coord)
+    return chebyshev_info(lobatto,radau)
+end
+
+function setup_chebyshev_pseudospectral_lobatto(coord)
     # ngrid_fft is the number of grid points in the extended domain
     # in z = cos(theta).  this is necessary to turn a cosine transform on [0,π]
     # into a complex transform on [0,2π], which is more efficient in FFTW
@@ -54,9 +71,37 @@ function setup_chebyshev_pseudospectral(coord)
     # setup the plans for the forward and backward Fourier transforms
     forward_transform = plan_fft!(fext, flags=FFTW.MEASURE)
     backward_transform = plan_ifft!(fext, flags=FFTW.MEASURE)
+    # create array for differentiation matrix 
+    Dmat = allocate_float(coord.ngrid, coord.ngrid)
+    cheb_derivative_matrix_elementwise!(Dmat,coord.ngrid)
+    D0 = allocate_float(coord.ngrid)
+    D0 .= Dmat[1,:]
     # return a structure containing the information needed to carry out
     # a 1D Chebyshev transform
-    return chebyshev_info(fext, fcheby, dcheby, forward_transform, backward_transform)
+    return chebyshev_base_info(fext, fcheby, dcheby, forward_transform, backward_transform, Dmat, D0)
+end
+
+function setup_chebyshev_pseudospectral_radau(coord)
+        # ngrid_fft is the number of grid points in the extended domain
+        # in z = cos(theta).  this is necessary to turn a cosine transform on [0,π]
+        # into a complex transform on [0,2π], which is more efficient in FFTW
+        ngrid_fft = 2*coord.ngrid - 1
+        # create array for f on extended [0,2π] domain in theta = ArcCos[z]
+        fext = allocate_complex(ngrid_fft)
+        # create arrays for storing Chebyshev spectral coefficients of f and f'
+        fcheby = allocate_float(coord.ngrid, coord.nelement_local)
+        dcheby = allocate_float(coord.ngrid)
+        # setup the plans for the forward and backward Fourier transforms
+        forward_transform = plan_fft!(fext, flags=FFTW.MEASURE)
+        backward_transform = plan_ifft!(fext, flags=FFTW.MEASURE)
+        # create array for differentiation matrix 
+        Dmat = allocate_float(coord.ngrid, coord.ngrid)
+        cheb_derivative_matrix_elementwise_radau_by_FFT!(Dmat, coord, fcheby, dcheby, fext, forward_transform)
+        D0 = allocate_float(coord.ngrid)
+        cheb_lower_endpoint_derivative_vector_elementwise_radau_by_FFT!(D0, coord, fcheby, dcheby, fext, forward_transform)
+        # return a structure containing the information needed to carry out
+        # a 1D Chebyshev transform
+        return chebyshev_base_info(fext, fcheby, dcheby, forward_transform, backward_transform, Dmat, D0)
 end
 
 """
@@ -107,6 +152,56 @@ function scaled_chebyshev_grid(ngrid, nelement_local, n,
     return grid, wgts
 end
 
+function scaled_chebyshev_radau_grid(ngrid, nelement_local, n,
+			element_scale, element_shift, imin, imax, irank)
+    # initialize chebyshev grid defined on [1,-1]
+    # with n grid points chosen to facilitate
+    # the fast Chebyshev transform (aka the discrete cosine transform)
+    # needed to obtain Chebyshev spectral coefficients
+    # this grid goes from +1 to -1
+    chebyshev_grid = chebyshevpoints(ngrid)
+    chebyshev_radau_grid = chebyshev_radau_points(ngrid)
+    # create array for the full grid
+    grid = allocate_float(n)
+    # setup the scale factor by which the Chebyshev grid on [-1,1]
+    # is to be multiplied to account for the full domain [-L/2,L/2]
+    # and the splitting into nelement elements with ngrid grid points
+    if irank == 0 # use a Chebyshev-Gauss-Radau element for the lowest element on rank 0
+        scale_factor = element_scale[1]
+        shift = element_shift[1]
+        grid[imin[1]:imax[1]] .= (chebyshev_radau_grid[1:ngrid] * scale_factor) .+ shift
+        # account for the fact that the minimum index needed for the chebyshev_grid
+        # within each element changes from 1 to 2 in going from the first element
+        # to the remaining elements
+        k = 2
+        @inbounds for j ∈ 2:nelement_local
+            scale_factor = element_scale[j]
+            shift = element_shift[j]
+            # reverse the order of the original chebyshev_grid (ran from [1,-1])
+            # and apply the scale factor and shift
+            grid[imin[j]:imax[j]] .= (reverse(chebyshev_grid)[k:ngrid] * scale_factor) .+ shift
+        end
+        wgts = clenshaw_curtis_radau_weights(ngrid, nelement_local, n, imin, imax, element_scale)
+    else
+        # account for the fact that the minimum index needed for the chebyshev_grid
+        # within each element changes from 1 to 2 in going from the first element
+        # to the remaining elements
+        k = 1
+        @inbounds for j ∈ 1:nelement_local
+            scale_factor = element_scale[j]
+            shift = element_shift[j]
+            # reverse the order of the original chebyshev_grid (ran from [1,-1])
+            # and apply the scale factor and shift
+            grid[imin[j]:imax[j]] .= (reverse(chebyshev_grid)[k:ngrid] * scale_factor) .+ shift
+            # after first element, increase minimum index for chebyshev_grid to 2
+            # to avoid double-counting boundary element
+            k = 2
+        end
+        wgts = clenshaw_curtis_weights(ngrid, nelement_local, n, imin, imax, element_scale)
+    end
+    return grid, wgts
+end
+
 """
     elementwise_derivative!(coord, ff, chebyshev::chebyshev_info)
 
@@ -117,33 +212,98 @@ function elementwise_derivative!(coord, ff, chebyshev::chebyshev_info)
     # define local variable nelement for convenience
     nelement = coord.nelement_local
     # check array bounds
-    @boundscheck nelement == size(chebyshev.f,2) || throw(BoundsError(chebyshev.f))
+    @boundscheck nelement == size(chebyshev.lobatto.f,2) || throw(BoundsError(chebyshev.lobatto.f))
+    @boundscheck nelement == size(chebyshev.radau.f,2) || throw(BoundsError(chebyshev.radau.f))
     @boundscheck nelement == size(df,2) && coord.ngrid == size(df,1) || throw(BoundsError(df))
-    # note that one must multiply by a coordinate transform factor 1/element_scale[j] 
+    # note that one must multiply by a coordinate transform factor 1/element_scale[j]
     # for each element j to get derivative on the extended grid
     
-    # variable k will be used to avoid double counting of overlapping point
-    # at element boundaries (see below for further explanation)
-    k = 0
-    # calculate the Chebyshev derivative on each element
-    @inbounds for j ∈ 1:nelement
-        # imin is the minimum index on the full grid for this (jth) element
-        # the 'k' below accounts for the fact that the first element includes
-        # both boundary points, while each additional element shares a boundary
-        # point with neighboring elements.  the choice was made when defining
-        # coord.imin to exclude the lower boundary point in each element other
-        # than the first so that no point is double-counted
+    if coord.cheb_option == "matrix"
+        # variable k will be used to avoid double counting of overlapping point
+        # at element boundaries (see below for further explanation)
+        k = 0
+        j = 1 # the first element
         imin = coord.imin[j]-k
         # imax is the maximum index on the full grid for this (jth) element
-        imax = coord.imax[j]
-        @views chebyshev_derivative_single_element!(df[:,j], ff[imin:imax],
-            chebyshev.f[:,j], chebyshev.df, chebyshev.fext, chebyshev.forward, coord)
-        # and multiply by scaling factor needed to go
-        # from Chebyshev z coordinate to actual z
+        imax = coord.imax[j]        
+        if coord.name == "vperp" && coord.irank == 0 # differentiate this element with the Radau scheme
+            @views mul!(df[:,j],chebyshev.radau.Dmat[:,:],ff[imin:imax])
+        else #differentiate using the Lobatto scheme
+            @views mul!(df[:,j],chebyshev.lobatto.Dmat[:,:],ff[imin:imax])
+        end
         for i ∈ 1:coord.ngrid
             df[i,j] /= coord.element_scale[j]
         end
-        k = 1
+        # calculate the Chebyshev derivative on each element
+        @inbounds for j ∈ 2:nelement
+            # imin is the minimum index on the full grid for this (jth) element
+            # the 'k' below accounts for the fact that the first element includes
+            # both boundary points, while each additional element shares a boundary
+            # point with neighboring elements.  the choice was made when defining
+            # coord.imin to exclude the lower boundary point in each element other
+            # than the first so that no point is double-counted
+            k = 1 
+            imin = coord.imin[j]-k
+            # imax is the maximum index on the full grid for this (jth) element
+            imax = coord.imax[j]
+            @views mul!(df[:,j],chebyshev.lobatto.Dmat[:,:],ff[imin:imax])
+            for i ∈ 1:coord.ngrid
+                df[i,j] /= coord.element_scale[j]
+            end
+        end
+    elseif coord.cheb_option == "FFT"   
+        # note that one must multiply by  1/element_scale[j] get derivative
+        # in scaled coordinate on element j
+        
+        # variable k will be used to avoid double counting of overlapping point
+        # at element boundaries (see below for further explanation)
+        k = 0
+        j = 1 # the first element
+        if coord.name == "vperp" && coord.irank == 0 # differentiate this element with the Radau scheme
+            imin = coord.imin[j]-k
+            # imax is the maximum index on the full grid for this (jth) element
+            imax = coord.imax[j]
+            @views chebyshev_radau_derivative_single_element!(df[:,j], ff[imin:imax],
+                chebyshev.radau.f[:,j], chebyshev.radau.df, chebyshev.radau.fext, chebyshev.radau.forward, coord)
+            # and multiply by scaling factor needed to go
+            # from Chebyshev z coordinate to actual z
+            for i ∈ 1:coord.ngrid
+                df[i,j] /= coord.element_scale[j]
+            end
+        else #differentiate using the Lobatto scheme
+            imin = coord.imin[j]-k
+            # imax is the maximum index on the full grid for this (jth) element
+            imax = coord.imax[j]
+            @views chebyshev_derivative_single_element!(df[:,j], ff[imin:imax],
+                chebyshev.lobatto.f[:,j], chebyshev.lobatto.df, chebyshev.lobatto.fext, chebyshev.lobatto.forward, coord)
+            # and multiply by scaling factor needed to go
+            # from Chebyshev z coordinate to actual z
+            for i ∈ 1:coord.ngrid
+                df[i,j] /= coord.element_scale[j]
+            end
+        end
+        # calculate the Chebyshev derivative on each element
+        @inbounds for j ∈ 2:nelement
+            # imin is the minimum index on the full grid for this (jth) element
+            # the 'k' below accounts for the fact that the first element includes
+            # both boundary points, while each additional element shares a boundary
+            # point with neighboring elements.  the choice was made when defining
+            # coord.imin to exclude the lower boundary point in each element other
+            # than the first so that no point is double-counted
+            k = 1 
+            imin = coord.imin[j]-k
+            # imax is the maximum index on the full grid for this (jth) element
+            imax = coord.imax[j]
+            @views chebyshev_derivative_single_element!(df[:,j], ff[imin:imax],
+                chebyshev.lobatto.f[:,j], chebyshev.lobatto.df, chebyshev.lobatto.fext, chebyshev.lobatto.forward, coord)
+            # and multiply by scaling factor needed to go
+            # from Chebyshev z coordinate to actual z
+            for i ∈ 1:coord.ngrid
+                df[i,j] /= coord.element_scale[j]
+            end        
+        end
+    else
+        println("ERROR: ", coord.cheb_option, " NOT SUPPORTED")
     end
     return nothing
 end
@@ -255,12 +415,14 @@ coord : coordinate
     `coordinate` struct giving the coordinate along which f varies
 chebyshev : chebyshev_info
     struct containing information for Chebyshev transforms
+
+Note that this routine does not support Gauss-Chebyshev-Radau elements
 """
 function interpolate_to_grid_1d!(result, newgrid, f, coord, chebyshev::chebyshev_info)
     # define local variable nelement for convenience
     nelement = coord.nelement_local
     # check array bounds
-    @boundscheck nelement == size(chebyshev.f,2) || throw(BoundsError(chebyshev.f))
+    @boundscheck nelement == size(chebyshev.lobatto.f,2) || throw(BoundsError(chebyshev.lobatto.f))
 
     n_new = size(newgrid)[1]
     # Find which points belong to which element.
@@ -295,7 +457,7 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, chebyshev::chebyshev
         @views chebyshev_interpolate_single_element!(result[kmin:kmax],
                                                      newgrid[kmin:kmax],
                                                      f[imin:imax],
-                                                     imin, imax, coord, chebyshev)
+                                                     imin, imax, coord, chebyshev.lobatto)
     end
     @inbounds for j ∈ 2:nelement
         kmin = kstart[j]
@@ -306,7 +468,7 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, chebyshev::chebyshev
             @views chebyshev_interpolate_single_element!(result[kmin:kmax],
                                                          newgrid[kmin:kmax],
                                                          f[imin:imax],
-                                                         imin, imax, coord, chebyshev)
+                                                         imin, imax, coord, chebyshev.lobatto)
         end
     end
 
@@ -319,7 +481,7 @@ end
 
 """
 """
-function chebyshev_interpolate_single_element!(result, newgrid, f, imin, imax, coord, chebyshev)
+function chebyshev_interpolate_single_element!(result, newgrid, f, imin, imax, coord, chebyshev::chebyshev_base_info)
     # Temporary buffer to store Chebyshev coefficients
     cheby_f = chebyshev.df
 
@@ -372,6 +534,29 @@ function clenshaw_curtis_weights(ngrid, nelement_local, n, imin, imax, element_s
     return wgts
 end
 
+function clenshaw_curtis_radau_weights(ngrid, nelement_local, n, imin, imax, element_scale)
+    # create array containing the integration weights
+    wgts = zeros(mk_float, n)
+    # calculate the modified Chebshev moments of the first kind
+    μ = chebyshevmoments(ngrid)
+    wgts_lobatto = clenshawcurtisweights(μ)
+    wgts_radau = chebyshev_radau_weights(μ, ngrid)
+    @inbounds begin
+        # calculate the weights within a single element and
+        # scale to account for modified domain (not [-1,1])
+        wgts[1:ngrid] .= wgts_radau[1:ngrid]*element_scale[1]
+        if nelement_local > 1
+            for j ∈ 2:nelement_local
+                # account for double-counting of points at inner element boundaries
+                wgts[imin[j]-1] += wgts_lobatto[1]*element_scale[j]
+                # assign weights for interior of elements and one boundary point
+                wgts[imin[j]:imax[j]] .= wgts_lobatto[2:ngrid]*element_scale[j]
+            end
+        end
+    end
+    return wgts
+end
+
 """
 compute and return modified Chebyshev moments of the first kind:
 ∫dx Tᵢ(x) over range [-1,1]
@@ -399,6 +584,50 @@ function chebyshevpoints(n)
     return grid
 end
 
+function chebyshev_radau_points(n)
+    grid = allocate_float(n)
+    nfac = 1.0/(n-0.5)
+    @inbounds begin
+        # calculate z = cos(θ) ∈ (-1,1]
+        for j ∈ 1:n
+            grid[j] = cospi((n-j)*nfac)
+        end
+    end
+    return grid
+end
+
+function chebyshev_radau_weights(moments::Array{mk_float,1}, n)
+    # input should have values moments[j] = (cos(pi j) + 1)/(1-j^2) for j >= 0
+    nfft = 2*n - 1
+    # create array for moments on extended [0,2π] domain in theta = ArcCos[z]
+    fext = allocate_complex(nfft)
+    # make fft plan
+    forward_transform = plan_fft!(fext, flags=FFTW.MEASURE)
+    # assign values of fext from moments 
+    @inbounds begin
+        for j ∈ 1:n
+            fext[j] = complex(moments[j],0.0)
+        end
+        for j ∈ 1:n-1
+            fext[n+j] = fext[n-j+1]
+        end
+    end
+    # perform the forward, complex-to-complex FFT in-place (fext is overwritten)
+    forward_transform*fext
+    # use reality + evenness of moments to eliminate unncessary information
+    # also sort out normalisation and order of array
+    # note that fft order output is reversed compared to the order of 
+    # the grid chosen, which runs from (-1,1]
+    wgts = allocate_float(n)
+    @inbounds begin
+        for j ∈ 2:n
+            wgts[n-j+1] = 2.0*real(fext[j])/nfft
+        end
+        wgts[n] = real(fext[1])/nfft
+    end
+    return wgts
+end
+
 """
 takes the real function ff on a Chebyshev grid in z (domain [-1, 1]),
 which corresponds to the domain [π, 2π] in variable theta = ArcCos(z).
@@ -487,4 +716,216 @@ function chebyshev_backward_transform!(ff, fext, chebyf, transform, n)
     return nothing
 end
 
+function chebyshev_radau_forward_transform!(chebyf, fext, ff, transform, n)
+        @inbounds begin
+            for j ∈ 1:n
+                fext[j] = complex(ff[n-j+1],0.0)
+            end
+            for j ∈ 1:n-1
+                fext[n+j] = fext[n-j+1]
+            end
+        end
+        #println("ff",ff)
+        #println("fext",fext)
+        # perform the forward, complex-to-complex FFT in-place (cheby.fext is overwritten)
+        transform*fext
+        #println("fext",fext)
+        # use reality + evenness of f to eliminate unncessary information
+        # and obtain Chebyshev spectral coefficients for this element
+        # also sort out normalisation
+        @inbounds begin
+            nfft = 2*n - 1
+            for j ∈ 2:n
+                chebyf[j] = 2.0*real(fext[j])/nfft
+            end
+            chebyf[1] = real(fext[1])/nfft
+        end
+        return nothing
+    end
+    
+    """
+    """
+    function chebyshev_radau_backward_transform!(ff, fext, chebyf, transform, n)
+        # chebyf as input contains Chebyshev spectral coefficients
+        # need to use reality condition to extend onto negative frequency domain
+        @inbounds begin
+            # first, fill in values for fext corresponding to positive frequencies
+            for j ∈ 2:n
+                fext[j] = chebyf[j]*0.5
+            end
+            # next, fill in values for fext corresponding to negative frequencies
+            # using fext(-k) = conjg(fext(k)) = fext(k)
+            # usual FFT ordering with j=1 <-> k=0, followed by ascending k up to kmax
+            # and then descending from -kmax down to -dk
+            for j ∈ 1:n-1
+                fext[n+j] = fext[n-j+1]
+            end
+            # fill in zero frequency mode, which is special in that it does not require
+            # the 1/2 scale factor
+            fext[1] = chebyf[1]
+        end
+        #println("chebyf",chebyf)
+        #println("fext",fext)
+        # perform the backward, complex-to-complex FFT in-place (fext is overwritten)
+        transform*fext
+        #println("fext",fext)
+        
+        @inbounds begin
+            for j ∈ 1:n
+                ff[j] = real(fext[n-j+1])
+            end
+        end
+        return nothing
+    end
+    function chebyshev_radau_derivative_single_element!(df, ff, cheby_f, cheby_df, cheby_fext, forward, coord)
+        # calculate the Chebyshev coefficients of the real-space function ff and return
+        # as cheby_f
+        chebyshev_radau_forward_transform!(cheby_f, cheby_fext, ff, forward, coord.ngrid)
+        # calculate the Chebyshev coefficients of the derivative of ff with respect to coord.grid
+        chebyshev_spectral_derivative!(cheby_df, cheby_f)
+        # inverse Chebyshev transform to get df/dcoord
+        chebyshev_radau_backward_transform!(df, cheby_fext, cheby_df, forward, coord.ngrid)
+    end
+    function chebyshev_radau_derivative_lower_endpoint(ff, cheby_f, cheby_df, cheby_fext, forward, coord)
+        # calculate the Chebyshev coefficients of the real-space function ff and return
+        # as cheby_f
+        chebyshev_radau_forward_transform!(cheby_f, cheby_fext, ff, forward, coord.ngrid)
+        # calculate the Chebyshev coefficients of the derivative of ff with respect to coord.grid
+        chebyshev_spectral_derivative!(cheby_df, cheby_f)
+        # form the derivative at x = - 1 using that T_n(-1) = (-1)^n
+        # and converting the normalisation factors to undo the normalisation in the FFT
+        # df = d0 + sum_n=1 (-1)^n d_n/2 with d_n the coeffs
+        # of the Cheb derivative in the Fourier representation
+        df = cheby_df[1]
+        for i in 2:coord.ngrid
+            df += ((-1)^(i-1))*0.5*cheby_df[i]
+        end
+        return df
+    end
+
+
+"""
+derivative matrix for Gauss-Lobatto points using the analytical specification from 
+Chapter 8.2 from Trefethen 1994 
+https://people.maths.ox.ac.uk/trefethen/8all.pdf
+full list of Chapters may be obtained here 
+https://people.maths.ox.ac.uk/trefethen/pdetext.html
+"""
+    function cheb_derivative_matrix_elementwise!(D::Array{Float64,2},n::Int64)
+        
+        # define Gauss-Lobatto Chebyshev points in reversed order x_j = { -1, ... , 1}
+        # consistent with use in elements of the grid
+        x = Array{Float64,1}(undef,n)
+        for j in 1:n
+            x[j] = cospi((n-j)/(n-1))
+        end
+        
+        # zero matrix before allocating values
+        D[:,:] .= 0.0
+        
+        # top row 
+        j = 1
+        c_j = 2.0 
+        c_k = 1.0
+        for k in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        k = n 
+        c_k = 2.0
+        D[j,k] = Djk(x,j,k,c_j,c_k)
+        
+        # bottom row 
+        j = n
+        c_j = 2.0 
+        c_k = 1.0
+        for k in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        k = 1
+        c_k = 2.0
+        D[j,k] = Djk(x,j,k,c_j,c_k)
+        
+        #left column
+        k = 1
+        c_j = 1.0 
+        c_k = 2.0
+        for j in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        
+        #right column
+        k = n
+        c_j = 1.0 
+        c_k = 2.0
+        for j in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        
+        
+        # top left, bottom right
+        #D[n,n] = (2.0*(n - 1.0)^2 + 1.0)/6.0
+        #D[1,1] = -(2.0*(n - 1.0)^2 + 1.0)/6.0        
+        # interior rows and columns
+        for j in 2:n-1
+            #D[j,j] = Djj(x,j)
+            for k in 2:n-1
+                if j == k 
+                    continue
+                end
+                c_k = 1.0
+                c_j = 1.0
+                D[j,k] = Djk(x,j,k,c_j,c_k)
+            end
+        end
+        
+        # calculate diagonal entries to guarantee that
+        # D * (1, 1, ..., 1, 1) = (0, 0, ..., 0, 0)
+        for j in 1:n
+            D[j,j] = -sum(D[j,:])
+        end
+    end
+    function Djk(x::Array{Float64,1},j::Int64,k::Int64,c_j::Float64,c_k::Float64)
+        return  (c_j/c_k)*((-1)^(k+j))/(x[j] - x[k])
+    end
+ """
+ Derivative matrix for Chebyshev-Radau grid using the FFT.
+ Note that a similar function could be constructed for the 
+ Chebyshev-Lobatto grid, if desired.
+ """
+    function cheb_derivative_matrix_elementwise_radau_by_FFT!(D::Array{Float64,2}, coord, f, df, fext, forward)
+        ff_buffer = Array{Float64,1}(undef,coord.ngrid)
+        df_buffer = Array{Float64,1}(undef,coord.ngrid)
+        # use response matrix approach to calculate derivative matrix D 
+        for j in 1:coord.ngrid 
+            ff_buffer .= 0.0 
+            ff_buffer[j] = 1.0
+            @views chebyshev_radau_derivative_single_element!(df_buffer[:], ff_buffer[:],
+                f[:,1], df, fext, forward, coord)
+            @. D[:,j] = df_buffer[:] # assign appropriate column of derivative matrix 
+        end
+        # correct diagonal elements to gurantee numerical stability
+        # gives D*[1.0, 1.0, ... 1.0] = [0.0, 0.0, ... 0.0]
+        for j in 1:coord.ngrid
+            D[j,j] = 0.0
+            D[j,j] = -sum(D[j,:])
+        end
+    end
+    
+    function cheb_lower_endpoint_derivative_vector_elementwise_radau_by_FFT!(D::Array{Float64,1}, coord, f, df, fext, forward)
+        ff_buffer = Array{Float64,1}(undef,coord.ngrid)
+        df_buffer = Array{Float64,1}(undef,coord.ngrid)
+        # use response matrix approach to calculate derivative vector D 
+        for j in 1:coord.ngrid 
+            ff_buffer .= 0.0 
+            ff_buffer[j] = 1.0
+            @views df_buffer = chebyshev_radau_derivative_lower_endpoint(ff_buffer[:],
+                f[:,1], df, fext, forward, coord)
+            D[j] = df_buffer # assign appropriate value of derivative vector 
+        end
+        # correct diagonal elements to gurantee numerical stability
+        # gives D*[1.0, 1.0, ... 1.0] = [0.0, 0.0, ... 0.0]
+        D[1] = 0.0
+        D[1] = -sum(D[:])
+    end
+
 end
diff --git a/src/communication.jl b/src/communication.jl
index e2c2789ae..395c57b51 100644
--- a/src/communication.jl
+++ b/src/communication.jl
@@ -16,6 +16,8 @@ export allocate_shared, block_rank, block_size, n_blocks, comm_block, comm_inter
        iblock_index, comm_world, finalize_comms!, initialize_comms!, global_rank,
        MPISharedArray, global_size
 export setup_distributed_memory_MPI
+export setup_distributed_memory_MPI_for_weights_precomputation
+export _block_synchronize
 
 using MPI
 using SHA
@@ -99,6 +101,13 @@ notation definitions:
     - block: group of processes that share data with shared memory
     - z group: group of processes that need to communicate data for z derivatives
     - r group: group of processes that need to communicate data for r derivatives
+This routine assumes that the number of processes is selected by the user
+to match exactly the number the ratio 
+
+  nblocks = (r_nelement_global/r_nelement_local)*(z_nelement_global/z_nelement_local)
+  
+This guarantees perfect load balancing. Shared memory is used to parallelise the other
+dimensions within each distributed-memory parallelised rz block.   
 """
 function setup_distributed_memory_MPI(z_nelement_global,z_nelement_local,r_nelement_global,r_nelement_local; printout=false)
     # setup some local constants and dummy variables
@@ -106,11 +115,11 @@ function setup_distributed_memory_MPI(z_nelement_global,z_nelement_local,r_nelem
     nrank_global = global_size[] # number of processes 
     
     # get information about how the grid is divided up
-    # number of sections `chunks' of the x grid
+    # number of sections `chunks' of the r grid
     r_nchunks = floor(mk_int,r_nelement_global/r_nelement_local)
     # number of sections `chunks' of the z grid
 	z_nchunks = floor(mk_int,z_nelement_global/z_nelement_local) # number of sections of the z grid
-	# get the number of shared-memorz blocks in the z r decomposition
+	# get the number of shared-memory blocks in the z r decomposition
     nblocks = r_nchunks*z_nchunks
     # get the number of ranks per block
     nrank_per_zr_block = floor(mk_int,nrank_global/nblocks)
@@ -208,6 +217,139 @@ function setup_distributed_memory_MPI(z_nelement_global,z_nelement_local,r_nelem
     return z_irank, z_nrank_per_group, z_comm, r_irank, r_nrank_per_group, r_comm
 end
 
+"""
+Function to take information from user about vpa vperp grids and 
+number of processes allocated to set up communicators for 
+precomputation of the Rosenbluth potential integration weights
+notation definitions:
+    - block: group of processes that share data with shared memory
+    - vpa group: group of processes that need to communicate data for vpa derivatives/integrals
+    - vperp group: group of processes that need to communicate data for vperp derivatives/integrals
+This routine assumes that the number of processes is selected by the user
+to match or be larger than the ratio 
+
+  nblocks = (vpa_nelement_global/vpa_nelement_local)*(vperp_nelement_global/vperp_nelement_local)
+  
+We also need to know (from user input) the maximum number of cores per shared memory region.
+A fraction of the cores will not contribute to the calculation, as we cannot guarantee that 
+the same number of cores is required for the rz parallelisation as the vpa vperp parallelisation 
+"""
+function setup_distributed_memory_MPI_for_weights_precomputation(vpa_nelement_global,vpa_nelement_local,
+               vperp_nelement_global,vperp_nelement_local, max_cores_per_block; printout=false)
+    # setup some local constants and dummy variables
+    irank_global = global_rank[] # rank index within global processes
+    nrank_global = global_size[] # number of processes 
+    
+    # get information about how the grid is divided up
+    # number of sections `chunks' of the vperp grid
+    vperp_nchunks = floor(mk_int,vperp_nelement_global/vperp_nelement_local)
+    # number of sections `chunks' of the vpa grid
+	vpa_nchunks = floor(mk_int,vpa_nelement_global/vpa_nelement_local)
+	# get the number of shared-memory blocks in the vpa vperp decomposition
+    nblocks = vperp_nchunks*vpa_nchunks
+    # get the number of ranks per block
+    nrank_per_vpavperp_block = min(floor(mk_int,nrank_global/nblocks), max_cores_per_block)
+    # get the total number of useful cores 
+    nrank_vpavperp = nrank_per_vpavperp_block*nblocks
+    # N.B. the user should pick the largest possible value for nblocks that is consistent 
+    # with the total number of cores available and complete shared-memory regions. This 
+    # should be done by choosing 
+    #  (vperp_nelement_global/vperp_nelement_local)*(vpa_nelement_global/vpa_nelement_local)
+    # in the input file. For example, if there are 26 cores available, and 8 global elements in 
+    # each dimension, we should choose 4 local elements, making nblocks = 16 and nrank_per_vpavperp_block = 1.
+    if printout
+        println("debug info:")
+        println("nrank_global: ",nrank_global)
+        println("vperp_nchunks: ",vperp_nchunks)
+        println("vpa_nchunks: ",vpa_nchunks)
+        println("nblocks: ",nblocks)
+        println("nrank_per_vpavperp_block: ",nrank_per_vpavperp_block)
+        println("max_cores_per_block: ",max_cores_per_block)
+    end
+	 
+    # Create a communicator which includes enough cores for the calculation
+    # and includes irank_global = 0. Excess cores have a copy of the communicator
+    # with a different color. After the calculation is completed a MPI broadcast
+    # on the world communicator should be carried out to get the data to the 
+    # excess cores.
+    irank_vpavperp = mod(irank_global,nrank_vpavperp)
+    igroup_vpavperp = floor(mk_int,irank_global/nrank_vpavperp)
+    comm_vpavperp = MPI.Comm_split(comm_world,igroup_vpavperp,irank_vpavperp)
+    # MPI.Comm_split(comm,color,key)
+	# comm -> communicator to be split
+	# color -> label of group of processes
+	# key -> label of process in group
+    # if color == nothing then this process is excluded from the communicator
+    
+    # assign information regarding shared-memory blocks
+    # block index -- which block is this process in 
+    iblock = floor(mk_int,irank_vpavperp/nrank_per_vpavperp_block)
+    # rank index within a block
+    irank_block = mod(irank_vpavperp,nrank_per_vpavperp_block)
+
+    if printout
+        println("iblock: ",iblock)
+        println("irank_block: ",irank_block)
+    end
+    # assign the block rank to the global variables
+    iblock_index[] = iblock
+    block_rank[] = irank_block
+    block_size[] = nrank_per_vpavperp_block
+    # construct a communicator for intra-block communication
+    comm_block[] = MPI.Comm_split(comm_vpavperp,iblock,irank_block)
+    
+    vpa_ngroup = vperp_nchunks
+    vpa_nrank_per_group = vpa_nchunks
+	vpa_igroup = floor(mk_int,iblock/vpa_nchunks) # iblock(irank) - > vpa_igroup 
+	vpa_irank =  mod(iblock,vpa_nchunks) # iblock(irank) -> vpa_irank
+	# iblock = vpa_igroup * vpa_nchunks + vpa_irank_sub 
+
+    if printout
+        # useful information for debugging
+        println("vpa_ngroup: ",vpa_ngroup)
+        println("vpa_nrank_per_group: ",vpa_nrank_per_group)
+        println("vpa_igroup: ",vpa_igroup)
+        println("vpa_irank_sub: ",vpa_irank)
+        println("iblock: ",iblock, " ", vpa_igroup * vpa_nchunks + vpa_irank)
+        println("")
+    end
+
+    vperp_ngroup = vpa_nchunks
+	vperp_nrank_per_group = vperp_nchunks
+	vperp_igroup = vpa_irank # block(irank) - > vperp_igroup 
+	vperp_irank = vpa_igroup # block(irank) -> vperp_irank
+    # irank = vperp_igroup + vpa_nrank_per_group * vperp_irank
+
+    if printout
+        # useful information for debugging
+        println("vperp_ngroup: ",vperp_ngroup)
+        println("vperp_nrank_per_group: ",vperp_nrank_per_group)
+        println("vperp_igroup: ",vperp_igroup)
+        println("vperp_irank: ",vperp_irank)
+        println("iblock: ",iblock, " ", vperp_irank * vperp_ngroup + vperp_igroup)
+        println("")
+    end
+
+	# construct communicators for inter-block communication
+	# only communicate between lead processes on a block
+    if block_rank[] == 0 #&& utilised_core
+        comm_inter_block[] = MPI.Comm_split(comm_vpavperp, 0, iblock)
+        vperp_comm = MPI.Comm_split(comm_vpavperp,vperp_igroup,vperp_irank)
+        vpa_comm = MPI.Comm_split(comm_vpavperp,vpa_igroup,vpa_irank)
+    else # assign a dummy value 
+        comm_inter_block[] = MPI.Comm_split(comm_vpavperp, nothing, iblock)
+        vperp_comm = MPI.Comm_split(comm_vpavperp,nothing,vperp_irank)
+        vpa_comm = MPI.Comm_split(comm_vpavperp,nothing,vpa_irank)
+    end
+    # MPI.Comm_split(comm,color,key)
+	# comm -> communicator to be split
+	# color -> label of group of processes
+	# key -> label of process in group
+    # if color == nothing then this process is excluded from the communicator
+    
+    return vpa_irank, vpa_nrank_per_group, vpa_comm, vperp_irank, vperp_nrank_per_group, vperp_comm
+end
+
 @debug_shared_array begin
     """
     Special type for debugging race conditions in accesses to shared-memory arrays.
@@ -215,7 +357,7 @@ end
     """
     struct DebugMPISharedArray{T, N} <: AbstractArray{T, N}
         data::Array{T,N}
-        is_initialized::Array{Bool,N}
+        is_initialized::Array{mk_int,N}
         is_read::Array{Bool,N}
         is_written::Array{Bool, N}
         creation_stack_trace::String
@@ -230,8 +372,10 @@ end
     # Constructors
     function DebugMPISharedArray(array::Array)
         dims = size(array)
-        is_initialized = Array{Bool}(undef, dims)
-        is_initialized .= false
+        is_initialized = allocate_shared(mk_int, dims; maybe_debug=false)
+        if block_rank[] == 0
+            is_initialized .= 0
+        end
         is_read = Array{Bool}(undef, dims)
         is_read .= false
         is_written = Array{Bool}(undef, dims)
@@ -260,7 +404,7 @@ end
     Base.size(A::DebugMPISharedArray{T, N}) where {T, N} = size(A.data)
     function Base.getindex(A::DebugMPISharedArray{T, N}, I::Vararg{mk_int,N}) where {T, N}
         @debug_track_initialized begin
-            if !all(A.is_initialized[I...])
+            if !all(A.is_initialized[I...] .== 1)
                 if A.creation_stack_trace != ""
                     error("Shared memory array read at $I before being initialized. "
                           * "Array was created at:\n"
@@ -277,7 +421,7 @@ end
     end
     function Base.setindex!(A::DebugMPISharedArray{T, N}, v::T, I::Vararg{mk_int,N}) where {T, N}
         @debug_track_initialized begin
-            A.is_initialized[I...] = true
+            A.is_initialized[I...] = 1
         end
         A.is_written[I...] = true
         return setindex!(A.data, v, I...)
@@ -332,12 +476,16 @@ dims - mk_int or Tuple{mk_int}
     Dimensions of the array to be created. Dimensions passed define the size of the
     array which is being handled by the 'block' (rather than the global array, or a
     subset for a single process).
+maybe_debug - Bool
+    Can be set to `false` to force not creating a DebugMPISharedArray when debugging is
+    active. This avoids recursion when including a shared-memory array as a member of a
+    DebugMPISharedArray for debugging purposes.
 
 Returns
 -------
 Array{mk_float}
 """
-function allocate_shared(T, dims)
+function allocate_shared(T, dims; maybe_debug=true)
     br = block_rank[]
     bs = block_size[]
     n = prod(dims)
@@ -349,7 +497,9 @@ function allocate_shared(T, dims)
 
         @debug_shared_array begin
             # If @debug_shared_array is active, create DebugMPISharedArray instead of Array
-            array = DebugMPISharedArray(array)
+            if maybe_debug
+                array = DebugMPISharedArray(array)
+            end
         end
 
         return array
@@ -399,9 +549,11 @@ function allocate_shared(T, dims)
 
     @debug_shared_array begin
         # If @debug_shared_array is active, create DebugMPISharedArray instead of Array
-        debug_array = DebugMPISharedArray(array)
-        push!(global_debugmpisharedarray_store, debug_array)
-        return debug_array
+        if maybe_debug
+            debug_array = DebugMPISharedArray(array)
+            push!(global_debugmpisharedarray_store, debug_array)
+            return debug_array
+        end
     end
 
     return array
diff --git a/src/coordinates.jl b/src/coordinates.jl
index a2278585c..936695a10 100644
--- a/src/coordinates.jl
+++ b/src/coordinates.jl
@@ -9,8 +9,9 @@ export set_element_boundaries
 using ..type_definitions: mk_float, mk_int
 using ..array_allocation: allocate_float, allocate_int
 using ..calculus: derivative!
-using ..chebyshev: scaled_chebyshev_grid, setup_chebyshev_pseudospectral
+using ..chebyshev: scaled_chebyshev_grid, scaled_chebyshev_radau_grid, setup_chebyshev_pseudospectral
 using ..finite_differences: finite_difference_info
+using ..gauss_legendre: scaled_gauss_legendre_lobatto_grid, scaled_gauss_legendre_radau_grid, setup_gausslegendre_pseudospectral
 using ..quadrature: composite_simpson_weights
 using ..input_structs: advection_input
 using ..moment_kinetics_structs: null_spatial_dimension_info, null_velocity_dimension_info
@@ -51,10 +52,14 @@ struct coordinate
     imin::Array{mk_int,1}
     # imax[j] contains the maximum index on the full grid for element j
     imax::Array{mk_int,1}
+    # igrid_full[i,j] contains the index of the full grid for the elemental grid point i, on element j
+    igrid_full::Array{mk_int,2}
     # discretization option for the grid
     discretization::String
     # if the discretization is finite differences, fd_option provides the precise scheme
     fd_option::String
+    # if the discretization is chebyshev_pseudospectral, cheb_option chooses whether to use FFT or differentiation matrices for d / d coord
+    cheb_option::String
     # bc is the boundary condition option for this coordinate
     bc::String
     # wgts contains the integration weights associated with each grid point
@@ -100,7 +105,7 @@ create arrays associated with a given coordinate,
 setup the coordinate grid, and populate the coordinate structure
 containing all of this information
 """
-function define_coordinate(input, parallel_io::Bool=false)
+function define_coordinate(input, parallel_io::Bool=false; init_YY::Bool=true)
     # total number of grid points is ngrid for the first element
     # plus ngrid-1 unique points for each additional element due
     # to the repetition of a point at the element boundary
@@ -113,10 +118,10 @@ function define_coordinate(input, parallel_io::Bool=false)
         input.nelement_local, n_local)
     # obtain (local) index mapping from the grid within each element
     # to the full grid
-    imin, imax = elemental_to_full_grid_map(input.ngrid, input.nelement_local)
+    imin, imax, igrid_full = elemental_to_full_grid_map(input.ngrid, input.nelement_local)
     # initialise the data used to construct the grid
     # boundaries for each element
-    element_boundaries = set_element_boundaries(input.nelement_global, input.L, input.element_spacing_option)
+    element_boundaries = set_element_boundaries(input.nelement_global, input.L, input.element_spacing_option, input.name)
     # shift and scale factors for each local element
     element_scale, element_shift = set_element_scale_and_shift(input.nelement_global, input.nelement_local, input.irank, element_boundaries)
     # initialize the grid and the integration weights associated with the grid
@@ -158,7 +163,7 @@ function define_coordinate(input, parallel_io::Bool=false)
     end
     coord = coordinate(input.name, n_global, n_local, input.ngrid,
         input.nelement_global, input.nelement_local, input.nrank, input.irank, input.L, grid,
-        cell_width, igrid, ielement, imin, imax, input.discretization, input.fd_option,
+        cell_width, igrid, ielement, imin, imax, igrid_full, input.discretization, input.fd_option, input.cheb_option,
         input.bc, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch),
         scratch_2d, copy(scratch_2d), advection, send_buffer, receive_buffer, input.comm,
         local_io_range, global_io_range, element_scale, element_shift, input.element_spacing_option)
@@ -176,6 +181,12 @@ function define_coordinate(input, parallel_io::Bool=false)
         spectral = setup_chebyshev_pseudospectral(coord)
         # obtain the local derivatives of the uniform grid with respect to the used grid
         derivative!(coord.duniform_dgrid, coord.uniform_grid, coord, spectral)
+    elseif input.discretization == "gausslegendre_pseudospectral"
+        # create arrays needed for explicit GaussLegendre pseudospectral treatment in this
+        # coordinate and create the matrices for differentiation
+        spectral = setup_gausslegendre_pseudospectral(coord,init_YY=init_YY)
+        # obtain the local derivatives of the uniform grid with respect to the used grid
+        derivative!(coord.duniform_dgrid, coord.uniform_grid, coord, spectral)
     else
         # finite_difference_info is just a type so that derivative methods, etc., dispatch
         # to the finite difference versions, it does not contain any information.
@@ -186,8 +197,8 @@ function define_coordinate(input, parallel_io::Bool=false)
     return coord, spectral
 end
 
-function set_element_boundaries(nelement_global, L, element_spacing_option)
-    # set global element boundaries
+function set_element_boundaries(nelement_global, L, element_spacing_option, coord_name)
+    # set global element boundaries between [-L/2,L/2]
     element_boundaries = allocate_float(nelement_global+1)
     if element_spacing_option == "sqrt" && nelement_global > 3
         # number of boundaries of sqrt grid
@@ -216,6 +227,12 @@ function set_element_boundaries(nelement_global, L, element_spacing_option)
     else 
         println("ERROR: element_spacing_option: ",element_spacing_option, " not supported")
     end
+    if coord_name == "vperp"
+        #shift so that the range of element boundaries is [0,L]
+        for j in 1:nelement_global+1
+            element_boundaries[j] += L/2.0
+        end
+    end
     return element_boundaries
 end
 
@@ -252,10 +269,9 @@ function init_grid(ngrid, nelement_local, n_global, n_local, irank, L, element_s
     elseif discretization == "chebyshev_pseudospectral"
         if name == "vperp"
             # initialize chebyshev grid defined on [-L/2,L/2]
-            grid, wgts = scaled_chebyshev_grid(ngrid, nelement_local, n_local, element_scale, element_shift, imin, imax)
-            grid .= grid .+ L/2.0 # shift to [0,L] appropriate to vperp variable
+            grid, wgts = scaled_chebyshev_radau_grid(ngrid, nelement_local, n_local, element_scale, element_shift, imin, imax, irank)
             wgts = 2.0 .* wgts .* grid # to include 2 vperp in jacobian of integral
-                                        # see note above on normalisation
+                                       # see note above on normalisation
         else
             # initialize chebyshev grid defined on [-L/2,L/2]
             # with n grid points chosen to facilitate
@@ -265,6 +281,15 @@ function init_grid(ngrid, nelement_local, n_global, n_local, irank, L, element_s
             # that are those associated with Clenshaw-Curtis quadrature
             grid, wgts = scaled_chebyshev_grid(ngrid, nelement_local, n_local, element_scale, element_shift, imin, imax)
         end
+    elseif discretization == "gausslegendre_pseudospectral"
+        if name == "vperp"
+            # use a radau grid for the 1st element near the origin
+            grid, wgts = scaled_gauss_legendre_radau_grid(ngrid, nelement_local, n_local, element_scale, element_shift, imin, imax, irank)
+            wgts = 2.0 .* wgts .* grid # to include 2 vperp in jacobian of integral
+                                       # see note above on normalisation
+        else
+            grid, wgts = scaled_gauss_legendre_lobatto_grid(ngrid, nelement_local, n_local, element_scale, element_shift, imin, imax)
+        end
     elseif discretization == "finite_difference"
         if name == "vperp"
             # initialize equally spaced grid defined on [0,L]
@@ -372,6 +397,7 @@ indices on the full grid for each element
 function elemental_to_full_grid_map(ngrid, nelement)
     imin = allocate_int(nelement)
     imax = allocate_int(nelement)
+    igrid_full = allocate_int(ngrid, nelement)
     @inbounds begin
         # the first element contains ngrid entries
         imin[1] = 1
@@ -384,8 +410,14 @@ function elemental_to_full_grid_map(ngrid, nelement)
                 imax[i] = imin[i] + ngrid - 2
             end
         end
+        
+        for j in 1:nelement
+            for i in 1:ngrid
+                igrid_full[i,j] = i + (j - 1)*(ngrid - 1)
+            end
+        end
     end
-    return imin, imax
+    return imin, imax, igrid_full
 end
 
 end
diff --git a/src/em_fields.jl b/src/em_fields.jl
index 131979e9a..ebbdba5eb 100644
--- a/src/em_fields.jl
+++ b/src/em_fields.jl
@@ -110,9 +110,9 @@ function update_phi!(fields, fvec, z, r, composition, z_spectral, r_spectral, sc
     ## calculate the electric fields after obtaining phi
     #Er = - d phi / dr 
     if r.n > 1
-        @views derivative_r!(fields.Er,-fields.phi,
-                scratch_dummy.buffer_zs_1[:,1], scratch_dummy.buffer_zs_2[:,1],
-                scratch_dummy.buffer_zs_3[:,1], scratch_dummy.buffer_zs_4[:,1],
+        derivative_r!(fields.Er,-fields.phi,
+                scratch_dummy.buffer_z_1, scratch_dummy.buffer_z_2,
+                scratch_dummy.buffer_z_3, scratch_dummy.buffer_z_4,
                 r_spectral,r)
         if z.irank == 0 && fields.force_Er_zero_at_wall
             fields.Er[1,:] .= 0.0
@@ -127,10 +127,16 @@ function update_phi!(fields, fvec, z, r, composition, z_spectral, r_spectral, sc
         end
     end
     #Ez = - d phi / dz
-    @views derivative_z!(fields.Ez,-fields.phi,
-                scratch_dummy.buffer_rs_1[:,1], scratch_dummy.buffer_rs_2[:,1],
-                scratch_dummy.buffer_rs_3[:,1], scratch_dummy.buffer_rs_4[:,1],
+    if z.n > 1
+        derivative_z!(fields.Ez,-fields.phi,
+                scratch_dummy.buffer_r_1, scratch_dummy.buffer_r_2,
+                scratch_dummy.buffer_r_3, scratch_dummy.buffer_r_4,
                 z_spectral,z)
+    else
+        @serial_region begin
+            fields.Ez[:,:] .= 0.0
+        end
+    end
 
 end
 
diff --git a/src/file_io.jl b/src/file_io.jl
index de90c3fd2..c6e40edca 100644
--- a/src/file_io.jl
+++ b/src/file_io.jl
@@ -69,6 +69,8 @@ struct io_moments_info{Tfile, Ttime, Tphi, Tmomi, Tmomn, Tchodura_lower,
     parallel_heat_flux::Tmomi
     # handle for the charged species thermal speed
     thermal_speed::Tmomi
+    # handle for the charged species entropy production
+    entropy_production::Tmomi
     # handle for chodura diagnostic (lower)
     chodura_integral_lower::Tchodura_lower
     # handle for chodura diagnostic (upper)
@@ -545,6 +547,9 @@ function define_io_coordinate!(parent, coord, coord_name, description, parallel_
         write_single_value!(group, "fd_option", coord.fd_option; parallel_io=parallel_io,
                             description="type of finite difference for $coord_name, if used")
 
+        write_single_value!(group, "cheb_option", coord.cheb_option; parallel_io=parallel_io,
+                            description="type of chebyshev differentiation used for $coord_name, if used")
+
         # write the boundary condition for the coordinate
         write_single_value!(group, "bc", coord.bc; parallel_io=parallel_io,
                             description="boundary condition for $coord_name")
@@ -642,7 +647,14 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species,
                                           parallel_io=parallel_io,
                                           description="charged species thermal speed",
                                           units="c_ref")
-        
+
+        # io_dSdt is the handle for the entropy production (due to collisions)
+        io_dSdt = create_dynamic_variable!(dynamic, "entropy_production", mk_float, z, r;
+                                          n_ion_species=n_ion_species,
+                                          parallel_io=parallel_io,
+                                          description="charged species entropy production",
+                                          units="")
+
         if parallel_io || z.irank == 0
             # io_chodura_lower is the handle for the ion thermal speed
             io_chodura_lower = create_dynamic_variable!(dynamic, "chodura_integral_lower", mk_float, r;
@@ -812,7 +824,7 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species,
             units="minutes")
 
         return io_moments_info(fid, io_time, io_phi, io_Er, io_Ez, io_density, io_upar,
-                               io_ppar, io_pperp, io_qpar, io_vth, io_chodura_lower, io_chodura_upper, io_density_neutral, io_uz_neutral,
+                               io_ppar, io_pperp, io_qpar, io_vth, io_dSdt, io_chodura_lower, io_chodura_upper, io_density_neutral, io_uz_neutral,
                                io_pz_neutral, io_qz_neutral, io_thermal_speed_neutral,
                                external_source_amplitude,
                                external_source_density_amplitude,
@@ -969,7 +981,8 @@ function reopen_moments_io(file_info)
                                getvar("Ez"), getvar("density"), getvar("parallel_flow"),
                                getvar("parallel_pressure"), getvar("perpendicular_pressure"),
                                getvar("parallel_heat_flux"),
-                               getvar("thermal_speed"), getvar("chodura_integral_lower"),
+                               getvar("thermal_speed"), getvar("entropy_production"),
+                               getvar("chodura_integral_lower"),
                                getvar("chodura_integral_upper"), getvar("density_neutral"),
                                getvar("uz_neutral"), getvar("pz_neutral"),
                                getvar("qz_neutral"), getvar("thermal_speed_neutral"),
@@ -1067,8 +1080,8 @@ function reopen_dfns_io(file_info)
                                      getvar("Ez"), getvar("density"),
                                      getvar("parallel_flow"), getvar("parallel_pressure"),
                                      getvar("perpendicular_pressure"),
-                                     getvar("parallel_heat_flux"),
-                                     getvar("thermal_speed"), getvar("chodura_integral_lower"),
+                                     getvar("parallel_heat_flux"), getvar("thermal_speed"),
+                                     getvar("entropy_production"), getvar("chodura_integral_lower"),
                                      getvar("chodura_integral_upper"), getvar("density_neutral"),
                                      getvar("uz_neutral"), getvar("pz_neutral"),
                                      getvar("qz_neutral"),
@@ -1105,6 +1118,12 @@ range is `1:n`).
 """
 function append_to_dynamic_var() end
 
+@debug_shared_array begin
+    function append_to_dynamic_var(data::DebugMPISharedArray, args...; kwargs...)
+        return append_to_dynamic_var(data.data, args...; kwargs...)
+    end
+end
+
 """
 write time-dependent moments data to the binary output file
 """
@@ -1145,6 +1164,8 @@ function write_moments_data_to_binary(moments, fields, t, n_ion_species,
                               parallel_io, z, r, n_ion_species)
         append_to_dynamic_var(io_moments.thermal_speed, moments.charged.vth, t_idx,
                               parallel_io, z, r, n_ion_species)
+        append_to_dynamic_var(io_moments.entropy_production, moments.charged.dSdt, t_idx,
+                              parallel_io, z, r, n_ion_species)
         if z.irank == 0 # lower wall 
             append_to_dynamic_var(io_moments.chodura_integral_lower,
                                   moments.charged.chodura_integral_lower, t_idx,
@@ -1283,148 +1304,6 @@ function write_dfns_data_to_binary(ff, ff_neutral, moments, fields, t, n_ion_spe
     return nothing
 end
 
-@debug_shared_array begin
-    # Special versions when using DebugMPISharedArray to avoid implicit conversion to
-    # Array, which is forbidden.
-    function write_moments_data_to_binary(moments, fields, t, n_ion_species,
-                                          n_neutral_species, io_or_file_info_moments,
-                                          t_idx, time_for_run, r, z)
-        @serial_region begin
-            # Only read/write from first process in each 'block'
-
-            if isa(io_or_file_info_moments, io_moments_info)
-                io_moments = io_or_file_info_moments
-                closefile = false
-            else
-                io_moments = reopen_moments_io(io_or_file_info_moments)
-                closefile = true
-            end
-
-            parallel_io = io_moments.parallel_io
-
-            # add the time for this time slice to the hdf5 file
-            append_to_dynamic_var(io_moments.time, t, t_idx, parallel_io)
-
-            # add the electrostatic potential and electric field components at this time slice to the hdf5 file
-            append_to_dynamic_var(io_moments.phi, fields.phi.data, t_idx, parallel_io, z,
-                                  r)
-            append_to_dynamic_var(io_moments.Er, fields.Er.data, t_idx, parallel_io, z, r)
-            append_to_dynamic_var(io_moments.Ez, fields.Ez.data, t_idx, parallel_io, z, r)
-
-            # add the density data at this time slice to the output file
-            append_to_dynamic_var(io_moments.density, moments.charged.dens.data, t_idx,
-                                  parallel_io, z, r, n_ion_species)
-            append_to_dynamic_var(io_moments.parallel_flow, moments.charged.upar.data,
-                                  t_idx, parallel_io, z, r, n_ion_species)
-            append_to_dynamic_var(io_moments.parallel_pressure, moments.charged.ppar.data,
-                                  t_idx, parallel_io, z, r, n_ion_species)
-            append_to_dynamic_var(io_moments.perpendicular_pressure, moments.charged.pperp.data,
-                                  t_idx, parallel_io, z, r, n_ion_species)
-            append_to_dynamic_var(io_moments.parallel_heat_flux,
-                                  moments.charged.qpar.data, t_idx, parallel_io, z, r,
-                                  n_ion_species)
-            append_to_dynamic_var(io_moments.thermal_speed, moments.charged.vth.data,
-                                  t_idx, parallel_io, z, r, n_ion_species)
-            append_to_dynamic_var(io_moments.chodura_integral_lower, moments.charged.chodura_integral_lower.data,
-                                  t_idx, parallel_io, r, n_ion_species)
-            append_to_dynamic_var(io_moments.chodura_integral_upper, moments.charged.chodura_integral_upper.data,
-                                  t_idx, parallel_io, r, n_ion_species)
-            if io_moments.external_source_amplitude !== nothing
-                append_to_dynamic_var(io_moments.external_source_amplitude,
-                                      moments.charged.external_source_amplitude.data,
-                                      t_idx, parallel_io, z, r)
-            end
-            if io_moments.external_source_controller_integral !== nothing
-                if size(moments.charged.external_source_controller_integral) == (1,1)
-                    append_to_dynamic_var(io_moments.external_source_controller_integral,
-                                          moments.charged.external_source_controller_integral[1,1],
-                                          t_idx, parallel_io)
-                else
-                    append_to_dynamic_var(io_moments.external_source_controller_integral,
-                                          moments.charged.external_source_controller_integral,data,
-                                          t_idx, parallel_io, z, r)
-                end
-            end
-            if n_neutral_species > 0
-                append_to_dynamic_var(io_moments.density_neutral,
-                                      moments.neutral.dens.data, t_idx, parallel_io, z, r,
-                                      n_neutral_species)
-                append_to_dynamic_var(io_moments.uz_neutral, moments.neutral.uz.data,
-                                      t_idx, parallel_io, z, r, n_neutral_species)
-                append_to_dynamic_var(io_moments.pz_neutral, moments.neutral.pz.data,
-                                      t_idx, parallel_io, z, r, n_neutral_species)
-                append_to_dynamic_var(io_moments.qz_neutral, moments.neutral.qz.data,
-                                      t_idx, parallel_io, z, r, n_neutral_species)
-                append_to_dynamic_var(io_moments.thermal_speed_neutral,
-                                      moments.neutral.vth.data, t_idx, parallel_io, z, r,
-                                      n_neutral_species)
-
-                if io_moments.external_source_neutral_amplitude !== nothing
-                    append_to_dynamic_var(io_moments.external_source_neutral_amplitude,
-                                          moments.neutral.external_source_amplitude,
-                                          t_idx, parallel_io, z, r)
-                end
-                if io_moments.external_source_neutral_controller_integral !== nothing
-                    if size(moments.neutral.external_source_neutral_controller_integral) == (1,1)
-                        append_to_dynamic_var(io_moments.external_source_neutral_controller_integral,
-                                              moments.neutral.external_source_controller_integral[1,1],
-                                              t_idx, parallel_io)
-                    else
-                        append_to_dynamic_var(io_moments.external_source_neutral_controller_integral,
-                                              moments.neutral.external_source_controller_integral,
-                                              t_idx, parallel_io, z, r)
-                    end
-                end
-            end
-
-            append_to_dynamic_var(io_moments.time_for_run, time_for_run, t_idx,
-                                  parallel_io)
-
-            closefile && close(io_moments.fid)
-        end
-        return nothing
-    end
-
-    # Special versions when using DebugMPISharedArray to avoid implicit conversion to
-    # Array, which is forbidden.
-    function write_dfns_data_to_binary(ff::DebugMPISharedArray,
-                                       ff_neutral::DebugMPISharedArray, moments, fields,
-                                       t, n_ion_species, n_neutral_species,
-                                       io_or_file_info_dfns, t_idx, r, z, vperp, vpa,
-                                       vzeta, vr, vz)
-        @serial_region begin
-            # Only read/write from first process in each 'block'
-
-            if isa(io_or_file_info_dfns, io_dfns_info)
-                io_dfns = io_or_file_info_dfns
-                closefile = false
-            else
-                io_dfns = reopen_dfns_io(io_or_file_info_dfns)
-                closefile = true
-            end
-
-            # Write the moments for this time slice to the output file.
-            # This also updates the time.
-            write_moments_data_to_binary(moments, fields, t, n_ion_species,
-                                         n_neutral_species, io_dfns.io_moments, t_idx, r,
-                                         z)
-
-            parallel_io = io_dfns.parallel_io
-
-            # add the distribution function data at this time slice to the output file
-            append_to_dynamic_var(io_dfns.f, ff.data, t_idx, parallel_io, vpa, vperp, z,
-                                  r, n_ion_species)
-            if n_neutral_species > 0
-                append_to_dynamic_var(io_dfns.f_neutral, ff_neutral.data, t_idx,
-                                      parallel_io, vz, vr, vzeta, z, r, n_neutral_species)
-            end
-
-            closefile && close(io_dfns.fid)
-        end
-        return nothing
-    end
-end
-
 """
 close all opened output files
 """
diff --git a/src/finite_differences.jl b/src/finite_differences.jl
index 6d87cc2cf..17f3c58c2 100644
--- a/src/finite_differences.jl
+++ b/src/finite_differences.jl
@@ -65,15 +65,10 @@ function elementwise_second_derivative!(coord, f, not_spectral::finite_differenc
         coord.bc, coord.igrid, coord.ielement)
 end
 
-function second_derivative!(df, f, Q, coord, spectral::finite_difference_info)
+function second_derivative!(df, f, coord, spectral::finite_difference_info)
     # Finite difference version must use an appropriate second derivative stencil, not
     # apply the 1st derivative twice as for the spectral element method
 
-    if !all(Q .== 1.0)
-        error("Finite difference implementation of second derivative does not support "
-              * "Q!=1.")
-    end
-
     # get the derivative at each grid point within each element and store in
     # coord.scratch_2d
     elementwise_second_derivative!(coord, f, spectral)
diff --git a/src/fokker_planck.jl b/src/fokker_planck.jl
new file mode 100644
index 000000000..82d2b3a9c
--- /dev/null
+++ b/src/fokker_planck.jl
@@ -0,0 +1,485 @@
+"""
+module for including the Full-F Fokker-Planck Collision Operator
+
+The functions in this module are split into two groups. 
+
+The first set of functions implement the weak-form
+Collision operator using the Rosenbluth-MacDonald-Judd
+formulation in a divergence form. The Green's functions
+for the Rosenbluth potentials are used to obtain the Rosenbluth
+potentials at the boundaries. To find the potentials
+everywhere else elliptic solves of the PDEs for the
+Rosenbluth potentials are performed with Dirichlet
+boundary conditions. These routines provide the default collision operator
+used in the code.
+
+The second set of functions are used to set up the necessary arrays to 
+compute the Rosenbluth potentials everywhere in vpa, vperp
+by direct integration of the Green's functions. These functions are 
+supported for the purposes of testing and debugging.
+
+"""
+module fokker_planck
+
+
+export init_fokker_planck_collisions, fokkerplanck_arrays_struct
+export init_fokker_planck_collisions_weak_form
+export explicit_fokker_planck_collisions_weak_form!
+export explicit_fokker_planck_collisions!
+export calculate_Maxwellian_Rosenbluth_coefficients
+export get_local_Cssp_coefficients!, init_fokker_planck_collisions
+# testing
+export symmetric_matrix_inverse
+export fokker_planck_collision_operator_weak_form!
+
+using SpecialFunctions: ellipk, ellipe, erf
+using FastGaussQuadrature
+using Dates
+using LinearAlgebra: lu
+using ..type_definitions: mk_float, mk_int
+using ..array_allocation: allocate_float, allocate_shared_float
+using ..communication: MPISharedArray, global_rank, _block_synchronize
+using ..velocity_moments: integrate_over_vspace
+using ..velocity_moments: get_density, get_upar, get_ppar, get_pperp, get_qpar, get_pressure, get_rmom
+using ..looping
+using ..fokker_planck_calculus: init_Rosenbluth_potential_integration_weights!
+using ..fokker_planck_calculus: init_Rosenbluth_potential_boundary_integration_weights!
+using ..fokker_planck_calculus: allocate_boundary_integration_weights
+using ..fokker_planck_calculus: allocate_rosenbluth_potential_boundary_data
+using ..fokker_planck_calculus: fokkerplanck_arrays_direct_integration_struct, fokkerplanck_weakform_arrays_struct
+using ..fokker_planck_calculus: assemble_matrix_operators_dirichlet_bc
+using ..fokker_planck_calculus: assemble_matrix_operators_dirichlet_bc_sparse
+using ..fokker_planck_calculus: assemble_explicit_collision_operator_rhs_serial!
+using ..fokker_planck_calculus: assemble_explicit_collision_operator_rhs_parallel!
+using ..fokker_planck_calculus: assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!
+using ..fokker_planck_calculus: calculate_YY_arrays, enforce_vpavperp_BCs!
+using ..fokker_planck_calculus: calculate_rosenbluth_potential_boundary_data!
+using ..fokker_planck_calculus: enforce_zero_bc!, elliptic_solve!, algebraic_solve!, ravel_c_to_vpavperp_parallel!
+using ..fokker_planck_calculus: calculate_rosenbluth_potentials_via_elliptic_solve!
+using ..fokker_planck_test: Cssp_fully_expanded_form, calculate_collisional_fluxes, H_Maxwellian, dGdvperp_Maxwellian
+using ..fokker_planck_test: d2Gdvpa2_Maxwellian, d2Gdvperpdvpa_Maxwellian, d2Gdvperp2_Maxwellian, dHdvpa_Maxwellian, dHdvperp_Maxwellian
+using ..fokker_planck_test: F_Maxwellian, dFdvpa_Maxwellian, dFdvperp_Maxwellian
+
+########################################################
+# begin functions associated with the weak-form operator
+# where the potentials are computed by elliptic solve
+########################################################
+
+"""
+function that initialises the arrays needed for Fokker Planck collisions
+using numerical integration to compute the Rosenbluth potentials only
+at the boundary and using an elliptic solve to obtain the potentials 
+in the rest of the velocity space domain.
+"""
+function init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral; precompute_weights=false, test_dense_matrix_construction=false, print_to_screen=true)
+    bwgt = allocate_boundary_integration_weights(vpa,vperp)
+    if vperp.n > 1 && precompute_weights
+        @views init_Rosenbluth_potential_boundary_integration_weights!(bwgt.G0_weights, bwgt.G1_weights, bwgt.H0_weights, bwgt.H1_weights,
+                                        bwgt.H2_weights, bwgt.H3_weights, vpa, vperp, print_to_screen=print_to_screen)
+    end
+    rpbd = allocate_rosenbluth_potential_boundary_data(vpa,vperp)
+    if test_dense_matrix_construction
+        MM2D_sparse, KKpar2D_sparse, KKperp2D_sparse, 
+        KKpar2D_with_BC_terms_sparse, KKperp2D_with_BC_terms_sparse,
+        LP2D_sparse, LV2D_sparse, LB2D_sparse, KPperp2D_sparse,
+        PUperp2D_sparse, PPparPUperp2D_sparse, PPpar2D_sparse,
+        MMparMNperp2D_sparse = assemble_matrix_operators_dirichlet_bc(vpa,vperp,vpa_spectral,vperp_spectral,print_to_screen=print_to_screen)
+    else
+        MM2D_sparse, KKpar2D_sparse, KKperp2D_sparse,
+        KKpar2D_with_BC_terms_sparse, KKperp2D_with_BC_terms_sparse,
+        LP2D_sparse, LV2D_sparse, LB2D_sparse, KPperp2D_sparse,
+        PUperp2D_sparse, PPparPUperp2D_sparse, PPpar2D_sparse,
+        MMparMNperp2D_sparse = assemble_matrix_operators_dirichlet_bc_sparse(vpa,vperp,vpa_spectral,vperp_spectral,print_to_screen=print_to_screen)
+    end
+    lu_obj_MM = lu(MM2D_sparse)
+    lu_obj_LP = lu(LP2D_sparse)
+    lu_obj_LV = lu(LV2D_sparse)
+    lu_obj_LB = lu(LB2D_sparse)
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("finished LU decomposition initialisation   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    
+    YY_arrays = calculate_YY_arrays(vpa,vperp,vpa_spectral,vperp_spectral)
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("finished YY array calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    nvpa, nvperp = vpa.n, vperp.n
+    nc = nvpa*nvperp
+    S_dummy = allocate_shared_float(nvpa,nvperp)
+    Q_dummy = allocate_shared_float(nvpa,nvperp)
+    rhsvpavperp = allocate_shared_float(nvpa,nvperp)
+    rhsc = allocate_shared_float(nc)
+    rhqc = allocate_shared_float(nc)
+    sc = allocate_shared_float(nc)
+    qc = allocate_shared_float(nc)
+    
+    CC = allocate_shared_float(nvpa,nvperp)
+    GG = allocate_shared_float(nvpa,nvperp)
+    HH = allocate_shared_float(nvpa,nvperp)
+    dHdvpa = allocate_shared_float(nvpa,nvperp)
+    dHdvperp = allocate_shared_float(nvpa,nvperp)
+    dGdvperp = allocate_shared_float(nvpa,nvperp)
+    d2Gdvperp2 = allocate_shared_float(nvpa,nvperp)
+    d2Gdvpa2 = allocate_shared_float(nvpa,nvperp)
+    d2Gdvperpdvpa = allocate_shared_float(nvpa,nvperp)
+    
+    FF = allocate_shared_float(nvpa,nvperp)
+    dFdvpa = allocate_shared_float(nvpa,nvperp)
+    dFdvperp = allocate_shared_float(nvpa,nvperp)
+    
+    fka = fokkerplanck_weakform_arrays_struct(bwgt,rpbd,MM2D_sparse,KKpar2D_sparse,KKperp2D_sparse,
+                                           KKpar2D_with_BC_terms_sparse,KKperp2D_with_BC_terms_sparse,
+                                           LP2D_sparse,LV2D_sparse,LB2D_sparse,PUperp2D_sparse,PPparPUperp2D_sparse,
+                                           PPpar2D_sparse,MMparMNperp2D_sparse,KPperp2D_sparse,
+                                           lu_obj_MM,lu_obj_LP,lu_obj_LV,lu_obj_LB,
+                                           YY_arrays, S_dummy, Q_dummy, rhsvpavperp, rhsc, rhqc, sc, qc,
+                                           CC, GG, HH, dHdvpa, dHdvperp, dGdvperp, d2Gdvperp2, d2Gdvpa2, d2Gdvperpdvpa,
+                                           FF, dFdvpa, dFdvperp)
+    return fka
+end
+
+"""
+Function for advancing with the explicit, weak-form, self-collision operator
+"""
+function explicit_fokker_planck_collisions_weak_form!(pdf_out,pdf_in,dSdt,composition,collisions,dt,
+                                             fkpl_arrays::fokkerplanck_weakform_arrays_struct,
+                                             r, z, vperp, vpa, vperp_spectral, vpa_spectral, scratch_dummy;
+                                             test_assembly_serial=false,impose_zero_gradient_BC=false,
+                                             diagnose_entropy_production=false)
+    # N.B. only self-collisions are currently supported
+    # This can be modified by adding a loop over s' below
+    n_ion_species = composition.n_ion_species
+    @boundscheck vpa.n == size(pdf_out,1) || throw(BoundsError(pdf_out))
+    @boundscheck vperp.n == size(pdf_out,2) || throw(BoundsError(pdf_out))
+    @boundscheck z.n == size(pdf_out,3) || throw(BoundsError(pdf_out))
+    @boundscheck r.n == size(pdf_out,4) || throw(BoundsError(pdf_out))
+    @boundscheck n_ion_species == size(pdf_out,5) || throw(BoundsError(pdf_out))
+    @boundscheck vpa.n == size(pdf_in,1) || throw(BoundsError(pdf_in))
+    @boundscheck vperp.n == size(pdf_in,2) || throw(BoundsError(pdf_in))
+    @boundscheck z.n == size(pdf_in,3) || throw(BoundsError(pdf_in))
+    @boundscheck r.n == size(pdf_in,4) || throw(BoundsError(pdf_in))
+    @boundscheck n_ion_species == size(pdf_in,5) || throw(BoundsError(pdf_in))
+    @boundscheck z.n == size(dSdt,1) || throw(BoundsError(dSdt))
+    @boundscheck r.n == size(dSdt,2) || throw(BoundsError(dSdt))
+    @boundscheck n_ion_species == size(dSdt,3) || throw(BoundsError(dSdt))
+    
+    # masses and collision frequencies
+    ms, msp = 1.0, 1.0 # generalise!
+    nussp = collisions.nuii # generalise!
+    Css = scratch_dummy.buffer_vpavperp_1
+    # N.B. parallelisation is only over vpa vperp
+    # ensure s, r, z are local before initiating the s, r, z loop
+    begin_vperp_vpa_region()
+    @loop_s_r_z is ir iz begin
+        # the functions within this loop will call
+        # begin_vpa_region(), begin_vperp_region(), begin_vperp_vpa_region(), begin_serial_region() to synchronise the shared-memory arrays
+        # first argument is Fs, and second argument is Fs' in C[Fs,Fs'] 
+        @views fokker_planck_collision_operator_weak_form!(pdf_in[:,:,iz,ir,is],pdf_in[:,:,iz,ir,is],ms,msp,nussp,
+                                             fkpl_arrays,vperp,vpa,vperp_spectral,vpa_spectral)        
+        # enforce the boundary conditions on CC before it is used for timestepping
+        enforce_vpavperp_BCs!(fkpl_arrays.CC,vpa,vperp,vpa_spectral,vperp_spectral)
+        # make ad-hoc conserving corrections
+        conserving_corrections!(fkpl_arrays.CC,pdf_in[:,:,iz,ir,is],vpa,vperp,scratch_dummy.dummy_vpavperp)
+        
+        # advance this part of s,r,z with the resulting C[Fs,Fs]
+        begin_vperp_vpa_region()
+        @loop_vperp_vpa ivperp ivpa begin
+            Css[ivpa,ivperp] = fkpl_arrays.CC[ivpa,ivperp]
+            pdf_out[ivpa,ivperp,iz,ir,is] += dt*Css[ivpa,ivperp]
+        end
+        if diagnose_entropy_production
+            # assign dummy array
+            lnfC = fkpl_arrays.rhsvpavperp
+            @loop_vperp_vpa ivperp ivpa begin
+                lnfC[ivpa,ivperp] = log(abs(pdf_in[ivpa,ivperp,iz,ir,is]) + 1.0e-15)*Css[ivpa,ivperp]
+            end
+            begin_serial_region()
+            @serial_region begin
+                dSdt[iz,ir,is] = -get_density(lnfC,vpa,vperp)
+            end
+            begin_vperp_vpa_region()
+        end
+    end
+    return nothing
+end
+
+
+"""
+Function for evaluating \$C_{ss'} = C_{ss'}[F_s,F_{s'}]\$
+
+The result is stored in the array `fkpl_arrays.CC`.
+
+The normalised collision frequency is defined by
+```math
+\\nu_{ss'} = \\frac{\\gamma_{ss'} n_\\mathrm{ref}}{2 m_s^2 c_\\mathrm{ref}^3}
+```
+with \$\\gamma_{ss'} = 2 \\pi (Z_s Z_{s'})^2 e^4 \\ln \\Lambda_{ss'} / (4 \\pi
+\\epsilon_0)^2\$.
+"""
+function fokker_planck_collision_operator_weak_form!(ffs_in,ffsp_in,ms,msp,nussp,
+                                             fkpl_arrays::fokkerplanck_weakform_arrays_struct,
+                                             vperp, vpa, vperp_spectral, vpa_spectral;
+                                             test_assembly_serial=false,
+                                             use_Maxwellian_Rosenbluth_coefficients=false,
+                                             use_Maxwellian_field_particle_distribution=false,
+                                             algebraic_solve_for_d2Gdvperp2 = false,
+                                             calculate_GG=false,
+                                             calculate_dGdvperp=false)
+    @boundscheck vpa.n == size(ffsp_in,1) || throw(BoundsError(ffsp_in))
+    @boundscheck vperp.n == size(ffsp_in,2) || throw(BoundsError(ffsp_in))
+    @boundscheck vpa.n == size(ffs_in,1) || throw(BoundsError(ffs_in))
+    @boundscheck vperp.n == size(ffs_in,2) || throw(BoundsError(ffs_in))
+    # the functions within this function will call
+    # begin_vpa_region(), begin_vperp_region(), begin_vperp_vpa_region(), begin_serial_region() to synchronise the shared-memory arrays
+    
+    # extract the necessary precalculated and buffer arrays from fokkerplanck_arrays
+    rhsc = fkpl_arrays.rhsc
+    sc = fkpl_arrays.sc
+    rhsvpavperp = fkpl_arrays.rhsvpavperp
+    lu_obj_MM = fkpl_arrays.lu_obj_MM
+    YY_arrays = fkpl_arrays.YY_arrays    
+    
+    CC = fkpl_arrays.CC
+    GG = fkpl_arrays.GG
+    HH = fkpl_arrays.HH
+    dHdvpa = fkpl_arrays.dHdvpa
+    dHdvperp = fkpl_arrays.dHdvperp
+    dGdvperp = fkpl_arrays.dGdvperp
+    d2Gdvperp2 = fkpl_arrays.d2Gdvperp2
+    d2Gdvpa2 = fkpl_arrays.d2Gdvpa2
+    d2Gdvperpdvpa = fkpl_arrays.d2Gdvperpdvpa
+    FF = fkpl_arrays.FF
+    dFdvpa = fkpl_arrays.dFdvpa
+    dFdvperp = fkpl_arrays.dFdvperp
+    
+    if use_Maxwellian_Rosenbluth_coefficients
+        begin_serial_region()
+        dens = get_density(@view(ffsp_in[:,:]),vpa,vperp)
+        upar = get_upar(@view(ffsp_in[:,:]), vpa, vperp, dens)
+        ppar = get_ppar(@view(ffsp_in[:,:]), vpa, vperp, upar)
+        pperp = get_pperp(@view(ffsp_in[:,:]), vpa, vperp)
+        pressure = get_pressure(ppar,pperp)
+        vth = sqrt(2.0*pressure/dens)
+        begin_vperp_vpa_region()
+        @loop_vperp_vpa ivperp ivpa begin
+            HH[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+            d2Gdvpa2[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+            d2Gdvperp2[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+            dGdvperp[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+            d2Gdvperpdvpa[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+            dHdvpa[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+            dHdvperp[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+        end
+        # Need to synchronize as these arrays may be read outside the locally-owned set of
+        # ivperp, ivpa indices in assemble_explicit_collision_operator_rhs_parallel!()
+        _block_synchronize()
+    else
+        calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvperp,
+             d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,@view(ffsp_in[:,:]),
+             vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays,
+             algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
+             calculate_GG=calculate_GG,calculate_dGdvperp=calculate_dGdvperp)
+    end
+    # assemble the RHS of the collision operator matrix eq
+    if use_Maxwellian_field_particle_distribution
+        begin_serial_region()
+        dens = get_density(ffs_in,vpa,vperp)
+        upar = get_upar(ffs_in, vpa, vperp, dens)
+        ppar = get_ppar(ffs_in, vpa, vperp, upar)
+        pperp = get_pperp(ffs_in, vpa, vperp)
+        pressure = get_pressure(ppar,pperp)
+        vth = sqrt(2.0*pressure/dens)
+        begin_vperp_vpa_region()
+        @loop_vperp_vpa ivperp ivpa begin
+            FF[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+            dFdvpa[ivpa,ivperp] = dFdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+            dFdvperp[ivpa,ivperp] = dFdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+        end
+        # Need to synchronize as FF, dFdvpa, dFdvperp may be read outside the
+        # locally-owned set of ivperp, ivpa indices in
+        # assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!()
+        _block_synchronize()
+        assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!(rhsc,rhsvpavperp,
+          FF,dFdvpa,dFdvperp,
+          d2Gdvpa2,d2Gdvperpdvpa,d2Gdvperp2,
+          dHdvpa,dHdvperp,ms,msp,nussp,
+          vpa,vperp,YY_arrays)
+    elseif test_assembly_serial
+        assemble_explicit_collision_operator_rhs_serial!(rhsc,@view(ffs_in[:,:]),
+          d2Gdvpa2,d2Gdvperpdvpa,d2Gdvperp2,
+          dHdvpa,dHdvperp,ms,msp,nussp,
+          vpa,vperp,YY_arrays)
+    else
+        assemble_explicit_collision_operator_rhs_parallel!(rhsc,rhsvpavperp,@view(ffs_in[:,:]),
+          d2Gdvpa2,d2Gdvperpdvpa,d2Gdvperp2,
+          dHdvpa,dHdvperp,ms,msp,nussp,
+          vpa,vperp,YY_arrays)
+    end
+    # solve the collision operator matrix eq
+    begin_serial_region()
+    @serial_region begin
+        # invert mass matrix and fill fc
+        sc .= lu_obj_MM \ rhsc
+    end
+    ravel_c_to_vpavperp_parallel!(CC,sc,vpa.n)
+    return nothing
+end
+
+# solves A x = b for a matrix of the form
+# A00  0    A02
+# 0    A11  A12
+# A02  A12  A22
+# appropriate for the moment numerical conserving terms
+function symmetric_matrix_inverse(A00,A02,A11,A12,A22,b0,b1,b2)
+    # matrix determinant
+    detA = A00*(A11*A22 - A12^2) - A11*A02^2
+    # cofactors C (also a symmetric matrix)
+    C00 = A11*A22 - A12^2
+    C01 = A12*A02
+    C02 = -A11*A02
+    C11 = A00*A22 - A02^2
+    C12 = -A00*A12
+    C22 = A00*A11
+    x0 = ( C00*b0 + C01*b1 + C02*b2 )/detA
+    x1 = ( C01*b0 + C11*b1 + C12*b2 )/detA
+    x2 = ( C02*b0 + C12*b1 + C22*b2 )/detA
+    #println("b0: ",b0," b1: ",b1," b2: ",b2)
+    #println("A00: ",A00," A02: ",A02," A11: ",A11," A12: ",A12," A22: ",A22, " detA: ",detA)
+    #println("C00: ",C00," C02: ",C02," C11: ",C11," C12: ",C12," C22: ",C22)
+    #println("x0: ",x0," x1: ",x1," x2: ",x2)
+    return x0, x1, x2
+end
+
+# solves A x = b for a matrix of the form
+# A00  A01  A02
+# A01  A11  A12
+# A02  A12  A22
+# appropriate for the moment numerical conserving terms
+function symmetric_matrix_inverse(A00,A01,A02,A11,A12,A22,b0,b1,b2)
+    # matrix determinant
+    detA = A00*(A11*A22 - A12^2) - A01*(A01*A22 - A12*A02) + A02*(A01*A12 - A11*A02)
+    # cofactors C (also a symmetric matrix)
+    C00 = A11*A22 - A12^2
+    C01 = A12*A02 - A01*A22
+    C02 = A01*A12 -A11*A02
+    C11 = A00*A22 - A02^2
+    C12 = A01*A02 -A00*A12
+    C22 = A00*A11 - A01^2
+    x0 = ( C00*b0 + C01*b1 + C02*b2 )/detA
+    x1 = ( C01*b0 + C11*b1 + C12*b2 )/detA
+    x2 = ( C02*b0 + C12*b1 + C22*b2 )/detA
+    #println("b0: ",b0," b1: ",b1," b2: ",b2)
+    #println("A00: ",A00," A02: ",A02," A11: ",A11," A12: ",A12," A22: ",A22, " detA: ",detA)
+    #println("C00: ",C00," C02: ",C02," C11: ",C11," C12: ",C12," C22: ",C22)
+    #println("x0: ",x0," x1: ",x1," x2: ",x2)
+    return x0, x1, x2
+end
+
+function conserving_corrections!(CC,pdf_in,vpa,vperp,dummy_vpavperp)
+    begin_serial_region()
+    # compute moments of the input pdf
+    dens =  get_density(@view(pdf_in[:,:]), vpa, vperp)
+    upar = get_upar(@view(pdf_in[:,:]), vpa, vperp, dens)
+    ppar = get_ppar(@view(pdf_in[:,:]), vpa, vperp, upar)
+    pperp = get_pperp(@view(pdf_in[:,:]), vpa, vperp)
+    pressure = get_pressure(ppar,pperp)
+    qpar = get_qpar(@view(pdf_in[:,:]), vpa, vperp, upar, dummy_vpavperp)
+    rmom = get_rmom(@view(pdf_in[:,:]), vpa, vperp, upar, dummy_vpavperp)
+    
+    # compute moments of the numerical collision operator
+    dn = get_density(CC, vpa, vperp)
+    du = get_upar(CC, vpa, vperp, 1.0)
+    dppar = get_ppar(CC, vpa, vperp, upar)
+    dpperp = get_pperp(CC, vpa, vperp)
+    dp = get_pressure(dppar,dpperp)
+    
+    # form the appropriate matrix coefficients
+    b0, b1, b2 = dn, du - upar*dn, 3.0*dp
+    A00, A02, A11, A12, A22 = dens, 3.0*pressure, ppar, qpar, rmom
+    
+    # obtain the coefficients for the corrections 
+    (x0, x1, x2) = symmetric_matrix_inverse(A00,A02,A11,A12,A22,b0,b1,b2)
+    
+    # correct CC
+    begin_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        wpar = vpa.grid[ivpa] - upar
+        CC[ivpa,ivperp] -= (x0 + x1*wpar + x2*(vperp.grid[ivperp]^2 + wpar^2) )*pdf_in[ivpa,ivperp]
+    end
+end
+
+
+######################################################
+# end functions associated with the weak-form operator
+# where the potentials are computed by elliptic solve
+######################################################
+
+
+
+##########################################################
+# begin functions associated with the direct integration
+# method for computing the Rosenbluth potentials
+##########################################################
+
+
+"""
+allocate the required ancilliary arrays 
+"""
+function allocate_fokkerplanck_arrays_direct_integration(vperp,vpa)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    
+    G0_weights = allocate_shared_float(nvpa,nvperp,nvpa,nvperp)
+    G1_weights = allocate_shared_float(nvpa,nvperp,nvpa,nvperp)
+    H0_weights = allocate_shared_float(nvpa,nvperp,nvpa,nvperp)
+    H1_weights = allocate_shared_float(nvpa,nvperp,nvpa,nvperp)
+    H2_weights = allocate_shared_float(nvpa,nvperp,nvpa,nvperp)
+    H3_weights = allocate_shared_float(nvpa,nvperp,nvpa,nvperp)
+    GG = allocate_shared_float(nvpa,nvperp)
+    d2Gdvpa2 = allocate_shared_float(nvpa,nvperp)
+    d2Gdvperpdvpa = allocate_shared_float(nvpa,nvperp)
+    d2Gdvperp2 = allocate_shared_float(nvpa,nvperp)
+    dGdvperp = allocate_shared_float(nvpa,nvperp)
+    HH = allocate_shared_float(nvpa,nvperp)
+    dHdvpa = allocate_shared_float(nvpa,nvperp)
+    dHdvperp = allocate_shared_float(nvpa,nvperp)
+    #Cflux_vpa = allocate_shared_float(nvpa,nvperp)
+    #Cflux_vperp = allocate_shared_float(nvpa,nvperp)
+    buffer_vpavperp_1 = allocate_float(nvpa,nvperp)
+    buffer_vpavperp_2 = allocate_float(nvpa,nvperp)
+    Cssp_result_vpavperp = allocate_shared_float(nvpa,nvperp)
+    dfdvpa = allocate_shared_float(nvpa,nvperp)
+    d2fdvpa2 = allocate_shared_float(nvpa,nvperp)
+    d2fdvperpdvpa = allocate_shared_float(nvpa,nvperp)
+    dfdvperp = allocate_shared_float(nvpa,nvperp)
+    d2fdvperp2 = allocate_shared_float(nvpa,nvperp)
+    
+    return fokkerplanck_arrays_direct_integration_struct(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                               GG,d2Gdvpa2,d2Gdvperpdvpa,d2Gdvperp2,dGdvperp,
+                               HH,dHdvpa,dHdvperp,buffer_vpavperp_1,buffer_vpavperp_2,
+                               Cssp_result_vpavperp, dfdvpa, d2fdvpa2,
+                               d2fdvperpdvpa, dfdvperp, d2fdvperp2)
+end
+
+"""
+function that initialises the arrays needed to calculate the Rosenbluth potentials
+by direct integration. As this function is only supported to keep the testing
+of the direct integration method, the struct 'fka' created here does not contain
+all of the arrays necessary to compute the weak-form operator. This functionality
+could be ported if necessary.
+"""
+function init_fokker_planck_collisions_direct_integration(vperp,vpa; precompute_weights=false, print_to_screen=false)
+    fka = allocate_fokkerplanck_arrays_direct_integration(vperp,vpa)
+    if vperp.n > 1 && precompute_weights
+        @views init_Rosenbluth_potential_integration_weights!(fka.G0_weights, fka.G1_weights, fka.H0_weights, fka.H1_weights,
+                                        fka.H2_weights, fka.H3_weights, vperp, vpa, print_to_screen=print_to_screen)
+    end
+    return fka
+end
+
+
+end
diff --git a/src/fokker_planck_calculus.jl b/src/fokker_planck_calculus.jl
new file mode 100644
index 000000000..46d4d7111
--- /dev/null
+++ b/src/fokker_planck_calculus.jl
@@ -0,0 +1,2287 @@
+"""
+module for functions used 
+in calculating the integrals and doing 
+the numerical differentiation for 
+the implementation of the 
+the Full-F Fokker-Planck Collision Operator
+"""
+module fokker_planck_calculus
+
+export assemble_matrix_operators_dirichlet_bc
+export assemble_matrix_operators_dirichlet_bc_sparse
+export assemble_explicit_collision_operator_rhs_serial!
+export assemble_explicit_collision_operator_rhs_parallel!
+export assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!
+export YY_collision_operator_arrays, calculate_YY_arrays
+export calculate_rosenbluth_potential_boundary_data!
+export elliptic_solve!, algebraic_solve!
+export fokkerplanck_arrays_direct_integration_struct
+export fokkerplanck_weakform_arrays_struct
+export enforce_vpavperp_BCs!
+export calculate_rosenbluth_potentials_via_elliptic_solve!
+
+# testing
+export calculate_rosenbluth_potential_boundary_data_exact!
+export ravel_c_to_vpavperp!, ravel_vpavperp_to_c!
+export enforce_zero_bc!, ravel_c_to_vpavperp_parallel!
+export allocate_rosenbluth_potential_boundary_data
+export calculate_rosenbluth_potential_boundary_data_exact!
+export test_rosenbluth_potential_boundary_data
+
+using ..type_definitions: mk_float, mk_int
+using ..array_allocation: allocate_float, allocate_shared_float
+using ..calculus: derivative!
+using ..communication
+using ..communication: MPISharedArray, global_rank
+using ..looping
+using moment_kinetics.gauss_legendre: get_QQ_local!
+using Dates
+using SpecialFunctions: ellipk, ellipe
+using SparseArrays: sparse, AbstractSparseArray
+using SuiteSparse
+using LinearAlgebra: mul!, LU
+using FastGaussQuadrature
+using Printf
+
+function print_matrix(matrix,name::String,n::mk_int,m::mk_int)
+    println("\n ",name," \n")
+    for i in 1:n
+        for j in 1:m
+            @printf("%.2f ", matrix[i,j])
+        end
+        println("")
+    end
+    println("\n")
+end
+
+function print_vector(vector,name::String,m::mk_int)
+    println("\n ",name," \n")
+    for j in 1:m
+        @printf("%.3f ", vector[j])
+    end
+    println("")
+    println("\n")
+end
+
+"""
+a struct of dummy arrays and precalculated coefficients
+for the strong-form Fokker-Planck collision operator 
+"""
+
+struct fokkerplanck_arrays_direct_integration_struct
+    G0_weights::MPISharedArray{mk_float,4}
+    G1_weights::MPISharedArray{mk_float,4}
+    H0_weights::MPISharedArray{mk_float,4}
+    H1_weights::MPISharedArray{mk_float,4}
+    H2_weights::MPISharedArray{mk_float,4}
+    H3_weights::MPISharedArray{mk_float,4}
+    GG::MPISharedArray{mk_float,2}
+    d2Gdvpa2::MPISharedArray{mk_float,2}
+    d2Gdvperpdvpa::MPISharedArray{mk_float,2}
+    d2Gdvperp2::MPISharedArray{mk_float,2}
+    dGdvperp::MPISharedArray{mk_float,2}
+    HH::MPISharedArray{mk_float,2}
+    dHdvpa::MPISharedArray{mk_float,2}
+    dHdvperp::MPISharedArray{mk_float,2}
+    #Cflux_vpa::MPISharedArray{mk_float,2}
+    #Cflux_vperp::MPISharedArray{mk_float,2}
+    buffer_vpavperp_1::Array{mk_float,2}
+    buffer_vpavperp_2::Array{mk_float,2}
+    Cssp_result_vpavperp::MPISharedArray{mk_float,2}
+    dfdvpa::MPISharedArray{mk_float,2}
+    d2fdvpa2::MPISharedArray{mk_float,2}
+    d2fdvperpdvpa::MPISharedArray{mk_float,2}
+    dfdvperp::MPISharedArray{mk_float,2}
+    d2fdvperp2::MPISharedArray{mk_float,2}
+end
+
+"""
+a struct to contain the integration weights for the boundary points
+in the (vpa,vperp) domain
+"""
+struct boundary_integration_weights_struct
+    lower_vpa_boundary::MPISharedArray{mk_float,3}
+    upper_vpa_boundary::MPISharedArray{mk_float,3}
+    upper_vperp_boundary::MPISharedArray{mk_float,3}
+end
+
+"""
+a struct used for calculating the integration weights for the 
+boundary of the velocity space domain in (vpa,vperp) coordinates
+"""
+struct fokkerplanck_boundary_data_arrays_struct
+    G0_weights::boundary_integration_weights_struct
+    G1_weights::boundary_integration_weights_struct
+    H0_weights::boundary_integration_weights_struct
+    H1_weights::boundary_integration_weights_struct
+    H2_weights::boundary_integration_weights_struct
+    H3_weights::boundary_integration_weights_struct
+    dfdvpa::MPISharedArray{mk_float,2}
+    d2fdvperpdvpa::MPISharedArray{mk_float,2}
+    dfdvperp::MPISharedArray{mk_float,2}    
+end
+
+struct vpa_vperp_boundary_data
+    lower_boundary_vpa::MPISharedArray{mk_float,1}
+    upper_boundary_vpa::MPISharedArray{mk_float,1}
+    upper_boundary_vperp::MPISharedArray{mk_float,1}
+end
+
+struct rosenbluth_potential_boundary_data
+    H_data::vpa_vperp_boundary_data
+    dHdvpa_data::vpa_vperp_boundary_data
+    dHdvperp_data::vpa_vperp_boundary_data
+    G_data::vpa_vperp_boundary_data
+    dGdvperp_data::vpa_vperp_boundary_data
+    d2Gdvperp2_data::vpa_vperp_boundary_data
+    d2Gdvperpdvpa_data::vpa_vperp_boundary_data
+    d2Gdvpa2_data::vpa_vperp_boundary_data
+end
+
+struct YY_collision_operator_arrays
+    # let phi_j(vperp) be the jth Lagrange basis function, 
+    # and phi'_j(vperp) the first derivative of the Lagrange basis function
+    # on the iel^th element. Then, the arrays are defined as follows.
+    # YY0perp[i,j,k,iel] = \int phi_i(vperp) phi_j(vperp) phi_k(vperp) vperp d vperp
+    YY0perp::Array{mk_float,4}
+    # YY1perp[i,j,k,iel] = \int phi_i(vperp) phi_j(vperp) phi'_k(vperp) vperp d vperp
+    YY1perp::Array{mk_float,4}
+    # YY2perp[i,j,k,iel] = \int phi_i(vperp) phi'_j(vperp) phi'_k(vperp) vperp d vperp
+    YY2perp::Array{mk_float,4}
+    # YY3perp[i,j,k,iel] = \int phi_i(vperp) phi'_j(vperp) phi_k(vperp) vperp d vperp
+    YY3perp::Array{mk_float,4}
+    # YY0par[i,j,k,iel] = \int phi_i(vpa) phi_j(vpa) phi_k(vpa) vpa d vpa
+    YY0par::Array{mk_float,4}
+    # YY1par[i,j,k,iel] = \int phi_i(vpa) phi_j(vpa) phi'_k(vpa) vpa d vpa
+    YY1par::Array{mk_float,4}
+    # YY2par[i,j,k,iel] = \int phi_i(vpa) phi'_j(vpa) phi'_k(vpa) vpa d vpa
+    YY2par::Array{mk_float,4}
+    # YY3par[i,j,k,iel] = \int phi_i(vpa) phi'_j(vpa) phi_k(vpa) vpa d vpa
+    YY3par::Array{mk_float,4}
+end
+
+"""
+a struct of dummy arrays and precalculated coefficients
+for the weak-form Fokker-Planck collision operator 
+"""
+struct fokkerplanck_weakform_arrays_struct{N}
+    # boundary weights (Green's function) data
+    bwgt::fokkerplanck_boundary_data_arrays_struct
+    # dummy arrays for boundary data calculation
+    rpbd::rosenbluth_potential_boundary_data
+    # assembled 2D weak-form matrices
+    MM2D_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    KKpar2D_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    KKperp2D_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    KKpar2D_with_BC_terms_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    KKperp2D_with_BC_terms_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    LP2D_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    LV2D_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    LB2D_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    PUperp2D_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    PPparPUperp2D_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    PPpar2D_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    MMparMNperp2D_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    KPperp2D_sparse::AbstractSparseArray{mk_float,mk_int,N}
+    # lu decomposition objects
+    lu_obj_MM::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
+    lu_obj_LP::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
+    lu_obj_LV::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
+    lu_obj_LB::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
+    # elemental matrices for the assembly of C[Fs,Fsp]
+    YY_arrays::YY_collision_operator_arrays
+    # dummy arrays for elliptic solvers
+    S_dummy::MPISharedArray{mk_float,2}
+    Q_dummy::MPISharedArray{mk_float,2}
+    rhsvpavperp::MPISharedArray{mk_float,2}
+    rhsc::MPISharedArray{mk_float,1}
+    rhqc::MPISharedArray{mk_float,1}
+    sc::MPISharedArray{mk_float,1}
+    qc::MPISharedArray{mk_float,1}
+    # dummy array for the result of the calculation
+    CC::MPISharedArray{mk_float,2}
+    # dummy arrays for storing Rosenbluth potentials
+    GG::MPISharedArray{mk_float,2}
+    HH::MPISharedArray{mk_float,2}
+    dHdvpa::MPISharedArray{mk_float,2}
+    dHdvperp::MPISharedArray{mk_float,2}
+    dGdvperp::MPISharedArray{mk_float,2}
+    d2Gdvperp2::MPISharedArray{mk_float,2}
+    d2Gdvpa2::MPISharedArray{mk_float,2}
+    d2Gdvperpdvpa::MPISharedArray{mk_float,2}
+    FF::MPISharedArray{mk_float,2}
+    dFdvpa::MPISharedArray{mk_float,2}
+    dFdvperp::MPISharedArray{mk_float,2}
+end
+
+function allocate_boundary_integration_weight(vpa,vperp)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    lower_vpa_boundary = allocate_shared_float(nvpa,nvperp,nvperp)
+    upper_vpa_boundary = allocate_shared_float(nvpa,nvperp,nvperp)
+    upper_vperp_boundary = allocate_shared_float(nvpa,nvperp,nvpa)
+    return boundary_integration_weights_struct(lower_vpa_boundary,
+            upper_vpa_boundary, upper_vperp_boundary)
+end
+
+function allocate_boundary_integration_weights(vpa,vperp)
+    G0_weights = allocate_boundary_integration_weight(vpa,vperp)
+    G1_weights = allocate_boundary_integration_weight(vpa,vperp)
+    H0_weights = allocate_boundary_integration_weight(vpa,vperp)
+    H1_weights = allocate_boundary_integration_weight(vpa,vperp)
+    H2_weights = allocate_boundary_integration_weight(vpa,vperp)
+    H3_weights = allocate_boundary_integration_weight(vpa,vperp)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    dfdvpa = allocate_shared_float(nvpa,nvperp)
+    d2fdvperpdvpa = allocate_shared_float(nvpa,nvperp)
+    dfdvperp = allocate_shared_float(nvpa,nvperp)
+    return fokkerplanck_boundary_data_arrays_struct(G0_weights,
+            G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+            dfdvpa,d2fdvperpdvpa,dfdvperp)
+end
+
+
+"""
+function that precomputes the required integration weights
+"""
+function init_Rosenbluth_potential_integration_weights!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,vperp,vpa;print_to_screen=true)
+    
+    x_vpa, w_vpa, x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre = setup_basic_quadratures(vpa,vperp,print_to_screen=print_to_screen)
+    
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("beginning weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+
+    # precalculated weights, integrating over Lagrange polynomials
+    begin_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        #limits where checks required to determine which divergence-safe grid is needed
+        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
+        
+        vperp_val = vperp.grid[ivperp]
+        vpa_val = vpa.grid[ivpa]
+        for ivperpp in 1:vperp.n
+            for ivpap in 1:vpa.n
+                G0_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                G1_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H0_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H1_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+            end
+        end
+        # loop over elements and grid points within elements on primed coordinate
+        @views loop_over_vperp_vpa_elements!(G0_weights[:,:,ivpa,ivperp],G1_weights[:,:,ivpa,ivperp],
+                H0_weights[:,:,ivpa,ivperp],H1_weights[:,:,ivpa,ivperp],
+                H2_weights[:,:,ivpa,ivperp],H3_weights[:,:,ivpa,ivperp],
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    end
+    
+    
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("finished weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    return nothing
+end
+
+"""
+function for getting the basic quadratures used for the 
+numerical integration of the Lagrange polynomials and the 
+Green's function.
+"""
+function setup_basic_quadratures(vpa,vperp;print_to_screen=true)
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("setting up GL quadrature   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    
+    # get Gauss-Legendre points and weights on (-1,1)
+    ngrid = max(vpa.ngrid,vperp.ngrid)
+    nquad = 2*ngrid
+    x_legendre, w_legendre = gausslegendre(nquad)
+    #nlaguerre = min(9,nquad) # to prevent points to close to the boundaries
+    nlaguerre = nquad
+    x_laguerre, w_laguerre = gausslaguerre(nlaguerre)
+    
+    x_vpa, w_vpa = Array{mk_float,1}(undef,4*nquad), Array{mk_float,1}(undef,4*nquad)
+    x_vperp, w_vperp = Array{mk_float,1}(undef,4*nquad), Array{mk_float,1}(undef,4*nquad)
+  
+    return x_vpa, w_vpa, x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre
+end
+
+
+"""
+function for getting the indices used to choose the integration
+quadrature 
+"""
+function get_element_limit_indices(ivpa,ivperp,vpa,vperp)
+    nelement_vpa, ngrid_vpa = vpa.nelement_local, vpa.ngrid
+    nelement_vperp, ngrid_vperp = vperp.nelement_local, vperp.ngrid
+    #limits where checks required to determine which divergence-safe grid is needed
+    igrid_vpa, ielement_vpa = vpa.igrid[ivpa], vpa.ielement[ivpa]
+    ielement_vpa_low = ielement_vpa - ng_low(igrid_vpa,ngrid_vpa)*nel_low(ielement_vpa,nelement_vpa)
+    ielement_vpa_hi = ielement_vpa + ng_hi(igrid_vpa,ngrid_vpa)*nel_hi(ielement_vpa,nelement_vpa)
+    #println("igrid_vpa: ielement_vpa: ielement_vpa_low: ielement_vpa_hi:", igrid_vpa," ",ielement_vpa," ",ielement_vpa_low," ",ielement_vpa_hi)
+    igrid_vperp, ielement_vperp = vperp.igrid[ivperp], vperp.ielement[ivperp]
+    ielement_vperp_low = ielement_vperp - ng_low(igrid_vperp,ngrid_vperp)*nel_low(ielement_vperp,nelement_vperp)
+    ielement_vperp_hi = ielement_vperp + ng_hi(igrid_vperp,ngrid_vperp)*nel_hi(ielement_vperp,nelement_vperp)
+    #println("igrid_vperp: ielement_vperp: ielement_vperp_low: ielement_vperp_hi:", igrid_vperp," ",ielement_vperp," ",ielement_vperp_low," ",ielement_vperp_hi)
+    return igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, 
+            igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi
+end
+"""
+function that precomputes the required integration weights
+only along the velocity space boundaries
+"""
+function init_Rosenbluth_potential_boundary_integration_weights!(G0_weights,
+      G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,vpa,vperp;print_to_screen=true)
+    
+    x_vpa, w_vpa, x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre = setup_basic_quadratures(vpa,vperp,print_to_screen=print_to_screen)
+    
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("beginning (boundary) weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+
+    # precalculate weights, integrating over Lagrange polynomials
+    # first compute weights along lower vpa boundary
+    begin_vperp_region()
+    ivpa = 1 # lower_vpa_boundary
+    @loop_vperp ivperp begin
+        #limits where checks required to determine which divergence-safe grid is needed
+        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
+        
+        vperp_val = vperp.grid[ivperp]
+        vpa_val = vpa.grid[ivpa]
+        for ivperpp in 1:vperp.n
+            for ivpap in 1:vpa.n
+                G0_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                G1_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H0_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H1_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H2_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H3_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+            end
+        end
+        # loop over elements and grid points within elements on primed coordinate
+        @views loop_over_vperp_vpa_elements!(G0_weights.lower_vpa_boundary[:,:,ivperp],
+                G1_weights.lower_vpa_boundary[:,:,ivperp],
+                H0_weights.lower_vpa_boundary[:,:,ivperp],
+                H1_weights.lower_vpa_boundary[:,:,ivperp],
+                H2_weights.lower_vpa_boundary[:,:,ivperp],
+                H3_weights.lower_vpa_boundary[:,:,ivperp],
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    end
+    # second compute weights along upper vpa boundary
+    ivpa = vpa.n # upper_vpa_boundary
+    @loop_vperp ivperp begin
+        #limits where checks required to determine which divergence-safe grid is needed
+        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
+        
+        vperp_val = vperp.grid[ivperp]
+        vpa_val = vpa.grid[ivpa]
+        for ivperpp in 1:vperp.n
+            for ivpap in 1:vpa.n
+                G0_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                G1_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H0_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H1_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H2_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H3_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+            end
+        end
+        # loop over elements and grid points within elements on primed coordinate
+        @views loop_over_vperp_vpa_elements!(G0_weights.upper_vpa_boundary[:,:,ivperp],
+                G1_weights.upper_vpa_boundary[:,:,ivperp],
+                H0_weights.upper_vpa_boundary[:,:,ivperp],
+                H1_weights.upper_vpa_boundary[:,:,ivperp],
+                H2_weights.upper_vpa_boundary[:,:,ivperp],
+                H3_weights.upper_vpa_boundary[:,:,ivperp],
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    end
+    # finally compute weight along upper vperp boundary
+    begin_vpa_region()
+    ivperp = vperp.n # upper_vperp_boundary
+    @loop_vpa ivpa begin
+        #limits where checks required to determine which divergence-safe grid is needed
+        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
+        
+        vperp_val = vperp.grid[ivperp]
+        vpa_val = vpa.grid[ivpa]
+        for ivperpp in 1:vperp.n
+            for ivpap in 1:vpa.n
+                G0_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                G1_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H0_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                H1_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                H2_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                H3_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+            end
+        end
+        # loop over elements and grid points within elements on primed coordinate
+        @views loop_over_vperp_vpa_elements!(G0_weights.upper_vperp_boundary[:,:,ivpa],
+                G1_weights.upper_vperp_boundary[:,:,ivpa],
+                H0_weights.upper_vperp_boundary[:,:,ivpa],
+                H1_weights.upper_vperp_boundary[:,:,ivpa],
+                H2_weights.upper_vperp_boundary[:,:,ivpa],
+                H3_weights.upper_vperp_boundary[:,:,ivpa],
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    end
+    # return the parallelisation status to serial
+    begin_serial_region()
+    @serial_region begin 
+        if global_rank[] == 0 && print_to_screen
+            println("finished (boundary) weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    return nothing
+end
+
+function get_imin_imax(coord,iel)
+    j = iel
+    if j > 1
+        k = 1
+    else
+        k = 0
+    end
+    imin = coord.imin[j] - k
+    imax = coord.imax[j]
+    return imin, imax
+end
+
+function get_nodes(coord,iel)
+    # get imin and imax of this element on full grid
+    (imin, imax) = get_imin_imax(coord,iel)
+    nodes = coord.grid[imin:imax]
+    return nodes
+end
+"""
+Lagrange polynomial
+args: 
+j - index of l_j from list of nodes
+x_nodes - array of x node values
+x - point where interpolated value is returned
+"""
+function lagrange_poly(j,x_nodes,x)
+    # get number of nodes
+    n = size(x_nodes,1)
+    # location where l(x0) = 1
+    x0 = x_nodes[j]
+    # evaluate polynomial
+    poly = 1.0
+    for i in 1:j-1
+            poly *= (x - x_nodes[i])/(x0 - x_nodes[i])
+    end
+    for i in j+1:n
+            poly *= (x - x_nodes[i])/(x0 - x_nodes[i])
+    end
+    return poly
+end
+
+# Function to get the local integration grid and quadrature weights
+# to integrate a 1D element in the 2D representation of the 
+# velocity space distribution functions. This function assumes that
+# there is a divergence at the point coord_val, and splits the grid 
+# and integration weights appropriately, using Gauss-Laguerre points
+# near the divergence and Gauss-Legendre points away from the divergence. 
+function get_scaled_x_w_with_divergences!(x_scaled, w_scaled, x_legendre, w_legendre, x_laguerre, w_laguerre, node_min, node_max, nodes, igrid_coord, coord_val)
+    #println("nodes ",nodes)
+    zero = 1.0e-10 
+    @. x_scaled = 0.0
+    @. w_scaled = 0.0
+    nnodes = size(nodes,1)
+    nquad_legendre = size(x_legendre,1)
+    nquad_laguerre = size(x_laguerre,1)
+    # assume x_scaled, w_scaled are arrays of length 2*nquad
+    # use only nquad points for most elements, but use 2*nquad for
+    # elements with interior divergences
+    #println("coord: ",coord_val," node_max: ",node_max," node_min: ",node_min) 
+    if abs(coord_val - node_max) < zero # divergence at upper endpoint 
+        node_cut = (nodes[nnodes-1] + nodes[nnodes])/2.0
+        
+        n = nquad_laguerre + nquad_legendre
+        shift = 0.5*(node_min + node_cut)
+        scale = 0.5*(node_cut - node_min)
+        @. x_scaled[1:nquad_legendre] = scale*x_legendre + shift
+        @. w_scaled[1:nquad_legendre] = scale*w_legendre
+
+        @. x_scaled[1+nquad_legendre:n] = node_max + (node_cut - node_max)*exp(-x_laguerre)
+        @. w_scaled[1+nquad_legendre:n] = (node_max - node_cut)*w_laguerre
+        
+        nquad_coord = n
+        #println("upper divergence")
+    elseif abs(coord_val - node_min) < zero # divergence at lower endpoint
+        n = nquad_laguerre + nquad_legendre
+        nquad = size(x_laguerre,1)
+        node_cut = (nodes[1] + nodes[2])/2.0
+        for j in 1:nquad_laguerre
+            x_scaled[nquad_laguerre+1-j] = node_min + (node_cut - node_min)*exp(-x_laguerre[j])
+            w_scaled[nquad_laguerre+1-j] = (node_cut - node_min)*w_laguerre[j]
+        end
+        shift = 0.5*(node_max + node_cut)
+        scale = 0.5*(node_max - node_cut)
+        @. x_scaled[1+nquad_laguerre:n] = scale*x_legendre + shift
+        @. w_scaled[1+nquad_laguerre:n] = scale*w_legendre
+
+        nquad_coord = n
+        #println("lower divergence")
+    else #if (coord_val - node_min)*(coord_val - node_max) < - zero # interior divergence
+        #println(nodes[igrid_coord]," ", coord_val)
+        n = 2*nquad_laguerre
+        node_cut_high = (nodes[igrid_coord+1] + nodes[igrid_coord])/2.0
+        if igrid_coord == 1
+            # exception for vperp coordinate near orgin
+            k = 0
+            node_cut_low = node_min
+            nquad_coord = nquad_legendre + 2*nquad_laguerre
+        else
+            # fill in lower Gauss-Legendre points
+            node_cut_low = (nodes[igrid_coord-1]+nodes[igrid_coord])/2.0
+            shift = 0.5*(node_cut_low + node_min)
+            scale = 0.5*(node_cut_low - node_min)
+            @. x_scaled[1:nquad_legendre] = scale*x_legendre + shift
+            @. w_scaled[1:nquad_legendre] = scale*w_legendre
+            k = nquad_legendre
+            nquad_coord = 2*(nquad_laguerre + nquad_legendre)
+        end
+        # lower half of domain  
+        for j in 1:nquad_laguerre  
+            x_scaled[k+j] = coord_val + (node_cut_low - coord_val)*exp(-x_laguerre[j])
+            w_scaled[k+j] = (coord_val - node_cut_low)*w_laguerre[j]
+        end  
+        # upper half of domain
+        for j in 1:nquad_laguerre
+            x_scaled[k+n+1-j] = coord_val + (node_cut_high - coord_val)*exp(-x_laguerre[j])
+            w_scaled[k+n+1-j] = (node_cut_high - coord_val)*w_laguerre[j]
+        end
+        # fill in upper Gauss-Legendre points
+        shift = 0.5*(node_cut_high + node_max)
+        scale = 0.5*(node_max - node_cut_high)
+        @. x_scaled[k+n+1:nquad_coord] = scale*x_legendre + shift
+        @. w_scaled[k+n+1:nquad_coord] = scale*w_legendre
+        
+        #println("intermediate divergence")
+    #else # no divergences
+    #    nquad = size(x_legendre,1) 
+    #    shift = 0.5*(node_min + node_max)
+    #    scale = 0.5*(node_max - node_min)
+    #    @. x_scaled[1:nquad] = scale*x_legendre + shift
+    #    @. w_scaled[1:nquad] = scale*w_legendre
+    #    #println("no divergence")
+    #    nquad_coord = nquad
+    end
+    #println("x_scaled",x_scaled)
+    #println("w_scaled",w_scaled)
+    return nquad_coord
+end
+# Function to get the local grid and integration weights assuming 
+# no divergences of the function on the 1D element. Gauss-Legendre
+# quadrature is used for the entire element.
+function get_scaled_x_w_no_divergences!(x_scaled, w_scaled, x_legendre, w_legendre, node_min, node_max)
+    @. x_scaled = 0.0
+    @. w_scaled = 0.0
+    #println("coord: ",coord_val," node_max: ",node_max," node_min: ",node_min) 
+    nquad = size(x_legendre,1) 
+    shift = 0.5*(node_min + node_max)
+    scale = 0.5*(node_max - node_min)
+    @. x_scaled[1:nquad] = scale*x_legendre + shift
+    @. w_scaled[1:nquad] = scale*w_legendre
+    #println("x_scaled",x_scaled)
+    #println("w_scaled",w_scaled)
+    return nquad
+end
+
+# function returns 1 if igrid = 1 or 0 if 1 < igrid <= ngrid
+function ng_low(igrid,ngrid)
+    return floor(mk_int, (ngrid - igrid)/(ngrid - 1))
+end
+# function returns 1 if igrid = ngrid or 0 if 1 =< igrid < ngrid
+function ng_hi(igrid,ngrid)
+    return floor(mk_int, igrid/ngrid)
+end
+# function returns 1 for nelement >= ielement > 1, 0 for ielement =1 
+function nel_low(ielement,nelement)
+    return floor(mk_int, (ielement - 2 + nelement)/nelement)
+end
+# function returns 1 for nelement > ielement >= 1, 0 for ielement =nelement 
+function nel_hi(ielement,nelement)
+    return 1- floor(mk_int, ielement/nelement)
+end
+
+# base level function for computing the Green's function weights
+# note the definitions of ellipe & ellipk
+# `https://specialfunctions.juliamath.org/stable/functions_list/#SpecialFunctions.ellipe`
+# `https://specialfunctions.juliamath.org/stable/functions_list/#SpecialFunctions.ellipk`
+# `ellipe(m) = \int^{\pi/2}\_0 \sqrt{ 1 - m \sin^2(\theta)} d \theta`
+# `ellipe(k) = \int^{\pi/2}\_0 \frac{1}{\sqrt{ 1 - m \sin^2(\theta)}} d \theta`
+
+function local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                            nquad_vpa,ielement_vpa,vpa_nodes,vpa, # info about primed vpa grids
+                            nquad_vperp,ielement_vperp,vperp_nodes,vperp, # info about primed vperp grids
+                            x_vpa, w_vpa, x_vperp, w_vperp, # points and weights for primed (source) grids
+                            vpa_val, vperp_val) # values and indices for unprimed (field) grids
+    for igrid_vperp in 1:vperp.ngrid
+        for igrid_vpa in 1:vpa.ngrid
+            # get grid index for point on full grid  
+            ivpap = vpa.igrid_full[igrid_vpa,ielement_vpa]   
+            ivperpp = vperp.igrid_full[igrid_vperp,ielement_vperp]   
+            # carry out integration over Lagrange polynomial at this node, on this element
+            for kvperp in 1:nquad_vperp
+                for kvpa in 1:nquad_vpa 
+                    x_kvpa = x_vpa[kvpa]
+                    x_kvperp = x_vperp[kvperp]
+                    w_kvperp = w_vperp[kvperp]
+                    w_kvpa = w_vpa[kvpa]
+                    denom = (vpa_val - x_kvpa)^2 + (vperp_val + x_kvperp)^2 
+                    mm = min(4.0*vperp_val*x_kvperp/denom,1.0 - 1.0e-15)
+                    #mm = 4.0*vperp_val*x_kvperp/denom/(1.0 + 10^-15)
+                    #mm = 4.0*vperp_val*x_kvperp/denom
+                    prefac = sqrt(denom)
+                    ellipe_mm = ellipe(mm) 
+                    ellipk_mm = ellipk(mm) 
+                    #if mm_test > 1.0
+                    #    println("mm: ",mm_test," ellipe: ",ellipe_mm," ellipk: ",ellipk_mm)
+                    #end
+                    G_elliptic_integral_factor = 2.0*ellipe_mm*prefac/pi
+                    G1_elliptic_integral_factor = -(2.0*prefac/pi)*( (2.0 - mm)*ellipe_mm - 2.0*(1.0 - mm)*ellipk_mm )/(3.0*mm)
+                    #G2_elliptic_integral_factor = (2.0*prefac/pi)*( (7.0*mm^2 + 8.0*mm - 8.0)*ellipe_mm + 4.0*(2.0 - mm)*(1.0 - mm)*ellipk_mm )/(15.0*mm^2)
+                    #G3_elliptic_integral_factor = (2.0*prefac/pi)*( 8.0*(mm^2 - mm + 1.0)*ellipe_mm - 4.0*(2.0 - mm)*(1.0 - mm)*ellipk_mm )/(15.0*mm^2)
+                    H_elliptic_integral_factor = 2.0*ellipk_mm/(pi*prefac)
+                    H1_elliptic_integral_factor = -(2.0/(pi*prefac))*( (mm-2.0)*(ellipk_mm/mm) + (2.0*ellipe_mm/mm) )
+                    H2_elliptic_integral_factor = (2.0/(pi*prefac))*( (3.0*mm^2 - 8.0*mm + 8.0)*(ellipk_mm/(3.0*mm^2)) + (4.0*mm - 8.0)*ellipe_mm/(3.0*mm^2) )
+                    lagrange_poly_vpa = lagrange_poly(igrid_vpa,vpa_nodes,x_kvpa)
+                    lagrange_poly_vperp = lagrange_poly(igrid_vperp,vperp_nodes,x_kvperp)
+                    
+                    (G0_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        G_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    
+                    (G1_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        G1_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    
+                    #(G2_weights[ivpap,ivperpp] += 
+                    #    lagrange_poly_vpa*lagrange_poly_vperp*
+                    #    G2_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    
+                    #(G3_weights[ivpap,ivperpp] += 
+                    #    lagrange_poly_vpa*lagrange_poly_vperp*
+                    #    G3_elliptic_integral_factor*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    
+                    (H0_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        H_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                        
+                    (H1_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        H1_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                        
+                    (H2_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        (H1_elliptic_integral_factor*vperp_val - H2_elliptic_integral_factor*x_kvperp)*
+                        x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    (H3_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        H_elliptic_integral_factor*(vpa_val - x_kvpa)*
+                        x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    
+                    #(n_weights[ivpap,ivperpp] += 
+                    #    lagrange_poly_vpa*lagrange_poly_vperp*
+                    #    x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+function loop_over_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                            vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vperp grids
+                            vperp,ielement_vperpp, # info about primed vperp grids
+                            x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                            x_legendre,w_legendre,x_laguerre,w_laguerre,
+                            igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    vperp_nodes = get_nodes(vperp,ielement_vperpp)
+    vperp_max = vperp_nodes[end]
+    vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
+    nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+    for ielement_vpap in 1:ielement_vpa_low-1 
+        # do integration over part of the domain with no divergences
+        vpa_nodes = get_nodes(vpa,ielement_vpap)
+        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
+        nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
+        @views local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                    nquad_vpa,ielement_vpap,vpa_nodes,vpa,
+                    nquad_vperp,ielement_vperpp,vperp_nodes,vperp,
+                    x_vpa, w_vpa, x_vperp, w_vperp, 
+                    vpa_val, vperp_val)
+    end
+    nquad_vperp = get_scaled_x_w_with_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre, vperp_min, vperp_max, vperp_nodes, igrid_vperp, vperp_val)
+    for ielement_vpap in ielement_vpa_low:ielement_vpa_hi
+    #for ielement_vpap in 1:vpa.nelement_local
+        # use general grid function that checks divergences
+        vpa_nodes = get_nodes(vpa,ielement_vpap)
+        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
+        #nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
+        nquad_vpa = get_scaled_x_w_with_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, x_laguerre, w_laguerre, vpa_min, vpa_max, vpa_nodes, igrid_vpa, vpa_val)
+        @views local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                    nquad_vpa,ielement_vpap,vpa_nodes,vpa,
+                    nquad_vperp,ielement_vperpp,vperp_nodes,vperp,
+                    x_vpa, w_vpa, x_vperp, w_vperp, 
+                    vpa_val, vperp_val)
+    end
+    nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+    for ielement_vpap in ielement_vpa_hi+1:vpa.nelement_local
+        # do integration over part of the domain with no divergences
+        vpa_nodes = get_nodes(vpa,ielement_vpap)
+        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
+        nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
+        @views local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                    nquad_vpa,ielement_vpap,vpa_nodes,vpa,
+                    nquad_vperp,ielement_vperpp,vperp_nodes,vperp,
+                    x_vpa, w_vpa, x_vperp, w_vperp, 
+                    vpa_val, vperp_val)
+                    
+    end
+    return nothing
+end
+
+function loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                            vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vperp grids
+                            nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
+                            x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                            x_legendre,w_legendre,
+                            vpa_val, vperp_val)
+    for ielement_vpap in 1:vpa.nelement_local
+        # do integration over part of the domain with no divergences
+        vpa_nodes = get_nodes(vpa,ielement_vpap)
+        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
+        nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
+        @views local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                    nquad_vpa,ielement_vpap,vpa_nodes,vpa,
+                    nquad_vperp,ielement_vperpp,vperp_nodes,vperp,
+                    x_vpa, w_vpa, x_vperp, w_vperp, 
+                    vpa_val, vperp_val)
+                    
+    end
+    return nothing
+end
+
+function loop_over_vperp_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    for ielement_vperpp in 1:ielement_vperp_low-1
+        
+        vperp_nodes = get_nodes(vperp,ielement_vperpp)
+        vperp_max = vperp_nodes[end]
+        vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
+        nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+        @views loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,
+                vpa_val, vperp_val)
+    end
+    for ielement_vperpp in ielement_vperp_low:ielement_vperp_hi
+        
+        #vperp_nodes = get_nodes(vperp,ielement_vperpp)
+        #vperp_max = vperp_nodes[end]
+        #vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
+        #nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+        #nquad_vperp = get_scaled_x_w_with_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre, vperp_min, vperp_max, vperp_nodes, igrid_vperp, vperp_val)
+        @views loop_over_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperpp, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    end
+    for ielement_vperpp in ielement_vperp_hi+1:vperp.nelement_local
+        
+        vperp_nodes = get_nodes(vperp,ielement_vperpp)
+        vperp_max = vperp_nodes[end]
+        vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
+        nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+        @views loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,
+                vpa_val, vperp_val)
+    end
+    return nothing
+end
+
+# The function loop_over_vperp_vpa_elements_no_divergences!() was for debugging.
+# By changing the source where loop_over_vperp_vpa_elements!() is called to
+# instead call this function we can verify that the Gauss-Legendre quadrature
+# is adequate for integrating a divergence-free integrand. This function should be 
+# kept until the problems with the pure integration method of computing the
+# Rosenbluth potentials are understood.
+function loop_over_vperp_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    for ielement_vperpp in 1:vperp.nelement_local
+        vperp_nodes = get_nodes(vperp,ielement_vperpp)
+        vperp_max = vperp_nodes[end]
+        vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,nelement_vperp) 
+        nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+        @views loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,
+                vpa_val, vperp_val)
+    end
+    return nothing
+end 
+
+
+# Array indices for compound 1D form 
+function ic_func(ivpa::mk_int,ivperp::mk_int,nvpa::mk_int)
+    return ivpa + nvpa*(ivperp-1)
+end
+function ivperp_func(ic::mk_int,nvpa::mk_int)
+    return floor(Int64,(ic-1)/nvpa) + 1
+end
+function ivpa_func(ic::mk_int,nvpa::mk_int)
+    ivpa = ic - nvpa*(ivperp_func(ic,nvpa) - 1)
+    return ivpa
+end
+
+function ravel_vpavperp_to_c!(pdf_c,pdf_vpavperp,nvpa::mk_int,nvperp::mk_int)
+    for ivperp in 1:nvperp
+        for ivpa in 1:nvpa
+            ic = ic_func(ivpa,ivperp,nvpa)
+            pdf_c[ic] = pdf_vpavperp[ivpa,ivperp]
+        end
+    end
+    return nothing
+end
+
+function ravel_vpavperp_to_c_parallel!(pdf_c,pdf_vpavperp,nvpa::mk_int)
+    begin_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin 
+        ic = ic_func(ivpa,ivperp,nvpa)
+        pdf_c[ic] = pdf_vpavperp[ivpa,ivperp]
+    end
+    return nothing
+end
+
+function ravel_c_to_vpavperp!(pdf_vpavperp,pdf_c,nc::mk_int,nvpa::mk_int)
+    for ic in 1:nc
+        ivpa = ivpa_func(ic,nvpa)
+        ivperp = ivperp_func(ic,nvpa)
+        pdf_vpavperp[ivpa,ivperp] = pdf_c[ic]
+    end
+    return nothing
+end
+
+function ravel_c_to_vpavperp_parallel!(pdf_vpavperp,pdf_c,nvpa::mk_int)
+    begin_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        ic = ic_func(ivpa,ivperp,nvpa) 
+        pdf_vpavperp[ivpa,ivperp] = pdf_c[ic]
+    end
+    return nothing
+end
+
+function ivpa_global_func(ivpa_local::mk_int,ielement_vpa::mk_int,ngrid_vpa::mk_int)
+    ivpa_global = ivpa_local + (ielement_vpa - 1)*(ngrid_vpa - 1)
+    return ivpa_global
+end
+
+# function that returns the sparse matrix index
+# used to directly construct the nonzero entries
+# of a 2D assembled sparse matrix
+function icsc_func(ivpa_local::mk_int,ivpap_local::mk_int,
+                   ielement_vpa::mk_int,
+                   ngrid_vpa::mk_int,nelement_vpa::mk_int,
+                   ivperp_local::mk_int,ivperpp_local::mk_int,
+                   ielement_vperp::mk_int,
+                   ngrid_vperp::mk_int,nelement_vperp::mk_int)
+    ntot_vpa = (nelement_vpa - 1)*(ngrid_vpa^2 - 1) + ngrid_vpa^2
+    #ntot_vperp = (nelement_vperp - 1)*(ngrid_vperp^2 - 1) + ngrid_vperp^2
+    
+    icsc_vpa = ((ivpap_local - 1) + (ivpa_local - 1)*ngrid_vpa +
+                (ielement_vpa - 1)*(ngrid_vpa^2 - 1))
+    icsc_vperp = ((ivperpp_local - 1) + (ivperp_local - 1)*ngrid_vperp + 
+                    (ielement_vperp - 1)*(ngrid_vperp^2 - 1))
+    icsc = 1 + icsc_vpa + ntot_vpa*icsc_vperp
+    return icsc
+end
+
+struct sparse_matrix_constructor
+    # the Ith row
+    II::Array{mk_float,1}
+    # the Jth column
+    JJ::Array{mk_float,1}
+    # the data S[I,J]
+    SS::Array{mk_float,1}
+end
+
+function allocate_sparse_matrix_constructor(nsparse::mk_int)
+    II = Array{mk_int,1}(undef,nsparse)
+    @. II = 0
+    JJ = Array{mk_int,1}(undef,nsparse)
+    @. JJ = 0
+    SS = Array{mk_float,1}(undef,nsparse)
+    @. SS = 0.0
+    return sparse_matrix_constructor(II,JJ,SS)
+end
+
+function assign_constructor_data!(data::sparse_matrix_constructor,icsc::mk_int,ii::mk_int,jj::mk_int,ss::mk_float)
+    data.II[icsc] = ii
+    data.JJ[icsc] = jj
+    data.SS[icsc] = ss
+    return nothing
+end
+function assemble_constructor_data!(data::sparse_matrix_constructor,icsc::mk_int,ii::mk_int,jj::mk_int,ss::mk_float)
+    data.II[icsc] = ii
+    data.JJ[icsc] = jj
+    data.SS[icsc] += ss
+    return nothing
+end
+
+function create_sparse_matrix(data::sparse_matrix_constructor)
+    return sparse(data.II,data.JJ,data.SS)
+end
+
+function allocate_boundary_data(vpa,vperp)
+    lower_boundary_vpa = allocate_shared_float(vperp.n)
+    upper_boundary_vpa = allocate_shared_float(vperp.n)
+    upper_boundary_vperp = allocate_shared_float(vpa.n)
+    return vpa_vperp_boundary_data(lower_boundary_vpa,
+            upper_boundary_vpa,upper_boundary_vperp)
+end
+
+
+function assign_exact_boundary_data!(func_data::vpa_vperp_boundary_data,
+                                        func_exact,vpa,vperp)
+    begin_serial_region()
+    nvpa = vpa.n
+    nvperp = vperp.n
+    @serial_region begin
+        for ivperp in 1:nvperp
+            func_data.lower_boundary_vpa[ivperp] = func_exact[1,ivperp]
+            func_data.upper_boundary_vpa[ivperp] = func_exact[nvpa,ivperp]
+        end
+        for ivpa in 1:nvpa
+            func_data.upper_boundary_vperp[ivpa] = func_exact[ivpa,nvperp]
+        end
+    end
+    return nothing
+end
+    
+function allocate_rosenbluth_potential_boundary_data(vpa,vperp)
+    H_data = allocate_boundary_data(vpa,vperp)
+    dHdvpa_data = allocate_boundary_data(vpa,vperp)
+    dHdvperp_data = allocate_boundary_data(vpa,vperp)
+    G_data = allocate_boundary_data(vpa,vperp)
+    dGdvperp_data = allocate_boundary_data(vpa,vperp)
+    d2Gdvperp2_data = allocate_boundary_data(vpa,vperp)
+    d2Gdvperpdvpa_data = allocate_boundary_data(vpa,vperp)
+    d2Gdvpa2_data = allocate_boundary_data(vpa,vperp)
+    return rosenbluth_potential_boundary_data(H_data,dHdvpa_data,
+        dHdvperp_data,G_data,dGdvperp_data,d2Gdvperp2_data,
+        d2Gdvperpdvpa_data,d2Gdvpa2_data)
+end
+
+function calculate_rosenbluth_potential_boundary_data_exact!(rpbd::rosenbluth_potential_boundary_data,
+  H_exact,dHdvpa_exact,dHdvperp_exact,G_exact,dGdvperp_exact,
+  d2Gdvperp2_exact,d2Gdvperpdvpa_exact,d2Gdvpa2_exact,
+  vpa,vperp)
+    assign_exact_boundary_data!(rpbd.H_data,H_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.dHdvpa_data,dHdvpa_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.dHdvperp_data,dHdvperp_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.G_data,G_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.dGdvperp_data,dGdvperp_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.d2Gdvperp2_data,d2Gdvperp2_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.d2Gdvperpdvpa_data,d2Gdvperpdvpa_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.d2Gdvpa2_data,d2Gdvpa2_exact,vpa,vperp)
+    return nothing
+end
+
+
+function calculate_boundary_data!(func_data::vpa_vperp_boundary_data,
+                                        weight::MPISharedArray{mk_float,4},func_input,vpa,vperp)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    #for ivperp in 1:nvperp
+    begin_vperp_region()
+    @loop_vperp ivperp begin
+        func_data.lower_boundary_vpa[ivperp] = 0.0
+        func_data.upper_boundary_vpa[ivperp] = 0.0
+        for ivperpp in 1:nvperp
+            for ivpap in 1:nvpa
+                func_data.lower_boundary_vpa[ivperp] += weight[ivpap,ivperpp,1,ivperp]*func_input[ivpap,ivperpp]
+                func_data.upper_boundary_vpa[ivperp] += weight[ivpap,ivperpp,nvpa,ivperp]*func_input[ivpap,ivperpp]
+            end
+        end
+    end
+    #for ivpa in 1:nvpa
+    begin_vpa_region()
+    @loop_vpa ivpa begin
+        func_data.upper_boundary_vperp[ivpa] = 0.0
+        for ivperpp in 1:nvperp
+            for ivpap in 1:nvpa
+                func_data.upper_boundary_vperp[ivpa] += weight[ivpap,ivperpp,ivpa,nvperp]*func_input[ivpap,ivperpp]
+            end
+        end
+    end
+    # return to serial parallelisation
+    begin_serial_region()
+    return nothing
+end
+
+function calculate_boundary_data!(func_data::vpa_vperp_boundary_data,
+                                  weight::boundary_integration_weights_struct,
+                                  func_input,vpa,vperp)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    #for ivperp in 1:nvperp
+    begin_vperp_region()
+    @loop_vperp ivperp begin
+        func_data.lower_boundary_vpa[ivperp] = 0.0
+        func_data.upper_boundary_vpa[ivperp] = 0.0
+        for ivperpp in 1:nvperp
+            for ivpap in 1:nvpa
+                func_data.lower_boundary_vpa[ivperp] += weight.lower_vpa_boundary[ivpap,ivperpp,ivperp]*func_input[ivpap,ivperpp]
+                func_data.upper_boundary_vpa[ivperp] += weight.upper_vpa_boundary[ivpap,ivperpp,ivperp]*func_input[ivpap,ivperpp]
+            end
+        end
+    end
+    #for ivpa in 1:nvpa
+    begin_vpa_region()
+    @loop_vpa ivpa begin
+        func_data.upper_boundary_vperp[ivpa] = 0.0
+        for ivperpp in 1:nvperp
+            for ivpap in 1:nvpa
+                func_data.upper_boundary_vperp[ivpa] += weight.upper_vperp_boundary[ivpap,ivperpp,ivpa]*func_input[ivpap,ivperpp]
+            end
+        end
+    end
+    # return to serial parallelisation
+    begin_serial_region()
+    return nothing
+end
+
+function calculate_rosenbluth_potential_boundary_data!(rpbd::rosenbluth_potential_boundary_data,
+    fkpl::Union{fokkerplanck_arrays_direct_integration_struct,fokkerplanck_boundary_data_arrays_struct},pdf,vpa,vperp,vpa_spectral,vperp_spectral;
+    calculate_GG=false,calculate_dGdvperp=false)
+    # get derivatives of pdf
+    dfdvperp = fkpl.dfdvperp
+    dfdvpa = fkpl.dfdvpa
+    d2fdvperpdvpa = fkpl.d2fdvperpdvpa
+    #for ivpa in 1:vpa.n
+    begin_vpa_region()
+    @loop_vpa ivpa begin
+        @views derivative!(vperp.scratch, pdf[ivpa,:], vperp, vperp_spectral)
+        @. dfdvperp[ivpa,:] = vperp.scratch
+    end
+    begin_vperp_region()
+    @loop_vperp ivperp begin
+    #for ivperp in 1:vperp.n
+        @views derivative!(vpa.scratch, pdf[:,ivperp], vpa, vpa_spectral)
+        @. dfdvpa[:,ivperp] = vpa.scratch
+        @views derivative!(vpa.scratch, dfdvperp[:,ivperp], vpa, vpa_spectral)
+        @. d2fdvperpdvpa[:,ivperp] = vpa.scratch
+    end
+    # ensure data is synchronized
+    begin_serial_region()
+    # carry out the numerical integration 
+    calculate_boundary_data!(rpbd.H_data,fkpl.H0_weights,pdf,vpa,vperp)
+    calculate_boundary_data!(rpbd.dHdvpa_data,fkpl.H0_weights,dfdvpa,vpa,vperp)
+    calculate_boundary_data!(rpbd.dHdvperp_data,fkpl.H1_weights,dfdvperp,vpa,vperp)
+    if calculate_GG
+        calculate_boundary_data!(rpbd.G_data,fkpl.G0_weights,pdf,vpa,vperp)
+    end
+    if calculate_dGdvperp
+        calculate_boundary_data!(rpbd.dGdvperp_data,fkpl.G1_weights,dfdvperp,vpa,vperp)
+    end
+    calculate_boundary_data!(rpbd.d2Gdvperp2_data,fkpl.H2_weights,dfdvperp,vpa,vperp)
+    calculate_boundary_data!(rpbd.d2Gdvperpdvpa_data,fkpl.G1_weights,d2fdvperpdvpa,vpa,vperp)
+    calculate_boundary_data!(rpbd.d2Gdvpa2_data,fkpl.H3_weights,dfdvpa,vpa,vperp)
+    
+    return nothing
+end
+
+function test_rosenbluth_potential_boundary_data(rpbd::rosenbluth_potential_boundary_data,
+    rpbd_exact::rosenbluth_potential_boundary_data,vpa,vperp;print_to_screen=true)
+    
+    error_buffer_vpa = Array{mk_float,1}(undef,vpa.n)
+    error_buffer_vperp_1 = Array{mk_float,1}(undef,vperp.n)
+    error_buffer_vperp_2 = Array{mk_float,1}(undef,vperp.n)
+    max_H_err = test_boundary_data(rpbd.H_data,rpbd_exact.H_data,"H",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_dHdvpa_err = test_boundary_data(rpbd.dHdvpa_data,rpbd_exact.dHdvpa_data,"dHdvpa",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_dHdvperp_err = test_boundary_data(rpbd.dHdvperp_data,rpbd_exact.dHdvperp_data,"dHdvperp",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_G_err = test_boundary_data(rpbd.G_data,rpbd_exact.G_data,"G",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_dGdvperp_err = test_boundary_data(rpbd.dGdvperp_data,rpbd_exact.dGdvperp_data,"dGdvperp",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_d2Gdvperp2_err = test_boundary_data(rpbd.d2Gdvperp2_data,rpbd_exact.d2Gdvperp2_data,"d2Gdvperp2",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_d2Gdvperpdvpa_err = test_boundary_data(rpbd.d2Gdvperpdvpa_data,rpbd_exact.d2Gdvperpdvpa_data,"d2Gdvperpdvpa",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_d2Gdvpa2_err = test_boundary_data(rpbd.d2Gdvpa2_data,rpbd_exact.d2Gdvpa2_data,"d2Gdvpa2",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+
+    return max_H_err, max_dHdvpa_err, max_dHdvperp_err, max_G_err, max_dGdvperp_err, max_d2Gdvperp2_err, max_d2Gdvperpdvpa_err, max_d2Gdvpa2_err
+end
+
+function test_boundary_data(func,func_exact,func_name,vpa,vperp,buffer_vpa,buffer_vperp_1,buffer_vperp_2,print_to_screen)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    for ivperp in 1:nvperp
+        buffer_vperp_1 = abs(func.lower_boundary_vpa[ivperp] - func_exact.lower_boundary_vpa[ivperp])
+        buffer_vperp_2 = abs(func.upper_boundary_vpa[ivperp] - func_exact.upper_boundary_vpa[ivperp])
+    end
+    for ivpa in 1:nvpa
+        buffer_vpa = abs(func.upper_boundary_vperp[ivpa] - func_exact.upper_boundary_vperp[ivpa])
+    end
+    max_lower_vpa_err = maximum(buffer_vperp_1)
+    max_upper_vpa_err = maximum(buffer_vperp_2)
+    max_upper_vperp_err = maximum(buffer_vpa)
+    if print_to_screen
+        println(string(func_name*" boundary data:"))
+        println("max(lower_vpa_err) = ",max_lower_vpa_err)
+        println("max(upper_vpa_err) = ",max_upper_vpa_err)
+        println("max(upper_vperp_err) = ",max_upper_vperp_err)
+    end
+    max_err = max(max_lower_vpa_err,max_upper_vpa_err,max_upper_vperp_err)
+    return max_err
+end
+
+function get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+    # global indices on the grids
+    ivpa_global = vpa.igrid_full[ivpa_local,ielement_vpa]
+    ivperp_global = vperp.igrid_full[ivperp_local,ielement_vperp]
+    # global compound index
+    ic_global = ic_func(ivpa_global,ivperp_global,vpa.n)
+    return ic_global, ivpa_global, ivperp_global
+end
+function enforce_zero_bc!(fc,vpa,vperp;impose_BC_at_zero_vperp=false)
+    # lower vpa boundary
+    ielement_vpa = 1
+    ivpa_local = 1
+    for ielement_vperp in 1:vperp.nelement_local
+        for ivperp_local in 1:vperp.ngrid
+            ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+            fc[ic_global] = 0.0
+        end
+    end
+    
+    # upper vpa boundary
+    ielement_vpa = vpa.nelement_local
+    ivpa_local = vpa.ngrid
+    for ielement_vperp in 1:vperp.nelement_local
+        for ivperp_local in 1:vperp.ngrid
+            ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+            fc[ic_global] = 0.0
+        end
+    end
+    
+    if impose_BC_at_zero_vperp
+        # lower vperp boundary
+        ielement_vperp = 1
+        ivperp_local = 1
+        for ielement_vpa in 1:vpa.nelement_local
+            for ivpa_local in 1:vpa.ngrid
+                ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+                fc[ic_global] = 0.0
+            end
+        end
+    end
+    
+    # upper vperp boundary
+    ielement_vperp = vperp.nelement_local
+    ivperp_local = vperp.ngrid
+    for ielement_vpa in 1:vpa.nelement_local
+        for ivpa_local in 1:vpa.ngrid
+            ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+            fc[ic_global] = 0.0
+        end
+    end
+end
+
+function enforce_dirichlet_bc!(fc,vpa,vperp,f_bc;dirichlet_vperp_BC=false)
+    # lower vpa boundary
+    ielement_vpa = 1
+    ivpa_local = 1
+    for ielement_vperp in 1:vperp.nelement_local
+        for ivperp_local in 1:vperp.ngrid
+            ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+            fc[ic_global] = f_bc[ivpa_global,ivperp_global]
+        end
+    end
+    
+    # upper vpa boundary
+    ielement_vpa = vpa.nelement_local
+    ivpa_local = vpa.ngrid
+    for ielement_vperp in 1:vperp.nelement_local
+        for ivperp_local in 1:vperp.ngrid
+            ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+            fc[ic_global] = f_bc[ivpa_global,ivperp_global]
+        end
+    end
+    
+    if dirichlet_vperp_BC
+        # upper vperp boundary
+        ielement_vperp = 1
+        ivperp_local = 1
+        for ielement_vpa in 1:vpa.nelement_local
+            for ivpa_local in 1:vpa.ngrid
+                ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+                fc[ic_global] = f_bc[ivpa_global,ivperp_global]
+            end
+        end
+    end
+    
+    # upper vperp boundary
+    ielement_vperp = vperp.nelement_local
+    ivperp_local = vperp.ngrid
+    for ielement_vpa in 1:vpa.nelement_local
+        for ivpa_local in 1:vpa.ngrid
+            ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+            fc[ic_global] = f_bc[ivpa_global,ivperp_global]
+        end
+    end
+end
+
+function enforce_dirichlet_bc!(fc,vpa,vperp,f_bc::vpa_vperp_boundary_data)
+    # lower vpa boundary
+    ielement_vpa = 1
+    ivpa_local = 1
+    for ielement_vperp in 1:vperp.nelement_local
+        for ivperp_local in 1:vperp.ngrid
+            ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+            fc[ic_global] = f_bc.lower_boundary_vpa[ivperp_global]
+        end
+    end
+    
+    # upper vpa boundary
+    ielement_vpa = vpa.nelement_local
+    ivpa_local = vpa.ngrid
+    for ielement_vperp in 1:vperp.nelement_local
+        for ivperp_local in 1:vperp.ngrid
+            ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+            fc[ic_global] = f_bc.upper_boundary_vpa[ivperp_global]
+        end
+    end
+            
+    # upper vperp boundary
+    ielement_vperp = vperp.nelement_local
+    ivperp_local = vperp.ngrid
+    for ielement_vpa in 1:vpa.nelement_local
+        for ivpa_local in 1:vpa.ngrid
+            ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+            fc[ic_global] = f_bc.upper_boundary_vperp[ivpa_global]
+        end
+    end
+    return nothing
+end
+
+function assemble_matrix_operators_dirichlet_bc(vpa,vperp,vpa_spectral,vperp_spectral;print_to_screen=true)
+    nc_global = vpa.n*vperp.n
+    # Assemble a 2D mass matrix in the global compound coordinate
+    nc_global = vpa.n*vperp.n
+    MM2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    MM2D .= 0.0
+    KKpar2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    KKpar2D .= 0.0
+    KKperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    KKperp2D .= 0.0
+    KPperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    KPperp2D .= 0.0
+    KKpar2D_with_BC_terms = Array{mk_float,2}(undef,nc_global,nc_global)
+    KKpar2D_with_BC_terms .= 0.0
+    KKperp2D_with_BC_terms = Array{mk_float,2}(undef,nc_global,nc_global)
+    KKperp2D_with_BC_terms .= 0.0
+    PUperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    PUperp2D .= 0.0
+    PPparPUperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    PPparPUperp2D .= 0.0
+    PPpar2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    PPpar2D .= 0.0
+    MMparMNperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    MMparMNperp2D .= 0.0
+    # Laplacian matrix
+    LP2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    LP2D .= 0.0
+    # Modified Laplacian matrix
+    LV2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    LV2D .= 0.0
+    # Modified Laplacian matrix
+    LB2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    LB2D .= 0.0
+    
+    #print_matrix(MM2D,"MM2D",nc_global,nc_global)
+    # local dummy arrays
+    MMpar = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
+    MMperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    MNperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    MRperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    KKpar = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
+    KKperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    KKpar_with_BC_terms = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
+    KKperp_with_BC_terms = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    KJperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    LLperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    PPperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    PUperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    PPpar = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
+        
+    impose_BC_at_zero_vperp = false
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("begin elliptic operator assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    for ielement_vperp in 1:vperp.nelement_local
+        get_QQ_local!(MMperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"M")
+        get_QQ_local!(MRperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"R")
+        get_QQ_local!(MNperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"N")
+        get_QQ_local!(KKperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K")
+        get_QQ_local!(KKperp_with_BC_terms,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K_with_BC_terms")
+        get_QQ_local!(KJperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"J")
+        get_QQ_local!(LLperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"L")
+        get_QQ_local!(PPperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"P")
+        get_QQ_local!(PUperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"U")
+        #print_matrix(MMperp,"MMperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(MRperp,"MRperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(MNperp,"MNperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(KKperp,"KKperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(KJperp,"KJperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(LLperp,"LLperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(PPperp,"PPperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(PUperp,"PUperp",vperp.ngrid,vperp.ngrid)
+        
+        for ielement_vpa in 1:vpa.nelement_local
+            get_QQ_local!(MMpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"M")
+            get_QQ_local!(KKpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K")
+            get_QQ_local!(KKpar_with_BC_terms,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K_with_BC_terms")
+            get_QQ_local!(PPpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"P")
+            #print_matrix(MMpar,"MMpar",vpa.ngrid,vpa.ngrid)
+            #print_matrix(KKpar,"KKpar",vpa.ngrid,vpa.ngrid)
+            #print_matrix(PPpar,"PPpar",vpa.ngrid,vpa.ngrid)
+            
+            for ivperpp_local in 1:vperp.ngrid
+                for ivperp_local in 1:vperp.ngrid
+                    for ivpap_local in 1:vpa.ngrid
+                        for ivpa_local in 1:vpa.ngrid
+                            ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+                            icp_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpap_local,ivperpp_local) #get_indices(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivpap_local,ivperp_local,ivperpp_local)
+                            #println("ielement_vpa: ",ielement_vpa," ielement_vperp: ",ielement_vperp)
+                            #println("ivpa_local: ",ivpa_local," ivpap_local: ",ivpap_local)
+                            #println("ivperp_local: ",ivperp_local," ivperpp_local: ",ivperpp_local)
+                            #println("ic: ",ic_global," icp: ",icp_global)
+                            # boundary condition possibilities
+                            lower_boundary_row_vpa = (ielement_vpa == 1 && ivpa_local == 1)
+                            upper_boundary_row_vpa = (ielement_vpa == vpa.nelement_local && ivpa_local == vpa.ngrid)
+                            lower_boundary_row_vperp = (ielement_vperp == 1 && ivperp_local == 1)
+                            upper_boundary_row_vperp = (ielement_vperp == vperp.nelement_local && ivperp_local == vperp.ngrid)
+                            
+
+                            if lower_boundary_row_vpa
+                                if ivpap_local == 1 && ivperp_local == ivperpp_local
+                                    LP2D[ic_global,icp_global] = 1.0
+                                    LV2D[ic_global,icp_global] = 1.0
+                                    LB2D[ic_global,icp_global] = 1.0
+                                else 
+                                    LP2D[ic_global,icp_global] = 0.0
+                                    LV2D[ic_global,icp_global] = 0.0
+                                    LB2D[ic_global,icp_global] = 0.0
+                                end
+                            elseif upper_boundary_row_vpa
+                                if ivpap_local == vpa.ngrid && ivperp_local == ivperpp_local 
+                                    LP2D[ic_global,icp_global] = 1.0
+                                    LV2D[ic_global,icp_global] = 1.0
+                                    LB2D[ic_global,icp_global] = 1.0
+                                else 
+                                    LP2D[ic_global,icp_global] = 0.0
+                                    LV2D[ic_global,icp_global] = 0.0
+                                    LB2D[ic_global,icp_global] = 0.0
+                                end
+                            elseif lower_boundary_row_vperp && impose_BC_at_zero_vperp
+                                if ivperpp_local == 1 && ivpa_local == ivpap_local
+                                    LP2D[ic_global,icp_global] = 1.0
+                                    LV2D[ic_global,icp_global] = 1.0
+                                    LB2D[ic_global,icp_global] = 1.0
+                                else 
+                                    LP2D[ic_global,icp_global] = 0.0
+                                    LV2D[ic_global,icp_global] = 0.0
+                                    LB2D[ic_global,icp_global] = 0.0
+                                end
+                            elseif upper_boundary_row_vperp
+                                if ivperpp_local == vperp.ngrid && ivpa_local == ivpap_local
+                                    LP2D[ic_global,icp_global] = 1.0
+                                    LV2D[ic_global,icp_global] = 1.0
+                                    LB2D[ic_global,icp_global] = 1.0
+                                else 
+                                    LP2D[ic_global,icp_global] = 0.0
+                                    LV2D[ic_global,icp_global] = 0.0
+                                    LB2D[ic_global,icp_global] = 0.0
+                                end
+                            else
+                                # assign Laplacian and modified Laplacian matrix data
+                                LP2D[ic_global,icp_global] += (KKpar[ivpa_local,ivpap_local]*
+                                                                MMperp[ivperp_local,ivperpp_local] +
+                                                               MMpar[ivpa_local,ivpap_local]*
+                                                                LLperp[ivperp_local,ivperpp_local])
+                                LV2D[ic_global,icp_global] += (KKpar[ivpa_local,ivpap_local]*
+                                                                MRperp[ivperp_local,ivperpp_local] +
+                                                               MMpar[ivpa_local,ivpap_local]*
+                                                                (KJperp[ivperp_local,ivperpp_local] -
+                                                                 PPperp[ivperp_local,ivperpp_local] - 
+                                                                 MNperp[ivperp_local,ivperpp_local]))
+                                LB2D[ic_global,icp_global] += (KKpar[ivpa_local,ivpap_local]*
+                                                                MRperp[ivperp_local,ivperpp_local] +
+                                                               MMpar[ivpa_local,ivpap_local]*
+                                                                (KJperp[ivperp_local,ivperpp_local] -
+                                                                 PPperp[ivperp_local,ivperpp_local] - 
+                                                             4.0*MNperp[ivperp_local,ivperpp_local]))
+                            end
+                            # assign mass matrix data
+                            MM2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                                MMperp[ivperp_local,ivperpp_local]
+                            
+                            # assign K matrices
+                            KKpar2D[ic_global,icp_global] += KKpar[ivpa_local,ivpap_local]*
+                                                            MMperp[ivperp_local,ivperpp_local]
+                            KKperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                            KKperp[ivperp_local,ivperpp_local]
+                            KPperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                         (KJperp[ivperp_local,ivperpp_local] -
+                                                      2.0*PPperp[ivperp_local,ivperpp_local] -
+                                                      2.0*MNperp[ivperp_local,ivperpp_local])
+                            # assign K matrices with explicit boundary terms from integration by parts
+                            KKpar2D_with_BC_terms[ic_global,icp_global] += KKpar_with_BC_terms[ivpa_local,ivpap_local]*
+                                                            MMperp[ivperp_local,ivperpp_local]
+                            KKperp2D_with_BC_terms[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                            KKperp_with_BC_terms[ivperp_local,ivperpp_local]
+                            # assign PU matrix
+                            PUperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                            PUperp[ivperp_local,ivperpp_local]
+                            PPparPUperp2D[ic_global,icp_global] += PPpar[ivpa_local,ivpap_local]*
+                                                            PUperp[ivperp_local,ivperpp_local]
+                            PPpar2D[ic_global,icp_global] += PPpar[ivpa_local,ivpap_local]*
+                                                            MMperp[ivperp_local,ivperpp_local]
+                            # assign RHS mass matrix for d2Gdvperp2
+                            MMparMNperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                            MNperp[ivperp_local,ivperpp_local]
+                        end
+                    end
+                end
+            end
+        end
+    end
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("finished elliptic operator assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+        # convert these matrices to sparse matrices
+        if global_rank[] == 0 && print_to_screen
+            println("begin conversion to sparse matrices   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    MM2D_sparse = sparse(MM2D)
+    KKpar2D_sparse = sparse(KKpar2D)
+    KKperp2D_sparse = sparse(KKperp2D)
+    KKpar2D_with_BC_terms_sparse = sparse(KKpar2D_with_BC_terms)
+    KKperp2D_with_BC_terms_sparse = sparse(KKperp2D_with_BC_terms)
+    LP2D_sparse = sparse(LP2D)
+    LV2D_sparse = sparse(LV2D)
+    LB2D_sparse = sparse(LB2D)
+    KPperp2D_sparse = sparse(KPperp2D)
+    PUperp2D_sparse = sparse(PUperp2D)
+    PPparPUperp2D_sparse = sparse(PPparPUperp2D)
+    PPpar2D_sparse = sparse(PPpar2D)
+    MMparMNperp2D_sparse = sparse(MMparMNperp2D)
+    return MM2D_sparse, KKpar2D_sparse, KKperp2D_sparse, 
+           KKpar2D_with_BC_terms_sparse, KKperp2D_with_BC_terms_sparse,
+           LP2D_sparse, LV2D_sparse, LB2D_sparse, 
+           KPperp2D_sparse,PUperp2D_sparse, PPparPUperp2D_sparse,
+           PPpar2D_sparse, MMparMNperp2D_sparse
+end
+
+function assemble_matrix_operators_dirichlet_bc_sparse(vpa,vperp,vpa_spectral,vperp_spectral;print_to_screen=true)
+    # Assemble a 2D mass matrix in the global compound coordinate
+    nc_global = vpa.n*vperp.n
+    ntot_vpa = (vpa.nelement_local - 1)*(vpa.ngrid^2 - 1) + vpa.ngrid^2
+    ntot_vperp = (vperp.nelement_local - 1)*(vperp.ngrid^2 - 1) + vperp.ngrid^2
+    nsparse = ntot_vpa*ntot_vperp
+    ngrid_vpa = vpa.ngrid
+    nelement_vpa = vpa.nelement_local
+    ngrid_vperp = vperp.ngrid
+    nelement_vperp = vperp.nelement_local
+    
+    MM2D = allocate_sparse_matrix_constructor(nsparse)
+    KKpar2D = allocate_sparse_matrix_constructor(nsparse)
+    KKperp2D = allocate_sparse_matrix_constructor(nsparse)
+    KKpar2D_with_BC_terms = allocate_sparse_matrix_constructor(nsparse)
+    KKperp2D_with_BC_terms = allocate_sparse_matrix_constructor(nsparse)
+    PUperp2D = allocate_sparse_matrix_constructor(nsparse)
+    PPparPUperp2D = allocate_sparse_matrix_constructor(nsparse)
+    PPpar2D = allocate_sparse_matrix_constructor(nsparse)
+    MMparMNperp2D = allocate_sparse_matrix_constructor(nsparse)
+    KPperp2D = allocate_sparse_matrix_constructor(nsparse)
+    # Laplacian matrix
+    LP2D = allocate_sparse_matrix_constructor(nsparse)
+    # Modified Laplacian matrix (for d / d vperp potentials)
+    LV2D = allocate_sparse_matrix_constructor(nsparse)
+    # Modified Laplacian matrix (for d^2 / d vperp^2 potentials)
+    LB2D = allocate_sparse_matrix_constructor(nsparse)
+    
+    # local dummy arrays
+    MMpar = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
+    MMperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    MNperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    MRperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    KKpar = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
+    KKpar_with_BC_terms = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
+    KKperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    KKperp_with_BC_terms = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    KJperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    LLperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    PPperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    PUperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    PPpar = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
+        
+    impose_BC_at_zero_vperp = false
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("begin elliptic operator assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    for ielement_vperp in 1:nelement_vperp
+        get_QQ_local!(MMperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"M")
+        get_QQ_local!(MRperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"R")
+        get_QQ_local!(MNperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"N")
+        get_QQ_local!(KKperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K")
+        get_QQ_local!(KKperp_with_BC_terms,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K_with_BC_terms")
+        get_QQ_local!(KJperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"J")
+        get_QQ_local!(LLperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"L")
+        get_QQ_local!(PPperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"P")
+        get_QQ_local!(PUperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"U")
+        #print_matrix(MMperp,"MMperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(MRperp,"MRperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(MNperp,"MNperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(KKperp,"KKperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(KJperp,"KJperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(LLperp,"LLperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(PPperp,"PPperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(PUperp,"PUperp",vperp.ngrid,vperp.ngrid)
+        
+        for ielement_vpa in 1:nelement_vpa
+            get_QQ_local!(MMpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"M")
+            get_QQ_local!(KKpar_with_BC_terms,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K_with_BC_terms")
+            get_QQ_local!(KKpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K")
+            get_QQ_local!(PPpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"P")
+            #print_matrix(MMpar,"MMpar",vpa.ngrid,vpa.ngrid)
+            #print_matrix(KKpar,"KKpar",vpa.ngrid,vpa.ngrid)
+            #print_matrix(PPpar,"PPpar",vpa.ngrid,vpa.ngrid)
+            
+            for ivperpp_local in 1:ngrid_vperp
+                for ivperp_local in 1:ngrid_vperp
+                    for ivpap_local in 1:ngrid_vpa
+                        for ivpa_local in 1:ngrid_vpa
+                            ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+                            icp_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpap_local,ivperpp_local) #get_indices(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivpap_local,ivperp_local,ivperpp_local)
+                            icsc = icsc_func(ivpa_local,ivpap_local,ielement_vpa::mk_int,
+                                           ngrid_vpa,nelement_vpa,
+                                           ivperp_local,ivperpp_local,
+                                           ielement_vperp,
+                                           ngrid_vperp,nelement_vperp)
+                            #println("ielement_vpa: ",ielement_vpa," ielement_vperp: ",ielement_vperp)
+                            #println("ivpa_local: ",ivpa_local," ivpap_local: ",ivpap_local)
+                            #println("ivperp_local: ",ivperp_local," ivperpp_local: ",ivperpp_local)
+                            #println("ic: ",ic_global," icp: ",icp_global)
+                            # boundary condition possibilities
+                            lower_boundary_row_vpa = (ielement_vpa == 1 && ivpa_local == 1)
+                            upper_boundary_row_vpa = (ielement_vpa == vpa.nelement_local && ivpa_local == vpa.ngrid)
+                            lower_boundary_row_vperp = (ielement_vperp == 1 && ivperp_local == 1)
+                            upper_boundary_row_vperp = (ielement_vperp == vperp.nelement_local && ivperp_local == vperp.ngrid)
+                            
+
+                            if lower_boundary_row_vpa
+                                if ivpap_local == 1 && ivperp_local == ivperpp_local
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
+                                else 
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
+                                end
+                            elseif upper_boundary_row_vpa
+                                if ivpap_local == vpa.ngrid && ivperp_local == ivperpp_local 
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
+                                else 
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
+                                end
+                            elseif lower_boundary_row_vperp && impose_BC_at_zero_vperp
+                                if ivperpp_local == 1 && ivpa_local == ivpap_local
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
+                                else 
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
+                                end
+                            elseif upper_boundary_row_vperp
+                                if ivperpp_local == vperp.ngrid && ivpa_local == ivpap_local
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
+                                else 
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
+                                end
+                            else
+                                # assign Laplacian matrix data
+                                assemble_constructor_data!(LP2D,icsc,ic_global,icp_global,
+                                            (KKpar[ivpa_local,ivpap_local]*
+                                             MMperp[ivperp_local,ivperpp_local] +
+                                             MMpar[ivpa_local,ivpap_local]*
+                                             LLperp[ivperp_local,ivperpp_local]))
+                                assemble_constructor_data!(LV2D,icsc,ic_global,icp_global,
+                                            (KKpar[ivpa_local,ivpap_local]*
+                                             MRperp[ivperp_local,ivperpp_local] +
+                                             MMpar[ivpa_local,ivpap_local]*
+                                            (KJperp[ivperp_local,ivperpp_local] -
+                                             PPperp[ivperp_local,ivperpp_local] - 
+                                             MNperp[ivperp_local,ivperpp_local])))
+                                assemble_constructor_data!(LB2D,icsc,ic_global,icp_global,
+                                            (KKpar[ivpa_local,ivpap_local]*
+                                             MRperp[ivperp_local,ivperpp_local] +
+                                             MMpar[ivpa_local,ivpap_local]*
+                                             (KJperp[ivperp_local,ivperpp_local] -
+                                              PPperp[ivperp_local,ivperpp_local] -
+                                          4.0*MNperp[ivperp_local,ivperpp_local])))
+                            end
+                            #assign mass matrix
+                            assemble_constructor_data!(MM2D,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             MMperp[ivperp_local,ivperpp_local]))
+                                
+                            # assign K matrices (no explicit boundary terms)
+                            assemble_constructor_data!(KKpar2D,icsc,ic_global,icp_global,
+                                            (KKpar[ivpa_local,ivpap_local]*
+                                             MMperp[ivperp_local,ivperpp_local]))
+                            assemble_constructor_data!(KKperp2D,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             KKperp[ivperp_local,ivperpp_local]))
+                            assemble_constructor_data!(KPperp2D,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             (KJperp[ivperp_local,ivperpp_local] -
+                                              2.0*PPperp[ivperp_local,ivperpp_local] -
+                                              2.0*MNperp[ivperp_local,ivperpp_local])))
+                                             
+                            # assign K matrices (with explicit boundary terms from integration by parts)
+                            assemble_constructor_data!(KKpar2D_with_BC_terms,icsc,ic_global,icp_global,
+                                            (KKpar_with_BC_terms[ivpa_local,ivpap_local]*
+                                             MMperp[ivperp_local,ivperpp_local]))
+                            assemble_constructor_data!(KKperp2D_with_BC_terms,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             KKperp_with_BC_terms[ivperp_local,ivperpp_local]))
+                            # assign PU matrix
+                            assemble_constructor_data!(PUperp2D,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             PUperp[ivperp_local,ivperpp_local]))
+                            assemble_constructor_data!(PPparPUperp2D,icsc,ic_global,icp_global,
+                                            (PPpar[ivpa_local,ivpap_local]*
+                                             PUperp[ivperp_local,ivperpp_local]))
+                            assemble_constructor_data!(PPpar2D,icsc,ic_global,icp_global,
+                                            (PPpar[ivpa_local,ivpap_local]*
+                                             MMperp[ivperp_local,ivperpp_local]))
+                            # assign RHS mass matrix for d2Gdvperp2
+                            assemble_constructor_data!(MMparMNperp2D,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             MNperp[ivperp_local,ivperpp_local]))
+                        end
+                    end
+                end
+            end
+        end
+    end
+    MM2D_sparse = create_sparse_matrix(MM2D)
+    KKpar2D_sparse = create_sparse_matrix(KKpar2D)
+    KKperp2D_sparse = create_sparse_matrix(KKperp2D)
+    KKpar2D_with_BC_terms_sparse = create_sparse_matrix(KKpar2D_with_BC_terms)
+    KKperp2D_with_BC_terms_sparse = create_sparse_matrix(KKperp2D_with_BC_terms)
+    LP2D_sparse = create_sparse_matrix(LP2D)
+    LV2D_sparse = create_sparse_matrix(LV2D)
+    LB2D_sparse = create_sparse_matrix(LB2D)
+    KPperp2D_sparse = create_sparse_matrix(KPperp2D)
+    PUperp2D_sparse = create_sparse_matrix(PUperp2D)
+    PPparPUperp2D_sparse = create_sparse_matrix(PPparPUperp2D)
+    PPpar2D_sparse = create_sparse_matrix(PPpar2D)
+    MMparMNperp2D_sparse = create_sparse_matrix(MMparMNperp2D)
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("finished elliptic operator constructor assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+        #if nc_global < 60
+        #    println("MM2D_sparse \n",MM2D_sparse)
+        #    print_matrix(Array(MM2D_sparse),"MM2D_sparse",nc_global,nc_global)
+        #    print_matrix(KKpar2D,"KKpar2D",nc_global,nc_global)
+        #    print_matrix(KKperp2D,"KKperp2D",nc_global,nc_global)
+        #    print_matrix(LP2D,"LP",nc_global,nc_global)
+        #    print_matrix(LV2D,"LV",nc_global,nc_global)
+        #end
+    end
+    return MM2D_sparse, KKpar2D_sparse, KKperp2D_sparse, 
+           KKpar2D_with_BC_terms_sparse, KKperp2D_with_BC_terms_sparse, 
+           LP2D_sparse, LV2D_sparse, LB2D_sparse, 
+           KPperp2D_sparse, PUperp2D_sparse, PPparPUperp2D_sparse,
+           PPpar2D_sparse, MMparMNperp2D_sparse
+end
+
+function calculate_YY_arrays(vpa,vperp,vpa_spectral,vperp_spectral)
+    YY0perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
+    YY1perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
+    YY2perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
+    YY3perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
+    YY0par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
+    YY1par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
+    YY2par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
+    YY3par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
+    
+    for ielement_vperp in 1:vperp.nelement_local
+        @views get_QQ_local!(YY0perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY0")
+        @views get_QQ_local!(YY1perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY1")
+        @views get_QQ_local!(YY2perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY2")
+        @views get_QQ_local!(YY3perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY3")
+     end
+     for ielement_vpa in 1:vpa.nelement_local
+        @views get_QQ_local!(YY0par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY0")
+        @views get_QQ_local!(YY1par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY1")
+        @views get_QQ_local!(YY2par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY2")
+        @views get_QQ_local!(YY3par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY3")
+     end
+    
+    return YY_collision_operator_arrays(YY0perp,YY1perp,YY2perp,YY3perp,
+                                        YY0par,YY1par,YY2par,YY3par)
+end
+
+function assemble_explicit_collision_operator_rhs_serial!(rhsc,pdfs,d2Gspdvpa2,d2Gspdvperpdvpa,
+    d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
+    vpa,vperp,YY_arrays::YY_collision_operator_arrays)
+    begin_serial_region()
+    @serial_region begin
+        # assemble RHS of collision operator
+        @. rhsc = 0.0
+        
+        # loop over elements
+        for ielement_vperp in 1:vperp.nelement_local
+            YY0perp = YY_arrays.YY0perp[:,:,:,ielement_vperp]
+            YY1perp = YY_arrays.YY1perp[:,:,:,ielement_vperp]
+            YY2perp = YY_arrays.YY2perp[:,:,:,ielement_vperp]
+            YY3perp = YY_arrays.YY3perp[:,:,:,ielement_vperp]
+            
+            for ielement_vpa in 1:vpa.nelement_local
+                YY0par = YY_arrays.YY0par[:,:,:,ielement_vpa]
+                YY1par = YY_arrays.YY1par[:,:,:,ielement_vpa]
+                YY2par = YY_arrays.YY2par[:,:,:,ielement_vpa]
+                YY3par = YY_arrays.YY3par[:,:,:,ielement_vpa]
+                
+                # loop over field positions in each element
+                for ivperp_local in 1:vperp.ngrid
+                    for ivpa_local in 1:vpa.ngrid
+                        ic_global, ivpa_global, ivperp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+                        # carry out the matrix sum on each 2D element
+                        for jvperpp_local in 1:vperp.ngrid
+                            jvperpp = vperp.igrid_full[jvperpp_local,ielement_vperp]
+                            for kvperpp_local in 1:vperp.ngrid
+                                kvperpp = vperp.igrid_full[kvperpp_local,ielement_vperp]
+                                for jvpap_local in 1:vpa.ngrid
+                                    jvpap = vpa.igrid_full[jvpap_local,ielement_vpa]
+                                    pdfjj = pdfs[jvpap,jvperpp]
+                                    for kvpap_local in 1:vpa.ngrid
+                                        kvpap = vpa.igrid_full[kvpap_local,ielement_vpa]
+                                        # first three lines represent parallel flux terms
+                                        # second three lines represent perpendicular flux terms
+                                        rhsc[ic_global] += (YY0perp[kvperpp_local,jvperpp_local,ivperp_local]*YY2par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvpa2[kvpap,kvperpp] +
+                                                            YY3perp[kvperpp_local,jvperpp_local,ivperp_local]*YY1par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperpdvpa[kvpap,kvperpp] - 
+                                                            2.0*(ms/msp)*YY0perp[kvperpp_local,jvperpp_local,ivperp_local]*YY1par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*dHspdvpa[kvpap,kvperpp] +
+                                                            # end parallel flux, start of perpendicular flux
+                                                            YY1perp[kvperpp_local,jvperpp_local,ivperp_local]*YY3par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperpdvpa[kvpap,kvperpp] + 
+                                                            YY2perp[kvperpp_local,jvperpp_local,ivperp_local]*YY0par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperp2[kvpap,kvperpp] - 
+                                                            2.0*(ms/msp)*YY1perp[kvperpp_local,jvperpp_local,ivperp_local]*YY0par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*dHspdvperp[kvpap,kvperpp])
+                                    end
+                                end
+                            end
+                        end
+                    end
+                end 
+            end
+        end
+        # correct for minus sign due to integration by parts
+        # and multiply by the normalised collision frequency
+        @. rhsc *= -nussp
+    end
+    return nothing
+end
+
+function assemble_explicit_collision_operator_rhs_parallel!(rhsc,rhsvpavperp,pdfs,d2Gspdvpa2,d2Gspdvperpdvpa,
+    d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
+    vpa,vperp,YY_arrays::YY_collision_operator_arrays)
+    # assemble RHS of collision operator
+    begin_vperp_vpa_region() 
+    @loop_vperp_vpa ivperp ivpa begin
+        rhsvpavperp[ivpa,ivperp] = 0.0
+    end
+
+    # loop over collocation points to benefit from shared-memory parallelism
+    ngrid_vpa, ngrid_vperp = vpa.ngrid, vperp.ngrid
+    @loop_vperp_vpa ivperp_global ivpa_global begin
+        igrid_vpa, ielement_vpax, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperpx, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa_global,ivperp_global,vpa,vperp)
+        # loop over elements belonging to this collocation point
+        for ielement_vperp in ielement_vperp_low:ielement_vperp_hi
+            # correct local ivperp in the case that we on a boundary point
+            ivperp_local = igrid_vperp + (ielement_vperp - ielement_vperp_low)*(1-ngrid_vperp)
+            @views YY0perp = YY_arrays.YY0perp[:,:,:,ielement_vperp]
+            @views YY1perp = YY_arrays.YY1perp[:,:,:,ielement_vperp]
+            @views YY2perp = YY_arrays.YY2perp[:,:,:,ielement_vperp]
+            @views YY3perp = YY_arrays.YY3perp[:,:,:,ielement_vperp]
+            
+            for ielement_vpa in ielement_vpa_low:ielement_vpa_hi
+                # correct local ivpa in the case that we on a boundary point
+                ivpa_local = igrid_vpa + (ielement_vpa - ielement_vpa_low)*(1-ngrid_vpa)
+                @views YY0par = YY_arrays.YY0par[:,:,:,ielement_vpa]
+                @views YY1par = YY_arrays.YY1par[:,:,:,ielement_vpa]
+                @views YY2par = YY_arrays.YY2par[:,:,:,ielement_vpa]
+                @views YY3par = YY_arrays.YY3par[:,:,:,ielement_vpa]
+                
+                # carry out the matrix sum on each 2D element
+                for jvperpp_local in 1:vperp.ngrid
+                    jvperpp = vperp.igrid_full[jvperpp_local,ielement_vperp]
+                    for kvperpp_local in 1:vperp.ngrid
+                        kvperpp = vperp.igrid_full[kvperpp_local,ielement_vperp]
+                        for jvpap_local in 1:vpa.ngrid
+                            jvpap = vpa.igrid_full[jvpap_local,ielement_vpa]
+                            pdfjj = pdfs[jvpap,jvperpp]
+                            for kvpap_local in 1:vpa.ngrid
+                                kvpap = vpa.igrid_full[kvpap_local,ielement_vpa]
+                                # first three lines represent parallel flux terms
+                                # second three lines represent perpendicular flux terms
+                                rhsvpavperp[ivpa_global,ivperp_global] += -nussp*(YY0perp[kvperpp_local,jvperpp_local,ivperp_local]*YY2par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvpa2[kvpap,kvperpp] +
+                                                    YY3perp[kvperpp_local,jvperpp_local,ivperp_local]*YY1par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperpdvpa[kvpap,kvperpp] - 
+                                                    2.0*(ms/msp)*YY0perp[kvperpp_local,jvperpp_local,ivperp_local]*YY1par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*dHspdvpa[kvpap,kvperpp] +
+                                                    # end parallel flux, start of perpendicular flux
+                                                    YY1perp[kvperpp_local,jvperpp_local,ivperp_local]*YY3par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperpdvpa[kvpap,kvperpp] + 
+                                                    YY2perp[kvperpp_local,jvperpp_local,ivperp_local]*YY0par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperp2[kvpap,kvperpp] - 
+                                                    2.0*(ms/msp)*YY1perp[kvperpp_local,jvperpp_local,ivperp_local]*YY0par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*dHspdvperp[kvpap,kvperpp])
+                            end
+                        end
+                    end
+                end
+             end
+        end
+    end
+    # ravel to compound index
+    #begin_serial_region()
+    #ravel_vpavperp_to_c!(rhsc,rhsvpavperp,vpa.n,vperp.n)
+    ravel_vpavperp_to_c_parallel!(rhsc,rhsvpavperp,vpa.n)
+    return nothing
+end
+
+function assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!(rhsc,rhsvpavperp,pdfs,dpdfsdvpa,dpdfsdvperp,d2Gspdvpa2,d2Gspdvperpdvpa,
+    d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
+    vpa,vperp,YY_arrays::YY_collision_operator_arrays)
+    # assemble RHS of collision operator
+    begin_vperp_vpa_region() 
+    @loop_vperp_vpa ivperp ivpa begin
+        rhsvpavperp[ivpa,ivperp] = 0.0
+    end
+
+    # loop over collocation points to benefit from shared-memory parallelism
+    ngrid_vpa, ngrid_vperp = vpa.ngrid, vperp.ngrid
+    @loop_vperp_vpa ivperp_global ivpa_global begin
+        igrid_vpa, ielement_vpax, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperpx, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa_global,ivperp_global,vpa,vperp)
+        # loop over elements belonging to this collocation point
+        for ielement_vperp in ielement_vperp_low:ielement_vperp_hi
+            # correct local ivperp in the case that we on a boundary point
+            ivperp_local = igrid_vperp + (ielement_vperp - ielement_vperp_low)*(1-ngrid_vperp)
+            @views YY0perp = YY_arrays.YY0perp[:,:,:,ielement_vperp]
+            @views YY1perp = YY_arrays.YY1perp[:,:,:,ielement_vperp]
+            @views YY2perp = YY_arrays.YY2perp[:,:,:,ielement_vperp]
+            @views YY3perp = YY_arrays.YY3perp[:,:,:,ielement_vperp]
+            
+            for ielement_vpa in ielement_vpa_low:ielement_vpa_hi
+                # correct local ivpa in the case that we on a boundary point
+                ivpa_local = igrid_vpa + (ielement_vpa - ielement_vpa_low)*(1-ngrid_vpa)
+                @views YY0par = YY_arrays.YY0par[:,:,:,ielement_vpa]
+                @views YY1par = YY_arrays.YY1par[:,:,:,ielement_vpa]
+                @views YY2par = YY_arrays.YY2par[:,:,:,ielement_vpa]
+                @views YY3par = YY_arrays.YY3par[:,:,:,ielement_vpa]
+                
+                # carry out the matrix sum on each 2D element
+                for jvperpp_local in 1:vperp.ngrid
+                    jvperpp = vperp.igrid_full[jvperpp_local,ielement_vperp]
+                    for kvperpp_local in 1:vperp.ngrid
+                        kvperpp = vperp.igrid_full[kvperpp_local,ielement_vperp]
+                        for jvpap_local in 1:vpa.ngrid
+                            jvpap = vpa.igrid_full[jvpap_local,ielement_vpa]
+                            for kvpap_local in 1:vpa.ngrid
+                                kvpap = vpa.igrid_full[kvpap_local,ielement_vpa]
+                                # first three lines represent parallel flux terms
+                                # second three lines represent perpendicular flux terms
+                                rhsvpavperp[ivpa_global,ivperp_global] += -nussp*(YY0perp[kvperpp_local,jvperpp_local,ivperp_local]*YY1par[kvpap_local,jvpap_local,ivpa_local]*dpdfsdvpa[jvpap,jvperpp]*d2Gspdvpa2[kvpap,kvperpp] +
+                                                    YY0perp[kvperpp_local,jvperpp_local,ivperp_local]*YY1par[kvpap_local,jvpap_local,ivpa_local]*dpdfsdvperp[jvpap,jvperpp]*d2Gspdvperpdvpa[kvpap,kvperpp] - 
+                                                    2.0*(ms/msp)*YY0perp[kvperpp_local,jvperpp_local,ivperp_local]*YY1par[kvpap_local,jvpap_local,ivpa_local]*pdfs[jvpap,jvperpp]*dHspdvpa[kvpap,kvperpp] +
+                                                    # end parallel flux, start of perpendicular flux
+                                                    YY1perp[kvperpp_local,jvperpp_local,ivperp_local]*YY0par[kvpap_local,jvpap_local,ivpa_local]*dpdfsdvpa[jvpap,jvperpp]*d2Gspdvperpdvpa[kvpap,kvperpp] + 
+                                                    YY1perp[kvperpp_local,jvperpp_local,ivperp_local]*YY0par[kvpap_local,jvpap_local,ivpa_local]*dpdfsdvperp[jvpap,jvperpp]*d2Gspdvperp2[kvpap,kvperpp] - 
+                                                    2.0*(ms/msp)*YY1perp[kvperpp_local,jvperpp_local,ivperp_local]*YY0par[kvpap_local,jvpap_local,ivpa_local]*pdfs[jvpap,jvperpp]*dHspdvperp[kvpap,kvperpp])
+                            end
+                        end
+                    end
+                end
+             end
+        end
+    end
+    # ravel to compound index
+    #begin_serial_region()
+    #ravel_vpavperp_to_c!(rhsc,rhsvpavperp,vpa.n,vperp.n)
+    ravel_vpavperp_to_c_parallel!(rhsc,rhsvpavperp,vpa.n)
+    return nothing
+end
+
+
+# Elliptic solve function. 
+# field: the solution
+# source: the source function on the RHS
+# boundary data: the known values of field at infinity
+# lu_object_lhs: the object for the differential operator that defines field
+# matrix_rhs: the weak matrix acting on the source vector
+# rhsc, sc: dummy arrays in the compound index (assumed MPISharedArray or SubArray type)
+# vpa, vperp: coordinate structs
+function elliptic_solve!(field,source,boundary_data::vpa_vperp_boundary_data,
+            lu_object_lhs,matrix_rhs,rhsc,sc,vpa,vperp)
+    # get data into the compound index format
+    begin_vperp_vpa_region()
+    ravel_vpavperp_to_c_parallel!(sc,source,vpa.n)
+    # assemble the rhs of the weak system
+    begin_serial_region()
+    @serial_region begin
+        mul!(rhsc,matrix_rhs,sc)
+        # enforce the boundary conditions
+        enforce_dirichlet_bc!(rhsc,vpa,vperp,boundary_data)
+        # solve the linear system
+        sc .= lu_object_lhs \ rhsc
+    end
+    # get data into the vpa vperp indices format
+    begin_vperp_vpa_region()
+    ravel_c_to_vpavperp_parallel!(field,sc,vpa.n)
+    return nothing
+end
+# same as above but source is made of two different terms
+# with different weak matrices
+function elliptic_solve!(field,source_1,source_2,boundary_data::vpa_vperp_boundary_data,
+            lu_object_lhs,matrix_rhs_1,matrix_rhs_2,rhsc_1,rhsc_2,sc_1,sc_2,vpa,vperp)
+    # get data into the compound index format
+    begin_vperp_vpa_region()
+    ravel_vpavperp_to_c_parallel!(sc_1,source_1,vpa.n)
+    ravel_vpavperp_to_c_parallel!(sc_2,source_2,vpa.n)
+    
+    # assemble the rhs of the weak system
+    begin_serial_region()
+    @serial_region begin
+        mul!(rhsc_1,matrix_rhs_1,sc_1)
+        mul!(rhsc_2,matrix_rhs_2,sc_2)
+    end
+    begin_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        ic = ic_func(ivpa,ivperp,vpa.n)
+        rhsc_1[ic] += rhsc_2[ic]
+    end
+    begin_serial_region()
+    @serial_region begin
+        # enforce the boundary conditions
+        enforce_dirichlet_bc!(rhsc_1,vpa,vperp,boundary_data)
+        # solve the linear system
+        sc_1 .= lu_object_lhs \ rhsc_1
+    end
+    # get data into the vpa vperp indices format
+    begin_vperp_vpa_region()
+    ravel_c_to_vpavperp_parallel!(field,sc_1,vpa.n)
+    return nothing
+end
+
+# Same as elliptic_solve!() above but no Dirichlet boundary conditions are imposed,
+# because the function is only used where the lu_object_lhs is derived from a mass matrix.
+# The source is made of two different terms with different weak matrices
+# because of the form of the only algebraic equation that we consider.
+function algebraic_solve!(field,source_1,source_2,boundary_data::vpa_vperp_boundary_data,
+            lu_object_lhs,matrix_rhs_1,matrix_rhs_2,rhsc_1,rhsc_2,sc_1,sc_2,vpa,vperp)
+    # get data into the compound index format
+    begin_vperp_vpa_region()
+    ravel_vpavperp_to_c_parallel!(sc_1,source_1,vpa.n)
+    ravel_vpavperp_to_c_parallel!(sc_2,source_2,vpa.n)
+    
+    # assemble the rhs of the weak system
+    begin_serial_region()
+    @serial_region begin
+        mul!(rhsc_1,matrix_rhs_1,sc_1)
+        mul!(rhsc_2,matrix_rhs_2,sc_2)
+    end
+    begin_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        ic = ic_func(ivpa,ivperp,vpa.n)
+        rhsc_1[ic] += rhsc_2[ic]
+    end
+    begin_serial_region()
+    @serial_region begin
+        # solve the linear system
+        sc_1 .= lu_object_lhs \ rhsc_1
+    end
+    # get data into the vpa vperp indices format
+    begin_vperp_vpa_region()
+    ravel_c_to_vpavperp_parallel!(field,sc_1,vpa.n)
+    return nothing
+end
+
+function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvperp,
+             d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,ffsp_in,
+             vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays::fokkerplanck_weakform_arrays_struct;
+             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=false,calculate_dGdvperp=false)
+    
+    # extract the necessary precalculated and buffer arrays from fokkerplanck_arrays
+    MM2D_sparse = fkpl_arrays.MM2D_sparse
+    KKpar2D_sparse = fkpl_arrays.KKpar2D_sparse
+    KKperp2D_sparse = fkpl_arrays.KKperp2D_sparse
+    LP2D_sparse = fkpl_arrays.LP2D_sparse
+    LV2D_sparse = fkpl_arrays.LV2D_sparse
+    PUperp2D_sparse = fkpl_arrays.PUperp2D_sparse
+    PPparPUperp2D_sparse = fkpl_arrays.PPparPUperp2D_sparse
+    PPpar2D_sparse = fkpl_arrays.PPpar2D_sparse
+    MMparMNperp2D_sparse = fkpl_arrays.MMparMNperp2D_sparse
+    KPperp2D_sparse = fkpl_arrays.KPperp2D_sparse
+    lu_obj_MM = fkpl_arrays.lu_obj_MM
+    lu_obj_LP = fkpl_arrays.lu_obj_LP
+    lu_obj_LV = fkpl_arrays.lu_obj_LV
+    lu_obj_LB = fkpl_arrays.lu_obj_LB
+    
+    bwgt = fkpl_arrays.bwgt
+    rpbd = fkpl_arrays.rpbd
+    
+    S_dummy = fkpl_arrays.S_dummy
+    Q_dummy = fkpl_arrays.Q_dummy
+    rhsc = fkpl_arrays.rhsc
+    rhqc = fkpl_arrays.rhqc
+    sc = fkpl_arrays.sc
+    qc = fkpl_arrays.qc
+    
+    # calculate the boundary data
+    calculate_rosenbluth_potential_boundary_data!(rpbd,bwgt,@view(ffsp_in[:,:]),vpa,vperp,vpa_spectral,vperp_spectral,
+      calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
+    # carry out the elliptic solves required
+    begin_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        S_dummy[ivpa,ivperp] = -(4.0/sqrt(pi))*ffsp_in[ivpa,ivperp]
+    end
+    elliptic_solve!(HH,S_dummy,rpbd.H_data,
+                lu_obj_LP,MM2D_sparse,rhsc,sc,vpa,vperp)
+    elliptic_solve!(dHdvpa,S_dummy,rpbd.dHdvpa_data,
+                lu_obj_LP,PPpar2D_sparse,rhsc,sc,vpa,vperp)
+    elliptic_solve!(dHdvperp,S_dummy,rpbd.dHdvperp_data,
+                lu_obj_LV,PUperp2D_sparse,rhsc,sc,vpa,vperp)
+    
+    begin_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        S_dummy[ivpa,ivperp] = 2.0*HH[ivpa,ivperp]
+    
+    end
+    if calculate_GG
+        elliptic_solve!(GG,S_dummy,rpbd.G_data,
+                    lu_obj_LP,MM2D_sparse,rhsc,sc,vpa,vperp)
+    end
+    if calculate_dGdvperp || algebraic_solve_for_d2Gdvperp2
+        elliptic_solve!(dGdvperp,S_dummy,rpbd.dGdvperp_data,
+                    lu_obj_LV,PUperp2D_sparse,rhsc,sc,vpa,vperp)
+    end
+    elliptic_solve!(d2Gdvpa2,S_dummy,rpbd.d2Gdvpa2_data,
+                lu_obj_LP,KKpar2D_sparse,rhsc,sc,vpa,vperp)
+    elliptic_solve!(d2Gdvperpdvpa,S_dummy,rpbd.d2Gdvperpdvpa_data,
+                lu_obj_LV,PPparPUperp2D_sparse,rhsc,sc,vpa,vperp)
+    
+    if algebraic_solve_for_d2Gdvperp2
+        begin_vperp_vpa_region()
+        @loop_vperp_vpa ivperp ivpa begin
+            S_dummy[ivpa,ivperp] = 2.0*HH[ivpa,ivperp] - d2Gdvpa2[ivpa,ivperp]
+            Q_dummy[ivpa,ivperp] = -dGdvperp[ivpa,ivperp]
+        end
+        # use the algebraic solve function to find
+        # d2Gdvperp2 = 2H - d2Gdvpa2 - (1/vperp)dGdvperp
+        # using a weak form
+        algebraic_solve!(d2Gdvperp2,S_dummy,Q_dummy,rpbd.d2Gdvperp2_data,
+                    lu_obj_MM,MM2D_sparse,MMparMNperp2D_sparse,
+                    rhsc,rhqc,sc,qc,vpa,vperp)
+    else
+        # solve a weak-form PDE for d2Gdvperp2
+        begin_vperp_vpa_region()
+        @loop_vperp_vpa ivperp ivpa begin
+            S_dummy[ivpa,ivperp] = 2.0*HH[ivpa,ivperp]
+            Q_dummy[ivpa,ivperp] = 2.0*d2Gdvpa2[ivpa,ivperp]
+        end
+        elliptic_solve!(d2Gdvperp2,S_dummy,Q_dummy,rpbd.d2Gdvperp2_data,
+                    lu_obj_LB,KPperp2D_sparse,MMparMNperp2D_sparse,
+                    rhsc,rhqc,sc,qc,vpa,vperp)
+    end
+    begin_serial_region()
+    return nothing
+end
+
+"""
+function to calculate Rosenbluth potentials by direct integration
+"""
+
+function calculate_rosenbluth_potentials_via_direct_integration!(GG,HH,dHdvpa,dHdvperp,
+             d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,ffsp_in,
+             vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays::fokkerplanck_arrays_direct_integration_struct)
+    dfdvpa = fkpl_arrays.dfdvpa
+    dfdvperp = fkpl_arrays.dfdvperp
+    d2fdvperpdvpa = fkpl_arrays.d2fdvperpdvpa
+    G0_weights = fkpl_arrays.G0_weights
+    G1_weights = fkpl_arrays.G1_weights
+    H0_weights = fkpl_arrays.H0_weights
+    H1_weights = fkpl_arrays.H1_weights
+    H2_weights = fkpl_arrays.H2_weights
+    H3_weights = fkpl_arrays.H3_weights
+    # first compute the derivatives of fs' (the integration weights assume d fs' dvpa and d fs' dvperp are known)
+    begin_vperp_region()
+    @loop_vperp ivperp begin
+        @views derivative!(vpa.scratch, ffsp_in[:,ivperp], vpa, vpa_spectral)
+        @. dfdvpa[:,ivperp] = vpa.scratch
+    end
+    begin_vpa_region()
+    @loop_vpa ivpa begin
+        @views derivative!(vperp.scratch, ffsp_in[ivpa,:], vperp, vperp_spectral)
+        @. dfdvperp[ivpa,:] = vperp.scratch
+        @views derivative!(vperp.scratch, dfdvpa[ivpa,:], vperp, vperp_spectral)
+        @. d2fdvperpdvpa[ivpa,:] = vperp.scratch
+    end
+    # with the integrands calculated, compute the integrals
+    calculate_rosenbluth_integrals!(GG,d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,
+                                        d2Gdvperp2,HH,dHdvpa,dHdvperp,
+                                        ffsp_in,dfdvpa,dfdvperp,d2fdvperpdvpa,
+                                        G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                                        vpa.n,vperp.n)
+    return nothing           
+end
+
+
+"""
+Function to carry out the integration of the revelant
+distribution functions to form the required coefficients
+for the full-F operator. We assume that the weights are
+precalculated. The function takes as arguments the arrays
+of coefficients (which we fill), the required distributions,
+the precomputed weights, the indicies of the `field' velocities,
+and the sizes of the primed vpa and vperp coordinates arrays.
+"""
+function calculate_rosenbluth_integrals!(GG,d2Gspdvpa2,dGspdvperp,d2Gspdvperpdvpa,
+                                        d2Gspdvperp2,HH,dHspdvpa,dHspdvperp,
+                                        fsp,dfspdvpa,dfspdvperp,d2fspdvperpdvpa,
+                                        G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                                        nvpa,nvperp)
+    begin_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        GG[ivpa,ivperp] = 0.0
+        d2Gspdvpa2[ivpa,ivperp] = 0.0
+        dGspdvperp[ivpa,ivperp] = 0.0
+        d2Gspdvperpdvpa[ivpa,ivperp] = 0.0
+        d2Gspdvperp2[ivpa,ivperp] = 0.0
+        HH[ivpa,ivperp] = 0.0
+        dHspdvpa[ivpa,ivperp] = 0.0
+        dHspdvperp[ivpa,ivperp] = 0.0
+        for ivperpp in 1:nvperp
+            for ivpap in 1:nvpa
+                GG[ivpa,ivperp] += G0_weights[ivpap,ivperpp,ivpa,ivperp]*fsp[ivpap,ivperpp]
+                #d2Gspdvpa2[ivpa,ivperp] += G0_weights[ivpap,ivperpp,ivpa,ivperp]*d2fspdvpa2[ivpap,ivperpp]
+                d2Gspdvpa2[ivpa,ivperp] += H3_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvpa[ivpap,ivperpp]
+                dGspdvperp[ivpa,ivperp] += G1_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
+                d2Gspdvperpdvpa[ivpa,ivperp] += G1_weights[ivpap,ivperpp,ivpa,ivperp]*d2fspdvperpdvpa[ivpap,ivperpp]
+                #d2Gspdvperp2[ivpa,ivperp] += G2_weights[ivpap,ivperpp,ivpa,ivperp]*d2fspdvperp2[ivpap,ivperpp] + G3_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
+                d2Gspdvperp2[ivpa,ivperp] += H2_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
+                HH[ivpa,ivperp] += H0_weights[ivpap,ivperpp,ivpa,ivperp]*fsp[ivpap,ivperpp]
+                dHspdvpa[ivpa,ivperp] += H0_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvpa[ivpap,ivperpp]
+                dHspdvperp[ivpa,ivperp] += H1_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
+            end
+        end
+    end
+    return nothing
+end
+
+"""
+function to enforce boundary conditions on the collision operator
+result to be consistent with the boundary conditions imposed on the the pdf
+"""
+function enforce_vpavperp_BCs!(pdf,vpa,vperp,vpa_spectral,vperp_spectral)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    ngrid_vperp = vperp.ngrid
+    D0 = vperp_spectral.radau.D0
+    # vpa boundary conditions
+    # zero at infinity
+    begin_vperp_region()
+    @loop_vperp ivperp begin
+        pdf[1,ivperp] = 0.0
+        pdf[nvpa,ivperp] = 0.0
+    end
+    # vperp boundary conditions
+    # zero boundary condition at infinity
+    # set regularity condition d F / d vperp = 0 at vperp = 0
+    # adjust F(vperp = 0) so that d F / d vperp = 0 at vperp = 0
+    begin_vpa_region()
+    @loop_vpa ivpa begin
+        pdf[ivpa,nvperp] = 0.0
+        pdf[ivpa,1] = -sum(D0[2:ngrid_vperp].*pdf[ivpa,2:ngrid_vperp])/D0[1]
+    end
+end
+
+end
diff --git a/src/fokker_planck_test.jl b/src/fokker_planck_test.jl
new file mode 100644
index 000000000..b0391a087
--- /dev/null
+++ b/src/fokker_planck_test.jl
@@ -0,0 +1,358 @@
+"""
+module for including functions used 
+in testing the implementation of the 
+the Full-F Fokker-Planck Collision Operator
+"""
+module fokker_planck_test
+
+export Cflux_vpa_Maxwellian_inputs, Cflux_vperp_Maxwellian_inputs
+export d2Gdvpa2_Maxwellian, dGdvperp_Maxwellian, d2Gdvperpdvpa_Maxwellian, d2Gdvperp2_Maxwellian
+export dHdvpa_Maxwellian, dHdvperp_Maxwellian, Cssp_Maxwellian_inputs
+export F_Maxwellian, dFdvpa_Maxwellian, dFdvperp_Maxwellian
+export d2Fdvpa2_Maxwellian, d2Fdvperpdvpa_Maxwellian, d2Fdvperp2_Maxwellian
+export H_Maxwellian, G_Maxwellian
+
+export Cssp_fully_expanded_form, calculate_collisional_fluxes
+
+export print_test_data, plot_test_data, fkpl_error_data, allocate_error_data
+
+using Plots
+using LaTeXStrings
+using Measures
+using ..type_definitions: mk_float, mk_int
+using SpecialFunctions: erf
+using ..velocity_moments: get_density
+# below are a series of functions that can be used to test the calculation 
+# of the Rosenbluth potentials for a shifted Maxwellian
+# or provide an estimate for collisional coefficients 
+
+# G (defined by Del^4 G = -(8/sqrt(pi))*F 
+# with F = cref^3 pi^(3/2) F_Maxwellian / nref 
+# the normalised Maxwellian
+function G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+    # speed variable
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    zero = 1.0e-10
+    if eta < zero
+        G = 2.0/sqrt(pi)
+    else 
+        # G_M = (1/2 eta)*( eta erf'(eta) + (1 + 2 eta^2) erf(eta))
+        G = (1.0/sqrt(pi))*exp(-eta^2) + ((0.5/eta) + eta)*erf(eta)
+    end
+    return G*dens*vth
+end
+
+# H (defined by Del^2 H = -(4/sqrt(pi))*F 
+# with F = cref^3 pi^(3/2) F_Maxwellian / nref 
+# the normalised Maxwellian
+function H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+    # speed variable
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    zero = 1.0e-10
+    if eta < zero
+        # erf(eta)/eta ~ 2/sqrt(pi) + O(eta^2) for eta << 1 
+        H = 2.0/sqrt(pi)
+    else 
+        # H_M =  erf(eta)/eta
+        H = erf(eta)/eta
+    end
+    return H*dens/vth
+end
+
+# 1D derivative functions
+
+function dGdeta(eta::mk_float)
+    # d \tilde{G} / d eta
+    dGdeta_fac = (1.0/sqrt(pi))*exp(-eta^2)/eta + (1.0 - 0.5/(eta^2))*erf(eta)
+    return dGdeta_fac
+end
+
+function d2Gdeta2(eta::mk_float)
+    # d \tilde{G} / d eta
+    d2Gdeta2_fac = erf(eta)/(eta^3) - (2.0/sqrt(pi))*exp(-eta^2)/(eta^2)
+    return d2Gdeta2_fac
+end
+
+function ddGddeta(eta::mk_float)
+    # d / d eta ( (1/ eta) d \tilde{G} d eta 
+    ddGddeta_fac = (1.5/(eta^2) - 1.0)*erf(eta)/(eta^2) - (3.0/sqrt(pi))*exp(-eta^2)/(eta^3)
+    return ddGddeta_fac
+end
+
+function dHdeta(eta::mk_float)
+    dHdeta_fac = (2.0/sqrt(pi))*(exp(-eta^2))/eta - erf(eta)/(eta^2)
+    return dHdeta_fac
+end
+
+# functions of vpa & vperp 
+function eta_func(upar::mk_float,vth::mk_float,
+             vpa,vperp,ivpa,ivperp)
+    speed = sqrt( (vpa.grid[ivpa] - upar)^2 + vperp.grid[ivperp]^2)/vth
+    return speed
+end
+
+function d2Gdvpa2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                            vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = dGdeta(eta) + ddGddeta(eta)*((vpa.grid[ivpa] - upar)^2)/(vth^2)
+    d2Gdvpa2_fac = fac*dens/(eta*vth)
+    return d2Gdvpa2_fac
+end
+
+function d2Gdvperpdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                            vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = ddGddeta(eta)*vperp.grid[ivperp]*(vpa.grid[ivpa] - upar)/(vth^2)
+    d2Gdvperpdvpa_fac = fac*dens/(eta*vth)
+    return d2Gdvperpdvpa_fac
+end
+
+function d2Gdvperp2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                            vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = dGdeta(eta) + ddGddeta(eta)*(vperp.grid[ivperp]^2)/(vth^2)
+    d2Gdvperp2_fac = fac*dens/(eta*vth)
+    return d2Gdvperp2_fac
+end
+
+function dGdvperp_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                            vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = dGdeta(eta)*vperp.grid[ivperp]*dens/(vth*eta)
+    return fac 
+end
+
+function dHdvperp_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                            vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = dHdeta(eta)*vperp.grid[ivperp]*dens/(eta*vth^3)
+    return fac 
+end
+
+function dHdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                            vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = dHdeta(eta)*(vpa.grid[ivpa]-upar)*dens/(eta*vth^3)
+    return fac 
+end
+
+function F_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                        vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = (dens/(vth^3))*exp(-eta^2)
+    return fac
+end
+
+function dFdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                        vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = -2.0*(dens/(vth^4))*((vpa.grid[ivpa] - upar)/vth)*exp(-eta^2)
+    return fac
+end
+
+function dFdvperp_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                        vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = -2.0*(dens/(vth^4))*(vperp.grid[ivperp]/vth)*exp(-eta^2)
+    return fac
+end
+
+function d2Fdvperpdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                        vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = 4.0*(dens/(vth^5))*(vperp.grid[ivperp]/vth)*((vpa.grid[ivpa] - upar)/vth)*exp(-eta^2)
+    return fac
+end
+
+function d2Fdvpa2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                        vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = 4.0*(dens/(vth^5))*( ((vpa.grid[ivpa] - upar)/vth)^2 - 0.5 )*exp(-eta^2)
+    return fac
+end
+
+function d2Fdvperp2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
+                        vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
+    fac = 4.0*(dens/(vth^5))*((vperp.grid[ivperp]/vth)^2 - 0.5)*exp(-eta^2)
+    return fac
+end
+
+function Cssp_Maxwellian_inputs(denss::mk_float,upars::mk_float,vths::mk_float,ms::mk_float,
+                                denssp::mk_float,uparsp::mk_float,vthsp::mk_float,msp::mk_float,
+                                nussp::mk_float,vpa,vperp,ivpa,ivperp)
+    
+    d2Fsdvpa2 = d2Fdvpa2_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+    d2Fsdvperp2 = d2Fdvperp2_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+    d2Fsdvperpdvpa = d2Fdvperpdvpa_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+    dFsdvperp = dFdvperp_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+    dFsdvpa = dFdvpa_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+    Fs = F_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+    
+    d2Gspdvpa2 = d2Gdvpa2_Maxwellian(denssp,uparsp,vthsp,vpa,vperp,ivpa,ivperp)
+    d2Gspdvperp2 = d2Gdvperp2_Maxwellian(denssp,uparsp,vthsp,vpa,vperp,ivpa,ivperp)
+    d2Gspdvperpdvpa = d2Gdvperpdvpa_Maxwellian(denssp,uparsp,vthsp,vpa,vperp,ivpa,ivperp)
+    dGspdvperp = dGdvperp_Maxwellian(denssp,uparsp,vthsp,vpa,vperp,ivpa,ivperp)
+    dHspdvperp = dHdvperp_Maxwellian(denssp,uparsp,vthsp,vpa,vperp,ivpa,ivperp)
+    dHspdvpa = dHdvpa_Maxwellian(denssp,uparsp,vthsp,vpa,vperp,ivpa,ivperp)
+    Fsp = F_Maxwellian(denssp,uparsp,vthsp,vpa,vperp,ivpa,ivperp)
+    
+    ( Cssp_Maxwellian = 
+        d2Fsdvpa2*d2Gspdvpa2 + 
+        d2Fsdvperp2*d2Gspdvperp2 + 
+        2.0*d2Fsdvperpdvpa*d2Gspdvperpdvpa + 
+        (1.0/(vperp.grid[ivperp]^2))*dFsdvperp*dGspdvperp +
+        2.0*(1.0 - (ms/msp))*(dFsdvpa*dHspdvpa + dFsdvperp*dHspdvperp) +
+        (8.0/sqrt(pi))*(ms/msp)*Fs*Fsp ) 
+        
+    Cssp_Maxwellian *= nussp
+    return Cssp_Maxwellian
+end
+
+function Cflux_vpa_Maxwellian_inputs(ms::mk_float,denss::mk_float,upars::mk_float,vths::mk_float,
+                                     msp::mk_float,denssp::mk_float,uparsp::mk_float,vthsp::mk_float,
+                                     vpa,vperp,ivpa,ivperp)
+    etap = eta_func(uparsp,vthsp,vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upars,vths,vpa,vperp,ivpa,ivperp)
+    prefac = -2.0*denss*denssp*exp( -eta^2)/(vthsp*vths^5)
+    (fac = (vpa.grid[ivpa]-uparsp)*(d2Gdeta2(etap) + (ms/msp)*((vths/vthsp)^2)*dHdeta(etap)/etap)
+             + (uparsp - upars)*( dGdeta(etap) + ((vpa.grid[ivpa]-uparsp)^2/vthsp^2)*ddGddeta(etap) )/etap )
+    Cflux = prefac*fac
+    #fac *= (ms/msp)*(vths/vthsp)*dHdeta(etap)/etap
+    #fac *= d2Gdeta2(etap) 
+    return Cflux
+end
+
+function Cflux_vperp_Maxwellian_inputs(ms::mk_float,denss::mk_float,upars::mk_float,vths::mk_float,
+                                     msp::mk_float,denssp::mk_float,uparsp::mk_float,vthsp::mk_float,
+                                     vpa,vperp,ivpa,ivperp)
+    etap = eta_func(uparsp,vthsp,vpa,vperp,ivpa,ivperp)
+    eta = eta_func(upars,vths,vpa,vperp,ivpa,ivperp)
+    prefac = -2.0*(vperp.grid[ivperp])*denss*denssp*exp( -eta^2)/(vthsp*vths^5)
+    (fac = (d2Gdeta2(etap) + (ms/msp)*((vths/vthsp)^2)*dHdeta(etap)/etap)
+             + ((uparsp - upars)*(vpa.grid[ivpa]-uparsp)/vthsp^2)*ddGddeta(etap)/etap )
+    Cflux = prefac*fac
+    #fac *= (ms/msp)*(vths/vthsp)*dHdeta(etap)/etap
+    #fac *= d2Gdeta2(etap) 
+    return Cflux
+end
+
+"""
+Function calculating the fully expanded form of the collision operator
+taking floats as arguments. This function is designed to be used at the 
+lowest level of a coordinate loop, with derivatives and integrals
+all previously calculated.
+"""
+function Cssp_fully_expanded_form(nussp,ms,msp,
+            d2fsdvpa2,d2fsdvperp2,d2fsdvperpdvpa,dfsdvpa,dfsdvperp,fs,
+            d2Gspdvpa2,d2Gspdvperp2,d2Gspdvperpdvpa,dGspdvperp,
+            dHspdvpa,dHspdvperp,fsp,vperp_val)
+    ( Cssp = nussp*( d2fsdvpa2*d2Gspdvpa2 +
+              d2fsdvperp2*d2Gspdvperp2 +
+              2.0*d2fsdvperpdvpa*d2Gspdvperpdvpa +                
+              (1.0/(vperp_val^2))*dfsdvperp*dGspdvperp +                
+              2.0*(1.0 - (ms/msp))*(dfsdvpa*dHspdvpa + dfsdvperp*dHspdvperp) +                
+              (8.0/sqrt(pi))*(ms/msp)*fs*fsp) )
+    return Cssp
+end
+
+
+"""
+calculates the collisional fluxes given input F_s and G_sp, H_sp
+"""
+function calculate_collisional_fluxes(F,dFdvpa,dFdvperp,
+                            d2Gdvpa2,d2Gdvperpdvpa,d2Gdvperp2,dHdvpa,dHdvperp,
+                            ms,msp)
+    # fill in value at (ivpa,ivperp)
+    Cflux_vpa = dFdvpa*d2Gdvpa2 + dFdvperp*d2Gdvperpdvpa - 2.0*(ms/msp)*F*dHdvpa
+    #Cflux_vpa = dFdvpa*d2Gdvpa2 + dFdvperp*d2Gdvperpdvpa # - 2.0*(ms/msp)*F*dHdvpa
+    #Cflux_vpa =  - 2.0*(ms/msp)*F*dHdvpa
+    Cflux_vperp = dFdvpa*d2Gdvperpdvpa + dFdvperp*d2Gdvperp2 - 2.0*(ms/msp)*F*dHdvperp
+    return Cflux_vpa, Cflux_vperp
+end
+
+
+"""
+Below are functions which are used for storing and printing data from the tests 
+"""
+
+function plot_test_data(func_exact,func_num,func_err,func_name,vpa,vperp)
+    @views heatmap(vperp.grid, vpa.grid, func_num[:,:], ylabel=L"v_{\|\|}", xlabel=L"v_{\perp}", c = :deep, interpolation = :cubic,
+                windowsize = (360,240), margin = 15pt)
+                outfile = string(func_name*"_num.pdf")
+                savefig(outfile)
+    @views heatmap(vperp.grid, vpa.grid, func_exact[:,:], ylabel=L"v_{\|\|}", xlabel=L"v_{\perp}", c = :deep, interpolation = :cubic,
+                windowsize = (360,240), margin = 15pt)
+                outfile = string(func_name*"_exact.pdf")
+                savefig(outfile)
+    @views heatmap(vperp.grid, vpa.grid, func_err[:,:], ylabel=L"v_{\|\|}", xlabel=L"v_{\perp}", c = :deep, interpolation = :cubic,
+                windowsize = (360,240), margin = 15pt)
+                outfile = string(func_name*"_err.pdf")
+                savefig(outfile)
+    return nothing
+end
+
+function print_test_data(func_exact,func_num,func_err,func_name)
+    @. func_err = abs(func_num - func_exact)
+    max_err = maximum(func_err)
+    println("maximum("*func_name*"_err): ",max_err)
+    return max_err
+end
+
+function print_test_data(func_exact,func_num,func_err,func_name,vpa,vperp,dummy;print_to_screen=true)
+    @. func_err = abs(func_num - func_exact)
+    max_err = maximum(func_err)
+    @. dummy = func_err^2
+    # compute the numerator
+    num = get_density(dummy,vpa,vperp)
+    # compute the denominator
+    @. dummy = 1.0
+    denom = get_density(dummy,vpa,vperp)
+    L2norm = sqrt(num/denom)
+    if print_to_screen 
+        println("maximum("*func_name*"_err): ",max_err," L2("*func_name*"_err): ",L2norm)
+    end
+    return max_err, L2norm
+end
+
+mutable struct error_data
+    max::mk_float
+    L2::mk_float
+end
+
+mutable struct moments_error_data
+    delta_density::mk_float
+    delta_upar::mk_float
+    delta_pressure::mk_float
+end
+
+struct fkpl_error_data
+    C_M::error_data
+    H_M::error_data
+    dHdvpa_M::error_data
+    dHdvperp_M::error_data
+    G_M::error_data
+    dGdvperp_M::error_data
+    d2Gdvpa2_M::error_data
+    d2Gdvperpdvpa_M::error_data
+    d2Gdvperp2_M::error_data
+    moments::moments_error_data
+end
+
+function allocate_error_data()
+    C_M = error_data(0.0,0.0)
+    H_M = error_data(0.0,0.0)
+    dHdvpa_M = error_data(0.0,0.0)
+    dHdvperp_M = error_data(0.0,0.0)
+    G_M = error_data(0.0,0.0)
+    dGdvperp_M = error_data(0.0,0.0)
+    d2Gdvpa2_M = error_data(0.0,0.0)
+    d2Gdvperpdvpa_M = error_data(0.0,0.0)
+    d2Gdvperp2_M = error_data(0.0,0.0)
+    moments = moments_error_data(0.0,0.0,0.0)
+    return fkpl_error_data(C_M,H_M,dHdvpa_M,dHdvperp_M,
+        G_M,dGdvperp_M,d2Gdvpa2_M,d2Gdvperpdvpa_M,d2Gdvperp2_M,
+        moments)
+end
+
+end
diff --git a/src/gauss_legendre.jl b/src/gauss_legendre.jl
new file mode 100644
index 000000000..c93d263c2
--- /dev/null
+++ b/src/gauss_legendre.jl
@@ -0,0 +1,1286 @@
+"""
+module for Gauss-Legendre-Lobatto and Gauss-Legendre-Radau spectral element grids
+"""
+module gauss_legendre
+
+export gausslobattolegendre_differentiation_matrix!
+export gaussradaulegendre_differentiation_matrix!
+export GaussLegendreLobatto_mass_matrix!
+export GaussLegendre_mass_matrix_1!
+export GaussLegendreLobatto_inverse_mass_matrix!
+export GaussLegendreLobatto_K_matrix!
+export GaussLegendreLobatto_S_matrix!
+export GaussLegendre_S_matrix_1!
+export scaled_gauss_legendre_lobatto_grid
+export scaled_gauss_legendre_radau_grid
+export gausslegendre_derivative!
+export gausslegendre_apply_Kmat!
+export gausslegendre_apply_Lmat!
+export setup_gausslegendre_pseudospectral
+export GaussLegendre_weak_product_matrix!
+export ielement_global_func
+export get_QQ_local!
+
+using FastGaussQuadrature
+using LegendrePolynomials: Pl, dnPl
+using LinearAlgebra: mul!, lu, LU
+using SparseArrays: sparse, AbstractSparseArray
+using ..type_definitions: mk_float, mk_int
+using ..array_allocation: allocate_float
+import ..calculus: elementwise_derivative!, elementwise_apply_Kmat!,
+                   elementwise_apply_Lmat!, mass_matrix_solve!
+using ..moment_kinetics_structs: weak_discretization_info
+
+
+"""
+structs for passing around matrices for taking
+the derivatives on Gauss-Legendre points in 1D
+"""
+struct gausslegendre_base_info
+    # elementwise differentiation matrix (ngrid*ngrid)
+    Dmat::Array{mk_float,2}
+    # local mass matrix type 0
+    M0::Array{mk_float,2}
+    # local mass matrix type 1
+    M1::Array{mk_float,2}
+    # local mass matrix type 2
+    M2::Array{mk_float,2}
+    # local S (weak derivative) matrix type 0
+    S0::Array{mk_float,2}
+    # local S (weak derivative) matrix type 1
+    S1::Array{mk_float,2}
+    # local K (weak second derivative) matrix type 0
+    K0::Array{mk_float,2}
+    # local K (weak second derivative) matrix type 1
+    K1::Array{mk_float,2}
+    # local K (weak second derivative) matrix type 2
+    K2::Array{mk_float,2}
+    # local P (weak derivative no integration by parts) matrix type 0
+    P0::Array{mk_float,2}
+    # local P (weak derivative no integration by parts) matrix type 1
+    P1::Array{mk_float,2}
+    # local P (weak derivative no integration by parts) matrix type 2
+    P2::Array{mk_float,2}
+    # boundary condition differentiation matrix (for vperp grid using radau points)
+    D0::Array{mk_float,1}
+    # local nonlinear diffusion matrix Y00
+    Y00::Array{mk_float,3}
+    # local nonlinear diffusion matrix Y01
+    Y01::Array{mk_float,3}
+    # local nonlinear diffusion matrix Y10
+    Y10::Array{mk_float,3}
+    # local nonlinear diffusion matrix Y11
+    Y11::Array{mk_float,3}
+    # local nonlinear diffusion matrix Y20
+    Y20::Array{mk_float,3}
+    # local nonlinear diffusion matrix Y21
+    Y21::Array{mk_float,3}
+    # local nonlinear diffusion matrix Y30
+    Y30::Array{mk_float,3}
+    # local nonlinear diffusion matrix Y31
+    Y31::Array{mk_float,3}
+end
+
+struct gausslegendre_info <: weak_discretization_info
+    lobatto::gausslegendre_base_info
+    radau::gausslegendre_base_info
+    # global (1D) mass matrix
+    mass_matrix::Array{mk_float,2}
+    # global (1D) weak derivative matrix
+    #S_matrix::Array{mk_float,2}
+    S_matrix::AbstractSparseArray{mk_float,Ti,2} where Ti
+    # global (1D) weak second derivative matrix
+    K_matrix::Array{mk_float,2}
+    # global (1D) weak Laplacian derivative matrix
+    L_matrix::Array{mk_float,2}
+    # global (1D) LU object
+    mass_matrix_lu::T where T
+    # dummy matrix for local operators
+    Qmat::Array{mk_float,2}
+end
+
+function setup_gausslegendre_pseudospectral(coord;init_YY=true)
+    lobatto = setup_gausslegendre_pseudospectral_lobatto(coord,init_YY=init_YY)
+    radau = setup_gausslegendre_pseudospectral_radau(coord,init_YY=init_YY)
+    mass_matrix = allocate_float(coord.n,coord.n)
+    S_matrix = allocate_float(coord.n,coord.n)
+    K_matrix = allocate_float(coord.n,coord.n)
+    L_matrix = allocate_float(coord.n,coord.n)
+    
+    setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M")
+    setup_global_weak_form_matrix!(S_matrix, lobatto, radau, coord, "S")
+    setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms")
+    setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms")
+    mass_matrix_lu = lu(sparse(mass_matrix))
+    Qmat = allocate_float(coord.ngrid,coord.ngrid)
+    return gausslegendre_info(lobatto,radau,mass_matrix,sparse(S_matrix),K_matrix,L_matrix,mass_matrix_lu,Qmat)
+end
+
+function setup_gausslegendre_pseudospectral_lobatto(coord;init_YY=true)
+    x, w = gausslobatto(coord.ngrid)
+    Dmat = allocate_float(coord.ngrid, coord.ngrid)
+    gausslobattolegendre_differentiation_matrix!(Dmat,x,coord.ngrid)
+    
+    M0 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(M0,coord.ngrid,x,w,"M0")
+    M1 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(M1,coord.ngrid,x,w,"M1")
+    M2 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(M2,coord.ngrid,x,w,"M2")
+    S0 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(S0,coord.ngrid,x,w,"S0")
+    S1 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(S1,coord.ngrid,x,w,"S1")
+    K0 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(K0,coord.ngrid,x,w,"K0")
+    K1 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(K1,coord.ngrid,x,w,"K1")
+    K2 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(K2,coord.ngrid,x,w,"K2")
+    P0 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(P0,coord.ngrid,x,w,"P0")
+    P1 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(P1,coord.ngrid,x,w,"P1")
+    P2 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(P2,coord.ngrid,x,w,"P2")
+    D0 = allocate_float(coord.ngrid)
+    #@. D0 = Dmat[1,:] # values at lower extreme of element
+    GaussLegendre_derivative_vector!(D0,-1.0,coord.ngrid,x,w)
+    
+    Y00 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y01 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y10 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y11 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y20 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y21 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y30 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y31 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    if init_YY
+        GaussLegendre_weak_product_matrix!(Y00,coord.ngrid,x,w,"Y00")
+        GaussLegendre_weak_product_matrix!(Y01,coord.ngrid,x,w,"Y01")
+        GaussLegendre_weak_product_matrix!(Y10,coord.ngrid,x,w,"Y10")
+        GaussLegendre_weak_product_matrix!(Y11,coord.ngrid,x,w,"Y11")
+        GaussLegendre_weak_product_matrix!(Y20,coord.ngrid,x,w,"Y20")
+        GaussLegendre_weak_product_matrix!(Y21,coord.ngrid,x,w,"Y21")
+        GaussLegendre_weak_product_matrix!(Y30,coord.ngrid,x,w,"Y30")
+        GaussLegendre_weak_product_matrix!(Y31,coord.ngrid,x,w,"Y31")
+    end
+    return gausslegendre_base_info(Dmat,M0,M1,M2,S0,S1,
+            K0,K1,K2,P0,P1,P2,D0,Y00,Y01,Y10,Y11,Y20,Y21,Y30,Y31)
+end
+
+function setup_gausslegendre_pseudospectral_radau(coord;init_YY=true)
+    # Gauss-Radau points on [-1,1)
+    x, w = gaussradau(coord.ngrid)
+    # Gauss-Radau points on (-1,1] 
+    xreverse, wreverse = -reverse(x), reverse(w)
+    # elemental differentiation matrix
+    Dmat = allocate_float(coord.ngrid, coord.ngrid)
+    gaussradaulegendre_differentiation_matrix!(Dmat,x,coord.ngrid)
+    
+    M0 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(M0,coord.ngrid,xreverse,wreverse,"M0",radau=true)
+    M1 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(M1,coord.ngrid,xreverse,wreverse,"M1",radau=true)
+    M2 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(M2,coord.ngrid,xreverse,wreverse,"M2",radau=true)
+    S0 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(S0,coord.ngrid,xreverse,wreverse,"S0",radau=true)
+    S1 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(S1,coord.ngrid,xreverse,wreverse,"S1",radau=true)
+    K0 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(K0,coord.ngrid,xreverse,wreverse,"K0",radau=true)
+    K1 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(K1,coord.ngrid,xreverse,wreverse,"K1",radau=true)
+    K2 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(K2,coord.ngrid,xreverse,wreverse,"K2",radau=true)
+    P0 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(P0,coord.ngrid,xreverse,wreverse,"P0",radau=true)
+    P1 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(P1,coord.ngrid,xreverse,wreverse,"P1",radau=true)
+    P2 = allocate_float(coord.ngrid, coord.ngrid)
+    GaussLegendre_weak_product_matrix!(P2,coord.ngrid,xreverse,wreverse,"P2",radau=true)
+    D0 = allocate_float(coord.ngrid)
+    GaussLegendre_derivative_vector!(D0,-1.0,coord.ngrid,xreverse,wreverse,radau=true)
+    Y00 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y01 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y10 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y11 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y20 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y21 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y30 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    Y31 = allocate_float(coord.ngrid, coord.ngrid, coord.ngrid)
+    if init_YY 
+        GaussLegendre_weak_product_matrix!(Y00,coord.ngrid,xreverse,wreverse,"Y00",radau=true)
+        GaussLegendre_weak_product_matrix!(Y01,coord.ngrid,xreverse,wreverse,"Y01",radau=true)
+        GaussLegendre_weak_product_matrix!(Y10,coord.ngrid,xreverse,wreverse,"Y10",radau=true)
+        GaussLegendre_weak_product_matrix!(Y11,coord.ngrid,xreverse,wreverse,"Y11",radau=true)
+        GaussLegendre_weak_product_matrix!(Y20,coord.ngrid,xreverse,wreverse,"Y20",radau=true)
+        GaussLegendre_weak_product_matrix!(Y21,coord.ngrid,xreverse,wreverse,"Y21",radau=true)
+        GaussLegendre_weak_product_matrix!(Y30,coord.ngrid,xreverse,wreverse,"Y30",radau=true)
+        GaussLegendre_weak_product_matrix!(Y31,coord.ngrid,xreverse,wreverse,"Y31",radau=true)
+    end
+    return gausslegendre_base_info(Dmat,M0,M1,M2,S0,S1,
+            K0,K1,K2,P0,P1,P2,D0,Y00,Y01,Y10,Y11,Y20,Y21,Y30,Y31)
+end 
+
+function elementwise_derivative!(coord, ff, gausslegendre::gausslegendre_info)
+    df = coord.scratch_2d
+    # define local variable nelement for convenience
+    nelement = coord.nelement_local
+    # check array bounds
+    @boundscheck nelement == size(df,2) && coord.ngrid == size(df,1) || throw(BoundsError(df))
+    
+    # variable k will be used to avoid double counting of overlapping point
+    k = 0
+    j = 1 # the first element
+    imin = coord.imin[j]-k
+    # imax is the maximum index on the full grid for this (jth) element
+    imax = coord.imax[j]        
+    if coord.name == "vperp" && coord.irank == 0 # differentiate this element with the Radau scheme
+        @views mul!(df[:,j],gausslegendre.radau.Dmat[:,:],ff[imin:imax])
+    else #differentiate using the Lobatto scheme
+        @views mul!(df[:,j],gausslegendre.lobatto.Dmat[:,:],ff[imin:imax])
+    end
+    # transform back to the physical coordinate scale
+    for i in 1:coord.ngrid
+        df[i,j] /= coord.element_scale[j]
+    end
+    # calculate the derivative on each element
+    @inbounds for j ∈ 2:nelement
+        k = 1 
+        imin = coord.imin[j]-k
+        # imax is the maximum index on the full grid for this (jth) element
+        imax = coord.imax[j]
+        @views mul!(df[:,j],gausslegendre.lobatto.Dmat[:,:],ff[imin:imax])        
+        # transform back to the physical coordinate scale
+        for i in 1:coord.ngrid
+            df[i,j] /= coord.element_scale[j]
+        end
+    end
+
+    return nothing
+end
+
+# Spectral element method does not use upwinding within an element
+function elementwise_derivative!(coord, ff, adv_fac, spectral::gausslegendre_info)
+    return elementwise_derivative!(coord, ff, spectral)
+end
+
+function elementwise_apply_Kmat!(coord, ff, gausslegendre::gausslegendre_info)
+    df = coord.scratch_2d
+    # define local variable nelement for convenience
+    nelement = coord.nelement_local
+    # check array bounds
+    @boundscheck nelement == size(df,2) && coord.ngrid == size(df,1) || throw(BoundsError(df))
+    
+    # variable k will be used to avoid double counting of overlapping point
+    k = 0
+    j = 1 # the first element
+    imin = coord.imin[j]-k
+    # imax is the maximum index on the full grid for this (jth) element
+    imax = coord.imax[j]        
+    get_KK_local!(gausslegendre.Qmat,j,gausslegendre.lobatto,gausslegendre.radau,coord,explicit_BC_terms=true)
+    #println(gausslegendre.Qmat)
+    @views mul!(df[:,j],gausslegendre.Qmat[:,:],ff[imin:imax])
+    zero_gradient_bc_lower_boundary = false#true
+    if coord.name == "vperp" && zero_gradient_bc_lower_boundary
+       # set the 1st point of the RHS vector to zero 
+       # consistent with use with the mass matrix with D f = 0 boundary conditions
+       df[1,j] = 0.0
+    end
+    # calculate the derivative on each element
+    @inbounds for j ∈ 2:nelement
+        k = 1 
+        imin = coord.imin[j]-k
+        # imax is the maximum index on the full grid for this (jth) element
+        imax = coord.imax[j]
+        #@views mul!(df[:,j],gausslegendre.lobatto.Kmat[:,:],ff[imin:imax])
+        get_KK_local!(gausslegendre.Qmat,j,gausslegendre.lobatto,gausslegendre.radau,coord,explicit_BC_terms=true)
+        #println(gausslegendre.Qmat)
+        @views mul!(df[:,j],gausslegendre.Qmat[:,:],ff[imin:imax])
+    end
+    #for j in 1:nelement
+    #    println(df[:,j])
+    #end
+    return nothing
+end
+
+function elementwise_apply_Lmat!(coord, ff, gausslegendre::gausslegendre_info)
+    df = coord.scratch_2d
+    # define local variable nelement for convenience
+    nelement = coord.nelement_local
+    # check array bounds
+    @boundscheck nelement == size(df,2) && coord.ngrid == size(df,1) || throw(BoundsError(df))
+    
+    # variable k will be used to avoid double counting of overlapping point
+    k = 0
+    j = 1 # the first element
+    imin = coord.imin[j]-k
+    # imax is the maximum index on the full grid for this (jth) element
+    imax = coord.imax[j]        
+    get_LL_local!(gausslegendre.Qmat,j,gausslegendre.lobatto,gausslegendre.radau,coord,explicit_BC_terms=true)
+    #println(gausslegendre.Qmat)
+    @views mul!(df[:,j],gausslegendre.Qmat[:,:],ff[imin:imax])
+    zero_gradient_bc_lower_boundary = false#true
+    if coord.name == "vperp" && zero_gradient_bc_lower_boundary
+       # set the 1st point of the RHS vector to zero 
+       # consistent with use with the mass matrix with D f = 0 boundary conditions
+       df[1,j] = 0.0
+    end
+    # calculate the derivative on each element
+    @inbounds for j ∈ 2:nelement
+        k = 1 
+        imin = coord.imin[j]-k
+        # imax is the maximum index on the full grid for this (jth) element
+        imax = coord.imax[j]
+        #@views mul!(df[:,j],gausslegendre.lobatto.Kmat[:,:],ff[imin:imax])
+        get_LL_local!(gausslegendre.Qmat,j,gausslegendre.lobatto,gausslegendre.radau,coord,explicit_BC_terms=true)
+        #println(gausslegendre.Qmat)
+        @views mul!(df[:,j],gausslegendre.Qmat[:,:],ff[imin:imax])
+    end
+    #for j in 1:nelement
+    #    println(df[:,j])
+    #end
+    return nothing
+end
+
+function mass_matrix_solve!(f, b, spectral::gausslegendre_info)
+    # invert mass matrix system
+    y = spectral.mass_matrix_lu \ b
+    @. f = y
+    return nothing
+end
+
+"""
+Formula for differentiation matrix taken from p196 of Chpt `The Spectral Elemtent Method' of 
+`Computational Seismology'. Heiner Igel First Edition. Published in 2017 by Oxford University Press.
+Or https://doc.nektar.info/tutorials/latest/fundamentals/differentiation/fundamentals-differentiationch2.html
+
+D -- differentiation matrix 
+x -- Gauss-Legendre-Lobatto points in [-1,1]
+ngrid -- number of points per element (incl. boundary points)
+
+Note that D has does not include a scaling factor
+"""
+function gausslobattolegendre_differentiation_matrix!(D::Array{Float64,2},x::Array{Float64,1},ngrid::Int64)
+    D[:,:] .= 0.0
+    for ix in 1:ngrid
+        for ixp in 1:ngrid
+            if !(ix == ixp)
+                D[ix,ixp] = (Pl(x[ix],ngrid-1)/Pl(x[ixp],ngrid-1))/(x[ix]-x[ixp])
+            end
+        end
+    end
+    # uncomment for analytical diagonal values 
+    #D[1,1] = -0.25*(ngrid - 1)*ngrid
+    #D[ngrid,ngrid] = 0.25*(ngrid - 1)*ngrid
+    #for ix in 1:ngrid-1
+    #   D[ix,ix] = 0.0
+    #end
+    # get diagonal values from sum of nonzero off diagonal values 
+    for ix in 1:ngrid
+        D[ix,ix] = -sum(D[ix,:])
+    end 
+    return nothing
+end
+"""
+From 
+https://doc.nektar.info/tutorials/latest/fundamentals/differentiation/fundamentals-differentiationch2.html
+
+D -- differentiation matrix 
+x -- Gauss-Legendre-Radau points in [-1,1)
+ngrid -- number of points per element (incl. boundary points)
+
+Note that D has does not include a scaling factor
+"""
+function gaussradaulegendre_differentiation_matrix!(D::Array{Float64,2},x::Array{Float64,1},ngrid::Int64)
+    D[:,:] .= 0.0
+    for ix in 1:ngrid
+        for ixp in 1:ngrid
+            if !(ix == ixp)
+                D[ix,ixp] = (Pl(x[ix],ngrid-1)/Pl(x[ixp],ngrid-1))*((1.0 - x[ixp])/(1.0 - x[ix]))/(x[ix]-x[ixp])
+            end
+        end
+    end
+    # uncomment for analytical diagonal values 
+    #D[1,1] = -0.25*(ngrid - 1)*(ngrid + 1)
+    #for ix in 2:ngrid
+    #   D[ix,ix] = 0.5/(1.0 - x[ix])
+    #end
+    # get diagonal values from sum of nonzero off diagonal values 
+    for ix in 1:ngrid
+        D[ix,ix] = -sum(D[ix,:])
+    end
+    
+    # get into correct order for a grid on (-1,1]
+    Dreverse = copy(D)
+    for ix in 1:ngrid
+        for ixp in 1:ngrid
+            Dreverse[ngrid-ix+1,ngrid-ixp+1] = -D[ix,ixp]
+        end
+    end
+    D .= Dreverse
+    return nothing
+end
+
+"""
+Gauss-Legendre derivative at arbitrary x values, for boundary condition on radau points
+D0 -- the vector
+xj -- the x location where the derivative is evaluated 
+ngrid -- number of points in x
+x -- the grid from -1, 1
+Note that D0 is not scaled to the physical grid
+"""
+function GaussLegendre_derivative_vector!(D0,xj,ngrid,x,wgts;radau=false)
+    # coefficient in expansion of 
+    # lagrange polys in terms of Legendre polys
+    gamma = allocate_float(ngrid)
+    for i in 1:ngrid-1
+        gamma[i] = Legendre_h_n(i-1)
+    end
+    if radau
+        gamma[ngrid] = Legendre_h_n(ngrid-1)
+    else
+        gamma[ngrid] = 2.0/(ngrid - 1)
+    end
+    
+    @. D0 = 0.0
+    for i in 1:ngrid
+        for k in 1:ngrid
+            D0[i] += wgts[i]*Pl(x[i],k-1)*dnPl(xj,k-1,1)/gamma[k]
+        end
+    end
+    # set `diagonal' value
+    D0[1] = 0.0
+    D0[1] = -sum(D0[:])
+    #@. D0 *= 2.0*float(nelement_global)/L
+end
+
+"""
+result of the inner product of Legendre polys of order k
+"""
+function Legendre_h_n(k)
+    h_n = 2.0/(2.0*k + 1)
+    return h_n
+end 
+
+
+"""
+assign abitrary weak inner product matrix Q on a 1D line with Jacobian = 1
+matrix Q acts on a single vector x such that y = Q * x is also a vector
+"""
+function GaussLegendre_weak_product_matrix!(QQ::Array{mk_float,2},ngrid,x,wgts,option;radau=false)
+    # coefficient in expansion of 
+    # lagrange polys in terms of Legendre polys
+    gamma = allocate_float(ngrid)
+    for i in 1:ngrid-1
+        gamma[i] = Legendre_h_n(i-1)
+    end
+    if radau
+        gamma[ngrid] = Legendre_h_n(ngrid-1)
+    else
+        gamma[ngrid] = 2.0/(ngrid - 1)
+    end
+    # appropriate inner product of Legendre polys
+    # definition depends on required matrix 
+    # for M0: AA = < P_i P_j >
+    # for M1: AA = < P_i P_j x >
+    # for M2: AA = < P_i P_j x^2 >
+    # for S0: AA = -< P'_i P_j >
+    # for S1: AA = -< P'_i P_j x >
+    # for K0: AA = -< P'_i P'_j >
+    # for K1: AA = -< P'_i P'_j x >
+    # for K2: AA = -< P'_i P'_j x^2 >
+    # for P0: AA = < P_i P'_j >
+    # for P1: AA = < P_i P'_j x >
+    # for P2: AA = < P_i P'_j x^2 >
+    AA = allocate_float(ngrid,ngrid)
+    nquad = 2*ngrid
+    zz, wz = gausslegendre(nquad)
+    @. AA = 0.0
+    if option == "M0"
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for k in 1:nquad
+                    AA[i,j] += wz[k]*Pl(zz[k],i-1)*Pl(zz[k],j-1)
+                end
+            end
+        end
+    elseif option == "M1"
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for k in 1:nquad
+                    AA[i,j] += zz[k]*wz[k]*Pl(zz[k],i-1)*Pl(zz[k],j-1)
+                end
+            end
+        end
+    elseif option == "M2"
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for k in 1:nquad
+                    AA[i,j] += (zz[k]^2)*wz[k]*Pl(zz[k],i-1)*Pl(zz[k],j-1)
+                end
+            end
+        end
+    elseif option == "S0"
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for k in 1:nquad
+                    AA[i,j] -= wz[k]*dnPl(zz[k],i-1,1)*Pl(zz[k],j-1)
+                end
+            end
+        end
+    elseif option == "S1"
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for k in 1:nquad
+                    AA[i,j] -= zz[k]*wz[k]*dnPl(zz[k],i-1,1)*Pl(zz[k],j-1)
+                end
+            end
+        end
+    elseif option == "K0"
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for k in 1:nquad
+                    AA[i,j] -= wz[k]*dnPl(zz[k],i-1,1)*dnPl(zz[k],j-1,1)
+                end
+            end
+        end
+    elseif option == "K1"
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for k in 1:nquad
+                    AA[i,j] -= zz[k]*wz[k]*dnPl(zz[k],i-1,1)*dnPl(zz[k],j-1,1)
+                end
+            end
+        end
+    elseif option == "K2"
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for k in 1:nquad
+                    AA[i,j] -= (zz[k]^2)*wz[k]*dnPl(zz[k],i-1,1)*dnPl(zz[k],j-1,1)
+                end
+            end
+        end
+    elseif option == "P0"
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for k in 1:nquad
+                    AA[i,j] += wz[k]*Pl(zz[k],i-1)*dnPl(zz[k],j-1,1)
+                end
+            end
+        end
+    elseif option == "P1"
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for k in 1:nquad
+                    AA[i,j] += zz[k]*wz[k]*Pl(zz[k],i-1)*dnPl(zz[k],j-1,1)
+                end
+            end
+        end
+    elseif option == "P2"
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for k in 1:nquad
+                    AA[i,j] += (zz[k]^2)*wz[k]*Pl(zz[k],i-1)*dnPl(zz[k],j-1,1)
+                end
+            end
+        end
+    end
+    
+    QQ .= 0.0
+    for j in 1:ngrid
+        for i in 1:ngrid
+            for l in 1:ngrid
+                for k in 1:ngrid
+                    QQ[i,j] += wgts[i]*wgts[j]*Pl(x[i],k-1)*Pl(x[j],l-1)*AA[k,l]/(gamma[k]*gamma[l])
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+"""
+assign abitrary weak inner product matrix Q on a 1D line with Jacobian = 1
+matrix Q acts on two vectors x1 and x2 such that the quadratic form 
+y = x1 * Q * x2 is also a vector
+"""
+function GaussLegendre_weak_product_matrix!(QQ::Array{mk_float,3},ngrid,x,wgts,option;radau=false)
+    # coefficient in expansion of 
+    # lagrange polys in terms of Legendre polys
+    gamma = allocate_float(ngrid)
+    for i in 1:ngrid-1
+        gamma[i] = Legendre_h_n(i-1)
+    end
+    if radau
+        gamma[ngrid] = Legendre_h_n(ngrid-1)
+    else
+        gamma[ngrid] = 2.0/(ngrid - 1)
+    end
+    # appropriate inner product of Legendre polys
+    # definition depends on required matrix 
+    # for Y00: AA = < P_i P_j P_k >
+    # for Y01: AA = < P_i P_j P_k x >
+    # for Y10: AA = < P_i P_j P'_k >
+    # for Y11: AA = < P_i P_j P'_k x >
+    # for Y20: AA = < P_i P'_j P'_k >
+    # for Y21: AA = < P_i P'_j P'_k x >
+    # for Y31: AA = < P_i P'_j P_k x >
+    # for Y30: AA = < P_i P'_j P_k >
+    AA = allocate_float(ngrid,ngrid,ngrid)
+    nquad = 2*ngrid
+    zz, wz = gausslegendre(nquad)
+    @. AA = 0.0
+    if option == "Y00"
+        for k in 1:ngrid
+            for j in 1:ngrid
+                for i in 1:ngrid
+                    for q in 1:nquad
+                        AA[i,j,k] += wz[q]*Pl(zz[q],i-1)*Pl(zz[q],j-1)*Pl(zz[q],k-1)
+                    end
+                end
+            end
+        end
+    elseif option == "Y01"
+        for k in 1:ngrid
+            for j in 1:ngrid
+                for i in 1:ngrid
+                    for q in 1:nquad
+                        AA[i,j,k] += zz[q]*wz[q]*Pl(zz[q],i-1)*Pl(zz[q],j-1)*Pl(zz[q],k-1)
+                    end
+                end
+            end
+        end
+    elseif option == "Y10"
+        for k in 1:ngrid
+            for j in 1:ngrid
+                for i in 1:ngrid
+                    for q in 1:nquad
+                        AA[i,j,k] += wz[q]*Pl(zz[q],i-1)*Pl(zz[q],j-1)*dnPl(zz[q],k-1,1)
+                    end
+                end
+            end
+        end
+    elseif option == "Y11"
+        for k in 1:ngrid
+            for j in 1:ngrid
+                for i in 1:ngrid
+                    for q in 1:nquad
+                        AA[i,j,k] += zz[q]*wz[q]*Pl(zz[q],i-1)*Pl(zz[q],j-1)*dnPl(zz[q],k-1,1)
+                    end
+                end
+            end
+        end
+    elseif option == "Y20"
+        for k in 1:ngrid
+            for j in 1:ngrid
+                for i in 1:ngrid
+                    for q in 1:nquad
+                        AA[i,j,k] += wz[q]*Pl(zz[q],i-1)*dnPl(zz[q],j-1,1)*dnPl(zz[q],k-1,1)
+                    end
+                end
+            end
+        end
+    elseif option == "Y21"
+        for k in 1:ngrid
+            for j in 1:ngrid
+                for i in 1:ngrid
+                    for q in 1:nquad
+                        AA[i,j,k] += zz[q]*wz[q]*Pl(zz[q],i-1)*dnPl(zz[q],j-1,1)*dnPl(zz[q],k-1,1)
+                    end
+                end
+            end
+        end
+    elseif option == "Y31"
+        for k in 1:ngrid
+            for j in 1:ngrid
+                for i in 1:ngrid
+                    for q in 1:nquad
+                        AA[i,j,k] += zz[q]*wz[q]*Pl(zz[q],i-1)*dnPl(zz[q],j-1,1)*Pl(zz[q],k-1)
+                    end
+                end
+            end
+        end
+    elseif option == "Y30"
+        for k in 1:ngrid
+            for j in 1:ngrid
+                for i in 1:ngrid
+                    for q in 1:nquad
+                        AA[i,j,k] += wz[q]*Pl(zz[q],i-1)*dnPl(zz[q],j-1,1)*Pl(zz[q],k-1)
+                    end
+                end
+            end
+        end
+    end
+    
+    QQ .= 0.0
+    for k in 1:ngrid
+        for j in 1:ngrid
+            for i in 1:ngrid
+                for l in 1:ngrid
+                    for m in 1:ngrid
+                        for n in 1:ngrid
+                            QQ[i,j,k] += wgts[i]*wgts[j]*wgts[k]*Pl(x[i],n-1)*Pl(x[j],m-1)*Pl(x[k],l-1)*AA[n,m,l]/(gamma[n]*gamma[m]*gamma[l])
+                        end
+                    end
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+function scale_factor_func(L,nelement_global)
+    return 0.5*L/float(nelement_global)
+end
+
+function shift_factor_func(L,nelement_global,nelement_local,irank,ielement_local)
+    #ielement_global = ielement_local # for testing + irank*nelement_local
+    ielement_global = ielement_local + irank*nelement_local # proper line for future distributed memory MPI use
+    shift = L*((float(ielement_global)-0.5)/float(nelement_global) - 0.5)
+    return shift
+end
+
+function ielement_global_func(nelement_local,irank,ielement_local)
+    return ielement_global = ielement_local + irank*nelement_local
+end
+
+"""
+function for setting up the full Gauss-Legendre-Lobatto
+grid and collocation point weights
+"""
+function scaled_gauss_legendre_lobatto_grid(ngrid, nelement_local, n_local, element_scale, element_shift, imin, imax)
+    # get Gauss-Legendre-Lobatto points and weights on [-1,1]
+    x, w = gausslobatto(ngrid)
+    # grid and weights arrays
+    grid = allocate_float(n_local)
+    wgts = allocate_float(n_local)
+    wgts .= 0.0
+    #integer to deal with the overlap of element boundaries
+    k = 1
+    @inbounds for j in 1:nelement_local
+        # element_scale[j]
+        # element_shift[j]
+        # factor with maps [-1,1] -> a subset of [-L/2, L/2]
+        @. grid[imin[j]:imax[j]] = element_scale[j]*x[k:ngrid] + element_shift[j]
+        
+        # calculate the weights
+        # remembering on boundary points to include weights
+        # from both left and right elements
+        #println(imin[j]," ",imax[j])
+        @. wgts[imin[j] - k + 1:imax[j]] += element_scale[j]*w[1:ngrid] 
+        
+        k = 2        
+    end
+    return grid, wgts
+end
+
+"""
+function for setting up the full Gauss-Legendre-Radau
+grid and collocation point weights
+see comments of Gauss-Legendre-Lobatto routine above
+"""
+function scaled_gauss_legendre_radau_grid(ngrid, nelement_local, n_local, element_scale, element_shift, imin, imax, irank)
+    # get Gauss-Legendre-Lobatto points and weights on [-1,1]
+    x_lob, w_lob = gausslobatto(ngrid)
+    # get Gauss-Legendre-Radau points and weights on [-1,1)
+    x_rad, w_rad = gaussradau(ngrid)
+    # transform to a Gauss-Legendre-Radau grid on (-1,1]
+    x_rad, w_rad = -reverse(x_rad), reverse(w_rad)#
+    # grid and weights arrays
+    grid = allocate_float(n_local)
+    wgts = allocate_float(n_local)
+    wgts .= 0.0
+    if irank == 0
+        # for 1st element, fill in with Gauss-Legendre-Radau points
+        j = 1
+        # element_scale[j]
+        # element_shift[j]
+        # factor with maps [-1,1] -> a subset of [-L/2, L/2]
+        @. grid[imin[j]:imax[j]] = element_scale[j]*x_rad[1:ngrid] + element_shift[j]
+        @. wgts[imin[j]:imax[j]] += element_scale[j]*w_rad[1:ngrid]       
+        #integer to deal with the overlap of element boundaries
+        k = 2
+        @inbounds for j in 2:nelement_local
+            # element_scale[j]
+            # element_shift[j]
+            # factor with maps [-1,1] -> a subset of [-L/2, L/2]
+            @. grid[imin[j]:imax[j]] = element_scale[j]*x_lob[k:ngrid] + element_shift[j]
+            @. wgts[imin[j] - k + 1:imax[j]] += element_scale[j]*w_lob[1:ngrid]         
+        end
+    else # all elements are Gauss-Legendre-Lobatto
+        #integer to deal with the overlap of element boundaries
+        k = 1
+        @inbounds for j in 1:nelement_local
+            # element_scale[j]
+            # element_shift[j]
+            # factor with maps [-1,1] -> a subset of [-L/2, L/2]
+            @. grid[imin[j]:imax[j]] = element_scale[j]*x_lob[k:ngrid] + element_shift[j]
+            @. wgts[imin[j] - k + 1:imax[j]] += element_scale[j]*w_lob[1:ngrid]            
+            k = 2 
+        end
+    end
+    return grid, wgts
+end
+
+"""
+A function that assigns the local weak-form matrices to 
+a global array QQ_global for later solving weak form of required
+1D equation. This function only supports fully local grids 
+that have coord.nelement_local = coord.nelement_global.
+
+The 'option' variable is a flag for 
+choosing the type of matrix to be constructed. 
+Currently the function is set up to assemble the 
+elemental matrices without imposing boundary conditions on the 
+first and final rows of the matrix. This means that 
+the operators constructed from this function can only be used
+for differentiation, and not solving 1D ODEs. 
+The shared points in the element assembly are 
+averaged (instead of simply added) to be consistent with the 
+derivative_elements_to_full_grid!() function in calculus.jl,
+which is used to form the RHS of the equation
+
+ M * d2f = K * f 
+ 
+where M is the mass matrix and K is the stiffness matrix. 
+"""
+function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2},
+                               lobatto::gausslegendre_base_info,
+                               radau::gausslegendre_base_info, 
+                               coord,option)
+    QQ_j = allocate_float(coord.ngrid,coord.ngrid)
+    QQ_jp1 = allocate_float(coord.ngrid,coord.ngrid)
+    
+    ngrid = coord.ngrid
+    imin = coord.imin
+    imax = coord.imax
+    @. QQ_global = 0.0
+    
+    # fill in first element 
+    j = 1
+    # N.B. QQ varies with ielement for vperp, but not vpa
+    # a radau element is used for the vperp grid (see get_QQ_local!())
+    get_QQ_local!(QQ_j,j,lobatto,radau,coord,option)
+    QQ_global[imin[j],imin[j]:imax[j]] .+= QQ_j[1,:]
+    for k in 2:imax[j]-imin[j] 
+        QQ_global[k,imin[j]:imax[j]] .+= QQ_j[k,:]
+    end
+    if coord.nelement_local > 1
+        QQ_global[imax[j],imin[j]:imax[j]] .+= QQ_j[ngrid,:]./2.0
+    else
+        QQ_global[imax[j],imin[j]:imax[j]] .+= QQ_j[ngrid,:]
+    end
+    # remaining elements recalling definitions of imax and imin
+    for j in 2:coord.nelement_local
+        get_QQ_local!(QQ_j,j,lobatto,radau,coord,option)
+        #lower boundary assembly on element
+        QQ_global[imin[j]-1,imin[j]-1:imax[j]] .+= QQ_j[1,:]./2.0
+        for k in 2:imax[j]-imin[j]+1 
+            QQ_global[k+imin[j]-2,imin[j]-1:imax[j]] .+= QQ_j[k,:]
+        end
+        # upper boundary assembly on element 
+        if j == coord.nelement_local
+            QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:]
+        else 
+            QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:]./2.0
+        end
+    end
+        
+    return nothing
+end
+
+function get_QQ_local!(QQ::Array{mk_float,2},ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord,option)
+  
+        if option == "M"
+            get_MM_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "R"
+            get_MR_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "N"
+            get_MN_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "P"
+            get_PP_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "U"
+            get_PU_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "S"
+            get_SS_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "K"
+            get_KK_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "K_with_BC_terms"
+            get_KK_local!(QQ,ielement,lobatto,radau,coord,explicit_BC_terms=true)
+        elseif option == "J"
+            get_KJ_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "L"
+            get_LL_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "L_with_BC_terms"
+            get_LL_local!(QQ,ielement,lobatto,radau,coord,explicit_BC_terms=true)
+        end
+        return nothing
+end
+
+function get_MM_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord)
+        
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  (shift_factor*lobatto.M0 + scale_factor*lobatto.M1)*scale_factor
+            else # radau points 
+                @. QQ =  (shift_factor*radau.M0 + scale_factor*radau.M1)*scale_factor
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.M0*scale_factor
+        end 
+        return nothing
+end
+
+function get_SS_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord)
+        
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  shift_factor*lobatto.S0 + scale_factor*lobatto.S1
+                # boundary terms from integration by parts
+                imin = coord.imin[ielement] - 1
+                imax = coord.imax[ielement]
+                QQ[1,1] -= coord.grid[imin]
+                QQ[coord.ngrid,coord.ngrid] += coord.grid[imax]
+            else # radau points 
+                @. QQ =  shift_factor*radau.S0 + scale_factor*radau.S1
+                # boundary terms from integration by parts
+                imax = coord.imax[ielement]
+                QQ[coord.ngrid,coord.ngrid] += coord.grid[imax]
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.S0
+            # boundary terms from integration by parts
+            QQ[1,1] -= 1.0
+            QQ[coord.ngrid,coord.ngrid] += 1.0
+        end
+        return nothing
+end
+
+function get_KK_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord;explicit_BC_terms=false)
+        nelement = coord.nelement_local
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            # P0 factors make this a d^2 / dvperp^2 rather than (1/vperp) d ( vperp d (.) / d vperp)
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  (shift_factor/scale_factor)*lobatto.K0 + lobatto.K1 - lobatto.P0
+                # boundary terms from integration by parts
+                if explicit_BC_terms && ielement == 1
+                    imin = coord.imin[ielement] - 1
+                    @. QQ[1,:] -= coord.grid[imin]*lobatto.Dmat[1,:]/scale_factor
+                end
+                if explicit_BC_terms && ielement == nelement
+                    imax = coord.imax[ielement]
+                    @. QQ[coord.ngrid,:] += coord.grid[imax]*lobatto.Dmat[coord.ngrid,:]/scale_factor  
+                end
+            else # radau points 
+                @. QQ =  (shift_factor/scale_factor)*radau.K0 + radau.K1 - radau.P0
+                # boundary terms from integration by parts
+                if explicit_BC_terms && ielement == nelement  
+                    imax = coord.imax[ielement]
+                    @. QQ[coord.ngrid,:] += coord.grid[imax]*radau.Dmat[coord.ngrid,:]/scale_factor
+                end
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.K0/scale_factor
+            # boundary terms from integration by parts
+            if explicit_BC_terms && ielement == 1
+                @. QQ[1,:] -= lobatto.Dmat[1,:]/scale_factor
+            end
+            if explicit_BC_terms && ielement == nelement
+                @. QQ[coord.ngrid,:] += lobatto.Dmat[coord.ngrid,:]/scale_factor
+            end
+        end
+        return nothing
+end
+
+# second derivative matrix with vperp^2 Jacobian factor if 
+# coord is vperp. Not useful for the vpa coordinate
+function get_KJ_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord)
+        
+        scale_factor = scale_factor_func(coord.L,coord.nelement_global)
+        shift_factor = shift_factor_func(coord.L,coord.nelement_global,coord.nelement_local,coord.irank,ielement) + 0.5*coord.L
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp^2 in integral
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ = (lobatto.K0*((shift_factor^2)/scale_factor) +
+                         lobatto.K1*2.0*shift_factor +
+                         lobatto.K2*scale_factor)
+            else # radau points 
+                @. QQ =  (radau.K0*((shift_factor^2)/scale_factor) +
+                         radau.K1*2.0*shift_factor +
+                         radau.K2*scale_factor)
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.K0/scale_factor
+        end
+        return nothing
+end
+
+function get_LL_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord;explicit_BC_terms=false)
+        nelement = coord.nelement_local
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            #  (1/vperp) d ( vperp d (.) / d vperp)
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  (shift_factor/scale_factor)*lobatto.K0 + lobatto.K1
+                # boundary terms from integration by parts
+                if explicit_BC_terms && ielement == 1
+                    imin = coord.imin[ielement] - 1
+                    @. QQ[1,:] -= coord.grid[imin]*lobatto.Dmat[1,:]/scale_factor
+                end
+                if explicit_BC_terms && ielement == nelement
+                    imax = coord.imax[ielement]
+                    @. QQ[coord.ngrid,:] += coord.grid[imax]*lobatto.Dmat[coord.ngrid,:]/scale_factor
+                end
+            else # radau points 
+                @. QQ =  (shift_factor/scale_factor)*radau.K0 + radau.K1
+                # boundary terms from integration by parts
+                if explicit_BC_terms && ielement == nelement
+                    imax = coord.imax[ielement]
+                    @. QQ[coord.ngrid,:] += coord.grid[imax]*radau.Dmat[coord.ngrid,:]/scale_factor
+                end
+            end
+        else # d^2 (.) d vpa^2 -- assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.K0/scale_factor
+            # boundary terms from integration by parts
+            if explicit_BC_terms && ielement == 1
+                @. QQ[1,:] -= lobatto.Dmat[1,:]/scale_factor
+            end
+            if explicit_BC_terms && ielement == nelement
+                @. QQ[coord.ngrid,:] += lobatto.Dmat[coord.ngrid,:]/scale_factor
+            end
+        end
+        return nothing
+end
+
+# mass matrix without vperp factor (matrix N)
+# only useful for the vperp coordinate
+function get_MN_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord)
+        
+        scale_factor = coord.element_scale[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  lobatto.M0*scale_factor
+            else # radau points 
+                @. QQ =  radau.M0*scale_factor
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.M0*scale_factor
+        end 
+        return nothing
+end
+
+# mass matrix with vperp^2 factor (matrix R)
+# only useful for the vperp coordinate
+function get_MR_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord)
+        
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  (lobatto.M0*shift_factor^2 +
+                          lobatto.M1*2.0*shift_factor*scale_factor +
+                          lobatto.M2*scale_factor^2)*scale_factor
+            else # radau points 
+                @. QQ =  (radau.M0*shift_factor^2 +
+                          radau.M1*2.0*shift_factor*scale_factor +
+                          radau.M2*scale_factor^2)*scale_factor
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.M0*scale_factor
+        end 
+        return nothing
+end
+
+# derivative matrix (matrix P, no integration by parts)
+# with vperp Jacobian factor if coord is vperp (matrix P)
+function get_PP_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord)
+        
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  lobatto.P0*shift_factor + lobatto.P1*scale_factor
+            else # radau points 
+                @. QQ =  radau.P0*shift_factor + radau.P1*scale_factor
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.P0
+        end 
+        return nothing
+end
+
+# derivative matrix (matrix P, no integration by parts)
+# with vperp^2 Jacobian factor if coord is vperp (matrix U)
+# not useful for vpa coordinate
+function get_PU_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord)
+        
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  (lobatto.P0*shift_factor^2 + 
+                          lobatto.P1*2.0*shift_factor*scale_factor +
+                          lobatto.P2*scale_factor^2)
+            else # radau points 
+                @. QQ =  (radau.P0*shift_factor^2 + 
+                          radau.P1*2.0*shift_factor*scale_factor +
+                          radau.P2*scale_factor^2)
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.P0
+        end 
+        return nothing
+end
+
+"""
+construction function for nonlinear diffusion matrices, only
+used in the assembly of the collision operator
+"""
+
+function get_QQ_local!(QQ::AbstractArray{mk_float,3},
+        ielement,lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord,option)
+  
+        if option == "YY0" # mass-like matrix
+            get_YY0_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "YY1" # first-derivative-like matrix
+            get_YY1_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "YY2" # second-derivative-like matrix
+            get_YY2_local!(QQ,ielement,lobatto,radau,coord)
+        elseif option == "YY3" # first-derivative-like matrix
+            get_YY3_local!(QQ,ielement,lobatto,radau,coord)
+        end
+        return nothing
+end
+
+function get_YY0_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord)
+        
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  (shift_factor*lobatto.Y00 + scale_factor*lobatto.Y01)*scale_factor
+            else # radau points 
+                @. QQ =  (shift_factor*radau.Y00 + scale_factor*radau.Y01)*scale_factor
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.Y00*scale_factor
+        end 
+        return nothing
+end
+
+function get_YY1_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord)
+        
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  shift_factor*lobatto.Y10 + scale_factor*lobatto.Y11
+            else # radau points 
+                @. QQ =  shift_factor*radau.Y10 + scale_factor*radau.Y11
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.Y10
+        end 
+        return nothing
+end
+
+function get_YY2_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord)
+        
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  (shift_factor/scale_factor)*lobatto.Y20 + lobatto.Y21
+            else # radau points 
+                @. QQ =  (shift_factor/scale_factor)*radau.Y20 + radau.Y21
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.Y20/scale_factor
+        end 
+        return nothing
+end
+
+function get_YY3_local!(QQ,ielement,
+        lobatto::gausslegendre_base_info,
+        radau::gausslegendre_base_info, 
+        coord)
+        
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
+        if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
+            # extra scale and shift factors required because of vperp in integral
+            if ielement > 1 || coord.irank > 0 # lobatto points
+                @. QQ =  shift_factor*lobatto.Y30 + scale_factor*lobatto.Y31
+            else # radau points 
+                @. QQ =  shift_factor*radau.Y30 + scale_factor*radau.Y31
+            end
+        else # assume integrals of form int^infty_-infty (.) d vpa
+            @. QQ = lobatto.Y30
+        end 
+        return nothing
+end
+
+
+end
diff --git a/src/initial_conditions.jl b/src/initial_conditions.jl
index 0bc2065d1..29b57d62a 100644
--- a/src/initial_conditions.jl
+++ b/src/initial_conditions.jl
@@ -4,13 +4,12 @@ module initial_conditions
 
 export allocate_pdf_and_moments
 export init_pdf_and_moments!
-export enforce_r_boundary_condition!
-export enforce_z_boundary_condition!
-export enforce_vpa_boundary_condition!
 export enforce_boundary_conditions!
 export enforce_neutral_boundary_conditions!
-export enforce_neutral_r_boundary_condition!
-export enforce_neutral_z_boundary_condition!
+
+# functional testing 
+export create_boundary_distributions
+export create_pdf
 
 # package
 using SpecialFunctions: erfc
@@ -217,6 +216,13 @@ function init_pdf_and_moments!(pdf, moments, boundary_distributions, geometry,
         end
     end
 
+    # Zero-initialise the dSdt diagnostic to avoid writing uninitialised values, as the
+    # collision operator will not be calculated before the initial values are written to
+    # file.
+    @serial_region begin
+        moments.charged.dSdt .= 0.0
+    end
+
     init_boundary_distributions!(boundary_distributions, pdf, vz, vr, vzeta, vpa, vperp,
                                  z, r, composition)
 
@@ -1123,42 +1129,51 @@ function init_boundary_distributions!(boundary_distributions, pdf, vz, vr, vzeta
                          vpa, vperp, z, r, composition)
     return nothing
 end
-
 """
 enforce boundary conditions in vpa and z on the evolved pdf;
 also enforce boundary conditions in z on all separately evolved velocity space moments of the pdf
 """
 function enforce_boundary_conditions!(f, f_r_bc, density, upar, ppar, moments, vpa_bc,
-        z_bc, r_bc, vpa, vperp, z, r, vpa_adv, z_adv, r_adv, composition, scratch_dummy,
+        z_bc, r_bc, vpa, vperp, z, r, vpa_spectral, vperp_spectral, vpa_adv, z_adv, r_adv, composition, scratch_dummy,
         r_diffusion, vpa_diffusion)
-
-    begin_s_r_z_vperp_region()
-    @loop_s_r_z_vperp is ir iz ivperp begin
-        # enforce the vpa BC
-        # use that adv.speed independent of vpa
-        @views enforce_v_boundary_condition_local!(f[:,ivperp,iz,ir,is], vpa_bc,
-                                                   vpa_adv[is].speed[:,ivperp,iz,ir],
-                                                   vpa_diffusion)
+    if vpa.n > 1
+        begin_s_r_z_vperp_region()
+        @loop_s_r_z_vperp is ir iz ivperp begin
+            # enforce the vpa BC
+            # use that adv.speed independent of vpa 
+            @views enforce_v_boundary_condition_local!(f[:,ivperp,iz,ir,is], vpa_bc,
+                             vpa_adv[is].speed[:,ivperp,iz,ir], vpa_diffusion,
+                             vpa, vpa_spectral)
+        end
+    end
+    if vperp.n > 1
+        begin_s_r_z_vpa_region()
+        @views enforce_vperp_boundary_condition!(f, vperp.bc, vperp, vperp_spectral)
+    end
+    if z.n > 1
+        begin_s_r_vperp_vpa_region()
+        # enforce the z BC on the evolved velocity space moments of the pdf
+        @views enforce_z_boundary_condition_moments!(density, moments, z_bc)
+        @views enforce_z_boundary_condition!(f, density, upar, ppar, moments, z_bc, z_adv, z,
+                                             vperp, vpa, composition,
+                                             scratch_dummy.buffer_vpavperprs_1, scratch_dummy.buffer_vpavperprs_2,
+                                             scratch_dummy.buffer_vpavperprs_3, scratch_dummy.buffer_vpavperprs_4)
+                                              
     end
-    begin_s_r_vperp_vpa_region()
-    # enforce the z BC on the evolved velocity space moments of the pdf
-    @views enforce_z_boundary_condition_moments!(density, moments, z_bc)
-    @views enforce_z_boundary_condition!(f, density, upar, ppar, moments, z_bc, z_adv, z,
-                                         vperp, vpa, composition)
     if r.n > 1
         begin_s_z_vperp_vpa_region()
         @views enforce_r_boundary_condition!(f, f_r_bc, r_bc, r_adv, vpa, vperp, z, r, composition,
             scratch_dummy.buffer_vpavperpzs_1, scratch_dummy.buffer_vpavperpzs_2,
             scratch_dummy.buffer_vpavperpzs_3, scratch_dummy.buffer_vpavperpzs_4,
-            scratch_dummy.buffer_vpavperpzrs_1, r_diffusion)
+            r_diffusion)
     end
 end
-
 function enforce_boundary_conditions!(fvec_out::scratch_pdf, moments, f_r_bc, vpa_bc,
-        z_bc, r_bc, vpa, vperp, z, r, vpa_adv, z_adv, r_adv, composition, scratch_dummy,
+        z_bc, r_bc, vpa, vperp, z, r, vpa_spectral, vperp_spectral, vpa_adv, z_adv, r_adv, composition, scratch_dummy,
         r_diffusion, vpa_diffusion)
     enforce_boundary_conditions!(fvec_out.pdf, f_r_bc, fvec_out.density, fvec_out.upar,
-        fvec_out.ppar, moments, vpa_bc, z_bc, r_bc, vpa, vperp, z, r, vpa_adv, z_adv,
+        fvec_out.ppar, moments, vpa_bc, z_bc, r_bc, vpa, vperp, z, r, 
+        vpa_spectral, vperp_spectral, vpa_adv, z_adv,
         r_adv, composition, scratch_dummy, r_diffusion, vpa_diffusion)
 end
 
@@ -1166,9 +1181,9 @@ end
 enforce boundary conditions on f in r
 """
 function enforce_r_boundary_condition!(f::AbstractArray{mk_float,5}, f_r_bc, bc::String,
-        adv::T, vpa, vperp, z, r, composition, end1::AbstractArray{mk_float,4},
+        adv, vpa, vperp, z, r, composition, end1::AbstractArray{mk_float,4},
         end2::AbstractArray{mk_float,4}, buffer1::AbstractArray{mk_float,4},
-        buffer2::AbstractArray{mk_float,4}, buffer_dfn::AbstractArray{mk_float,5}, r_diffusion::Bool) where T
+        buffer2::AbstractArray{mk_float,4}, r_diffusion::Bool)
 
     nr = r.n
 
@@ -1216,11 +1231,26 @@ end
 enforce boundary conditions on charged particle f in z
 """
 function enforce_z_boundary_condition!(pdf, density, upar, ppar, moments, bc::String, adv,
-                                       z, vperp, vpa, composition)
+                                       z, vperp, vpa, composition, end1::AbstractArray{mk_float,4},
+                                       end2::AbstractArray{mk_float,4}, buffer1::AbstractArray{mk_float,4},
+                                       buffer2::AbstractArray{mk_float,4})
+    # this block ensures periodic BC can be supported with distributed memory MPI
+    if z.nelement_global > z.nelement_local
+        # reconcile internal element boundaries across processes
+        # & enforce periodicity and external boundaries if needed
+        nz = z.n
+        @loop_s_r_vperp_vpa is ir ivperp ivpa begin
+            end1[ivpa,ivperp,ir,is] = pdf[ivpa,ivperp,1,ir,is]
+            end2[ivpa,ivperp,ir,is] = pdf[ivpa,ivperp,nz,ir,is]
+        end
+        # check on periodic bc happens inside this call below
+        @views reconcile_element_boundaries_MPI!(pdf,
+            end1, end2,	buffer1, buffer2, z)
+    end
     # define a zero that accounts for finite precision
     zero = 1.0e-14
     # 'constant' BC is time-independent f at upwind boundary
-    # and constant f beyond boundary
+    # and constant f beyond boundary 
     if bc == "constant"
         begin_s_r_vperp_vpa_region()
         density_offset = 1.0
@@ -1274,8 +1304,12 @@ enforce boundary conditions on neutral particle distribution function
 """
 function enforce_neutral_boundary_conditions!(f_neutral, f_charged,
         boundary_distributions, density_neutral, uz_neutral, pz_neutral, moments,
-        density_ion, upar_ion, Er, r_adv, z_adv, vzeta_adv, vr_adv, vz_adv, r, z, vzeta,
-        vr, vz, composition, geometry, scratch_dummy, r_diffusion, vz_diffusion)
+        density_ion, upar_ion, Er, vzeta_spectral, vr_spectral, vz_spectral, r_adv, z_adv,
+        vzeta_adv, vr_adv, vz_adv, r, z, vzeta, vr, vz, composition, geometry,
+        scratch_dummy, r_diffusion, vz_diffusion)
+
+    # without acceleration of neutrals bc on vz vr vzeta should not be required as no
+    # advection or diffusion in these coordinates
 
     if vzeta_adv !== nothing && vzeta.n_global > 1 && vzeta.bc != "none"
         begin_sn_r_z_vr_vz_region()
@@ -1284,7 +1318,7 @@ function enforce_neutral_boundary_conditions!(f_neutral, f_charged,
             @views enforce_v_boundary_condition_local!(f_neutral[ivz,ivr,:,iz,ir,isn],
                                                        vzeta.bc,
                                                        vzeta_adv[isn].speed[ivz,ivr,:,iz,ir],
-                                                       false)
+                                                       false, vzeta, vzeta_spectral)
         end
     end
     if vr_adv !== nothing && vr.n_global > 1 && vr.bc != "none"
@@ -1294,7 +1328,7 @@ function enforce_neutral_boundary_conditions!(f_neutral, f_charged,
             @views enforce_v_boundary_condition_local!(f_neutral[ivz,:,ivzeta,iz,ir,isn],
                                                        vr.bc,
                                                        vr_adv[isn].speed[ivz,:,ivzeta,iz,ir],
-                                                       false)
+                                                       false, vr, vr_spectral)
         end
     end
     if vz_adv !== nothing && vz.n_global > 1 && vz.bc != "none"
@@ -1304,30 +1338,33 @@ function enforce_neutral_boundary_conditions!(f_neutral, f_charged,
             @views enforce_v_boundary_condition_local!(f_neutral[:,ivr,ivzeta,iz,ir,isn],
                                                        vz.bc,
                                                        vz_adv[isn].speed[:,ivr,ivzeta,iz,ir],
-                                                       vz_diffusion)
+                                                       vz_diffusion, vz, vz_spectral)
         end
     end
     # f_initial contains the initial condition for enforcing a fixed-boundary-value condition
-    # no bc on vz vr vzeta required as no advection in these coordinates
-    begin_sn_r_vzeta_vr_vz_region()
-    @views enforce_neutral_z_boundary_condition!(f_neutral, density_neutral, uz_neutral,
-        pz_neutral, moments, density_ion, upar_ion, Er, boundary_distributions,
-        z_adv, z, vzeta, vr, vz, composition, geometry)
+    if z.n > 1
+        begin_sn_r_vzeta_vr_vz_region()
+        @views enforce_neutral_z_boundary_condition!(f_neutral, density_neutral, uz_neutral,
+            pz_neutral, moments, density_ion, upar_ion, Er, boundary_distributions,
+            z_adv, z, vzeta, vr, vz, composition, geometry, 
+            scratch_dummy.buffer_vzvrvzetarsn_1, scratch_dummy.buffer_vzvrvzetarsn_2,
+            scratch_dummy.buffer_vzvrvzetarsn_3, scratch_dummy.buffer_vzvrvzetarsn_4)
+    end
     if r.n > 1
         begin_sn_z_vzeta_vr_vz_region()
         @views enforce_neutral_r_boundary_condition!(f_neutral, boundary_distributions.pdf_rboundary_neutral,
                                     r_adv, vz, vr, vzeta, z, r, composition,
                                     scratch_dummy.buffer_vzvrvzetazsn_1, scratch_dummy.buffer_vzvrvzetazsn_2,
                                     scratch_dummy.buffer_vzvrvzetazsn_3, scratch_dummy.buffer_vzvrvzetazsn_4,
-                                    scratch_dummy.buffer_vzvrvzetazrsn_1, r_diffusion)
+                                    r_diffusion)
     end
 end
 
 function enforce_neutral_r_boundary_condition!(f::AbstractArray{mk_float,6},
-        f_r_bc::AbstractArray{mk_float,6}, adv::T, vz, vr, vzeta, z, r, composition,
+        f_r_bc::AbstractArray{mk_float,6}, adv, vz, vr, vzeta, z, r, composition,
         end1::AbstractArray{mk_float,5}, end2::AbstractArray{mk_float,5},
         buffer1::AbstractArray{mk_float,5}, buffer2::AbstractArray{mk_float,5},
-        buffer_dfn::AbstractArray{mk_float,6}, r_diffusion) where T #f_initial,
+        r_diffusion) #f_initial,
 
     bc = r.bc
     nr = r.n
@@ -1376,7 +1413,24 @@ enforce boundary conditions on neutral particle f in z
 """
 function enforce_neutral_z_boundary_condition!(pdf, density, uz, pz, moments, density_ion,
                                                upar_ion, Er, boundary_distributions, adv,
-                                               z, vzeta, vr, vz, composition, geometry)
+                                               z, vzeta, vr, vz, composition, geometry,
+                                               end1::AbstractArray{mk_float,5}, end2::AbstractArray{mk_float,5},
+                                               buffer1::AbstractArray{mk_float,5}, buffer2::AbstractArray{mk_float,5})
+    
+
+    if z.nelement_global > z.nelement_local
+        # reconcile internal element boundaries across processes
+        # & enforce periodicity and external boundaries if needed
+        nz = z.n
+        @loop_sn_r_vzeta_vr_vz isn ir ivzeta ivr ivz begin
+            end1[ivz,ivr,ivzeta,ir,isn] = pdf[ivz,ivr,ivzeta,1,ir,isn]
+            end2[ivz,ivr,ivzeta,ir,isn] = pdf[ivz,ivr,ivzeta,nz,ir,isn]
+        end
+        # check on periodic bc occurs within this call below
+        @views reconcile_element_boundaries_MPI!(pdf,
+            end1, end2,	buffer1, buffer2, z)
+    end
+
     zero = 1.0e-14
     # 'constant' BC is time-independent f at upwind boundary
     # and constant f beyond boundary
@@ -2084,24 +2138,10 @@ function enforce_z_boundary_condition_moments!(density, moments, bc::String)
     #    end
     #end
 end
-"""
-impose the prescribed vpa boundary condition on f
-at every z grid point
-"""
-function enforce_vpa_boundary_condition!(f, bc, src, v_diffusion)
-    nz = size(f,2)
-    nr = size(f,3)
-    for ir ∈ 1:nr
-        for iz ∈ 1:nz
-            enforce_v_boundary_condition_local!(view(f,:,iz,ir), bc, src.speed[:,iz,ir],
-                                                v_diffusion)
-        end
-    end
-end
 
 """
 """
-function enforce_v_boundary_condition_local!(f, bc, speed, v_diffusion)
+function enforce_v_boundary_condition_local!(f, bc, speed, v_diffusion, v, v_spectral)
     if bc == "zero"
         if v_diffusion || speed[1] > 0.0
             # 'upwind' boundary
@@ -2114,9 +2154,48 @@ function enforce_v_boundary_condition_local!(f, bc, speed, v_diffusion)
     elseif bc == "both_zero"
         f[1] = 0.0
         f[end] = 0.0
+    elseif bc == "zero_gradient"
+        D0 = v_spectral.lobatto.Dmat[1,:]
+        # adjust F(vpa = -L/2) so that d F / d vpa = 0 at vpa = -L/2
+        f[1] = -sum(D0[2:v.ngrid].*f[2:v.ngrid])/D0[1]
+
+        D0 = v_spectral.lobatto.Dmat[end,:]
+        # adjust F(vpa = L/2) so that d F / d vpa = 0 at vpa = L/2
+        f[end] = -sum(D0[1:ngrid-1].*f[end-v.ngrid+1:end-1])/D0[v.ngrid]
     elseif bc == "periodic"
         f[1] = 0.5*(f[1]+f[end])
         f[end] = f[1]
+    else
+        error("Unsupported boundary condition option '$bc' for $(v.name)")
+    end
+end
+
+"""
+enforce zero boundary condition at vperp -> infinity
+"""
+function enforce_vperp_boundary_condition!(f, bc, vperp, vperp_spectral)
+    if bc == "zero"
+        nvperp = vperp.n
+        ngrid = vperp.ngrid
+        # set zero boundary condition
+        @loop_s_r_z_vpa is ir iz ivpa begin
+            f[ivpa,nvperp,iz,ir,is] = 0.0
+        end
+        # set regularity condition d F / d vperp = 0 at vperp = 0
+        if vperp.discretization == "gausslegendre_pseudospectral" || vperp.discretization == "chebyshev_pseudospectral"
+            D0 = vperp_spectral.radau.D0
+            @loop_s_r_z_vpa is ir iz ivpa begin
+                # adjust F(vperp = 0) so that d F / d vperp = 0 at vperp = 0
+                f[ivpa,1,iz,ir,is] = -sum(D0[2:ngrid].*f[ivpa,2:ngrid,iz,ir,is])/D0[1]
+            end
+        else
+            println("vperp.bc=\"$bc\" not supported by discretization "
+                    * "$(vperp.discretization)")
+        end
+    elseif bc == "none"
+        # Do nothing
+    else
+        error("Unsupported boundary condition option '$bc' for vperp")
     end
 end
 
diff --git a/src/input_structs.jl b/src/input_structs.jl
index 89c821266..50c79486d 100644
--- a/src/input_structs.jl
+++ b/src/input_structs.jl
@@ -62,6 +62,7 @@ mutable struct advance_info
     ionization_collisions_1V::Bool
     ionization_source::Bool
     krook_collisions::Bool
+    explicit_weakform_fp_collisions::Bool
     external_source::Bool
     numerical_dissipation::Bool
     source_terms::Bool
@@ -128,6 +129,8 @@ mutable struct grid_input_mutable
     discretization::String
     # finite difference option (only used if discretization is "finite_difference")
     fd_option::String
+    # cheb option (only used if discretization is "chebyshev_pseudospectral")
+    cheb_option::String
     # boundary option
     bc::String
     # mutable struct containing advection speed options
@@ -157,6 +160,8 @@ struct grid_input
     discretization::String
     # finite difference option (only used if discretization is "finite_difference")
     fd_option::String
+    # cheb option (only used if discretization is "chebyshev_pseudospectral")
+    cheb_option::String
     # boundary option
     bc::String
     # struct containing advection speed options
@@ -313,6 +318,10 @@ mutable struct collisions_input
     krook_collision_frequency_prefactor::mk_float
     # Setting to switch between different options for Krook collision operator
     krook_collisions_option::String
+    # ion-ion self collision frequency
+    # nu_{ss'} = gamma_{ss'} n_{ref} / 2 (m_s)^2 (c_{ref})^3
+    # with gamma_ss' = 2 pi (Z_s Z_s')^2 e^4 ln \Lambda_{ss'} / (4 pi \epsilon_0)^2
+    nuii::mk_float
 end
 
 """
@@ -364,8 +373,12 @@ struct pp_input
     plot_upar0_vs_t::Bool
     # if plot_ppar0_vs_t = true, create plots of species ppar(z0) vs time
     plot_ppar0_vs_t::Bool
+    # if plot_pperp0_vs_t = true, create plots of species pperp(z0) vs time
+    plot_pperp0_vs_t::Bool
     # if plot_vth0_vs_t = true, create plots of species vth(z0) vs time
     plot_vth0_vs_t::Bool
+    # if plot_dSdt0_vs_t = true, create plots of species vth(z0) vs time
+    plot_dSdt0_vs_t::Bool
     # if plot_qpar0_vs_t = true, create plots of species qpar(z0) vs time
     plot_qpar0_vs_t::Bool
     # if plot_dens_vs_z_t = true, create plot of species density vs z and time
diff --git a/src/ionization.jl b/src/ionization.jl
index 147cb75ce..a921792e3 100644
--- a/src/ionization.jl
+++ b/src/ionization.jl
@@ -28,7 +28,14 @@ function constant_ionization_source!(f_out, vpa, vperp, z, r, moments, compositi
     # resolution, which then causes crashes due to overshoots giving
     # negative f??
     width = 0.5
-    rwidth = 0.25
+    vperpwidth = 0.5
+    rwidth = 0.5
+    if vperp.n > 1
+        vperpprefac = 1.0/vperpwidth^2
+    else
+        vperpprefac = 1.0
+    end
+    # loop below relies on vperp[1] = 0 when vperp.n = 1
     @loop_s_r is ir begin
         rfac = exp( - (r.grid[ir]/rwidth)^2)
 
@@ -49,8 +56,9 @@ function constant_ionization_source!(f_out, vpa, vperp, z, r, moments, compositi
                 @. vpa.scratch = vpa.grid
                 prefactor = 1.0
             end
-            @loop_vpa ivpa begin
-                f_out[ivpa,1,iz,ir,is] += dt*rfac*collisions.ionization/width*prefactor*exp(-(vpa.scratch[ivpa]/width)^2)
+            @loop_vperp_vpa ivperp ivpa begin
+                vperpfac = vperpprefac*exp( - (vperp.grid[ivperp]/vperpwidth)^2) 
+                f_out[ivpa,ivperp,iz,ir,is] += dt*rfac*vperpfac*collisions.ionization/width*prefactor*exp(-(vpa.scratch[ivpa]/width)^2)
             end
         end
     end
diff --git a/src/load_data.jl b/src/load_data.jl
index e33cb01d1..7a2152a76 100644
--- a/src/load_data.jl
+++ b/src/load_data.jl
@@ -258,6 +258,7 @@ function load_coordinate_data(fid, name; printout=false, irank=nothing, nrank=no
     L = load_variable(coord_group, "L")
     discretization = load_variable(coord_group, "discretization")
     fd_option = load_variable(coord_group, "fd_option")
+    cheb_option = load_variable(coord_group, "cheb_option")
     bc = load_variable(coord_group, "bc")
     if "element_spacing_option" ∈ keys(coord_group)
         element_spacing_option = load_variable(coord_group, "element_spacing_option")
@@ -266,7 +267,7 @@ function load_coordinate_data(fid, name; printout=false, irank=nothing, nrank=no
     end
     # Define input to create coordinate struct
     input = grid_input(name, ngrid, nelement_global, nelement_local, nrank, irank, L,
-                       discretization, fd_option, bc, advection_input("", 0.0, 0.0, 0.0),
+                       discretization, fd_option, cheb_option, bc, advection_input("", 0.0, 0.0, 0.0),
                        MPI.COMM_NULL, element_spacing_option)
 
     coord, spectral = define_coordinate(input, parallel_io)
@@ -420,7 +421,7 @@ end
 
 """
 """
-function load_charged_particle_moments_data(fid; printout=false)
+function load_charged_particle_moments_data(fid; printout=false, extended_moments = false)
     if printout
         print("Loading charged particle velocity moments data...")
     end
@@ -442,11 +443,22 @@ function load_charged_particle_moments_data(fid; printout=false)
     # Read charged species thermal speed
     thermal_speed = load_variable(group, "thermal_speed")
 
+    if extended_moments
+        # Read charged species perpendicular pressure
+        perpendicular_pressure = load_variable(group, "perpendicular_pressure")
+
+        # Read charged species entropy_production
+        entropy_production = load_variable(group, "entropy_production")
+    end
+
     if printout
         println("done.")
     end
-
-    return density, parallel_flow, parallel_pressure, parallel_heat_flux, thermal_speed
+    if extended_moments
+        density, parallel_flow, parallel_pressure, perpendicular_pressure, parallel_heat_flux, thermal_speed, entropy_production
+    else
+        return density, parallel_flow, parallel_pressure, parallel_heat_flux, thermal_speed
+    end
 end
 
 function load_neutral_particle_moments_data(fid; printout=false)
diff --git a/src/looping.jl b/src/looping.jl
index 08018cdd8..d3f2a0451 100644
--- a/src/looping.jl
+++ b/src/looping.jl
@@ -366,9 +366,17 @@ eval(quote
              loop_ranges_store[dims] = LoopRanges(;
                  parallel_dims=dims, rank0 = rank0, ranges...)
          else
-             # Use the same ranges as serial loops
+             # Loop over all indices for non-parallelised dimensions (dimensions not in
+             # `dims`), but only loop over parallel dimensions (dimensions in `dims`) on
+             # rank0.
+             this_ranges = Dict(d=>1:n for (d,n) in dim_sizes)
+             if !rank0
+                 for d ∈ dims
+                     this_ranges[d] = 1:0
+                 end
+             end
              loop_ranges_store[dims] = LoopRanges(;
-                 parallel_dims=dims, rank0 = rank0, serial_ranges...)
+                 parallel_dims=dims, rank0 = rank0, this_ranges...)
          end
      end
 
diff --git a/src/makie_post_processing.jl b/src/makie_post_processing.jl
index ea4fcda0a..6cacb3a30 100644
--- a/src/makie_post_processing.jl
+++ b/src/makie_post_processing.jl
@@ -905,7 +905,7 @@ function get_run_info(run_dir::Union{AbstractString,Tuple{AbstractString,Union{I
             dummy_adv_input = advection_input("default", 1.0, 0.0, 0.0)
             dummy_comm = MPI.COMM_NULL
             dummy_input = grid_input("dummy", 1, 1, 1, 1, 0, 1.0,
-                                     "chebyshev_pseudospectral", "", "periodic",
+                                     "chebyshev_pseudospectral", "", "", "periodic",
                                      dummy_adv_input, dummy_comm, "uniform")
             vzeta, vzeta_spectral = define_coordinate(dummy_input)
             vzeta_chunk_size = 1
diff --git a/src/manufactured_solns.jl b/src/manufactured_solns.jl
index df267400e..3130bd674 100644
--- a/src/manufactured_solns.jl
+++ b/src/manufactured_solns.jl
@@ -205,7 +205,7 @@ using IfElse
     # ion mean parallel flow symbolic function 
     function upari_sym(Lr,Lz,r_bc,z_bc,composition,geometry,nr,manufactured_solns_input,species)
         if z_bc == "periodic"
-            upari = 0.0 #not supported
+            upari = 0.0
         elseif z_bc == "wall"
             densi = densi_sym(Lr,Lz,r_bc,z_bc,composition,manufactured_solns_input,species)
             Er, Ez, phi = electric_fields(Lr,Lz,r_bc,z_bc,composition,nr,manufactured_solns_input,species)
@@ -222,10 +222,11 @@ using IfElse
     
     # ion parallel pressure symbolic function 
     function ppari_sym(Lr,Lz,r_bc,z_bc,composition,manufactured_solns_input,species)
-        # normalisation factor due to strange pressure normalisation convention in master
+        # normalisation factor due to pressure normalisation convention in master pref = nref mref cref^2
         norm_fac = 0.5
         if z_bc == "periodic"
-            ppari = 0.0 # not supported
+            densi = densi_sym(Lr,Lz,r_bc,z_bc,composition,manufactured_solns_input,species)
+            ppari = densi
         elseif z_bc == "wall"
             densi = densi_sym(Lr,Lz,r_bc,z_bc,composition,manufactured_solns_input,species)
             epsilon = manufactured_solns_input.epsilon_offset
@@ -483,6 +484,7 @@ using IfElse
         Dz = Differential(z) 
         Dvpa = Differential(vpa) 
         Dvperp = Differential(vperp) 
+        Dvz = Differential(vz) 
         Dt = Differential(t) 
 
         # get geometric/composition data
@@ -511,17 +513,6 @@ using IfElse
         # the ion source to maintain the manufactured solution
         Si = ( Dt(dfni) + ( vpa * (Bzed/Bmag) - 0.5*rhostar*Er ) * Dz(dfni) + ( 0.5*rhostar*Ez*rfac ) * Dr(dfni) + ( 0.5*Ez*Bzed/Bmag ) * Dvpa(dfni)
                + cx_frequency*( densn*dfni - densi*gav_dfnn )  - ionization_frequency*dense*gav_dfnn)
-
-        include_num_diss_in_MMS = true
-        if num_diss_params.vpa_dissipation_coefficient > 0.0 && include_num_diss_in_MMS
-            Si += - num_diss_params.vpa_dissipation_coefficient*Dvpa(Dvpa(dfni))
-        end
-        if num_diss_params.r_dissipation_coefficient > 0.0 && include_num_diss_in_MMS
-            Si += - rfac*num_diss_params.r_dissipation_coefficient*Dr(Dr(dfni))
-        end
-        if num_diss_params.z_dissipation_coefficient > 0.0 && include_num_diss_in_MMS
-            Si += - num_diss_params.z_dissipation_coefficient*Dz(Dz(dfni))
-        end
         nu_krook = collisions.krook_collision_frequency_prefactor
         if nu_krook > 0.0
             Ti_over_Tref = vthi^2
@@ -536,14 +527,36 @@ using IfElse
                 pvth = 1
             end
             FMaxwellian = (densi/vthi^pvth)*exp( -( ( vpa-upari)^2 + vperp^2 )/vthi^2)
-            Si += -nuii_krook*(FMaxwellian - dfni)
+            Si += - nuii_krook*(FMaxwellian - dfni)
+        end
+        include_num_diss_in_MMS = true
+        if num_diss_params.vpa_dissipation_coefficient > 0.0 && include_num_diss_in_MMS
+            Si += - num_diss_params.vpa_dissipation_coefficient*Dvpa(Dvpa(dfni))
+        end
+        if num_diss_params.vperp_dissipation_coefficient > 0.0 && include_num_diss_in_MMS
+            Si += - num_diss_params.vperp_dissipation_coefficient*Dvperp(Dvperp(dfni))
+        end
+        if num_diss_params.r_dissipation_coefficient > 0.0 && include_num_diss_in_MMS
+            Si += - rfac*num_diss_params.r_dissipation_coefficient*Dr(Dr(dfni))
+        end
+        if num_diss_params.z_dissipation_coefficient > 0.0 && include_num_diss_in_MMS
+            Si += - num_diss_params.z_dissipation_coefficient*Dz(Dz(dfni))
         end
-
 
         Source_i = expand_derivatives(Si)
         
         # the neutral source to maintain the manufactured solution
         Sn = Dt(dfnn) + vz * Dz(dfnn) + rfac*vr * Dr(dfnn) + cx_frequency* (densi*dfnn - densn*vrvzvzeta_dfni) + ionization_frequency*dense*dfnn
+        if num_diss_params.vz_dissipation_coefficient > 0.0 && include_num_diss_in_MMS
+            Sn += - num_diss_params.vz_dissipation_coefficient*Dvz(Dvz(dfnn))
+        end
+        if num_diss_params.r_dissipation_coefficient > 0.0 && include_num_diss_in_MMS
+            Sn += - rfac*num_diss_params.r_dissipation_coefficient*Dr(Dr(dfnn))
+        end
+        if num_diss_params.z_dissipation_coefficient > 0.0 && include_num_diss_in_MMS
+            Sn += - num_diss_params.z_dissipation_coefficient*Dz(Dz(dfnn))
+        end
+        
         Source_n = expand_derivatives(Sn)
         
         Source_i_func = build_function(Source_i, vpa, vperp, z, r, t, expression=Val{false})
diff --git a/src/moment_kinetics.jl b/src/moment_kinetics.jl
index f4c1ad052..a09a68bbb 100644
--- a/src/moment_kinetics.jl
+++ b/src/moment_kinetics.jl
@@ -21,6 +21,7 @@ include("array_allocation.jl")
 include("interpolation.jl")
 include("calculus.jl")
 include("clenshaw_curtis.jl")
+include("gauss_legendre.jl")
 include("chebyshev.jl")
 include("finite_differences.jl")
 include("quadrature.jl")
@@ -38,6 +39,9 @@ include("manufactured_solns.jl") # MRH Here?
 include("external_sources.jl")
 include("initial_conditions.jl")
 include("moment_constraints.jl")
+include("fokker_planck_test.jl")
+include("fokker_planck_calculus.jl")
+include("fokker_planck.jl")
 include("advection.jl")
 include("vpa_advection.jl")
 include("z_advection.jl")
@@ -388,7 +392,7 @@ function setup_moment_kinetics(input_dict::AbstractDict;
     # the main time advance loop -- including normalisation of f by density if requested
 
     moments, fields, spectral_objects, advect_objects,
-    scratch, advance, scratch_dummy, manufactured_source_list =
+    scratch, advance, fp_arrays, scratch_dummy, manufactured_source_list =
         setup_time_advance!(pdf, vz, vr, vzeta, vpa, vperp, z, r, vz_spectral,
             vr_spectral, vzeta_spectral, vpa_spectral, vperp_spectral, z_spectral,
             r_spectral, composition, drive_input, moments, t_input, collisions, species,
@@ -420,7 +424,7 @@ function setup_moment_kinetics(input_dict::AbstractDict;
     return pdf, scratch, code_time, t_input, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
            moments, fields, spectral_objects, advect_objects,
            composition, collisions, geometry, boundary_distributions,
-           external_source_settings, num_diss_params, advance, scratch_dummy,
+           external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy,
            manufactured_source_list, ascii_io, io_moments, io_dfns
 end
 
diff --git a/src/moment_kinetics_input.jl b/src/moment_kinetics_input.jl
index 7fa693b68..1164f22cb 100644
--- a/src/moment_kinetics_input.jl
+++ b/src/moment_kinetics_input.jl
@@ -185,7 +185,9 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true)
         error("Invalid option "
               * "krook_collisions_option=$(collisions.krook_collisions_option) passed")
     end
-
+    # set the Fokker-Planck collision frequency
+    collisions.nuii = get(scan_input, "nuii", 0.0)
+    
     # parameters related to the time stepping
     nstep = get(scan_input, "nstep", 5)
     dt = get(scan_input, "dt", 0.00025/sqrt(species.charged[1].initial_temperature))
@@ -279,9 +281,6 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true)
     vpa.fd_option = get(scan_input, "vpa_finite_difference_option", "third_order_upwind")
     vpa.element_spacing_option = get(scan_input, "vpa_element_spacing_option", "uniform")
     
-    num_diss_params = setup_numerical_dissipation(
-        get(scan_input, "numerical_dissipation", Dict{String,Any}()), true)
-
     # overwrite some default parameters related to the vperp grid
     # ngrid is the number of grid points per element
     vperp.ngrid = get(scan_input, "vperp_ngrid", 1)
@@ -291,11 +290,9 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true)
     vperp.nelement_local = vperp.nelement_global 
     # L is the box length in units of vthermal_species
     vperp.L = get(scan_input, "vperp_L", 8.0*sqrt(species.charged[1].initial_temperature))
-    # determine the boundary condition
-    # only supported option at present is "zero" and "periodic"
-    # MRH probably need to add new bc option here
-    # MRH no vperp bc currently imposed so option below not used
-    vperp.bc = get(scan_input, "vperp_bc", "periodic")
+    # Note vperp.bc is set below, after numerical dissipation is initialized, so that it
+    # can use the numerical dissipation settings to set its default value.
+    #
     # determine the discretization option for the vperp grid
     # supported options are "finite_difference_vperp" "chebyshev_pseudospectral"
     vperp.discretization = get(scan_input, "vperp_discretization", "chebyshev_pseudospectral")
@@ -367,6 +364,13 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true)
              vzeta.nelement_global == 1 && vr.ngrid == vr.nelement_global == 1)
     num_diss_params = setup_numerical_dissipation(
         get(scan_input, "numerical_dissipation", Dict{String,Any}()), is_1V)
+
+    # vperp.bc is set here (a bit out of place) so that we can use
+    # num_diss_params.vperp_dissipation_coefficient to set the default.
+    vperp.bc = get(scan_input, "vperp_bc",
+                   (collisions.nuii > 0.0 ||
+                    num_diss_params.vperp_dissipation_coefficient > 0.0) ?
+                    "zero" : "none")
     
     #########################################################################
     ########## end user inputs. do not modify following code! ###############
@@ -398,37 +402,37 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true)
 	z_advection_immutable = advection_input(z.advection.option, z.advection.constant_speed,
         z.advection.frequency, z.advection.oscillation_amplitude)
     z_immutable = grid_input("z", z.ngrid, z.nelement_global, z.nelement_local, nrank_z, irank_z, z.L, 
-        z.discretization, z.fd_option, z.bc, z_advection_immutable, comm_sub_z, z.element_spacing_option)
+        z.discretization, z.fd_option, z.cheb_option, z.bc, z_advection_immutable, comm_sub_z, z.element_spacing_option)
     r_advection_immutable = advection_input(r.advection.option, r.advection.constant_speed,
         r.advection.frequency, r.advection.oscillation_amplitude)
     r_immutable = grid_input("r", r.ngrid, r.nelement_global, r.nelement_local, nrank_r, irank_r, r.L,
-        r.discretization, r.fd_option, r.bc, r_advection_immutable, comm_sub_r, r.element_spacing_option)
+        r.discretization, r.fd_option, r.cheb_option, r.bc, r_advection_immutable, comm_sub_r, r.element_spacing_option)
 	# for dimensions below which do not currently use distributed-memory MPI
 	# assign dummy values to nrank, irank and comm of coord struct
     vpa_advection_immutable = advection_input(vpa.advection.option, vpa.advection.constant_speed,
         vpa.advection.frequency, vpa.advection.oscillation_amplitude)
     vpa_immutable = grid_input("vpa", vpa.ngrid, vpa.nelement_global, vpa.nelement_local, 1, 0, vpa.L,
-        vpa.discretization, vpa.fd_option, vpa.bc, vpa_advection_immutable, MPI.COMM_NULL, vpa.element_spacing_option)
+        vpa.discretization, vpa.fd_option, vpa.cheb_option, vpa.bc, vpa_advection_immutable, MPI.COMM_NULL, vpa.element_spacing_option)
     vperp_advection_immutable = advection_input(vperp.advection.option, vperp.advection.constant_speed,
         vperp.advection.frequency, vperp.advection.oscillation_amplitude)
     vperp_immutable = grid_input("vperp", vperp.ngrid, vperp.nelement_global, vperp.nelement_local, 1, 0, vperp.L,
-        vperp.discretization, vperp.fd_option, vperp.bc, vperp_advection_immutable, MPI.COMM_NULL, vperp.element_spacing_option)
+        vperp.discretization, vperp.fd_option, vperp.cheb_option, vperp.bc, vperp_advection_immutable, MPI.COMM_NULL, vperp.element_spacing_option)
     gyrophase_advection_immutable = advection_input(gyrophase.advection.option, gyrophase.advection.constant_speed,
         gyrophase.advection.frequency, gyrophase.advection.oscillation_amplitude)
     gyrophase_immutable = grid_input("gyrophase", gyrophase.ngrid, gyrophase.nelement_global, gyrophase.nelement_local, 1, 0, gyrophase.L,
-        gyrophase.discretization, gyrophase.fd_option, gyrophase.bc, gyrophase_advection_immutable, MPI.COMM_NULL, gyrophase.element_spacing_option)
+        gyrophase.discretization, gyrophase.fd_option, gyrophase.cheb_option, gyrophase.bc, gyrophase_advection_immutable, MPI.COMM_NULL, gyrophase.element_spacing_option)
     vz_advection_immutable = advection_input(vz.advection.option, vz.advection.constant_speed,
         vz.advection.frequency, vz.advection.oscillation_amplitude)
     vz_immutable = grid_input("vz", vz.ngrid, vz.nelement_global, vz.nelement_local, 1, 0, vz.L,
-        vz.discretization, vz.fd_option, vz.bc, vz_advection_immutable, MPI.COMM_NULL, vz.element_spacing_option)
+        vz.discretization, vz.fd_option, vz.cheb_option, vz.bc, vz_advection_immutable, MPI.COMM_NULL, vz.element_spacing_option)
     vr_advection_immutable = advection_input(vr.advection.option, vr.advection.constant_speed,
         vr.advection.frequency, vr.advection.oscillation_amplitude)
     vr_immutable = grid_input("vr", vr.ngrid, vr.nelement_global, vr.nelement_local, 1, 0, vr.L,
-        vr.discretization, vr.fd_option, vr.bc, vr_advection_immutable, MPI.COMM_NULL, vr.element_spacing_option)
+        vr.discretization, vr.fd_option, vr.cheb_option, vr.bc, vr_advection_immutable, MPI.COMM_NULL, vr.element_spacing_option)
     vzeta_advection_immutable = advection_input(vzeta.advection.option, vzeta.advection.constant_speed,
         vzeta.advection.frequency, vzeta.advection.oscillation_amplitude)
     vzeta_immutable = grid_input("vzeta", vzeta.ngrid, vzeta.nelement_global, vzeta.nelement_local, 1, 0, vzeta.L,
-        vzeta.discretization, vzeta.fd_option, vzeta.bc, vzeta_advection_immutable, MPI.COMM_NULL, vzeta.element_spacing_option)
+        vzeta.discretization, vzeta.fd_option, vzeta.cheb_option, vzeta.bc, vzeta_advection_immutable, MPI.COMM_NULL, vzeta.element_spacing_option)
     
     species_charged_immutable = Array{species_parameters,1}(undef,n_ion_species)
     species_neutral_immutable = Array{species_parameters,1}(undef,n_neutral_species)
@@ -526,10 +530,10 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true)
         io = devnull
     end
 
-    # check input to catch errors/unsupported options
-    check_input(io, output_dir, nstep, dt, r_immutable, z_immutable, vpa_immutable,
-                composition, species_immutable, evolve_moments, num_diss_params,
-                save_inputs_to_txt)
+    # check input (and initialized coordinate structs) to catch errors/unsupported options
+    check_input(io, output_dir, nstep, dt, r, z, vpa, vperp, composition,
+                species_immutable, evolve_moments, num_diss_params, save_inputs_to_txt,
+                collisions)
 
     # return immutable structs for z, vpa, species and composition
     all_inputs = (io_immutable, evolve_moments, t_input, z, z_spectral, r, r_spectral,
@@ -554,6 +558,8 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     conservation = true
     #advective_form = false
     evolve_moments = evolve_moments_options(evolve_density, evolve_parallel_flow, evolve_parallel_pressure, conservation)#advective_form)
+    # cheb option switch 
+    cheb_option = "FFT" # "matrix" # 
     #################### parameters related to the z grid ######################
     # ngrid_z is number of grid points per element
     ngrid_z = 100
@@ -577,6 +583,8 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     #finite_difference_option_z = "first_order_upwind"
     #finite_difference_option_z = "second_order_upwind"
     finite_difference_option_z = "third_order_upwind"
+    #cheb_option_z = "FFT" # "matrix"
+    cheb_option_z = cheb_option
     # determine the option used for the advection speed in z
     # supported options are "constant" and "oscillating",
     # in addition to the "default" option which uses dz/dt = vpa as the advection speed
@@ -593,7 +601,7 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     element_spacing_option_z = "uniform"
     # create a mutable structure containing the input info related to the z grid
     z = grid_input_mutable("z", ngrid_z, nelement_global_z, nelement_local_z, L_z,
-        discretization_option_z, finite_difference_option_z, boundary_option_z,
+        discretization_option_z, finite_difference_option_z, cheb_option_z,  boundary_option_z,
         advection_z, element_spacing_option_z)
     #################### parameters related to the r grid ######################
     # ngrid_r is number of grid points per element
@@ -618,6 +626,8 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     #finite_difference_option_r = "first_order_upwind"
     #finite_difference_option_r = "second_order_upwind"
     finite_difference_option_r = "third_order_upwind"
+    #cheb_option_r = "FFT" #"matrix"
+    cheb_option_r = cheb_option
     # determine the option used for the advection speed in r
     # supported options are "constant" and "oscillating",
     # in addition to the "default" option which uses dr/dt = vpa as the advection speed
@@ -634,7 +644,7 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     element_spacing_option_r = "uniform"
     # create a mutable structure containing the input info related to the r grid
     r = grid_input_mutable("r", ngrid_r, nelement_global_r, nelement_local_r, L_r,
-        discretization_option_r, finite_difference_option_r, boundary_option_r,
+        discretization_option_r, finite_difference_option_r, cheb_option_r, boundary_option_r,
         advection_r, element_spacing_option_r)
     ############################################################################
     ################### parameters related to the vpa grid #####################
@@ -657,6 +667,8 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     # supported options are "third_order_upwind", "second_order_upwind" and "first_order_upwind"
     #finite_difference_option_vpa = "second_order_upwind"
     finite_difference_option_vpa = "third_order_upwind"
+    #cheb_option_vpa = "FFT" # "matrix"
+    cheb_option_vpa = cheb_option
     # determine the option used for the advection speed in vpa
     # supported options are "constant" and "oscillating",
     # in addition to the "default" option which uses dvpa/dt = q*Ez/m as the advection speed
@@ -673,7 +685,7 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     element_spacing_option_vpa = "uniform"
     # create a mutable structure containing the input info related to the vpa grid
     vpa = grid_input_mutable("vpa", ngrid_vpa, nelement_vpa, nelement_vpa, L_vpa,
-        discretization_option_vpa, finite_difference_option_vpa, boundary_option_vpa,
+        discretization_option_vpa, finite_difference_option_vpa, cheb_option_vpa, boundary_option_vpa,
         advection_vpa, element_spacing_option_vpa)
     ############################################################################
     ################### parameters related to the vperp grid #####################
@@ -696,6 +708,8 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     # supported options are "third_order_upwind", "second_order_upwind" and "first_order_upwind"
     #finite_difference_option_vperp = "second_order_upwind"
     finite_difference_option_vperp = "third_order_upwind"
+    #cheb_option_vperp = "FFT" # "matrix"
+    cheb_option_vperp = cheb_option
     # determine the option used for the advection speed in vperp
     # supported options are "constant" and "oscillating",
     advection_option_vperp = "default"
@@ -711,7 +725,7 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     element_spacing_option_vperp = "uniform"
     # create a mutable structure containing the input info related to the vperp grid
     vperp = grid_input_mutable("vperp", ngrid_vperp, nelement_vperp, nelement_vperp, L_vperp,
-        discretization_option_vperp, finite_difference_option_vperp, boundary_option_vperp,
+        discretization_option_vperp, finite_difference_option_vperp, cheb_option_vperp, boundary_option_vperp,
         advection_vperp, element_spacing_option_vperp)
     ############################################################################
     ################### parameters related to the gyrophase grid #####################
@@ -726,6 +740,8 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     boundary_option_gyrophase = "periodic"
     discretization_option_gyrophase = "finite_difference"
     finite_difference_option_gyrophase = "third_order_upwind"
+    #cheb_option_gyrophase = "FFT" #"matrix"
+    cheb_option_gyrophase = cheb_option
     advection_option_gyrophase = "default"
     advection_speed_gyrophase = 0.0
     frequency_gyrophase = 1.0
@@ -735,7 +751,7 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     element_spacing_option_gyrophase = "uniform"
     # create a mutable structure containing the input info related to the gyrophase grid
     gyrophase = grid_input_mutable("gyrophase", ngrid_gyrophase, nelement_gyrophase, nelement_gyrophase, L_gyrophase,
-        discretization_option_gyrophase, finite_difference_option_gyrophase, boundary_option_gyrophase,
+        discretization_option_gyrophase, finite_difference_option_gyrophase, cheb_option_gyrophase, boundary_option_gyrophase,
         advection_gyrophase, element_spacing_option_gyrophase)
     ############################################################################
     ################### parameters related to the vr grid #####################
@@ -756,6 +772,8 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     # supported options are "third_order_upwind", "second_order_upwind" and "first_order_upwind"
     #finite_difference_option_vr = "second_order_upwind"
     finite_difference_option_vr = "third_order_upwind"
+    #cheb_option_vr = "FFT" # "matrix"
+    cheb_option_vr = cheb_option
     # determine the option used for the advection speed in vr
     # supported options are "constant" and "oscillating",
     advection_option_vr = "default"
@@ -771,7 +789,7 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     element_spacing_option_vr = "uniform"
     # create a mutable structure containing the input info related to the vr grid
     vr = grid_input_mutable("vr", ngrid_vr, nelement_vr, nelement_vr, L_vr,
-        discretization_option_vr, finite_difference_option_vr, boundary_option_vr,
+        discretization_option_vr, finite_difference_option_vr, cheb_option_vr, boundary_option_vr,
         advection_vr, element_spacing_option_vr)
     ############################################################################
     ################### parameters related to the vz grid #####################
@@ -792,6 +810,8 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     # supported options are "third_order_upwind", "second_order_upwind" and "first_order_upwind"
     #finite_difference_option_vz = "second_order_upwind"
     finite_difference_option_vz = "third_order_upwind"
+    #cheb_option_vz = "FFT" # "matrix"
+    cheb_option_vz = cheb_option
     # determine the option used for the advection speed in vz
     # supported options are "constant" and "oscillating",
     advection_option_vz = "default"
@@ -807,7 +827,7 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     element_spacing_option_vz = "uniform"
     # create a mutable structure containing the input info related to the vz grid
     vz = grid_input_mutable("vz", ngrid_vz, nelement_vz, nelement_vz, L_vz,
-        discretization_option_vz, finite_difference_option_vz, boundary_option_vz,
+        discretization_option_vz, finite_difference_option_vz, cheb_option_vz, boundary_option_vz,
         advection_vz, element_spacing_option_vz)
     ############################################################################
     ################### parameters related to the vzeta grid #####################
@@ -828,6 +848,8 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     # supported options are "third_order_upwind", "second_order_upwind" and "first_order_upwind"
     #finite_difference_option_vzeta = "second_order_upwind"
     finite_difference_option_vzeta = "third_order_upwind"
+    #cheb_option_vzeta = "FFT" # "matrix"
+    cheb_option_vzeta = cheb_option
     # determine the option used for the advection speed in vzeta
     # supported options are "constant" and "oscillating",
     advection_option_vzeta = "default"
@@ -843,7 +865,7 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     element_spacing_option_vzeta = "uniform"
     # create a mutable structure containing the input info related to the vzeta grid
     vzeta = grid_input_mutable("vzeta", ngrid_vzeta, nelement_vzeta, nelement_vzeta, L_vzeta,
-        discretization_option_vzeta, finite_difference_option_vzeta, boundary_option_vzeta,
+        discretization_option_vzeta, finite_difference_option_vzeta, cheb_option_vzeta, boundary_option_vzeta,
         advection_vzeta, element_spacing_option_vzeta)
     #############################################################################
     # define default values and create corresponding mutable structs holding
@@ -970,9 +992,9 @@ function load_defaults(n_ion_species, n_neutral_species, electron_physics)
     ionization = 0.0
     constant_ionization_rate = false
     krook_collision_frequency_prefactor = -1.0
+    nuii = 0.0
     collisions = collisions_input(charge_exchange, ionization, constant_ionization_rate,
-                                  krook_collision_frequency_prefactor,"none")
-
+                                  krook_collision_frequency_prefactor,"none", nuii)
     Bzed = 1.0 # magnetic field component along z
     Bmag = 1.0 # magnetic field strength
     bzed = 1.0 # component of b unit vector along z
@@ -987,8 +1009,8 @@ end
 """
 check various input options to ensure they are all valid/consistent
 """
-function check_input(io, output_dir, nstep, dt, r, z, vpa, composition, species,
-                     evolve_moments, num_diss_params, save_inputs_to_txt)
+function check_input(io, output_dir, nstep, dt, r, z, vpa, vperp, composition, species,
+                     evolve_moments, num_diss_params, save_inputs_to_txt, collisions)
     # copy the input file to the output directory to be saved
     if save_inputs_to_txt && global_rank[] == 0
         cp(joinpath(@__DIR__, "moment_kinetics_input.jl"), joinpath(output_dir, "moment_kinetics_input.jl"), force=true)
@@ -998,12 +1020,22 @@ function check_input(io, output_dir, nstep, dt, r, z, vpa, composition, species,
     check_coordinate_input(r, "r", io)
     check_coordinate_input(z, "z", io)
     check_coordinate_input(vpa, "vpa", io)
+    check_coordinate_input(vperp, "vperp", io)
     # if the parallel flow is evolved separately, then the density must also be evolved separately
     if evolve_moments.parallel_flow && !evolve_moments.density
         print(io,">evolve_moments.parallel_flow = true, but evolve_moments.density = false.")
         println(io, "this is not a supported option.  forcing evolve_moments.density = true.")
         evolve_moments.density = true
     end
+    if collisions.nuii > 0.0
+    # check that the grids support the collision operator
+        print(io, "The self-collision operator is switched on \n nuii = $collisions.nuii \n")
+        if !(vpa.discretization == "gausslegendre_pseudospectral") || !(vperp.discretization == "gausslegendre_pseudospectral")
+            error("ERROR: you are using \n      vpa.discretization='"*vpa.discretization*
+              "' \n      vperp.discretization='"*vperp.discretization*"' \n      with the ion self-collision operator \n"*
+              "ERROR: you should use \n       vpa.discretization='gausslegendre_pseudospectral' \n       vperp.discretization='gausslegendre_pseudospectral'")
+        end
+    end
 end
 
 """
@@ -1031,6 +1063,9 @@ function check_coordinate_input(coord, coord_name, io)
     if coord.discretization == "chebyshev_pseudospectral"
         print(io,">$coord_name.discretization = 'chebyshev_pseudospectral'.  ")
         println(io,"using a Chebyshev pseudospectral method in $coord_name.")
+    elseif coord.discretization == "gausslegendre_pseudospectral"
+        print(io,">$coord_name.discretization = 'gausslegendre_pseudospectral'.  ")
+        println(io,"using a Gauss-Legendre-Lobatto pseudospectral method in $coord_name.")
     elseif coord.discretization == "finite_difference"
         println(io,">$coord_name.discretization = 'finite_difference', ",
             "and $coord_name.fd_option = ", coord.fd_option,
@@ -1040,11 +1075,12 @@ function check_coordinate_input(coord, coord_name, io)
         input_option_error("$coord_name.discretization", coord.discretization)
     end
     # boundary_option determines coord boundary condition
-    # supported options are "constant" and "periodic"
     if coord.bc == "constant"
         println(io,">$coord_name.bc = 'constant'.  enforcing constant incoming BC in $coord_name.")
     elseif coord.bc == "zero"
-        println(io,">$coord_name.bc = 'zero'.  enforcing zero incoming BC in $coord_name.")
+        println(io,">$coord_name.bc = 'zero'.  enforcing zero incoming BC in $coord_name. Enforcing zero at both boundaries if diffusion operator is present.")
+    elseif coord.bc == "zero_gradient"
+        println(io,">$coord_name.bc = 'zero_gradient'.  enforcing zero gradients at both limits of $coord_name domain.")
     elseif coord.bc == "both_zero"
         println(io,">$coord_name.bc = 'both_zero'.  enforcing zero BC in $coord_name.")
     elseif coord.bc == "periodic"
@@ -1056,9 +1092,20 @@ function check_coordinate_input(coord, coord_name, io)
     else
         input_option_error("$coord_name.bc", coord.bc)
     end
-    println(io,">using ", coord.ngrid, " grid points per $coord_name element on ",
-            coord.nelement_global, " elements across the $coord_name domain [",
-            -0.5*coord.L, ",", 0.5*coord.L, "].")
+    if coord.name == "vperp"
+        println(io,">using ", coord.ngrid, " grid points per $coord_name element on ",
+                coord.nelement_global, " elements across the $coord_name domain [",
+                0.0, ",", coord.L, "].")
+
+        if coord.bc != "zero" && coord.n_global > 1 && global_rank[] == 0
+            println("WARNING: regularity condition (df/dvperp=0 at vperp=0) not being "
+                    * "imposed. Collisions or vperp-diffusion will be unstable.")
+        end
+    else
+        println(io,">using ", coord.ngrid, " grid points per $coord_name element on ",
+                coord.nelement_global, " elements across the $coord_name domain [",
+                -0.5*coord.L, ",", 0.5*coord.L, "].")
+    end
 end
 
 """
diff --git a/src/moment_kinetics_structs.jl b/src/moment_kinetics_structs.jl
index 903068cd1..f2c16d4b0 100644
--- a/src/moment_kinetics_structs.jl
+++ b/src/moment_kinetics_structs.jl
@@ -51,6 +51,12 @@ All the specific discretizations in moment_kinetics are subtypes of this type.
 """
 abstract type discretization_info end
 
+"""
+discretization_info for a discretization that supports 'weak form' methods, for one
+dimension
+"""
+abstract type weak_discretization_info <: discretization_info end
+
 """
 Type representing a spatial dimension with only one grid point
 """
diff --git a/src/numerical_dissipation.jl b/src/numerical_dissipation.jl
index def10baeb..64eb28138 100644
--- a/src/numerical_dissipation.jl
+++ b/src/numerical_dissipation.jl
@@ -3,8 +3,11 @@
 module numerical_dissipation
 
 export setup_numerical_dissipation, vpa_boundary_buffer_decay!,
-       vpa_boundary_buffer_diffusion!, vpa_dissipation!, z_dissipation!,
-       r_dissipation!, force_minimum_pdf_value!, force_minimum_pdf_value_neutral!
+       vpa_boundary_buffer_diffusion!, force_minimum_pdf_value!, force_minimum_pdf_value_neutral!,
+        vpa_dissipation!, vperp_dissipation!, 
+        z_dissipation!, r_dissipation!, 
+        vz_dissipation_neutral!, z_dissipation_neutral!,
+        r_dissipation_neutral! 
 
 using Base.Iterators: flatten
 
@@ -18,6 +21,7 @@ Base.@kwdef struct numerical_dissipation_parameters
     vpa_boundary_buffer_diffusion_coefficient::mk_float = -1.0
     vpa_dissipation_coefficient::mk_float = -1.0
     vz_dissipation_coefficient::mk_float = -1.0
+    vperp_dissipation_coefficient::mk_float = -1.0
     z_dissipation_coefficient::mk_float = -1.0
     r_dissipation_coefficient::mk_float = -1.0
     moment_dissipation_coefficient::mk_float = -1.0
@@ -242,7 +246,7 @@ function vpa_dissipation!(f_out, f_in, vpa, spectral::T_spectral, dt,
         num_diss_params::numerical_dissipation_parameters) where T_spectral
 
     diffusion_coefficient = num_diss_params.vpa_dissipation_coefficient
-    if diffusion_coefficient <= 0.0
+    if diffusion_coefficient <= 0.0 || vpa.n == 1
         return nothing
     end
 
@@ -258,36 +262,62 @@ function vpa_dissipation!(f_out, f_in, vpa, spectral::T_spectral, dt,
     #     # expected convergence of Chebyshev pseudospectral scheme
     #     diffusion_coefficient *= (vpa.L/vpa.nelement)^(vpa.ngrid-1)
     # end
-
     @loop_s_r_z_vperp is ir iz ivperp begin
-        # # Don't want to dissipate the fluid moments, so divide out the Maxwellian, then
-        # # diffuse the result, i.e.
-        # # df/dt += diffusion_coefficient * f_M d2(f/f_M)/dvpa2
-        # # Store f_M in vpa.scratch
-        # if (moments.evolve_ppar || moments.evolve_vth) && moments.evolve_upar
-        #     @views @. vpa.scratch = exp(-vpa.grid^2)
-        # elseif moments.evolve_ppar || moments.evolve_vth
-        #     vth = sqrt(2.0*fvec_in.ppar[iz,ir,is]/fvec_in.density[iz,ir,is])
-        #     @views @. vpa.scratch = exp(-(vpa.grid - fvec_in.upar[iz,ir,is]/vth)^2)
-        # elseif moments.evolve_upar
-        #     vth = sqrt(2.0*fvec_in.ppar[iz,ir,is]/fvec_in.density[iz,ir,is])
-        #     @views @. vpa.scratch = exp(-(vpa.grid/vth)^2)
-        # elseif moments.evolve_density
-        #     vth = sqrt(2.0*fvec_in.ppar[iz,ir,is]/fvec_in.density[iz,ir,is])
-        #     @views @. vpa.scratch = exp(-((vpa.grid - fvec_in.upar[iz,ir,is])/vth)^2)
-        # else
-        #     vth = sqrt(2.0*fvec_in.ppar[iz,ir,is]/fvec_in.density[iz,ir,is])
-        #     @views @. vpa.scratch = (fvec_in.density[iz,ir,is] *
-        #                              exp(-((vpa.grid - fvec_in.upar[iz,ir,is])/vth)^2))
-        # end
-        # @views @. vpa.scratch2 = fvec_in.pdf[:,iz,ir,is] / vpa.scratch
-        # derivative!(vpa.scratch3, vpa.scratch2, vpa, spectral, Val(2))
-        # @views @. f_out[:,iz,ir,is] += dt * diffusion_coefficient * vpa.scratch *
-        #                                vpa.scratch3
-        vpa.scratch2 .= 1.0 # placeholder for Q in d / d vpa ( Q d f / d vpa)
-        @views second_derivative!(vpa.scratch, f_in[:,ivperp,iz,ir,is], vpa.scratch2, vpa, spectral)
+    # # Don't want to dissipate the fluid moments, so divide out the Maxwellian, then
+    # # diffuse the result, i.e.
+    # # df/dt += diffusion_coefficient * f_M d2(f/f_M)/dvpa2
+    # # Store f_M in vpa.scratch
+    # if (moments.evolve_ppar || moments.evolve_vth) && moments.evolve_upar
+    #     @views @. vpa.scratch = exp(-vpa.grid^2)
+    # elseif moments.evolve_ppar || moments.evolve_vth
+    #     vth = sqrt(2.0*fvec_in.ppar[iz,ir,is]/fvec_in.density[iz,ir,is])
+    #     @views @. vpa.scratch = exp(-(vpa.grid - fvec_in.upar[iz,ir,is]/vth)^2)
+    # elseif moments.evolve_upar
+    #     vth = sqrt(2.0*fvec_in.ppar[iz,ir,is]/fvec_in.density[iz,ir,is])
+    #     @views @. vpa.scratch = exp(-(vpa.grid/vth)^2)
+    # elseif moments.evolve_density
+    #     vth = sqrt(2.0*fvec_in.ppar[iz,ir,is]/fvec_in.density[iz,ir,is])
+    #     @views @. vpa.scratch = exp(-((vpa.grid - fvec_in.upar[iz,ir,is])/vth)^2)
+    # else
+    #     vth = sqrt(2.0*fvec_in.ppar[iz,ir,is]/fvec_in.density[iz,ir,is])
+    #     @views @. vpa.scratch = (fvec_in.density[iz,ir,is] *
+    #                              exp(-((vpa.grid - fvec_in.upar[iz,ir,is])/vth)^2))
+    # end
+    # @views @. vpa.scratch2 = fvec_in.pdf[:,iz,ir,is] / vpa.scratch
+    # derivative!(vpa.scratch3, vpa.scratch2, vpa, spectral, Val(2))
+    # @views @. f_out[:,iz,ir,is] += dt * diffusion_coefficient * vpa.scratch *
+    #                                vpa.scratch3
+        @views second_derivative!(vpa.scratch, f_in[:,ivperp,iz,ir,is], vpa, spectral)
         @views @. f_out[:,ivperp,iz,ir,is] += dt * diffusion_coefficient * vpa.scratch
     end
+    return nothing
+end
+
+"""
+Add diffusion in the vperp direction to suppress oscillations
+
+Disabled by default.
+
+The diffusion coefficient is set in the input TOML file by the parameter
+```
+[numerical_dissipation]
+vperp_dissipation_coefficient = 0.1
+```
+"""
+function vperp_dissipation!(f_out, f_in, vperp, spectral::T_spectral, dt,
+        num_diss_params::numerical_dissipation_parameters) where T_spectral
+    
+    begin_s_r_z_vpa_region()
+
+    diffusion_coefficient = num_diss_params.vperp_dissipation_coefficient
+    if diffusion_coefficient <= 0.0 || vperp.n == 1
+        return nothing
+    end
+    
+    @loop_s_r_z_vpa is ir iz ivpa begin
+        @views second_derivative!(vperp.scratch, f_in[ivpa,:,iz,ir,is], vperp, spectral)
+        @views @. f_out[ivpa,:,iz,ir,is] += dt * diffusion_coefficient * vperp.scratch
+    end
 
     return nothing
 end
@@ -312,31 +342,26 @@ function z_dissipation!(f_out, f_in, z, z_spectral::T_spectral, dt,
         num_diss_params::numerical_dissipation_parameters, scratch_dummy) where T_spectral
 
     diffusion_coefficient = num_diss_params.z_dissipation_coefficient
-    if diffusion_coefficient <= 0.0
+    if diffusion_coefficient <= 0.0 || z.n == 1
         return nothing
     end
 
     begin_s_r_vperp_vpa_region()
 
-    # calculate d / d z ( Q d f / d z ) using distributed memory compatible routines
+    # calculate d^2 f / d z^2 using distributed memory compatible routines
     # first compute d f / d z using centred reconciliation and place in dummy array #1
     derivative_z!(scratch_dummy.buffer_vpavperpzrs_1, f_in[:,:,:,:,:],
                   scratch_dummy.buffer_vpavperprs_1, scratch_dummy.buffer_vpavperprs_2,
                   scratch_dummy.buffer_vpavperprs_3,scratch_dummy.buffer_vpavperprs_4,
                   z_spectral,z)
-    # form Q d f / d r and place in dummy array #2
-    @loop_s_r_vperp_vpa is ir ivperp ivpa begin
-        Q = 1.0 # placeholder for geometrical or velocity space dependent metric coefficient
-        @. scratch_dummy.buffer_vpavperpzrs_2[ivpa,ivperp,:,ir,is] =  Q * scratch_dummy.buffer_vpavperpzrs_1[ivpa,ivperp,:,ir,is]
-    end
-    # compute d / d z ( Q d f / d z ) using centred reconciliation and place in dummy array #1
-    derivative_z!(scratch_dummy.buffer_vpavperpzrs_1, scratch_dummy.buffer_vpavperpzrs_2[:,:,:,:,:],
+    # compute d^2 f / d z^2 using centred reconciliation and place in dummy array #2
+    derivative_z!(scratch_dummy.buffer_vpavperpzrs_2, scratch_dummy.buffer_vpavperpzrs_1,
                   scratch_dummy.buffer_vpavperprs_1, scratch_dummy.buffer_vpavperprs_2,
                   scratch_dummy.buffer_vpavperprs_3,scratch_dummy.buffer_vpavperprs_4,
                   z_spectral,z)
-    # advance f due to diffusion_coefficient * d / d z ( Q d f / d z )
+    # advance f due to diffusion_coefficient * d^2 f / d z^2
     @loop_s_r_vperp_vpa is ir ivperp ivpa begin
-        @views @. f_out[ivpa,ivperp,:,ir,is] += dt * diffusion_coefficient * scratch_dummy.buffer_vpavperpzrs_1[ivpa,ivperp,:,ir,is]
+        @views @. f_out[ivpa,ivperp,:,ir,is] += dt * diffusion_coefficient * scratch_dummy.buffer_vpavperpzrs_2[ivpa,ivperp,:,ir,is]
     end
 
     return nothing
@@ -369,25 +394,20 @@ function r_dissipation!(f_out, f_in, r, r_spectral::T_spectral, dt,
 
     begin_s_z_vperp_vpa_region()
 
-    # calculate d / d r ( Q d f / d r ) using distributed memory compatible routines
+    # calculate d^2 f / d r^2 using distributed memory compatible routines
     # first compute d f / d r using centred reconciliation and place in dummy array #1
     derivative_r!(scratch_dummy.buffer_vpavperpzrs_1, f_in[:,:,:,:,:],
                   scratch_dummy.buffer_vpavperpzs_1, scratch_dummy.buffer_vpavperpzs_2,
                   scratch_dummy.buffer_vpavperpzs_3,scratch_dummy.buffer_vpavperpzs_4,
                   r_spectral,r)
-    # form Q d f / d r and place in dummy array #2
-    @loop_s_z_vperp_vpa is iz ivperp ivpa begin
-        Q = 1.0 # placeholder for geometrical or velocity space dependent metric coefficient
-        @. scratch_dummy.buffer_vpavperpzrs_2[ivpa,ivperp,iz,:,is] =  Q * scratch_dummy.buffer_vpavperpzrs_1[ivpa,ivperp,iz,:,is]
-    end
-    # compute d / d r ( Q d f / d r ) using centred reconciliation and place in dummy array #1
-    derivative_r!(scratch_dummy.buffer_vpavperpzrs_1, scratch_dummy.buffer_vpavperpzrs_2[:,:,:,:,:],
+    # compute d^2 f / d r^2 using centred reconciliation and place in dummy array #2
+    derivative_r!(scratch_dummy.buffer_vpavperpzrs_2, scratch_dummy.buffer_vpavperpzrs_1,
                   scratch_dummy.buffer_vpavperpzs_1, scratch_dummy.buffer_vpavperpzs_2,
                   scratch_dummy.buffer_vpavperpzs_3,scratch_dummy.buffer_vpavperpzs_4,
                   r_spectral,r)
-    # advance f due to diffusion_coefficient * d / d r ( Q d f / d r )
+    # advance f due to diffusion_coefficient * d^2 f / d r^2
     @loop_s_z_vperp_vpa is iz ivperp ivpa begin
-        @views @. f_out[ivpa,ivperp,iz,:,is] += dt * diffusion_coefficient * scratch_dummy.buffer_vpavperpzrs_1[ivpa,ivperp,iz,:,is]
+        @views @. f_out[ivpa,ivperp,iz,:,is] += dt * diffusion_coefficient * scratch_dummy.buffer_vpavperpzrs_2[ivpa,ivperp,iz,:,is]
     end
 
     return nothing
@@ -415,8 +435,7 @@ function vz_dissipation_neutral!(f_out, f_in, vz, spectral::T_spectral, dt,
     begin_sn_r_z_vzeta_vr_region()
 
     @loop_sn_r_z_vzeta_vr isn ir iz ivzeta ivr begin
-        vz.scratch2 .= 1.0 # placeholder for Q in d / d vpa ( Q d f / d vpa)
-        @views second_derivative!(vz.scratch, f_in[:,ivr,ivzeta,iz,ir,isn], vz.scratch2, vz, spectral)
+        @views second_derivative!(vz.scratch, f_in[:,ivr,ivzeta,iz,ir,isn], vz, spectral)
         @views @. f_out[:,ivr,ivzeta,iz,ir,isn] += dt * diffusion_coefficient * vz.scratch
     end
 
@@ -449,25 +468,20 @@ function z_dissipation_neutral!(f_out, f_in, z, z_spectral::T_spectral, dt,
 
     begin_sn_r_vzeta_vr_vz_region()
 
-    # calculate d / d z ( Q d f / d z ) using distributed memory compatible routines
+    # calculate d^2 f / d z^2  using distributed memory compatible routines
     # first compute d f / d z using centred reconciliation and place in dummy array #1
     derivative_z!(scratch_dummy.buffer_vzvrvzetazrsn_1, f_in,
                   scratch_dummy.buffer_vzvrvzetarsn_1, scratch_dummy.buffer_vzvrvzetarsn_2,
                   scratch_dummy.buffer_vzvrvzetarsn_3,scratch_dummy.buffer_vzvrvzetarsn_4,
                   z_spectral,z)
-    # form Q d f / d r and place in dummy array #2
-    @loop_sn_r_vzeta_vr_vz isn ir ivzeta ivr ivz begin
-        Q = 1.0 # placeholder for geometrical or velocity space dependent metric coefficient
-        @. scratch_dummy.buffer_vzvrvzetazrsn_2[ivz,ivr,ivzeta,:,ir,isn] =  Q * scratch_dummy.buffer_vzvrvzetazrsn_1[ivz,ivr,ivzeta,:,ir,isn]
-    end
-    # compute d / d z ( Q d f / d z ) using centred reconciliation and place in dummy array #1
-    derivative_z!(scratch_dummy.buffer_vzvrvzetazrsn_1, scratch_dummy.buffer_vzvrvzetazrsn_2,
+    # compute d^2 f / d z^2 using centred reconciliation and place in dummy array #2
+    derivative_z!(scratch_dummy.buffer_vzvrvzetazrsn_2, scratch_dummy.buffer_vzvrvzetazrsn_1,
                   scratch_dummy.buffer_vzvrvzetarsn_1, scratch_dummy.buffer_vzvrvzetarsn_2,
                   scratch_dummy.buffer_vzvrvzetarsn_3,scratch_dummy.buffer_vzvrvzetarsn_4,
                   z_spectral,z)
-    # advance f due to diffusion_coefficient * d / d z ( Q d f / d z )
+    # advance f due to diffusion_coefficient * d^2 f/ d z^2
     @loop_sn_r_vzeta_vr_vz isn ir ivzeta ivr ivz begin
-        @views @. f_out[ivz,ivr,ivzeta,:,ir,isn] += dt * diffusion_coefficient * scratch_dummy.buffer_vpavperpzrs_1[ivz,ivr,ivzeta,:,ir,isn]
+        @views @. f_out[ivz,ivr,ivzeta,:,ir,isn] += dt * diffusion_coefficient * scratch_dummy.buffer_vzvrvzetazrsn_2[ivz,ivr,ivzeta,:,ir,isn]
     end
 
     return nothing
@@ -500,25 +514,20 @@ function r_dissipation_neutral!(f_out, f_in, r, r_spectral::T_spectral, dt,
 
     begin_sn_z_vzeta_vr_vz_region()
 
-    # calculate d / d r ( Q d f / d r ) using distributed memory compatible routines
+    # calculate d^2 f/ d r^2 using distributed memory compatible routines
     # first compute d f / d r using centred reconciliation and place in dummy array #1
     derivative_r!(scratch_dummy.buffer_vzvrvzetazrsn_1, f_in,
                   scratch_dummy.buffer_vzvrvzetazsn_1, scratch_dummy.buffer_vzvrvzetazsn_2,
                   scratch_dummy.buffer_vzvrvzetazsn_3,scratch_dummy.buffer_vzvrvzetazsn_4,
                   r_spectral,r)
-    # form Q d f / d r and place in dummy array #2
-    @loop_sn_z_vzeta_vr_vz isn iz ivzeta ivr ivz begin
-        Q = 1.0 # placeholder for geometrical or velocity space dependent metric coefficient
-        @. scratch_dummy.buffer_vzvrvzetazrsn_2[ivz,ivr,ivzeta,iz,:,isn] =  Q * scratch_dummy.buffer_vzvrvzetazrsn_1[ivz,ivr,ivzeta,iz,:,isn]
-    end
-    # compute d / d r ( Q d f / d r ) using centred reconciliation and place in dummy array #1
-    derivative_r!(scratch_dummy.buffer_vzvrvzetazrsn_1, scratch_dummy.buffer_vzvrvzetazrsn_2,
+    # compute d^2 f / d r^2  using centred reconciliation and place in dummy array #2
+    derivative_r!(scratch_dummy.buffer_vzvrvzetazrsn_2, scratch_dummy.buffer_vzvrvzetazrsn_1,
                   scratch_dummy.buffer_vzvrvzetazsn_1, scratch_dummy.buffer_vzvrvzetazsn_2,
                   scratch_dummy.buffer_vzvrvzetazsn_3,scratch_dummy.buffer_vzvrvzetazsn_4,
                   r_spectral,r)
     # advance f due to diffusion_coefficient * d / d r ( Q d f / d r )
     @loop_sn_z_vzeta_vr_vz isn iz ivzeta ivr ivz begin
-        @views @. f_out[ivz,ivr,ivzeta,iz,:,isn] += dt * diffusion_coefficient * scratch_dummy.buffer_vpavperpzrs_1[ivz,ivr,ivzeta,iz,:,isn]
+        @views @. f_out[ivz,ivr,ivzeta,iz,:,isn] += dt * diffusion_coefficient * scratch_dummy.buffer_vzvrvzetazrsn_2[ivz,ivr,ivzeta,iz,:,isn]
     end
 
     return nothing
diff --git a/src/plot_MMS_sequence.jl b/src/plot_MMS_sequence.jl
index 679bdee61..d11a53c10 100644
--- a/src/plot_MMS_sequence.jl
+++ b/src/plot_MMS_sequence.jl
@@ -18,7 +18,8 @@ using ..post_processing: compare_charged_pdf_symbolic_test, compare_fields_symbo
 using ..post_processing: compare_moments_symbolic_test, compare_neutral_pdf_symbolic_test
 using ..post_processing: read_distributed_zr_data!, construct_global_zr_coords
 using ..post_processing: allocate_global_zr_neutral_moments, allocate_global_zr_charged_moments
-using ..post_processing: allocate_global_zr_fields, get_geometry_and_composition, get_coords_nelement
+using ..post_processing: allocate_global_zr_fields, get_geometry_and_composition
+using ..post_processing: get_coords_nelement, get_coords_ngrid
 using ..array_allocation: allocate_float
 using ..type_definitions: mk_float, mk_int
 using ..load_data: open_readonly_output_file
@@ -32,6 +33,12 @@ using ..moment_kinetics_input: mk_input, read_input_file
 
 import Base: get
 
+function expected_nelement_scaling!(expected,nelement_list,ngrid,nscan)
+    for iscan in 1:nscan
+        expected[iscan] = (1.0/nelement_list[iscan])^(ngrid - 1)
+    end
+end
+
 # assume in function below that we have a list of simulations 
 # where only a single nelement parameter is varied
 # we plot the MMS error measurements as a fn of nelement
@@ -47,7 +54,7 @@ function get_MMS_error_data(path_list,scan_type,scan_name)
     neutral_density_error_sequence = zeros(mk_float,nsimulation)
     neutral_pdf_error_sequence = zeros(mk_float,nsimulation)
     nelement_sequence = zeros(mk_int,nsimulation)
-    
+    expected_scaling = zeros(mk_float,nsimulation)
     # declare local variables that are needed outside "nsimulation" loop below
     local n_neutral_species
     
@@ -70,6 +77,8 @@ function get_MMS_error_data(path_list,scan_type,scan_name)
         #    manufactured_solns_input = mk_input(scan_input)
         z_nelement, r_nelement, vpa_nelement, vperp_nelement, 
           vz_nelement, vr_nelement, vzeta_nelement = get_coords_nelement(scan_input)
+        z_ngrid, r_ngrid, vpa_ngrid, vperp_ngrid, 
+          vz_ngrid, vr_ngrid, vzeta_ngrid = get_coords_ngrid(scan_input)
         if scan_type == "vpa_nelement"
             # get the number of elements for plot
             nelement_sequence[isim] = vpa_nelement
@@ -117,6 +126,13 @@ function get_MMS_error_data(path_list,scan_type,scan_name)
             else 
                 println("ERROR: scan_type = ",scan_type," requires vpa_nelement = z_nelement/4")
             end
+        elseif scan_type == "vpavperpz_nelement"
+            nelement = z_nelement
+            if nelement == vpa_nelement && nelement == vperp_nelement
+                nelement_sequence[isim] = nelement
+            else 
+                println("ERROR: scan_type = ",scan_type," requires vpa_nelement = vperp_nelement = z_nelement")
+            end
         elseif scan_type == "vpaz_nelement"
             nelement = z_nelement
             if nelement == vpa_nelement
@@ -135,6 +151,7 @@ function get_MMS_error_data(path_list,scan_type,scan_name)
             println("ERROR: scan_type = ",scan_type," is unsupported")
         end
 
+        expected_nelement_scaling!(expected_scaling,nelement_sequence,z_ngrid,nsimulation)
         # load block data on iblock=0
         nblocks, iblock = load_block_data(fid)
              
@@ -205,7 +222,7 @@ function get_MMS_error_data(path_list,scan_type,scan_name)
         end
         geometry, composition = get_geometry_and_composition(scan_input,n_ion_species,n_neutral_species)
         
-        manufactured_solns_list = manufactured_solutions(Lr_in,z.L,r_bc,z_bc,geometry,composition,r.n) 
+        manufactured_solns_list = manufactured_solutions(Lr_in,z.L,r_bc,z_bc,geometry,composition,r.n,vperp.n) 
         dfni_func = manufactured_solns_list.dfni_func
         densi_func = manufactured_solns_list.densi_func
         dfnn_func = manufactured_solns_list.dfnn_func
@@ -288,6 +305,7 @@ function get_MMS_error_data(path_list,scan_type,scan_name)
     ylabel_phi = L"\varepsilon(\widetilde{\phi})"
     ylabel_Er = L"\varepsilon(\widetilde{E}_r)"
     ylabel_Ez = L"\varepsilon(\widetilde{E}_z)"
+    expected_label = L"(1/N_{el})^{n_g - 1}"
 	if scan_type == "vpa_nelement"
         xlabel = L"v_{||}"*" "*L"N_{element}"
     elseif scan_type == "vperp_nelement"
@@ -304,6 +322,8 @@ function get_MMS_error_data(path_list,scan_type,scan_name)
         xlabel = L"z"*" "*L"N_{element}"
     elseif scan_type == "zr_nelement"
         xlabel = L"z "*" & "*L"r "*" "*L"N_{element}"
+    elseif scan_type == "vpavperpz_nelement"
+        xlabel = L"N_{element}(z) = N_{element}(v_\perp) = N_{element}(v_{||})"
     elseif scan_type == "vpazr_nelement0.25"
         xlabel = L"N_{element}(z) = N_{element}(r) = N_{element}(v_{||})/4"
     elseif scan_type == "vpaz_nelement0.25"
@@ -332,7 +352,7 @@ function get_MMS_error_data(path_list,scan_type,scan_name)
         ytick_sequence = Array([1.0e-3,1.0e-2,1.0e-1,1.0e-0,1.0e1])
     elseif scan_name == "2D-sound-wave_cheb_cxiz" 
         ytick_sequence = Array([1.0e-5,1.0e-4,1.0e-3,1.0e-2,1.0e-1,1.0e-0,1.0e1])
-    elseif scan_name == "1D-1V-wall_cheb"
+    elseif scan_name == "1D-1V-wall_cheb" || scan_name == "1D-2V-wall_cheb_krook"
         ytick_sequence = Array([1.0e-13,1.0e-12,1.0e-11,1.0e-10,1.0e-9,1.0e-8,1.0e-7,1.0e-6,1.0e-5,1.0e-4,1.0e-3,1.0e-2,1.0e-1,1.0e-0,1.0e1])
     elseif scan_name == "1D-3V-wall_cheb-updated" || scan_name == "1D-3V-wall_cheb-new-dfni-Er" || scan_name == "1D-3V-wall_cheb-new-dfni" || scan_name == "2D-sound-wave_cheb"
         ytick_sequence = Array([1.0e-10,1.0e-9,1.0e-8,1.0e-7,1.0e-6,1.0e-5,1.0e-4,1.0e-3,1.0e-2,1.0e-1,1.0e-0,1.0e1])
@@ -372,8 +392,8 @@ function get_MMS_error_data(path_list,scan_type,scan_name)
     savefig(outfile)
     println(outfile)
     
-    plot(nelement_sequence, [ion_density_error_sequence,phi_error_sequence,Ez_error_sequence,ion_pdf_error_sequence], xlabel=xlabel,
-	label=[ylabel_ion_density ylabel_phi ylabel_Ez ylabel_ion_pdf], ylabel="",
+    plot(nelement_sequence, [ion_density_error_sequence,phi_error_sequence,Ez_error_sequence,ion_pdf_error_sequence,expected_scaling], xlabel=xlabel,
+	label=[ylabel_ion_density ylabel_phi ylabel_Ez ylabel_ion_pdf expected_label], ylabel="",
      shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_sequence, nelement_sequence), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
       xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize)
     outfile = outprefix*"_fields_and_ion_pdf_no_Er.pdf"
diff --git a/src/post_processing.jl b/src/post_processing.jl
index 66fcc3e73..48c1b10ec 100644
--- a/src/post_processing.jl
+++ b/src/post_processing.jl
@@ -12,6 +12,7 @@ export allocate_global_zr_charged_moments
 export allocate_global_zr_neutral_moments
 export allocate_global_zr_fields
 export get_coords_nelement
+export get_coords_ngrid
 
 # Next three lines only used for workaround needed by plot_unnormalised()
 using PyCall
@@ -269,7 +270,7 @@ function construct_global_zr_coords(r_local, z_local)
     function make_global_input(coord_local)
         return grid_input(coord_local.name, coord_local.ngrid,
             coord_local.nelement_global, coord_local.nelement_global, 1, 0, coord_local.L,
-            coord_local.discretization, coord_local.fd_option, coord_local.bc,
+            coord_local.discretization, coord_local.fd_option, coord_local.cheb_option, coord_local.bc,
             coord_local.advection, MPI.COMM_NULL, coord_local.element_spacing_option)
     end
 
@@ -297,9 +298,10 @@ function allocate_global_zr_charged_moments(nz_global,nr_global,n_ion_species,nt
     perpendicular_pressure = allocate_float(nz_global,nr_global,n_ion_species,ntime)
     parallel_heat_flux = allocate_float(nz_global,nr_global,n_ion_species,ntime)
     thermal_speed = allocate_float(nz_global,nr_global,n_ion_species,ntime)
+    entropy_production = allocate_float(nz_global,nr_global,n_ion_species,ntime)
     chodura_integral_lower = allocate_float(nr_global,n_ion_species,ntime)
     chodura_integral_upper = allocate_float(nr_global,n_ion_species,ntime)
-    return density, parallel_flow, parallel_pressure, perpendicular_pressure, parallel_heat_flux, thermal_speed, chodura_integral_lower, chodura_integral_upper
+    return density, parallel_flow, parallel_pressure, perpendicular_pressure, parallel_heat_flux, thermal_speed, entropy_production, chodura_integral_lower, chodura_integral_upper
 end
 
 function allocate_global_zr_charged_dfns(nvpa_global, nvperp_global, nz_global, nr_global,
@@ -337,6 +339,18 @@ function get_coords_nelement(scan_input)
     return z_nelement, r_nelement, vpa_nelement, vperp_nelement, vz_nelement, vr_nelement, vzeta_nelement
 end
 
+function get_coords_ngrid(scan_input)
+    # use 1 as default because these values should be set in input .toml
+    z_ngrid = get(scan_input, "z_ngrid", 1)
+    r_ngrid = get(scan_input, "r_ngrid", 1)
+    vpa_ngrid = get(scan_input, "vpa_ngrid", 1)
+    vperp_ngrid = get(scan_input, "vperp_ngrid", 1)
+    vz_ngrid = get(scan_input, "vz_ngrid", 1)
+    vr_ngrid = get(scan_input, "vr_ngrid", 1)
+    vzeta_ngrid = get(scan_input, "vzeta_ngrid", 1)
+    return z_ngrid, r_ngrid, vpa_ngrid, vperp_ngrid, vz_ngrid, vr_ngrid, vzeta_ngrid
+end
+
 function get_geometry_and_composition(scan_input,n_ion_species,n_neutral_species)
     # set geometry_input
     # MRH need to get this in way that does not duplicate code
@@ -370,6 +384,7 @@ function get_geometry_and_composition(scan_input,n_ion_species,n_neutral_species
     use_test_neutral_wall_pdf = get(scan_input, "use_test_neutral_wall_pdf", false)
     # constant to be used to test nonzero Er in wall boundary condition
     Er_constant = get(scan_input, "Er_constant", 0.0)
+    recycling_fraction = get(scan_input, "recycling_fraction", 1.0)
     # constant to be used to control Ez divergences
     epsilon_offset = get(scan_input, "epsilon_offset", 0.001)
     # bool to control if dfni is a function of vpa or vpabar in MMS test
@@ -385,7 +400,7 @@ function get_geometry_and_composition(scan_input,n_ion_species,n_neutral_species
     me_over_mi = 1.0/1836.0
     composition = species_composition(n_species, n_ion_species, n_neutral_species,
         electron_physics, use_test_neutral_wall_pdf, T_e, T_wall, phi_wall, Er_constant,
-        mn_over_mi, me_over_mi, allocate_float(n_species))
+        mn_over_mi, me_over_mi, recycling_fraction, allocate_float(n_species))
     return geometry, composition
 
 end
@@ -562,7 +577,7 @@ function analyze_and_plot_data(prefix...; run_index=nothing)
                                              Tuple(this_z.n_global for this_z ∈ z),
                                              Tuple(this_r.n_global for this_r ∈ r),
                                              ntime)
-    density, parallel_flow, parallel_pressure, perpendicular_pressure, parallel_heat_flux, thermal_speed, chodura_integral_lower, chodura_integral_upper =
+    density, parallel_flow, parallel_pressure, perpendicular_pressure, parallel_heat_flux, thermal_speed, entropy_production, chodura_integral_lower, chodura_integral_upper =
         get_tuple_of_return_values(allocate_global_zr_charged_moments,
                                    Tuple(this_z.n_global for this_z ∈ z),
                                    Tuple(this_r.n_global for this_r ∈ r),
@@ -617,6 +632,10 @@ function analyze_and_plot_data(prefix...; run_index=nothing)
                                run_names, "moments", nblocks,
                                Tuple(this_z.n for this_z ∈ z),
                                Tuple(this_r.n for this_r ∈ r), iskip)
+    get_tuple_of_return_values(read_distributed_zr_data!, entropy_production, "entropy_production",
+                               run_names, "moments", nblocks,
+                               Tuple(this_z.n for this_z ∈ z),
+                               Tuple(this_r.n for this_r ∈ r), iskip)
     get_tuple_of_return_values(read_distributed_zwallr_data!, chodura_integral_lower, "chodura_integral_lower",
                                run_names, "moments", nblocks,
                                Tuple(this_r.n for this_r ∈ r), iskip, "lower")
@@ -678,8 +697,9 @@ function analyze_and_plot_data(prefix...; run_index=nothing)
                                    Tuple(this_z.n_global for this_z ∈ z),
                                    Tuple(this_r.n_global for this_r ∈ r), ntime_pdfs)
     density_at_pdf_times, parallel_flow_at_pdf_times, parallel_pressure_at_pdf_times,
-    parallel_heat_flux_at_pdf_times, thermal_speed_at_pdf_times, chodura_integral_lower_at_pdf_times,
-    chodura_integral_upper_at_pdf_times =
+    perpendicular_pressure_at_pdf_timse, parallel_heat_flux_at_pdf_times,
+    thermal_speed_at_pdf_times, entropy_production_at_pdf_times,
+    chodura_integral_lower_at_pdf_times, chodura_integral_upper_at_pdf_times =
         get_tuple_of_return_values(allocate_global_zr_charged_moments,
                                    Tuple(this_z.n_global for this_z ∈ z),
                                    Tuple(this_r.n_global for this_r ∈ r), n_ion_species,
@@ -1113,6 +1133,7 @@ function analyze_and_plot_data(prefix...; run_index=nothing)
     perpendicular_pressure = perpendicular_pressure[1]
     parallel_heat_flux = parallel_heat_flux[1]
     thermal_speed = thermal_speed[1]
+    entropy_production = entropy_production[1]
     chodura_integral_lower = chodura_integral_lower[1]
     chodura_integral_upper = chodura_integral_upper[1]
     time = time[1]
@@ -1154,6 +1175,7 @@ function analyze_and_plot_data(prefix...; run_index=nothing)
     if !is_1D1V
         # make plots and animations of the phi, Ez and Er
         plot_charged_moments_2D(density, parallel_flow, parallel_pressure, 
+                                perpendicular_pressure, thermal_speed, entropy_production,
                                 chodura_integral_lower, chodura_integral_upper, time,
                                 z_global.grid, r_global.grid, iz0, ir0, n_ion_species,
                                 itime_min, itime_max, nwrite_movie, run_name_label, pp)
@@ -1167,6 +1189,16 @@ function analyze_and_plot_data(prefix...; run_index=nothing)
                          ivpa0, ivperp0, iz0, ir0, spec_type, n_ion_species, ntime_pdfs,
                          nblocks, itime_min_pdfs, itime_max_pdfs, iskip_pdfs,
                          nwrite_movie_pdfs, pp)
+        Maxwellian_diagnostic = true
+        if Maxwellian_diagnostic
+            pressure = copy(parallel_pressure)
+            @. pressure = (2.0*perpendicular_pressure + parallel_pressure)/3.0
+            ff = load_distributed_charged_pdf_slice(run_name, nblocks, 1:ntime, n_ion_species, r,
+                                                   z, vperp, vpa; iz=iz0, ir=ir0)
+            plot_Maxwellian_diagnostic(ff[:,:,:,:], density[iz0,ir0,:,:],
+             parallel_flow[iz0,ir0,:,:], thermal_speed[iz0,ir0,:,:], vpa.grid, vpa.wgts, 
+             vperp.grid, vperp.wgts, time, iz0, ir0, run_name_label, n_ion_species)
+        end
         # make plots and animations of the neutral pdf
         if n_neutral_species > 0
             spec_type = "neutral"
@@ -2019,7 +2051,7 @@ function plot_moments(density, delta_density, density_fldline_avg,
             # plot the time trace of n_s(z=z0)-density_fldline_avg
             plot(legend=legend)
             for (t, dupar, run_label) ∈ zip(time, delta_upar, run_name_labels)
-                @views plot!(t, abs.(du[iz0,is,:]), yaxis=:log, label=run_label)
+                @views plot!(t, abs.(dupar[iz0,is,:]), yaxis=:log, label=run_label)
             end
             outfile = string(prefix, "_$(label)_delta_upar0_vs_t_spec", spec_string, ".pdf")
             trysavefig(outfile)
@@ -3278,12 +3310,27 @@ function plot_charged_pdf(run_name, run_name_label, vpa, vperp, z, r, z_local, r
                                                  itime_min:iskip:itime_max, n_species,
                                                  r_local, z_local, vperp, vpa; iz=iz0,
                                                  ir=ir0)
-        for is ∈ 1:n_species
-            anim = @animate for i ∈ itime_min:nwrite_movie:itime_max
-                @views heatmap(vperp.grid, vpa.grid, pdf[:,:,is,i], xlabel="vperp", ylabel="vpa", c = :deep, interpolation = :cubic)
+        if vperp.n > 1
+            for is ∈ 1:n_species
+                anim = @animate for i ∈ itime_min:nwrite_movie:itime_max
+                    @views heatmap(vperp.grid, vpa.grid, pdf[:,:,is,i], xlabel="vperp", ylabel="vpa", c = :deep, interpolation = :cubic)
+                end
+                outfile = string(run_name_label, "_pdf_vs_vperp_vpa", iz0_string, ir0_string, spec_string[is], ".gif")
+                trygif(anim, outfile, fps=5)
+
+                @views heatmap(vperp.grid, vpa.grid, pdf[:,:,is,itime_max], xlabel="vperp", ylabel="vpa", c = :deep, interpolation = :cubic)
+                outfile = string(run_name_label, "_pdf_vs_vpa_vperp", ir0_string, iz0_string, spec_string[is], ".pdf")
+                savefig(outfile)
+            end
+        elseif vperp.n == 1
+            for is ∈ 1:n_species
+                # make a gif animation of ϕ(r) at different times
+                anim = @animate for i ∈ itime_min:nwrite_movie:itime_max
+                    @views plot(vpa.grid, pdf[:,1,is,i], xlabel="vpa", ylabel="f")
+                end
+                outfile = string(run_name_label, "_pdf_vs_vpa", ir0_string, iz0_string, spec_string[is], ".gif")
+                gif(anim, outfile, fps=5) 
             end
-            outfile = string(run_name_label, "_pdf_vs_vperp_vpa", iz0_string, ir0_string, spec_string[is], ".gif")
-            trygif(anim, outfile, fps=5)
         end
     end
     # make a gif animation of f(z,r,t) at a given (vpa,vperp) location
@@ -3470,10 +3517,13 @@ function plot_fields_2D(phi, Ez, Er, time, z, r, iz0, ir0,
     println("done.")
 end
 
-function plot_charged_moments_2D(density, parallel_flow, parallel_pressure, 
-    chodura_integral_lower, chodura_integral_upper, time, z, r, iz0, ir0, n_ion_species,
+function plot_charged_moments_2D(density, parallel_flow, parallel_pressure,
+    perpendicular_pressure, thermal_speed, 
+    entropy_production,chodura_integral_lower, chodura_integral_upper,
+    time, z, r, iz0, ir0, n_ion_species,
     itime_min, itime_max, nwrite_movie, run_name, pp)
     nr = size(r,1)
+    ntime = size(time,1)
     print("Plotting charged moments data...")
     for is in 1:n_ion_species
 		description = "_ion_spec"*string(is)*"_"
@@ -3504,7 +3554,14 @@ function plot_charged_moments_2D(density, parallel_flow, parallel_pressure,
 			outfile = string(run_name, "_density"*description*"_vs_r_z.pdf")
 			trysavefig(outfile)
 		end
-		
+		if pp.plot_dens0_vs_t
+            @views plot(time, density[iz0,ir0,is,:], xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"n_i", label = "")
+			outfile = string(run_name, "_density"*description*"(iz0,ir0)_vs_t.pdf")
+			savefig(outfile)
+            @views plot(time, density[iz0,ir0,is,:] .- density[iz0,ir0,is,1], xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"n_i(t) - n_i(0)", label = "")
+			outfile = string(run_name, "_delta_density"*description*"(iz0,ir0)_vs_t.pdf")
+			savefig(outfile)
+        end
 		# the parallel flow
 		parallel_flowmin = minimum(parallel_flow[:,:,is,:])
 		parallel_flowmax = maximum(parallel_flow)
@@ -3532,7 +3589,14 @@ function plot_charged_moments_2D(density, parallel_flow, parallel_pressure,
 			outfile = string(run_name, "_parallel_flow"*description*"_vs_r_z.pdf")
 			trysavefig(outfile)
 		end
-		
+		if pp.plot_upar0_vs_t
+            @views plot(time, parallel_flow[iz0,ir0,is,:], xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"u_{i\|\|}(t)", label = "")
+			outfile = string(run_name, "_parallel_flow"*description*"(iz0,ir0)_vs_t.pdf")
+			savefig(outfile)
+            @views plot(time, parallel_flow[iz0,ir0,is,:] .- parallel_flow[iz0,ir0,is,1], xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"u_{i\|\|}(t) - u_{i\|\|}(0)", label = "")
+			outfile = string(run_name, "_delta_parallel_flow"*description*"(iz0,ir0)_vs_t.pdf")
+			savefig(outfile)
+        end
 		# the parallel pressure
 		parallel_pressuremin = minimum(parallel_pressure[:,:,is,:])
 		parallel_pressuremax = maximum(parallel_pressure)
@@ -3588,6 +3652,68 @@ function plot_charged_moments_2D(density, parallel_flow, parallel_pressure,
                     outfile = string(run_name, "_temperature"*description*"_vs_r_z.pdf")
                     trysavefig(outfile)
                 end
+        if pp.plot_ppar0_vs_t
+            @views plot(time, parallel_pressure[iz0,ir0,is,:], xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"p_{i\|\|}(t)", label = "")
+			outfile = string(run_name, "_parallel_pressure"*description*"(iz0,ir0)_vs_t.pdf")
+			trysavefig(outfile)
+            @views plot(time, parallel_pressure[iz0,ir0,is,:] .- parallel_pressure[iz0,ir0,is,1], xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"p_{i\|\|}(t) - p_{i\|\|}(0)", label = "")
+			outfile = string(run_name, "_delta_parallel_pressure"*description*"(iz0,ir0)_vs_t.pdf")
+			trysavefig(outfile)
+        end
+        # the perpendicular pressure
+        if pp.plot_pperp0_vs_t
+            @views plot(time, perpendicular_pressure[iz0,ir0,is,:], xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"p_{i\perp}(t)", label = "")
+			outfile = string(run_name, "_perpendicular_pressure"*description*"(iz0,ir0)_vs_t.pdf")
+			trysavefig(outfile)
+            @views plot(time, perpendicular_pressure[iz0,ir0,is,:] .- perpendicular_pressure[iz0,ir0,is,1], xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"p_{i\perp}(t) - p_{i\perp}(0)", label = "")
+			outfile = string(run_name, "_delta_perpendicular_pressure"*description*"(iz0,ir0)_vs_t.pdf")
+			trysavefig(outfile)
+        end
+        # the total pressure
+        if pp.plot_ppar0_vs_t && pp.plot_pperp0_vs_t
+            @views plot([time, time, time] , 
+            [parallel_pressure[iz0,ir0,is,:], perpendicular_pressure[iz0,ir0,is,:], 
+            (2.0/3.0).*perpendicular_pressure[iz0,ir0,is,:] .+ (1.0/3.0).*parallel_pressure[iz0,ir0,is,:]],
+            xlabel=L"t/ (L_{ref}/c_{ref})", ylabel="", label = [L"p_{i\|\|}(t)" L"p_{i\perp}(t)" L"p_{i}(t)"])
+			outfile = string(run_name, "_pressures"*description*"(iz0,ir0)_vs_t.pdf")
+			trysavefig(outfile)
+            @views plot([time, time, time] , 
+            [parallel_pressure[iz0,ir0,is,:] .- parallel_pressure[iz0,ir0,is,1], perpendicular_pressure[iz0,ir0,is,:] .- perpendicular_pressure[iz0,ir0,is,1], 
+            (2.0/3.0).*(perpendicular_pressure[iz0,ir0,is,:] .- perpendicular_pressure[iz0,ir0,is,1]).+
+            (1.0/3.0).*(parallel_pressure[iz0,ir0,is,:] .- parallel_pressure[iz0,ir0,is,1])],
+            xlabel=L"t/ (L_{ref}/c_{ref})", ylabel="", label = [L"p_{i\|\|}(t) - p_{i\|\|}(0)" L"p_{i\perp}(t) - p_{i\perp}(0)" L"p_{i}(t) - p_{i}(0)"])
+			outfile = string(run_name, "_delta_pressures"*description*"(iz0,ir0)_vs_t.pdf")
+			trysavefig(outfile)
+            @views plot([time] , 
+            [(2.0/3.0).*perpendicular_pressure[iz0,ir0,is,:] .+ (1.0/3.0).*parallel_pressure[iz0,ir0,is,:]],
+            xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"p_{i}(t)", label = "")
+			outfile = string(run_name, "_pressure"*description*"(iz0,ir0)_vs_t.pdf")
+			trysavefig(outfile)
+            @views plot([time] , 
+            [(2.0/3.0).*(perpendicular_pressure[iz0,ir0,is,:] .- perpendicular_pressure[iz0,ir0,is,1]).+
+            (1.0/3.0).*(parallel_pressure[iz0,ir0,is,:] .- parallel_pressure[iz0,ir0,is,1])],
+            xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"p_{i}(t) - p_{i}(0)", label = "")
+			outfile = string(run_name, "_delta_pressure"*description*"(iz0,ir0)_vs_t.pdf")
+			trysavefig(outfile)
+        end
+        # the thermal speed
+        if pp.plot_vth0_vs_t
+            @views plot(time, thermal_speed[iz0,ir0,is,:], xlabel=L"t / (L_{ref}/c_{ref})", ylabel=L"v_{i,th}(t)", label = "")
+			outfile = string(run_name, "_thermal_speed"*description*"(iz0,ir0)_vs_t.pdf")
+			savefig(outfile)
+            @views plot(time, thermal_speed[iz0,ir0,is,:] .- thermal_speed[iz0,ir0,is,1], xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"v_{i,th}(t) - v_{i,th}(0)", label = "")
+			outfile = string(run_name, "_delta_thermal_speed"*description*"(iz0,ir0)_vs_t.pdf")
+			trysavefig(outfile)
+        end
+        # the entropy production
+        if pp.plot_dSdt0_vs_t
+            @views plot(time[2:ntime], entropy_production[iz0,ir0,is,2:ntime], xlabel=L"t/(L_{ref}/c_{ref})", ylabel=L"\dot{S}(t)", label = "")
+			outfile = string(run_name, "_entropy production"*description*"(iz0,ir0)_vs_t.pdf")
+			trysavefig(outfile)
+            @views plot(time[2:ntime], entropy_production[iz0,ir0,is,2:ntime], xlabel=L"t/(L_{ref}/c_{ref})", ylabel=L"\dot{S}(t)", label = "", yscale=:log10)
+			outfile = string(run_name, "_entropy production_log_scale"*description*"(iz0,ir0)_vs_t.pdf")
+			trysavefig(outfile)
+        end
         if pp.plot_chodura_integral
             # plot the Chodura condition integrals calculated at run time 
             @views plot(r, chodura_integral_lower[:,is,end], xlabel=L"r/L_r", ylabel="", label = "Chodura lower")
@@ -3605,10 +3731,65 @@ function plot_charged_moments_2D(density, parallel_flow, parallel_pressure,
             outfile = string(run_name, "_chodura_integral_upper"*description*"_vs_r_t.pdf")
             trysavefig(outfile)
         end
-	end
+        if pp.plot_dens0_vs_t && pp.plot_ppar0_vs_t && pp.plot_pperp0_vs_t && pp.plot_upar0_vs_t
+            xlist = [time,time,time]
+            ylist = [ density[iz0,ir0,is,:] .- density[iz0,ir0,is,1], 
+                      parallel_flow[iz0,ir0,is,:] .- parallel_flow[iz0,ir0,is,1],
+                      (2.0/3.0).*(perpendicular_pressure[iz0,ir0,is,:] .- perpendicular_pressure[iz0,ir0,is,1]).+
+                      (1.0/3.0).*(parallel_pressure[iz0,ir0,is,:] .- parallel_pressure[iz0,ir0,is,1])]
+            ylabels = [L"n_i(t) - n_i(0)" L"u_{i\|\|}(t) - u_{i\|\|}(0)" L"p_{i}(t) - p_{i}(0)"]          
+            @views plot(xlist,ylist, xlabel=L"t/ (L_{ref}/c_{ref})", ylabel="", label = ylabels)
+			outfile = string(run_name, "_delta_moments"*description*"(iz0,ir0)_vs_t.pdf")
+			savefig(outfile)
+        end
+    end
     println("done.")
 end
 
+function plot_Maxwellian_diagnostic(ff, density, parallel_flow, thermal_speed, vpa_local, vpa_local_wgts, 
+            vperp_local, vperp_local_wgts, time, iz0, ir0, run_name, n_ion_species)
+    ff_Maxwellian = copy(ff)
+    ff_ones = copy(ff)
+    nvpa = size(vpa_local,1)
+    nvperp = size(vperp_local,1)
+    ntime = size(time,1)
+    if nvperp > 1
+        pvth = 3
+    else
+        pvth = 1
+    end
+    for it in 1:ntime
+        for is in 1:n_ion_species
+            for ivperp in 1:nvperp
+                for ivpa in 1:nvpa
+                   ff_Maxwellian[ivpa,ivperp,is,it] = (density[is,it]/thermal_speed[is,it]^pvth)*
+                                                    exp(- (((vpa_local[ivpa] - parallel_flow[is,it])^2) +
+                                                     (vperp_local[ivperp]^2) )/(thermal_speed[is,it]^2) ) 
+                   ff_ones[ivpa,ivperp,is,it] = 1.0  
+                end
+            end
+        end
+    end
+    # form the L2 norm
+    ff_norm = copy(time)
+    for is in 1:n_ion_species
+        for it in 1:ntime
+            @views num = integrate_over_vspace( (ff[:,:,is,it] .- ff_Maxwellian[:,:,is,it]).^2 , vpa_local, 0, vpa_local_wgts, vperp_local, 0, vperp_local_wgts)
+            @views denom = integrate_over_vspace(ff_ones[:,:,is,it], vpa_local, 0, vpa_local_wgts, vperp_local, 0, vperp_local_wgts)
+            ff_norm[it] = sqrt(num/denom)
+        end
+        iz0_string = string("_iz0", string(iz0))
+        ir0_string = string("_ir0", string(ir0))
+        description = "_ion_spec"*string(is)*"_"*iz0_string*ir0_string
+        @views plot(time, ff_norm, xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"L^2(f - f_M)(t)", label = "")
+			outfile = string(run_name, "_L2_Maxwellian_norm"*description*"_vs_t.pdf")
+			savefig(outfile)
+        @views plot(time, ff_norm, xlabel=L"t/ (L_{ref}/c_{ref})", ylabel=L"L^2(f - f_M)(t)", label = "", yscale=:log10)
+			outfile = string(run_name, "_L2_Maxwellian_norm_log_scale"*description*"_vs_t.pdf")
+			savefig(outfile)
+    end
+    return nothing
+end
 function plot_charged_pdf_2D_at_wall(run_name, run_name_label, r_global, z_global,
                                      nblocks, n_ion_species, r, z, vperp, vpa, ntime)
     print("Plotting charged pdf data at wall boundaries...")
diff --git a/src/post_processing_input.jl b/src/post_processing_input.jl
index b04fb5364..a5e660633 100644
--- a/src/post_processing_input.jl
+++ b/src/post_processing_input.jl
@@ -17,14 +17,18 @@ const plot_phi0_vs_t = true
 const plot_phi_vs_z_t = true
 # if animate_phi_vs_z = true, create animation of phi(z) at different time slices
 const animate_phi_vs_z = true
-# if plot_dens0_vs_t = true, create plots of species density(z0) vs time
+# if plot_dens0_vs_t = true, create plots of species density(z0,r0) vs time
 const plot_dens0_vs_t = true
-# if plot_upar0_vs_t = true, create plots of species upar(z0) vs time
-const plot_upar0_vs_t = false
-# if plot_ppar0_vs_t = true, create plots of species ppar(z0) vs time
-const plot_ppar0_vs_t = false
-# if plot_vth0_vs_t = true, create plots of species vth(z0) vs time
-const plot_vth0_vs_t = false
+# if plot_upar0_vs_t = true, create plots of species upar(z0,r0) vs time
+const plot_upar0_vs_t = true
+# if plot_ppar0_vs_t = true, create plots of species ppar(z0,r0) vs time
+const plot_ppar0_vs_t = true
+# if plot_pperp0_vs_t = true, create plots of species pperp(z0,r0) vs time
+const plot_pperp0_vs_t = true
+# if plot_vth0_vs_t = true, create plots of species vth(z0,r0) vs time
+const plot_vth0_vs_t = true
+# if plot_dSdt0_vs_t = true, create plots of species dSdt(z0,r0) vs time
+const plot_dSdt0_vs_t = true
 # if plot_qpar0_vs_t = true, create plots of species qpar(z0) vs time
 const plot_qpar0_vs_t = false
 # if plot_dens_vs_z_t = true, create heatmap of species density vs z and time
@@ -74,7 +78,7 @@ const animate_f_vs_vperp_z =  true
 # if animate_f_vs_vperp_r = true, create animation of f(vperp,r) at different time slices
 const animate_f_vs_vperp_r = false
 # if animate_f_vs_vperp_vpa = true, create animation of f(vperp,vpa) at different time slices
-const animate_f_vs_vperp_vpa = false
+const animate_f_vs_vperp_vpa = true
 # if animate_f_vs_r_z = true, create animation of f(r,z) at different time slices
 const animate_f_vs_r_z = true
 # if animate_f_vs_vz_z = true, create animation of f(vz,z) at different time slices
@@ -155,7 +159,8 @@ const diagnostics_chodura_t = false
 const diagnostics_chodura_r = false
 
 pp = pp_input(calculate_frequencies, plot_phi0_vs_t, plot_phi_vs_z_t, animate_phi_vs_z,
-    plot_dens0_vs_t, plot_upar0_vs_t, plot_ppar0_vs_t, plot_vth0_vs_t, plot_qpar0_vs_t,
+    plot_dens0_vs_t, plot_upar0_vs_t, plot_ppar0_vs_t, plot_pperp0_vs_t,
+    plot_vth0_vs_t, plot_dSdt0_vs_t, plot_qpar0_vs_t,
     plot_dens_vs_z_t, plot_upar_vs_z_t, plot_ppar_vs_z_t, plot_Tpar_vs_z_t,
     plot_qpar_vs_z_t, animate_dens_vs_z, animate_upar_vs_z, animate_ppar_vs_z,
     animate_Tpar_vs_z, animate_vth_vs_z, animate_qpar_vs_z, plot_f_unnormalized_vs_vpa_z,
diff --git a/src/time_advance.jl b/src/time_advance.jl
index 973f8c696..109e94ebb 100644
--- a/src/time_advance.jl
+++ b/src/time_advance.jl
@@ -4,6 +4,9 @@ module time_advance
 
 export setup_time_advance!
 export time_advance!
+export setup_dummy_and_buffer_arrays
+# functional testing
+export setup_runge_kutta_coefficients
 
 using MPI
 using ..type_definitions: mk_float
@@ -22,10 +25,8 @@ using ..velocity_moments: update_neutral_pzeta!, update_neutral_pz!, update_neut
 using ..velocity_moments: calculate_moment_derivatives!, calculate_moment_derivatives_neutral!
 using ..velocity_moments: update_chodura!
 using ..velocity_grid_transforms: vzvrvzeta_to_vpavperp!, vpavperp_to_vzvrvzeta!
-using ..initial_conditions: enforce_z_boundary_condition!, enforce_boundary_conditions!
-using ..initial_conditions: enforce_vpa_boundary_condition!, enforce_r_boundary_condition!
+using ..initial_conditions: enforce_boundary_conditions!
 using ..initial_conditions: enforce_neutral_boundary_conditions!
-using ..initial_conditions: enforce_neutral_z_boundary_condition!, enforce_neutral_r_boundary_condition!
 using ..input_structs: advance_info, time_input
 using ..makie_post_processing: plot_1d, plot_2d, positive_or_nan
 using ..moment_constraints: hard_force_moment_constraints!,
@@ -44,7 +45,7 @@ using ..krook_collisions: krook_collisions!
 using ..external_sources
 using ..numerical_dissipation: vpa_boundary_buffer_decay!,
                                vpa_boundary_buffer_diffusion!, vpa_dissipation!,
-                               z_dissipation!, r_dissipation!,
+                               z_dissipation!, r_dissipation!, vperp_dissipation!,
                                vz_dissipation_neutral!, z_dissipation_neutral!,
                                r_dissipation_neutral!,
                                vpa_boundary_force_decreasing!, force_minimum_pdf_value!,
@@ -54,6 +55,7 @@ using ..continuity: continuity_equation!, neutral_continuity_equation!
 using ..force_balance: force_balance!, neutral_force_balance!
 using ..energy_equation: energy_equation!, neutral_energy_equation!
 using ..em_fields: setup_em_fields, update_phi!
+using ..fokker_planck: init_fokker_planck_collisions_weak_form, explicit_fokker_planck_collisions_weak_form!
 using ..manufactured_solns: manufactured_sources
 using ..advection: advection_info
 using ..utils: to_minutes
@@ -65,12 +67,22 @@ using ..analysis: steady_state_residuals
 #using ..post_processing: draw_v_parallel_zero!
 
 struct scratch_dummy_arrays
+    dummy_s::Array{mk_float,1}
     dummy_sr::Array{mk_float,2}
     dummy_vpavperp::Array{mk_float,2}
     dummy_zrs::MPISharedArray{mk_float,3}
     dummy_zrsn::MPISharedArray{mk_float,3}
 
     #buffer arrays for MPI 
+    buffer_z_1::MPISharedArray{mk_float,1}
+    buffer_z_2::MPISharedArray{mk_float,1}
+    buffer_z_3::MPISharedArray{mk_float,1}
+    buffer_z_4::MPISharedArray{mk_float,1}
+    buffer_r_1::MPISharedArray{mk_float,1}
+    buffer_r_2::MPISharedArray{mk_float,1}
+    buffer_r_3::MPISharedArray{mk_float,1}
+    buffer_r_4::MPISharedArray{mk_float,1}
+    
     buffer_zs_1::MPISharedArray{mk_float,2}
     buffer_zs_2::MPISharedArray{mk_float,2}
     buffer_zs_3::MPISharedArray{mk_float,2}
@@ -93,6 +105,10 @@ struct scratch_dummy_arrays
     buffer_rsn_5::MPISharedArray{mk_float,2}
     buffer_rsn_6::MPISharedArray{mk_float,2}
 
+    buffer_zrs_1::MPISharedArray{mk_float,3}
+    buffer_zrs_2::MPISharedArray{mk_float,3}
+    buffer_zrs_3::MPISharedArray{mk_float,3}
+    
     buffer_vpavperpzs_1::MPISharedArray{mk_float,4}
     buffer_vpavperpzs_2::MPISharedArray{mk_float,4}
     buffer_vpavperpzs_3::MPISharedArray{mk_float,4}
@@ -111,7 +127,7 @@ struct scratch_dummy_arrays
     # needs to be shared memory
     buffer_vpavperpzrs_1::MPISharedArray{mk_float,5}
     buffer_vpavperpzrs_2::MPISharedArray{mk_float,5}
-
+    
     buffer_vzvrvzetazsn_1::MPISharedArray{mk_float,5}
     buffer_vzvrvzetazsn_2::MPISharedArray{mk_float,5}
     buffer_vzvrvzetazsn_3::MPISharedArray{mk_float,5}
@@ -130,6 +146,10 @@ struct scratch_dummy_arrays
     # needs to be shared memory
     buffer_vzvrvzetazrsn_1::MPISharedArray{mk_float,6}
     buffer_vzvrvzetazrsn_2::MPISharedArray{mk_float,6}
+    
+    buffer_vpavperp_1::MPISharedArray{mk_float,2}
+    buffer_vpavperp_2::MPISharedArray{mk_float,2}
+    buffer_vpavperp_3::MPISharedArray{mk_float,2}
 
 end 
 
@@ -180,7 +200,7 @@ function setup_time_advance!(pdf, vz, vr, vzeta, vpa, vperp, z, r, vz_spectral,
     # else, will advance one term at a time.
     advance = setup_advance_flags(moments, composition, t_input, collisions,
                                   external_source_settings, num_diss_params,
-                                  manufactured_solns_input, rk_coefs, r, vperp, vpa,
+                                  manufactured_solns_input, rk_coefs, r, z, vperp, vpa,
                                   vzeta, vr, vz)
 
     begin_serial_region()
@@ -192,6 +212,12 @@ function setup_time_advance!(pdf, vz, vr, vzeta, vpa, vperp, z, r, vz_spectral,
     n_neutral_species_alloc = max(1,composition.n_neutral_species)
     scratch_dummy = setup_dummy_and_buffer_arrays(r.n,z.n,vpa.n,vperp.n,vz.n,vr.n,vzeta.n,
                                    composition.n_ion_species,n_neutral_species_alloc)
+    # create arrays for Fokker-Planck collisions 
+    if advance.explicit_weakform_fp_collisions
+        fp_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral; precompute_weights=true)
+    else
+        fp_arrays = nothing
+    end
     # create the "fields" structure that contains arrays
     # for the electrostatic potential phi and eventually the electromagnetic fields
     fields = setup_em_fields(z.n, r.n, drive_input.force_phi, drive_input.amplitude, drive_input.frequency, drive_input.force_Er_zero_at_wall)
@@ -220,20 +246,33 @@ function setup_time_advance!(pdf, vz, vr, vzeta, vpa, vperp, z, r, vz_spectral,
     # with advection in r
     begin_serial_region()
     r_advect = setup_advection(n_ion_species, r, vpa, vperp, z)
+    if r.n > 1
+        # initialise the r advection speed
+        begin_s_z_vperp_vpa_region()
+        @loop_s is begin
+            @views update_speed_r!(r_advect[is], moments.charged.upar[:,:,is],
+                                   moments.charged.vth[:,:,is], fields, moments.evolve_upar,
+                                   moments.evolve_ppar, vpa, vperp, z, r, geometry)
+        end
+        # enforce prescribed boundary condition in r on the distribution function f
+    end
 
     # create structure z_advect whose members are the arrays needed to compute
     # the advection term(s) appearing in the split part of the GK equation dealing
     # with advection in z
     begin_serial_region()
     z_advect = setup_advection(n_ion_species, z, vpa, vperp, r)
-    # initialise the z advection speed
-    begin_s_r_vperp_vpa_region()
-    @loop_s is begin
-        @views update_speed_z!(z_advect[is], moments.charged.upar[:,:,is],
-                               moments.charged.vth[:,:,is], moments.evolve_upar,
-                               moments.evolve_ppar, fields, vpa, vperp, z, r, 0.0,
-                               geometry)
+    if z.n > 1
+        # initialise the z advection speed
+        begin_s_r_vperp_vpa_region()
+        @loop_s is begin
+            @views update_speed_z!(z_advect[is], moments.charged.upar[:,:,is],
+                                   moments.charged.vth[:,:,is], moments.evolve_upar,
+                                   moments.evolve_ppar, fields, vpa, vperp, z, r, 0.0,
+                                   geometry)
+        end
     end
+    begin_serial_region()
 
     # create structure vpa_advect whose members are the arrays needed to compute
     # the advection term(s) appearing in the split part of the GK equation dealing
@@ -251,13 +290,15 @@ function setup_time_advance!(pdf, vz, vr, vzeta, vpa, vperp, z, r, vz_spectral,
     begin_serial_region()
     vperp_advect = setup_advection(n_ion_species, vperp, vpa, z, r)
     # initialise the vperp advection speed
-    begin_serial_region()
-    @serial_region begin
-        for is ∈ 1:n_ion_species
-            @views update_speed_vperp!(vperp_advect[is], vpa, vperp, z, r)
+    if vperp.n > 1
+        begin_serial_region()
+        @serial_region begin
+            for is ∈ 1:n_ion_species
+                @views update_speed_vperp!(vperp_advect[is], vpa, vperp, z, r)
+            end
         end
     end
-
+    
     ##
     # Neutral particle advection
     ##
@@ -276,7 +317,7 @@ function setup_time_advance!(pdf, vz, vr, vzeta, vpa, vperp, z, r, vz_spectral,
     # create structure neutral_z_advect for neutral particle advection
     begin_serial_region()
     neutral_z_advect = setup_advection(n_neutral_species_alloc, z, vz, vr, vzeta, r)
-    if n_neutral_species > 0
+    if n_neutral_species > 0 && z.n > 1
         # initialise the z advection speed
         begin_sn_r_vzeta_vr_vz_region()
         @loop_sn isn begin
@@ -308,14 +349,10 @@ function setup_time_advance!(pdf, vz, vr, vzeta, vpa, vperp, z, r, vz_spectral,
     end
 
     ##
-    # construct named list of advect & spectral objects to compactify arguments
+    # construct advect & spectral objects to compactify arguments
     ##
 
-    #advect_objects = (vpa_advect = vpa_advect, vperp_advect = vperp_advect, z_advect = z_advect,
-    # r_advect = r_advect, neutral_z_advect = neutral_z_advect, neutral_r_advect = neutral_r_advect)
     advect_objects = advect_object_struct(vpa_advect, vperp_advect, z_advect, r_advect, neutral_z_advect, neutral_r_advect, neutral_vz_advect)
-    #spectral_objects = (vz_spectral = vz_spectral, vr_spectral = vr_spectral, vzeta_spectral = vzeta_spectral,
-    # vpa_spectral = vpa_spectral, vperp_spectral = vperp_spectral, z_spectral = z_spectral, r_spectral = r_spectral)
     spectral_objects = spectral_object_struct(vz_spectral, vr_spectral, vzeta_spectral, vpa_spectral, vperp_spectral, z_spectral, r_spectral)
     if(advance.manufactured_solns_test)
         manufactured_source_list = manufactured_sources(manufactured_solns_input, r, z,
@@ -336,7 +373,8 @@ function setup_time_advance!(pdf, vz, vr, vzeta, vpa, vperp, z, r, vz_spectral,
         enforce_boundary_conditions!(
             pdf.charged.norm, boundary_distributions.pdf_rboundary_charged,
             moments.charged.dens, moments.charged.upar, moments.charged.ppar, moments,
-            vpa.bc, z.bc, r.bc, vpa, vperp, z, r, vpa_advect, z_advect, r_advect,
+            vpa.bc, z.bc, r.bc, vpa, vperp, z, r, vpa_spectral, vperp_spectral,
+            vpa_advect, z_advect, r_advect,
             composition, scratch_dummy, advance.r_diffusion, advance.vpa_diffusion)
         # Ensure normalised pdf exactly obeys integral constraints if evolving moments
         begin_s_r_z_region()
@@ -358,10 +396,10 @@ function setup_time_advance!(pdf, vz, vr, vzeta, vpa, vperp, z, r, vz_spectral,
             enforce_neutral_boundary_conditions!(
                 pdf.neutral.norm, pdf.charged.norm, boundary_distributions,
                 moments.neutral.dens, moments.neutral.uz, moments.neutral.pz, moments,
-                moments.charged.dens, moments.charged.upar, fields.Er, neutral_r_advect,
-                neutral_z_advect, nothing, nothing, neutral_vz_advect, r, z, vzeta, vr,
-                vz, composition, geometry, scratch_dummy, advance.r_diffusion,
-                advance.vz_diffusion)
+                moments.charged.dens, moments.charged.upar, fields.Er, vzeta_spectral,
+                vr_spectral, vz_spectral, neutral_r_advect, neutral_z_advect, nothing,
+                nothing, neutral_vz_advect, r, z, vzeta, vr, vz, composition, geometry,
+                scratch_dummy, advance.r_diffusion, advance.vz_diffusion)
             begin_sn_r_z_region()
             @loop_sn_r_z isn ir iz begin
                 @views hard_force_moment_constraints_neutral!(
@@ -400,7 +438,7 @@ function setup_time_advance!(pdf, vz, vr, vzeta, vpa, vperp, z, r, vz_spectral,
     _block_synchronize()
 
     return moments, fields, spectral_objects, advect_objects,
-    scratch, advance, scratch_dummy, manufactured_source_list
+    scratch, advance, fp_arrays, scratch_dummy, manufactured_source_list
 end
 
 """
@@ -411,8 +449,8 @@ else, will advance one term at a time.
 """
 function setup_advance_flags(moments, composition, t_input, collisions,
                              external_source_settings, num_diss_params,
-                             manufactured_solns_input, rk_coefs, r, vperp, vpa, vzeta, vr,
-                             vz)
+                             manufactured_solns_input, rk_coefs, r, z, vperp, vpa, vzeta,
+                             vr, vz)
     # default is not to concurrently advance different operators
     advance_vpa_advection = false
     advance_z_advection = false
@@ -440,14 +478,18 @@ function setup_advance_flags(moments, composition, t_input, collisions,
     r_diffusion = false
     vpa_diffusion = false
     vz_diffusion = false
+    explicit_weakform_fp_collisions = false
     # all advance flags remain false if using operator-splitting
     # otherwise, check to see if the flags need to be set to true
     if !t_input.split_operators
         # default for non-split operators is to include both vpa and z advection together
-        advance_vpa_advection = true
-        advance_z_advection = true
-        if r.n > 1
-            advance_r_advection = true
+        advance_vpa_advection = true && vpa.n > 1 && z.n > 1
+        advance_z_advection = true && z.n > 1
+        advance_r_advection = true && r.n > 1
+        if collisions.nuii > 0.0 && vperp.n > 1
+            explicit_weakform_fp_collisions = true
+        else
+            explicit_weakform_fp_collisions = false    
         end
         # if neutrals present, check to see if different ion-neutral
         # collisions are enabled
@@ -537,7 +579,7 @@ function setup_advance_flags(moments, composition, t_input, collisions,
         # flag to determine if a d^2/dr^2 operator is present
         r_diffusion = (advance_numerical_dissipation && num_diss_params.r_dissipation_coefficient > 0.0)
         # flag to determine if a d^2/dvpa^2 operator is present
-        vpa_diffusion = (advance_numerical_dissipation && num_diss_params.vpa_dissipation_coefficient > 0.0)
+        vpa_diffusion = ((advance_numerical_dissipation && num_diss_params.vpa_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions)
         vz_diffusion = (advance_numerical_dissipation && num_diss_params.vz_dissipation_coefficient > 0.0)
     end
 
@@ -548,6 +590,7 @@ function setup_advance_flags(moments, composition, t_input, collisions,
                         advance_neutral_vz_advection, advance_cx, advance_cx_1V,
                         advance_ionization, advance_ionization_1V,
                         advance_ionization_source, advance_krook_collisions,
+                        explicit_weakform_fp_collisions,
                         advance_external_source, advance_numerical_dissipation,
                         advance_sources, advance_continuity, advance_force_balance,
                         advance_energy, advance_neutral_external_source,
@@ -558,12 +601,22 @@ end
 
 function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies_ion,nspecies_neutral)
 
+    dummy_s = allocate_float(nspecies_ion)
     dummy_sr = allocate_float(nr, nspecies_ion)
     dummy_zrs = allocate_shared_float(nz, nr, nspecies_ion)
     dummy_zrsn = allocate_shared_float(nz, nr, nspecies_neutral)
     dummy_vpavperp = allocate_float(nvpa, nvperp)
+    
+    buffer_z_1 = allocate_shared_float(nz)
+    buffer_z_2 = allocate_shared_float(nz)
+    buffer_z_3 = allocate_shared_float(nz)
+    buffer_z_4 = allocate_shared_float(nz)
+    
+    buffer_r_1 = allocate_shared_float(nr)
+    buffer_r_2 = allocate_shared_float(nr)
+    buffer_r_3 = allocate_shared_float(nr)
+    buffer_r_4 = allocate_shared_float(nr)
 
-    # should the arrays below be shared memory arrays? MRH
     buffer_zs_1 = allocate_shared_float(nz,nspecies_ion)
     buffer_zs_2 = allocate_shared_float(nz,nspecies_ion)
     buffer_zs_3 = allocate_shared_float(nz,nspecies_ion)
@@ -586,6 +639,10 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies
     buffer_rsn_5 = allocate_shared_float(nr,nspecies_neutral)
     buffer_rsn_6 = allocate_shared_float(nr,nspecies_neutral)
 
+    buffer_zrs_1 = allocate_shared_float(nz,nr,nspecies_ion)
+    buffer_zrs_2 = allocate_shared_float(nz,nr,nspecies_ion)
+    buffer_zrs_3 = allocate_shared_float(nz,nr,nspecies_ion)
+    
     buffer_vpavperpzs_1 = allocate_shared_float(nvpa,nvperp,nz,nspecies_ion)
     buffer_vpavperpzs_2 = allocate_shared_float(nvpa,nvperp,nz,nspecies_ion)
     buffer_vpavperpzs_3 = allocate_shared_float(nvpa,nvperp,nz,nspecies_ion)
@@ -602,7 +659,7 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies
 
     buffer_vpavperpzrs_1 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion)
     buffer_vpavperpzrs_2 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion)
-
+    
     buffer_vzvrvzetazsn_1 = allocate_shared_float(nvz,nvr,nvzeta,nz,nspecies_neutral)
     buffer_vzvrvzetazsn_2 = allocate_shared_float(nvz,nvr,nvzeta,nz,nspecies_neutral)
     buffer_vzvrvzetazsn_3 = allocate_shared_float(nvz,nvr,nvzeta,nz,nspecies_neutral)
@@ -619,18 +676,26 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies
 
     buffer_vzvrvzetazrsn_1 = allocate_shared_float(nvz,nvr,nvzeta,nz,nr,nspecies_neutral)
     buffer_vzvrvzetazrsn_2 = allocate_shared_float(nvz,nvr,nvzeta,nz,nr,nspecies_neutral)
-
-    return scratch_dummy_arrays(dummy_sr,dummy_vpavperp,dummy_zrs,dummy_zrsn,
+    
+    buffer_vpavperp_1 = allocate_shared_float(nvpa,nvperp)
+    buffer_vpavperp_2 = allocate_shared_float(nvpa,nvperp)
+    buffer_vpavperp_3 = allocate_shared_float(nvpa,nvperp)
+    
+    return scratch_dummy_arrays(dummy_s,dummy_sr,dummy_vpavperp,dummy_zrs,dummy_zrsn,
+        buffer_z_1,buffer_z_2,buffer_z_3,buffer_z_4,
+        buffer_r_1,buffer_r_2,buffer_r_3,buffer_r_4,
         buffer_zs_1,buffer_zs_2,buffer_zs_3,buffer_zs_4,
         buffer_zsn_1,buffer_zsn_2,buffer_zsn_3,buffer_zsn_4,
         buffer_rs_1,buffer_rs_2,buffer_rs_3,buffer_rs_4,buffer_rs_5,buffer_rs_6,
         buffer_rsn_1,buffer_rsn_2,buffer_rsn_3,buffer_rsn_4,buffer_rsn_5,buffer_rsn_6,
+        buffer_zrs_1,buffer_zrs_2,buffer_zrs_3,
         buffer_vpavperpzs_1,buffer_vpavperpzs_2,buffer_vpavperpzs_3,buffer_vpavperpzs_4,buffer_vpavperpzs_5,buffer_vpavperpzs_6,
         buffer_vpavperprs_1,buffer_vpavperprs_2,buffer_vpavperprs_3,buffer_vpavperprs_4,buffer_vpavperprs_5,buffer_vpavperprs_6,
         buffer_vpavperpzrs_1,buffer_vpavperpzrs_2,
         buffer_vzvrvzetazsn_1,buffer_vzvrvzetazsn_2,buffer_vzvrvzetazsn_3,buffer_vzvrvzetazsn_4,buffer_vzvrvzetazsn_5,buffer_vzvrvzetazsn_6,
         buffer_vzvrvzetarsn_1,buffer_vzvrvzetarsn_2,buffer_vzvrvzetarsn_3,buffer_vzvrvzetarsn_4,buffer_vzvrvzetarsn_5,buffer_vzvrvzetarsn_6,
-        buffer_vzvrvzetazrsn_1, buffer_vzvrvzetazrsn_2)
+        buffer_vzvrvzetazrsn_1, buffer_vzvrvzetazrsn_2,
+        buffer_vpavperp_1,buffer_vpavperp_2,buffer_vpavperp_3)
 
 end
 
@@ -752,8 +817,8 @@ time integrator can be used without severe CFL condition
 """
 function time_advance!(pdf, scratch, t, t_input, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
            moments, fields, spectral_objects, advect_objects,
-           composition, collisions, geometry, boundary_distributions,
-           external_source_settings, num_diss_params, advance, scratch_dummy,
+           composition, collisions, geometry, boundary_distributions, 
+           external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy,
            manufactured_source_list, ascii_io, io_moments, io_dfns)
 
     @debug_detect_redundant_block_synchronize begin
@@ -796,7 +861,7 @@ function time_advance!(pdf, scratch, t, t_input, vz, vr, vzeta, vpa, vperp, gyro
             time_advance_no_splitting!(pdf, scratch, t, t_input, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
                 moments, fields, spectral_objects, advect_objects,
                 composition, collisions, geometry, boundary_distributions,
-                external_source_settings, num_diss_params, advance, scratch_dummy,
+                external_source_settings, num_diss_params, advance, fp_arrays,  scratch_dummy,
                 manufactured_source_list, i)
         end
         # update the time
@@ -1231,14 +1296,14 @@ end
 function time_advance_no_splitting!(pdf, scratch, t, t_input, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
            moments, fields, spectral_objects, advect_objects,
            composition, collisions, geometry, boundary_distributions,
-           external_source_settings, num_diss_params, advance, scratch_dummy,
+           external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy,
            manufactured_source_list, istep)
 
     if t_input.n_rk_stages > 1
         ssp_rk!(pdf, scratch, t, t_input, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
             moments, fields, spectral_objects, advect_objects, composition, collisions,
             geometry, boundary_distributions, external_source_settings, num_diss_params,
-            advance, scratch_dummy, manufactured_source_list, istep)
+            advance, fp_arrays, scratch_dummy, manufactured_source_list, istep)
     else
         euler_time_advance!(scratch, scratch, pdf, fields, moments,
             advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t,
@@ -1260,23 +1325,23 @@ stages, if the quantities are evolved separately from the modified pdf;
 or update them by taking the appropriate velocity moment of the evolved pdf
 """
 function rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, vr, vzeta,
-                    vpa, vperp, z, r, advect_objects, rk_coefs, istage, composition,
-                    geometry, num_diss_params, z_spectral, r_spectral, advance,
-                    scratch_dummy)
+                    vpa, vperp, z, r, spectral_objects, advect_objects, rk_coefs, istage, composition,
+                    geometry, num_diss_params, advance, scratch_dummy)
     begin_s_r_z_region()
 
     new_scratch = scratch[istage+1]
     old_scratch = scratch[istage]
 
+    z_spectral, r_spectral, vpa_spectral, vperp_spectral = spectral_objects.z_spectral, spectral_objects.r_spectral, spectral_objects.vpa_spectral, spectral_objects.vperp_spectral
+    vzeta_spectral, vr_spectral, vz_spectral = spectral_objects.vzeta_spectral, spectral_objects.vr_spectral, spectral_objects.vz_spectral
     vpa_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.r_advect, advect_objects.z_advect
     neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect
 
     ##
     # update the charged particle distribution and moments
     ##
-    # MRH here we seem to have duplicate arrays for storing n, u||, p||, etc, but not for vth
-    # MRH 'scratch' is for the multiple stages of time advanced quantities, but 'moments' can be updated directly at each stage
-    # MRH in the standard drift-kinetic model. Consider taking moment quantities out of scratch for clarity.
+    # here we seem to have duplicate arrays for storing n, u||, p||, etc, but not for vth
+    # 'scratch' is for the multiple stages of time advanced quantities, but 'moments' can be updated directly at each stage
     @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
         new_scratch.pdf[ivpa,ivperp,iz,ir,is] = rk_coefs[1]*pdf.charged.norm[ivpa,ivperp,iz,ir,is] + rk_coefs[2]*old_scratch.pdf[ivpa,ivperp,iz,ir,is] + rk_coefs[3]*new_scratch.pdf[ivpa,ivperp,iz,ir,is]
     end
@@ -1297,7 +1362,8 @@ function rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, v
     # NB: probably need to do the same for the evolved moments
     enforce_boundary_conditions!(new_scratch, moments,
         boundary_distributions.pdf_rboundary_charged, vpa.bc, z.bc, r.bc, vpa, vperp, z,
-        r, vpa_advect, z_advect, r_advect, composition, scratch_dummy,
+        r, vpa_spectral, vperp_spectral, 
+        vpa_advect, z_advect, r_advect, composition, scratch_dummy,
         advance.r_diffusion, advance.vpa_diffusion)
 
     if moments.evolve_density && moments.enforce_conservation
@@ -1365,9 +1431,10 @@ function rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, v
         enforce_neutral_boundary_conditions!(new_scratch.pdf_neutral, new_scratch.pdf,
             boundary_distributions, new_scratch.density_neutral, new_scratch.uz_neutral,
             new_scratch.pz_neutral, moments, new_scratch.density, new_scratch.upar,
-            fields.Er, neutral_r_advect, neutral_z_advect, nothing, nothing,
-            neutral_vz_advect, r, z, vzeta, vr, vz, composition, geometry, scratch_dummy,
-            advance.r_diffusion, advance.vz_diffusion)
+            fields.Er, vzeta_spectral, vr_spectral, vz_spectral, neutral_r_advect,
+            neutral_z_advect, nothing, nothing, neutral_vz_advect, r, z, vzeta, vr, vz,
+            composition, geometry, scratch_dummy, advance.r_diffusion,
+            advance.vz_diffusion)
 
         if moments.evolve_density && moments.enforce_conservation
             begin_sn_r_z_region()
@@ -1510,7 +1577,7 @@ end
 function ssp_rk!(pdf, scratch, t, t_input, vz, vr, vzeta, vpa, vperp, gyrophase, z, r,
            moments, fields, spectral_objects, advect_objects, composition, collisions,
            geometry, boundary_distributions, external_source_settings, num_diss_params,
-           advance, scratch_dummy, manufactured_source_list, istep)
+           advance, fp_arrays, scratch_dummy, manufactured_source_list,  istep)
 
     begin_s_r_z_region()
 
@@ -1555,12 +1622,11 @@ function ssp_rk!(pdf, scratch, t, t_input, vz, vr, vzeta, vpa, vperp, gyrophase,
             advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t,
             t_input, spectral_objects, composition,
             collisions, geometry, scratch_dummy, manufactured_source_list,
-            external_source_settings, num_diss_params, advance, istage)
+            external_source_settings, num_diss_params, advance, fp_arrays, istage)
         @views rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, vr,
-                          vzeta, vpa, vperp, z, r, advect_objects,
+                          vzeta, vpa, vperp, z, r, spectral_objects, advect_objects,
                           advance.rk_coefs[:,istage], istage, composition, geometry,
-                          num_diss_params, spectral_objects.z_spectral,
-                          spectral_objects.r_spectral, advance, scratch_dummy)
+                          num_diss_params, advance, scratch_dummy)
     end
 
     istage = n_rk_stages+1
@@ -1642,7 +1708,7 @@ with fvec_in an input and fvec_out the output
 function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments,
     advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t, t_input,
     spectral_objects, composition, collisions, geometry, scratch_dummy,
-    manufactured_source_list, external_source_settings, num_diss_params, advance, istage)
+    manufactured_source_list, external_source_settings, num_diss_params, advance, fp_arrays, istage)
 
     # define some abbreviated variables for tidiness
     n_ion_species = composition.n_ion_species
@@ -1652,7 +1718,7 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments,
     # only charged species have a force accelerating them in vpa;
     # however, neutral species do have non-zero d(wpa)/dt, so there is advection in wpa
 
-    vpa_spectral, r_spectral, z_spectral = spectral_objects.vpa_spectral, spectral_objects.r_spectral, spectral_objects.z_spectral
+    vpa_spectral, vperp_spectral, r_spectral, z_spectral = spectral_objects.vpa_spectral, spectral_objects.vperp_spectral, spectral_objects.r_spectral, spectral_objects.z_spectral
     vz_spectral, vr_spectral, vzeta_spectral = spectral_objects.vz_spectral, spectral_objects.vr_spectral, spectral_objects.vzeta_spectral
     vpa_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.r_advect, advect_objects.z_advect
     neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect
@@ -1769,6 +1835,8 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments,
     if advance.numerical_dissipation
         vpa_dissipation!(fvec_out.pdf, fvec_in.pdf, vpa, vpa_spectral, dt,
                          num_diss_params)
+        vperp_dissipation!(fvec_out.pdf, fvec_in.pdf, vperp, vperp_spectral, dt,
+                         num_diss_params)
         z_dissipation!(fvec_out.pdf, fvec_in.pdf, z, z_spectral, dt,
                        num_diss_params, scratch_dummy)
         r_dissipation!(fvec_out.pdf, fvec_in.pdf, r, r_spectral, dt,
@@ -1780,6 +1848,14 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments,
         r_dissipation_neutral!(fvec_out.pdf_neutral, fvec_in.pdf_neutral, r, r_spectral,
                                dt, num_diss_params, scratch_dummy)
     end
+    # advance with the Fokker-Planck self-collision operator
+    if advance.explicit_weakform_fp_collisions
+        update_entropy_diagnostic = (istage == 1)
+        explicit_fokker_planck_collisions_weak_form!(fvec_out.pdf,fvec_in.pdf,moments.charged.dSdt,composition,collisions,dt,
+                                             fp_arrays,r,z,vperp,vpa,vperp_spectral,vpa_spectral,scratch_dummy,
+                                             diagnose_entropy_production = update_entropy_diagnostic)
+    end
+    
     # End of advance for distribution function
 
     # Start advancing moments
diff --git a/src/velocity_moments.jl b/src/velocity_moments.jl
index ee91adaa1..fc2f2d2ed 100644
--- a/src/velocity_moments.jl
+++ b/src/velocity_moments.jl
@@ -31,6 +31,8 @@ export get_upar
 export get_ppar
 export get_pperp
 export get_pressure
+export get_qpar
+export get_rmom
 
 using ..type_definitions: mk_float
 using ..array_allocation: allocate_shared_float, allocate_bool, allocate_float
@@ -110,6 +112,8 @@ struct moments_charged_substruct
     dqpar_dz::Union{MPISharedArray{mk_float,3},Nothing}
     # this is the z-derivative of the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(2*Tpar/m)
     dvth_dz::Union{MPISharedArray{mk_float,3},Nothing}
+    # this is the entropy production dS/dt = - int (ln f sum_s' C_ss' [f_s,f_s']) d^3 v
+    dSdt::MPISharedArray{mk_float,3}
     # Spatially varying amplitude of the external source term
     external_source_amplitude::MPISharedArray{mk_float,2}
     # Spatially varying amplitude of the density moment of the external source term
@@ -295,6 +299,8 @@ function create_moments_charged(nz, nr, n_species, evolve_density, evolve_upar,
         dvth_dz = nothing
     end
 
+    entropy_production = allocate_shared_float(nz, nr, n_species)
+
     if ion_source_settings.active
         external_source_amplitude = allocate_shared_float(nz, nr)
         if evolve_density
@@ -336,7 +342,7 @@ function create_moments_charged(nz, nr, n_species, evolve_density, evolve_upar,
         parallel_heat_flux, parallel_heat_flux_updated, thermal_speed, 
         chodura_integral_lower, chodura_integral_upper, v_norm_fac,
         ddens_dz, ddens_dz_upwind, d2dens_dz2, dupar_dz, dupar_dz_upwind, d2upar_dz2,
-        dppar_dz, dppar_dz_upwind, d2ppar_dz2, dqpar_dz, dvth_dz,
+        dppar_dz, dppar_dz_upwind, d2ppar_dz2, dqpar_dz, dvth_dz, entropy_production,
         external_source_amplitude, external_source_density_amplitude,
         external_source_momentum_amplitude, external_source_pressure_amplitude,
         external_source_controller_integral)
@@ -575,7 +581,6 @@ the incoming pdf is the un-normalized pdf that satisfies int dv pdf = density
 """
 function update_upar!(upar, upar_updated, density, ppar, pdf, vpa, vperp, z, r,
                       composition, evolve_density, evolve_ppar)
-
     begin_s_r_z_region()
 
     n_species = size(pdf,5)
@@ -595,10 +600,12 @@ calculate the updated parallel flow (upar) for a given species
 """
 function update_upar_species!(upar, density, ppar, ff, vpa, vperp, z, r, evolve_density,
                               evolve_ppar)
+    @boundscheck vpa.n == size(ff, 1) || throw(BoundsError(ff))
     @boundscheck vperp.n == size(ff, 2) || throw(BoundsError(ff))
     @boundscheck z.n == size(ff, 3) || throw(BoundsError(ff))
     @boundscheck r.n == size(ff, 4) || throw(BoundsError(ff))
     @boundscheck z.n == size(upar, 1) || throw(BoundsError(upar))
+    @boundscheck r.n == size(upar, 2) || throw(BoundsError(upar))
     if evolve_density && evolve_ppar
         # this is the case where the density and parallel pressure are evolved
         # separately from the normalized pdf, g_s = (√π f_s vth_s / n_s); the vpa
@@ -798,6 +805,7 @@ calculate the updated parallel heat flux (qpar) for a given species
 """
 function update_qpar_species!(qpar, density, upar, vth, ff, vpa, vperp, z, r, evolve_density,
                               evolve_upar, evolve_ppar)
+    @boundscheck r.n == size(ff, 4) || throw(BoundsError(ff))
     @boundscheck z.n == size(ff, 3) || throw(BoundsError(ff))
     @boundscheck vperp.n == size(ff, 2) || throw(BoundsError(ff))
     @boundscheck vpa.n == size(ff, 1) || throw(BoundsError(ff))
@@ -1078,6 +1086,32 @@ function update_moments_neutral!(moments, pdf, vz, vr, vzeta, z, r, composition)
     return nothing
 end
 
+function get_qpar_1V(ff, vpa, vperp, upar)
+    @. vpa.scratch = vpa.grid - upar
+    return integrate_over_vspace(@view(ff[:,:]), vpa.scratch, 3, vpa.wgts, vperp.grid, 0, vperp.wgts)
+end
+
+function get_qpar(ff, vpa, vperp, upar, dummy_vpavperp)
+    for ivperp in 1:vperp.n 
+        for ivpa in 1:vpa.n
+            wpar = vpa.grid[ivpa]-upar
+            dummy_vpavperp[ivpa,ivperp] = ff[ivpa,ivperp]*wpar*( wpar^2 + vperp.grid[ivperp]^2)
+        end
+    end
+    return integrate_over_vspace(@view(dummy_vpavperp[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 0, vperp.wgts)
+end
+
+# generalised moment useful for computing numerical conserving terms in the collision operator
+function get_rmom(ff, vpa, vperp, upar, dummy_vpavperp)
+    for ivperp in 1:vperp.n 
+        for ivpa in 1:vpa.n
+            wpar = vpa.grid[ivpa]-upar
+            dummy_vpavperp[ivpa,ivperp] = ff[ivpa,ivperp]*( wpar^2 + vperp.grid[ivperp]^2)^2
+        end
+    end
+    return integrate_over_vspace(@view(dummy_vpavperp[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 0, vperp.wgts)
+end
+
 """
 calculate the neutral density from the neutral pdf
 """
diff --git a/test/Project.toml b/test/Project.toml
index 37165a783..fb1cb33d4 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -6,3 +6,4 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/test/calculus_tests.jl b/test/calculus_tests.jl
index 386f1ea3e..c46c93af7 100644
--- a/test/calculus_tests.jl
+++ b/test/calculus_tests.jl
@@ -14,7 +14,7 @@ function runtests()
         println("calculus tests")
         @testset "fundamental theorem of calculus" begin
             @testset "$discretization $ngrid $nelement" for
-                    (discretization, element_spacing_option, etol) ∈ (("finite_difference", "uniform", 1.0e-15), ("chebyshev_pseudospectral", "uniform", 1.0e-15), ("chebyshev_pseudospectral", "sqrt", 1.0e-2)),
+                    (discretization, element_spacing_option, etol, cheb_option) ∈ (("finite_difference", "uniform", 1.0e-15, ""), ("chebyshev_pseudospectral", "uniform", 1.0e-15, "FFT"), ("chebyshev_pseudospectral", "uniform", 1.0e-15, "matrix"), ("chebyshev_pseudospectral", "sqrt", 1.0e-2, "FFT"), ("gausslegendre_pseudospectral", "uniform", 1.0e-14, "")),
                     ngrid ∈ (5,6,7,8,9,10), nelement ∈ (1, 2, 3, 4, 5)
 
                 if discretization == "finite_difference" && (ngrid - 1) * nelement % 2 == 1
@@ -40,7 +40,7 @@ function runtests()
 				comm = MPI.COMM_NULL # dummy value 
 				input = grid_input("coord", ngrid, nelement,
                     nelement_local, nrank_per_block, irank, L,
-                    discretization, fd_option, bc, adv_input, comm,
+                    discretization, fd_option, cheb_option, bc, adv_input, comm,
                     element_spacing_option)
                 # create the coordinate struct 'x'
                 x, spectral = define_coordinate(input)
@@ -80,6 +80,7 @@ function runtests()
                 # fd_option and adv_input not actually used so given values unimportant
                 fd_option = ""
                 adv_input = advection_input("default", 1.0, 0.0, 0.0)
+                cheb_option = ""
                 # create the 'input' struct containing input info needed to create a
                 # coordinate
                 nelement_local = nelement
@@ -89,7 +90,7 @@ function runtests()
 				element_spacing_option = "uniform" # dummy value
                 input = grid_input("coord", ngrid, nelement,
                     nelement_local, nrank_per_block, irank, L,
-                    "finite_difference", fd_option, bc, adv_input, comm,
+                    "finite_difference", fd_option, cheb_option, bc, adv_input, comm,
                     element_spacing_option)
                 # create the coordinate struct 'x'
                 x, spectral = define_coordinate(input)
@@ -129,6 +130,7 @@ function runtests()
                 # fd_option and adv_input not actually used so given values unimportant
                 fd_option = "fourth_order_centered"
                 adv_input = advection_input("default", 1.0, 0.0, 0.0)
+                cheb_option = ""
                 # create the 'input' struct containing input info needed to create a
                 # coordinate
                 nelement_local = nelement
@@ -138,7 +140,7 @@ function runtests()
                 element_spacing_option = "uniform" # dummy value
 				input = grid_input("coord", ngrid, nelement,
                     nelement_local, nrank_per_block, irank, L,
-                    "finite_difference", fd_option, bc, adv_input, comm,
+                    "finite_difference", fd_option, cheb_option, bc, adv_input, comm,
                     element_spacing_option)
                 # create the coordinate struct 'x'
                 x, spectral = define_coordinate(input)
@@ -174,6 +176,7 @@ function runtests()
                 # fd_option and adv_input not actually used so given values unimportant
                 fd_option = ""
                 adv_input = advection_input("default", 1.0, 0.0, 0.0)
+                cheb_option = ""
                 # create the 'input' struct containing input info needed to create a
                 # coordinate
                 nelement_local = nelement
@@ -183,7 +186,7 @@ function runtests()
                 element_spacing_option = "uniform" # dummy value
 				input = grid_input("coord", ngrid, nelement,
                     nelement_local, nrank_per_block, irank, L,
-                    "finite_difference", fd_option, bc, adv_input, comm,
+                    "finite_difference", fd_option, cheb_option, bc, adv_input, comm,
                     element_spacing_option)
                 # create the coordinate struct 'x'
                 x, spectral = define_coordinate(input)
@@ -227,6 +230,7 @@ function runtests()
                 L = 6.0
                 # fd_option and adv_input not actually used so given values unimportant
                 adv_input = advection_input("default", 1.0, 0.0, 0.0)
+                cheb_option = ""
                 # create the 'input' struct containing input info needed to create a
                 # coordinate
                 nelement_local = nelement
@@ -236,7 +240,7 @@ function runtests()
                 element_spacing_option = "uniform" # dummy value
 				input = grid_input("coord", ngrid, nelement,
                     nelement_local, nrank_per_block, irank, L,
-                    "finite_difference", fd_option, bc, adv_input, comm,
+                    "finite_difference", fd_option, cheb_option, bc, adv_input, comm,
                     element_spacing_option)
                # create the coordinate struct 'x'
                 x, spectral = define_coordinate(input)
@@ -435,7 +439,7 @@ function runtests()
                      (5, 31, 8.e-13),
                      (5, 32, 8.e-13),
                      (5, 33, 8.e-13),
-                    )
+                    ), cheb_option in ("FFT","matrix")
 
                 # define inputs needed for the test
                 L = 6.0
@@ -452,7 +456,7 @@ function runtests()
                 element_spacing_option = "uniform"
 				input = grid_input("coord", ngrid, nelement,
                     nelement_local, nrank_per_block, irank, L,
-                    "chebyshev_pseudospectral", fd_option, bc, adv_input, comm,
+                    "chebyshev_pseudospectral", fd_option, cheb_option, bc, adv_input, comm,
                     element_spacing_option)
                 # create the coordinate struct 'x' and info for derivatives, etc.
                 x, spectral = define_coordinate(input)
@@ -631,7 +635,7 @@ function runtests()
                      (5, 31, 8.e-13),
                      (5, 32, 8.e-13),
                      (5, 33, 8.e-13),
-                    )
+                    ), cheb_option in ("FFT","matrix")
 
                 # define inputs needed for the test
                 L = 6.0
@@ -648,7 +652,7 @@ function runtests()
                 element_spacing_option = "uniform"
 				input = grid_input("coord", ngrid, nelement,
                     nelement_local, nrank_per_block, irank, L,
-                    "chebyshev_pseudospectral", fd_option, bc, adv_input, comm,
+                    "chebyshev_pseudospectral", fd_option, cheb_option, bc, adv_input, comm,
                     element_spacing_option)
                 # create the coordinate struct 'x' and info for derivatives, etc.
                 x, spectral = define_coordinate(input)
@@ -667,7 +671,7 @@ function runtests()
                     # differentiate f
                     derivative!(df, f, x, adv_fac, spectral)
 
-                    @test isapprox(df, expected_df, rtol=rtol, atol=1.e-14,
+                    @test isapprox(df, expected_df, rtol=rtol, atol=1.e-12,
                                    norm=maxabs_norm)
                 end
             end
@@ -675,7 +679,7 @@ function runtests()
 
         @testset "Chebyshev pseudospectral derivatives (4 argument), polynomials" verbose=false begin
             @testset "$nelement $ngrid" for bc ∈ ("constant", "zero"), element_spacing_option ∈ ("uniform", "sqrt"),
-                    nelement ∈ (1:5), ngrid ∈ (3:33)
+                    nelement ∈ (1:5), ngrid ∈ (3:33), cheb_option in ("FFT","matrix")
 
                 # define inputs needed for the test
                 L = 1.0
@@ -691,11 +695,10 @@ function runtests()
 				comm = MPI.COMM_NULL # dummy value
                 input = grid_input("coord", ngrid, nelement,
                     nelement_local, nrank_per_block, irank, L,
-                    "chebyshev_pseudospectral", fd_option, bc, adv_input, comm,
+                    "chebyshev_pseudospectral", fd_option, cheb_option, bc, adv_input, comm,
                     element_spacing_option)
                 # create the coordinate struct 'x' and info for derivatives, etc.
                 x, spectral = define_coordinate(input)
-
                 # test polynomials up to order ngrid-1
                 for n ∈ 0:ngrid-1
                     # create array for the function f(x) to be differentiated/integrated
@@ -718,7 +721,7 @@ function runtests()
                     # something like p*(round-off) for x^p (?) so error on expected_df would
                     # be p*p*(round-off), or plausibly 1024*(round-off), so tolerance of
                     # 2e-11 isn't unreasonable.
-                    @test isapprox(df, expected_df, rtol=2.0e-11, atol=2.0e-12,
+                    @test isapprox(df, expected_df, rtol=2.0e-11, atol=6.0e-12,
                                    norm=maxabs_norm)
                 end
             end
@@ -726,7 +729,7 @@ function runtests()
 
         @testset "Chebyshev pseudospectral derivatives upwinding (5 argument), polynomials" verbose=false begin
             @testset "$nelement $ngrid" for bc ∈ ("constant", "zero"), element_spacing_option ∈ ("uniform", "sqrt"),
-                    nelement ∈ (1:5), ngrid ∈ (3:33)
+                    nelement ∈ (1:5), ngrid ∈ (3:33), cheb_option in ("FFT","matrix")
 
                 # define inputs needed for the test
                 L = 1.0
@@ -742,11 +745,358 @@ function runtests()
 				comm = MPI.COMM_NULL # dummy value
                 input = grid_input("coord", ngrid, nelement,
                     nelement_local, nrank_per_block, irank, L,
-                    "chebyshev_pseudospectral", fd_option, bc, adv_input, comm,
+                    "chebyshev_pseudospectral", fd_option, cheb_option, bc, adv_input, comm,
                     element_spacing_option)
                 # create the coordinate struct 'x' and info for derivatives, etc.
                 x, spectral = define_coordinate(input)
+                # test polynomials up to order ngrid-1
+                for n ∈ 0:ngrid-1
+                    # create array for the function f(x) to be differentiated/integrated
+                    f = Array{Float64,1}(undef, x.n)
+                    # create array for the derivative df/dx and the expected result
+                    df = similar(f)
+                    expected_df = similar(f)
+                    # initialize f and expected df
+                    f[:] .= randn(rng)
+                    expected_df .= 0.0
+                    for p ∈ 1:n
+                        coefficient = randn(rng)
+                        @. f += coefficient * x.grid ^ p
+                        @. expected_df += coefficient * p * x.grid ^ (p - 1)
+                    end
 
+                    for advection ∈ (-1.0, 0.0, 1.0)
+                        adv_fac = similar(f)
+                        adv_fac .= advection
+
+                        # differentiate f
+                        derivative!(df, f, x, adv_fac, spectral)
+
+                        # Note the error we might expect for a p=32 polynomial is probably
+                        # something like p*(round-off) for x^p (?) so error on expected_df
+                        # would be p*p*(round-off), or plausibly 1024*(round-off), so
+                        # tolerance of 3e-11 isn't unreasonable.
+                        @test isapprox(df, expected_df, rtol=3.0e-11, atol=3.0e-11,
+                                       norm=maxabs_norm)
+                    end
+                end
+            end
+        end
+        
+        @testset "GaussLegendre pseudospectral derivatives (4 argument), testing periodic functions" verbose=false begin
+            @testset "$nelement $ngrid" for (nelement, ngrid, rtol) ∈
+                    (
+                     (1, 5, 8.e-1),
+                     (1, 6, 2.e-1),
+                     (1, 7, 1.e-1),
+                     (1, 8, 1.e-2),
+                     (1, 9, 5.e-3),
+                     (1, 10, 3.e-3),
+                     (1, 11, 5.e-4),
+                     (1, 12, 5.e-6),
+                     (1, 13, 3.e-6),
+                     (1, 14, 8.e-8),
+                     (1, 15, 4.e-8),
+                     (1, 16, 8.e-10),
+                     (1, 17, 8.e-10),
+                     
+
+                     (2, 4, 2.e-1),
+                     (2, 5, 4.e-2),
+                     (2, 6, 2.e-2),
+                     (2, 7, 4.e-4),
+                     (2, 8, 2.e-4),
+                     (2, 9, 4.e-6),
+                     (2, 10, 2.e-6),
+                     (2, 11, 2.e-8),
+                     (2, 12, 1.e-8),
+                     (2, 13, 1.e-10),
+                     (2, 14, 5.e-11),
+                     (2, 15, 4.e-13),
+                     (2, 16, 2.e-13),
+                     (2, 17, 2.e-13),
+                     
+                     (3, 3, 4.e-1),
+                     (3, 4, 1.e-1),
+                     (3, 5, 1.e-2),
+                     (3, 6, 2.e-3),
+                     (3, 7, 1.e-4),
+                     (3, 8, 1.e-5),
+                     (3, 9, 6.e-7),
+                     (3, 10, 5.e-8),
+                     (3, 11, 2.e-9),
+                     (3, 12, 1.e-10),
+                     (3, 13, 5.e-12),
+                     (3, 14, 3.e-13),
+                     (3, 15, 2.e-13),
+                     (3, 16, 2.e-13),
+                     (3, 17, 2.e-13),
+                     
+                     (4, 3, 3.e-1),
+                     (4, 4, 4.e-2),
+                     (4, 5, 4.e-3),
+                     (4, 6, 4.e-4),
+                     (4, 7, 4.e-5),
+                     (4, 8, 1.e-6),
+                     (4, 9, 8.e-8),
+                     (4, 10, 4.e-9),
+                     (4, 11, 4.e-10),
+                     (4, 12, 4.e-12),
+                     (4, 13, 2.e-13),
+                     (4, 14, 2.e-13),
+                     (4, 15, 2.e-13),
+                     (4, 16, 2.e-13),
+                     (4, 17, 2.e-13),
+                     
+                     (5, 3, 2.e-1),
+                     (5, 4, 2.e-2),
+                     (5, 5, 2.e-3),
+                     (5, 6, 1.e-4),
+                     (5, 7, 1.e-5),
+                     (5, 8, 2.e-7),
+                     (5, 9, 2.e-8),
+                     (5, 10, 3.e-10),
+                     (5, 11, 2.e-11),
+                     (5, 12, 3.e-13),
+                     (5, 13, 2.e-13),
+                     (5, 14, 2.e-13),
+                     (5, 15, 2.e-13),
+                     (5, 16, 2.e-13),
+                     (5, 17, 4.e-13),
+                    )
+
+                # define inputs needed for the test
+                L = 6.0
+                bc = "periodic"
+                # fd_option and adv_input not actually used so given values unimportant
+                fd_option = ""
+                adv_input = advection_input("default", 1.0, 0.0, 0.0)
+                cheb_option = ""
+                # create the 'input' struct containing input info needed to create a
+                # coordinate
+                nelement_local = nelement
+				nrank_per_block = 0 # dummy value
+				irank = 0 # dummy value
+				comm = MPI.COMM_NULL # dummy value
+                element_spacing_option = "uniform"
+				input = grid_input("coord", ngrid, nelement,
+                    nelement_local, nrank_per_block, irank, L,
+                    "gausslegendre_pseudospectral", fd_option, cheb_option, bc, adv_input, comm,
+                    element_spacing_option)
+                # create the coordinate struct 'x' and info for derivatives, etc.
+                x, spectral = define_coordinate(input,init_YY=false)
+
+                offset = randn(rng)
+                f = @. sinpi(2.0 * x.grid / L) + offset
+                expected_df = @. 2.0 * π / L * cospi(2.0 * x.grid / L)
+
+                # create array for the derivative df/dx
+                df = similar(f)
+
+                # differentiate f
+                derivative!(df, f, x, spectral)
+
+                @test isapprox(df, expected_df, rtol=rtol, atol=1.e-14,
+                               norm=maxabs_norm)
+            end
+        end
+
+        @testset "GaussLegendre pseudospectral derivatives upwinding (5 argument), testing periodic functions" verbose=false begin
+            @testset "$nelement $ngrid" for (nelement, ngrid, rtol) ∈
+                    (
+                     (1, 5, 8.e-1),
+                     (1, 6, 2.e-1),
+                     (1, 7, 1.e-1),
+                     (1, 8, 1.e-2),
+                     (1, 9, 5.e-3),
+                     (1, 10, 3.e-3),
+                     (1, 11, 8.e-4),
+                     (1, 12, 5.e-6),
+                     (1, 13, 3.e-6),
+                     (1, 14, 8.e-8),
+                     (1, 15, 4.e-8),
+                     (1, 16, 8.e-10),
+                     (1, 17, 8.e-10),
+                     
+                     (2, 4, 2.e-1),
+                     (2, 5, 4.e-2),
+                     (2, 6, 2.e-2),
+                     (2, 7, 4.e-4),
+                     (2, 8, 2.e-4),
+                     (2, 9, 4.e-6),
+                     (2, 10, 2.e-6),
+                     (2, 11, 2.e-8),
+                     (2, 12, 1.e-8),
+                     (2, 13, 1.e-10),
+                     (2, 14, 5.e-11),
+                     (2, 15, 4.e-13),
+                     (2, 16, 2.e-13),
+                     (2, 17, 2.e-13),
+                     
+                     (3, 3, 4.e-1),
+                     (3, 4, 1.e-1),
+                     (3, 5, 3.e-2),
+                     (3, 6, 2.e-3),
+                     (3, 7, 5.e-4),
+                     (3, 8, 1.e-5),
+                     (3, 9, 1.e-6),
+                     (3, 10, 5.e-8),
+                     (3, 11, 2.e-8),
+                     (3, 12, 1.e-9),
+                     (3, 13, 5.e-11),
+                     (3, 14, 3.e-13),
+                     (3, 15, 2.e-13),
+                     (3, 16, 2.e-13),
+                     (3, 17, 2.e-13),
+                     
+                     (4, 3, 3.e-1),
+                     (4, 4, 4.e-2),
+                     (4, 5, 4.e-3),
+                     (4, 6, 4.e-4),
+                     (4, 7, 4.e-5),
+                     (4, 8, 4.e-6),
+                     (4, 9, 8.e-8),
+                     (4, 10, 4.e-9),
+                     (4, 11, 5.e-10),
+                     (4, 12, 4.e-12),
+                     (4, 13, 2.e-13),
+                     (4, 14, 2.e-13),
+                     (4, 15, 2.e-13),
+                     (4, 16, 2.e-13),
+                     (4, 17, 2.e-13),
+                     
+                     (5, 3, 2.e-1),
+                     (5, 4, 2.e-2),
+                     (5, 5, 2.e-3),
+                     (5, 6, 1.e-4),
+                     (5, 7, 1.e-5),
+                     (5, 8, 4.e-7),
+                     (5, 9, 2.e-8),
+                     (5, 10, 8.e-10),
+                     (5, 11, 2.e-11),
+                     (5, 12, 3.e-13),
+                     (5, 13, 2.e-13),
+                     (5, 14, 2.e-13),
+                     (5, 15, 2.e-13),
+                     (5, 16, 2.e-13),
+                     (5, 17, 4.e-13),
+                    )
+
+                # define inputs needed for the test
+                L = 6.0
+                bc = "periodic"
+                # fd_option and adv_input not actually used so given values unimportant
+                fd_option = ""
+                adv_input = advection_input("default", 1.0, 0.0, 0.0)
+                cheb_option = ""
+                # create the 'input' struct containing input info needed to create a
+                # coordinate
+                nelement_local = nelement
+				nrank_per_block = 0 # dummy value
+				irank = 0 # dummy value
+				comm = MPI.COMM_NULL # dummy value
+                element_spacing_option = "uniform"
+				input = grid_input("coord", ngrid, nelement,
+                    nelement_local, nrank_per_block, irank, L,
+                    "gausslegendre_pseudospectral", fd_option, cheb_option, bc, adv_input, comm,
+                    element_spacing_option)
+                # create the coordinate struct 'x' and info for derivatives, etc.
+                x, spectral = define_coordinate(input,init_YY=false)
+
+                offset = randn(rng)
+                f = @. sinpi(2.0 * x.grid / L) + offset
+                expected_df = @. 2.0 * π / L * cospi(2.0 * x.grid / L)
+
+                # create array for the derivative df/dx
+                df = similar(f)
+
+                for advection ∈ (-1.0, 0.0, 1.0)
+                    adv_fac = similar(f)
+                    adv_fac .= advection
+
+                    # differentiate f
+                    derivative!(df, f, x, adv_fac, spectral)
+
+                    @test isapprox(df, expected_df, rtol=rtol, atol=1.e-12,
+                                   norm=maxabs_norm)
+                end
+            end
+        end
+        
+        @testset "GaussLegendre pseudospectral derivatives (4 argument), testing exact polynomials" verbose=false begin
+            @testset "$nelement $ngrid" for bc ∈ ("constant", "zero"), element_spacing_option ∈ ("uniform", "sqrt"),
+                    nelement ∈ (1:5), ngrid ∈ (3:17)
+                    
+                # define inputs needed for the test
+                L = 1.0
+                bc = "constant"
+                # fd_option and adv_input not actually used so given values unimportant
+                fd_option = ""
+                adv_input = advection_input("default", 1.0, 0.0, 0.0)
+                cheb_option = "" #not used
+                # create the 'input' struct containing input info needed to create a
+                # coordinate
+                nelement_local = nelement
+				nrank_per_block = 0 # dummy value
+				irank = 0 # dummy value
+				comm = MPI.COMM_NULL # dummy value
+                input = grid_input("coord", ngrid, nelement,
+                    nelement_local, nrank_per_block, irank, L,
+                    "gausslegendre_pseudospectral", fd_option, cheb_option, bc, adv_input, comm,
+                    element_spacing_option)
+                # create the coordinate struct 'x' and info for derivatives, etc.
+                x, spectral = define_coordinate(input,init_YY=false)
+                # test polynomials up to order ngrid-1
+                for n ∈ 0:ngrid-1
+                    # create array for the function f(x) to be differentiated/integrated
+                    f = Array{Float64,1}(undef, x.n)
+                    # create array for the derivative df/dx and the expected result
+                    df = similar(f)
+                    expected_df = similar(f)
+                    # initialize f and expected df
+                    f[:] .= randn(rng)
+                    expected_df .= 0.0
+                    for p ∈ 1:n
+                        coefficient = randn(rng)
+                        @. f += coefficient * x.grid ^ p
+                        @. expected_df += coefficient * p * x.grid ^ (p - 1)
+                    end
+                    # differentiate f
+                    derivative!(df, f, x, spectral)
+
+                    # Note the error we might expect for a p=32 polynomial is probably
+                    # something like p*(round-off) for x^p (?) so error on expected_df would
+                    # be p*p*(round-off), or plausibly 1024*(round-off), so tolerance of
+                    # 2e-11 isn't unreasonable.
+                    @test isapprox(df, expected_df, rtol=2.0e-11, atol=6.0e-12,
+                                   norm=maxabs_norm)
+                end
+            end
+        end
+        
+        @testset "GaussLegendre pseudospectral derivatives upwinding (5 argument), testing exact polynomials" verbose=false begin
+            @testset "$nelement $ngrid" for bc ∈ ("constant", "zero"), element_spacing_option ∈ ("uniform", "sqrt"),
+                    nelement ∈ (1:5), ngrid ∈ (3:17)
+
+                # define inputs needed for the test
+                L = 1.0
+                bc = "constant"
+                # fd_option and adv_input not actually used so given values unimportant
+                fd_option = ""
+                adv_input = advection_input("default", 1.0, 0.0, 0.0)
+                cheb_option = "" # not used
+                # create the 'input' struct containing input info needed to create a
+                # coordinate
+                nelement_local = nelement
+				nrank_per_block = 0 # dummy value
+				irank = 0 # dummy value
+				comm = MPI.COMM_NULL # dummy value
+                input = grid_input("coord", ngrid, nelement,
+                    nelement_local, nrank_per_block, irank, L,
+                    "gausslegendre_pseudospectral", fd_option, cheb_option, bc, adv_input, comm,
+                    element_spacing_option)
+                # create the coordinate struct 'x' and info for derivatives, etc.
+                x, spectral = define_coordinate(input,init_YY=false)
                 # test polynomials up to order ngrid-1
                 for n ∈ 0:ngrid-1
                     # create array for the function f(x) to be differentiated/integrated
@@ -774,7 +1124,7 @@ function runtests()
                         # something like p*(round-off) for x^p (?) so error on expected_df
                         # would be p*p*(round-off), or plausibly 1024*(round-off), so
                         # tolerance of 3e-11 isn't unreasonable.
-                        @test isapprox(df, expected_df, rtol=3.0e-11, atol=3.0e-12,
+                        @test isapprox(df, expected_df, rtol=3.0e-11, atol=3.0e-11,
                                        norm=maxabs_norm)
                     end
                 end
@@ -940,7 +1290,7 @@ function runtests()
                      (5, 31, 8.e-13),
                      (5, 32, 8.e-13),
                      (5, 33, 8.e-13),
-                    )
+                    ), cheb_option in ("FFT","matrix")
 
                 # define inputs needed for the test
                 L = 6.0
@@ -957,7 +1307,7 @@ function runtests()
                 element_spacing_option = "uniform"
 				input = grid_input("coord", ngrid, nelement,
                     nelement_local, nrank_per_block, irank, L,
-                    "chebyshev_pseudospectral", fd_option, bc, adv_input, comm,
+                    "chebyshev_pseudospectral", fd_option, cheb_option, bc, adv_input, comm,
                     element_spacing_option)
                 # create the coordinate struct 'x' and info for derivatives, etc.
                 x, spectral = define_coordinate(input)
@@ -970,8 +1320,115 @@ function runtests()
                 d2f = similar(f)
 
                 # differentiate f
-                x.scratch2 .= 1.0 # placeholder for Q in d / d x ( Q d f / d x)
-                second_derivative!(d2f, f, x.scratch2, x, spectral)
+                second_derivative!(d2f, f, x, spectral)
+
+                @test isapprox(d2f, expected_d2f, rtol=rtol, atol=1.e-10,
+                               norm=maxabs_norm)
+            end
+        end
+        
+        @testset "GaussLegendre pseudospectral second derivatives (4 argument), periodic" verbose=false begin
+            @testset "$nelement $ngrid" for (nelement, ngrid, rtol) ∈
+                    (
+                     (1, 8, 2.e-2),
+                     (1, 9, 5.e-3),
+                     (1, 10, 3.e-3),
+                     (1, 11, 2.e-4),
+                     (1, 12, 2.e-5),
+                     (1, 13, 4.e-6),
+                     (1, 14, 4.e-7),
+                     (1, 15, 1.e-7),
+                     (1, 16, 5.e-9),
+                     (1, 17, 1.e-9),
+                     
+                     (2, 4, 2.e-1),
+                     (2, 5, 5.e-2),
+                     (2, 6, 2.e-2),
+                     (2, 7, 2.e-3),
+                     (2, 8, 2.e-4),
+                     (2, 9, 2.e-5),
+                     (2, 10, 4.e-6),
+                     (2, 11, 2.e-7),
+                     (2, 12, 4.e-8),
+                     (2, 13, 8.e-10),
+                     (2, 14, 2.e-10),
+                     (2, 15, 4.e-13),
+                     (2, 16, 2.e-13),
+                     (2, 17, 2.e-13),
+                     
+                     (3, 5, 1.e-1),
+                     (3, 6, 2.e-2),
+                     (3, 7, 2.e-3),
+                     (3, 8, 2.e-4),
+                     (3, 9, 1.e-4),
+                     (3, 10, 4.e-6),
+                     (3, 11, 1.e-7),
+                     (3, 12, 8.e-9),
+                     (3, 13, 8.e-10),
+                     (3, 14, 3.e-10),
+                     (3, 15, 2.e-10),
+                     (3, 16, 2.e-10),
+                     (3, 17, 2.e-10),
+                     
+                     (4, 5, 5.e-2),
+                     (4, 6, 2.e-2),
+                     (4, 7, 2.e-3),
+                     (4, 8, 2.e-4),
+                     (4, 9, 1.e-4),
+                     (4, 10, 1.e-6),
+                     (4, 11, 8.e-9),
+                     (4, 12, 8.e-10),
+                     (4, 13, 8.e-10),
+                     (4, 14, 8.e-10),
+                     (4, 15, 8.e-10),
+                     (4, 16, 8.e-10),
+                     (4, 17, 8.e-10),
+                     
+                     (5, 5, 4.e-2),
+                     (5, 6, 8.e-3),
+                     (5, 7, 5.e-4),
+                     (5, 8, 5.e-5),
+                     (5, 9, 8.e-7),
+                     (5, 10, 5.e-8),
+                     (5, 11, 8.e-10),
+                     (5, 12, 4.e-10),
+                     (5, 13, 2.e-10),
+                     (5, 14, 2.e-10),
+                     (5, 15, 8.e-10),
+                     (5, 16, 8.e-10),
+                     (5, 17, 8.e-10),
+                     )
+
+                # define inputs needed for the test
+                L = 6.0
+                bc = "periodic"
+                # fd_option and adv_input not actually used so given values unimportant
+                fd_option = ""
+                adv_input = advection_input("default", 1.0, 0.0, 0.0)
+                cheb_option = ""
+                # create the 'input' struct containing input info needed to create a
+                # coordinate
+                nelement_local = nelement
+				nrank_per_block = 0 # dummy value
+				irank = 0 # dummy value
+				comm = MPI.COMM_NULL # dummy value
+                element_spacing_option = "uniform"
+				input = grid_input("coord", ngrid, nelement,
+                    nelement_local, nrank_per_block, irank, L,
+                    "gausslegendre_pseudospectral", fd_option, cheb_option, bc, adv_input, comm,
+                    element_spacing_option)
+                # create the coordinate struct 'x' and info for derivatives, etc.
+                x, spectral = define_coordinate(input,init_YY=false)
+
+                offset = randn(rng)
+                f = @. sinpi(2.0 * x.grid / L) + offset
+                expected_d2f = @. -4.0 * π^2 / L^2 * sinpi(2.0 * x.grid / L)
+
+                # create array for the derivative d2f/dx2
+                d2f = similar(f)
+
+                # differentiate f
+                second_derivative!(d2f, f, x, spectral)
 
                 @test isapprox(d2f, expected_d2f, rtol=rtol, atol=1.e-10,
                                norm=maxabs_norm)
diff --git a/test/fokker_planck_tests.jl b/test/fokker_planck_tests.jl
new file mode 100644
index 000000000..feb930de2
--- /dev/null
+++ b/test/fokker_planck_tests.jl
@@ -0,0 +1,558 @@
+module FokkerPlanckTests
+
+include("setup.jl")
+
+
+using MPI
+using moment_kinetics.fokker_planck_calculus: ravel_c_to_vpavperp!, ravel_vpavperp_to_c!, ravel_c_to_vpavperp_parallel!
+using LinearAlgebra: mul!
+using moment_kinetics.communication
+using moment_kinetics.looping
+using moment_kinetics.array_allocation: allocate_float, allocate_shared_float
+using moment_kinetics.input_structs: grid_input, advection_input
+using moment_kinetics.coordinates: define_coordinate
+using moment_kinetics.type_definitions: mk_float, mk_int
+using moment_kinetics.velocity_moments: get_density, get_upar, get_ppar, get_pperp, get_pressure
+
+using moment_kinetics.fokker_planck: init_fokker_planck_collisions_weak_form, fokker_planck_collision_operator_weak_form!
+using moment_kinetics.fokker_planck: conserving_corrections!, init_fokker_planck_collisions_direct_integration
+using moment_kinetics.fokker_planck_test: print_test_data, plot_test_data, fkpl_error_data, allocate_error_data
+using moment_kinetics.fokker_planck_test: F_Maxwellian, G_Maxwellian, H_Maxwellian
+using moment_kinetics.fokker_planck_test: d2Gdvpa2_Maxwellian, d2Gdvperp2_Maxwellian, d2Gdvperpdvpa_Maxwellian, dGdvperp_Maxwellian
+using moment_kinetics.fokker_planck_test: dHdvperp_Maxwellian, dHdvpa_Maxwellian, Cssp_Maxwellian_inputs
+using moment_kinetics.fokker_planck_calculus: calculate_rosenbluth_potentials_via_elliptic_solve!, calculate_rosenbluth_potential_boundary_data_exact!
+using moment_kinetics.fokker_planck_calculus: test_rosenbluth_potential_boundary_data, allocate_rosenbluth_potential_boundary_data
+using moment_kinetics.fokker_planck_calculus: enforce_vpavperp_BCs!, calculate_rosenbluth_potentials_via_direct_integration!
+
+function create_grids(ngrid,nelement_vpa,nelement_vperp;
+                      Lvpa=12.0,Lvperp=6.0)
+
+        nelement_local_vpa = nelement_vpa # number of elements per rank
+        nelement_global_vpa = nelement_local_vpa # total number of elements 
+        nelement_local_vperp = nelement_vperp # number of elements per rank
+        nelement_global_vperp = nelement_local_vperp # total number of elements 
+        bc = "zero" # used only in derivative! functions 
+        # fd_option and adv_input not actually used so given values unimportant
+        #discretization = "chebyshev_pseudospectral"
+        discretization = "gausslegendre_pseudospectral"
+        fd_option = "fourth_order_centered"
+        cheb_option = "matrix"
+        adv_input = advection_input("default", 1.0, 0.0, 0.0)
+        nrank = 1
+        irank = 0
+        comm = MPI.COMM_NULL
+        # create the 'input' struct containing input info needed to create a
+        # coordinate
+        element_spacing_option = "uniform"
+        vpa_input = grid_input("vpa", ngrid, nelement_global_vpa, nelement_local_vpa, 
+            nrank, irank, Lvpa, discretization, fd_option, cheb_option, bc, adv_input,comm,element_spacing_option)
+        vperp_input = grid_input("vperp", ngrid, nelement_global_vperp, nelement_local_vperp, 
+            nrank, irank, Lvperp, discretization, fd_option, cheb_option, bc, adv_input,comm,element_spacing_option)
+        # create the coordinate struct 'x'
+        #println("made inputs")
+        #println("vpa: ngrid: ",ngrid," nelement: ",nelement_local_vpa, " Lvpa: ",Lvpa)
+        #println("vperp: ngrid: ",ngrid," nelement: ",nelement_local_vperp, " Lvperp: ",Lvperp)
+        vpa, vpa_spectral = define_coordinate(vpa_input)
+        vperp, vperp_spectral = define_coordinate(vperp_input)
+        
+        # Set up MPI
+        initialize_comms!()
+        setup_distributed_memory_MPI(1,1,1,1)
+        looping.setup_loop_ranges!(block_rank[], block_size[];
+                                       s=1, sn=1,
+                                       r=1, z=1, vperp=vperp.n, vpa=vpa.n,
+                                       vzeta=1, vr=1, vz=1)
+        
+        return vpa, vpa_spectral, vperp, vperp_spectral
+end
+
+function runtests()
+    print_to_screen = false
+    @testset "Fokker Planck tests" verbose=use_verbose begin
+        println("Fokker Planck tests")
+        
+        @testset " - test weak-form 2D differentiation" begin
+        # tests the correct definition of mass and stiffness matrices in 2D
+            ngrid = 9
+            nelement_vpa = 8
+            nelement_vperp = 4
+            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
+                                                                        Lvpa=2.0,Lvperp=1.0)
+            nc_global = vpa.n*vperp.n
+            begin_serial_region()
+            fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
+                                                                  precompute_weights=false, print_to_screen=print_to_screen)
+            KKpar2D_with_BC_terms_sparse = fkpl_arrays.KKpar2D_with_BC_terms_sparse
+            KKperp2D_with_BC_terms_sparse = fkpl_arrays.KKperp2D_with_BC_terms_sparse
+            lu_obj_MM = fkpl_arrays.lu_obj_MM
+            
+            dummy_array = allocate_float(vpa.n,vperp.n)
+            fvpavperp = allocate_float(vpa.n,vperp.n)
+            fvpavperp_test = allocate_float(vpa.n,vperp.n)
+            fvpavperp_err = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvpa2_exact = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvpa2_err = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvpa2_num = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvperp2_exact = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvperp2_err = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvperp2_num = allocate_float(vpa.n,vperp.n)
+            fc = allocate_float(nc_global)
+            dfc = allocate_float(nc_global)
+            gc = allocate_float(nc_global)
+            dgc = allocate_float(nc_global)
+            for ivperp in 1:vperp.n
+                for ivpa in 1:vpa.n
+                    fvpavperp[ivpa,ivperp] = exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
+                    d2fvpavperp_dvpa2_exact[ivpa,ivperp] = (4.0*vpa.grid[ivpa]^2 - 2.0)*exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
+                    d2fvpavperp_dvperp2_exact[ivpa,ivperp] = (4.0*vperp.grid[ivperp]^2 - 2.0)*exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
+                end
+            end
+            
+            # fill fc with fvpavperp
+            ravel_vpavperp_to_c!(fc,fvpavperp,vpa.n,vperp.n)
+            ravel_c_to_vpavperp!(fvpavperp_test,fc,nc_global,vpa.n)
+            @. fvpavperp_err = abs(fvpavperp - fvpavperp_test)
+            max_ravel_err = maximum(fvpavperp_err)
+            @serial_region begin
+                if print_to_screen 
+                    println("max(ravel_err)",max_ravel_err)
+                end
+                @test max_ravel_err < 1.0e-15
+            end
+            #print_vector(fc,"fc",nc_global)
+            # multiply by KKpar2D and fill dfc
+            mul!(dfc,KKpar2D_with_BC_terms_sparse,fc)
+            mul!(dgc,KKperp2D_with_BC_terms_sparse,fc)
+            # invert mass matrix and fill fc
+            fc = lu_obj_MM \ dfc
+            gc = lu_obj_MM \ dgc
+            #print_vector(fc,"fc",nc_global)
+            # unravel
+            ravel_c_to_vpavperp!(d2fvpavperp_dvpa2_num,fc,nc_global,vpa.n)
+            ravel_c_to_vpavperp!(d2fvpavperp_dvperp2_num,gc,nc_global,vpa.n)
+            @serial_region begin 
+                d2fvpavperp_dvpa2_max, d2fvpavperp_dvpa2_L2 = print_test_data(d2fvpavperp_dvpa2_exact,d2fvpavperp_dvpa2_num,d2fvpavperp_dvpa2_err,"d2fdvpa2",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                @test d2fvpavperp_dvpa2_max < 1.0e-7
+                @test d2fvpavperp_dvpa2_L2 < 1.0e-8
+                d2fvpavperp_dvperp2_max, d2fvpavperp_dvperp2_L2 = print_test_data(d2fvpavperp_dvperp2_exact,d2fvpavperp_dvperp2_num,d2fvpavperp_dvperp2_err,"d2fdvperp2",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                @test d2fvpavperp_dvperp2_max < 1.0e-7
+                @test d2fvpavperp_dvperp2_L2 < 1.0e-8
+                #if plot_test_output
+                #    plot_test_data(d2fvpavperp_dvpa2_exact,d2fvpavperp_dvpa2_num,d2fvpavperp_dvpa2_err,"d2fvpavperp_dvpa2",vpa,vperp)
+                #    plot_test_data(d2fvpavperp_dvperp2_exact,d2fvpavperp_dvperp2_num,d2fvpavperp_dvperp2_err,"d2fvpavperp_dvperp2",vpa,vperp)
+                #end
+            end
+            finalize_comms!()
+        end
+        
+        @testset " - test weak-form Rosenbluth potential calculation: elliptic solve" begin
+            ngrid = 9
+            nelement_vpa = 8
+            nelement_vperp = 4
+            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
+                                                                        Lvpa=12.0,Lvperp=6.0)
+            nc_global = vpa.n*vperp.n
+            begin_serial_region()
+            fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
+                                                                  precompute_weights=true, print_to_screen=print_to_screen)
+            dummy_array = allocate_float(vpa.n,vperp.n)
+            F_M = allocate_float(vpa.n,vperp.n)
+            H_M_exact = allocate_float(vpa.n,vperp.n)
+            H_M_num = allocate_shared_float(vpa.n,vperp.n)
+            H_M_err = allocate_float(vpa.n,vperp.n)
+            G_M_exact = allocate_float(vpa.n,vperp.n)
+            G_M_num = allocate_shared_float(vpa.n,vperp.n)
+            G_M_err = allocate_float(vpa.n,vperp.n)
+            d2Gdvpa2_M_exact = allocate_float(vpa.n,vperp.n)
+            d2Gdvpa2_M_num = allocate_shared_float(vpa.n,vperp.n)
+            d2Gdvpa2_M_err = allocate_float(vpa.n,vperp.n)
+            d2Gdvperp2_M_exact = allocate_float(vpa.n,vperp.n)
+            d2Gdvperp2_M_num = allocate_shared_float(vpa.n,vperp.n)
+            d2Gdvperp2_M_err = allocate_float(vpa.n,vperp.n)
+            dGdvperp_M_exact = allocate_float(vpa.n,vperp.n)
+            dGdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
+            dGdvperp_M_err = allocate_float(vpa.n,vperp.n)
+            d2Gdvperpdvpa_M_exact = allocate_float(vpa.n,vperp.n)
+            d2Gdvperpdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
+            d2Gdvperpdvpa_M_err = allocate_float(vpa.n,vperp.n)
+            dHdvpa_M_exact = allocate_float(vpa.n,vperp.n)
+            dHdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
+            dHdvpa_M_err = allocate_float(vpa.n,vperp.n)
+            dHdvperp_M_exact = allocate_float(vpa.n,vperp.n)
+            dHdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
+            dHdvperp_M_err = allocate_float(vpa.n,vperp.n)
+
+            dens, upar, vth = 1.0, 1.0, 1.0
+            begin_serial_region()
+            for ivperp in 1:vperp.n
+                for ivpa in 1:vpa.n
+                    F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    H_M_exact[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    G_M_exact[ivpa,ivperp] = G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    d2Gdvpa2_M_exact[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    d2Gdvperp2_M_exact[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    dGdvperp_M_exact[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    d2Gdvperpdvpa_M_exact[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    dHdvpa_M_exact[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    dHdvperp_M_exact[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                end
+            end
+            rpbd_exact = allocate_rosenbluth_potential_boundary_data(vpa,vperp)
+            # use known test function to provide exact data
+            calculate_rosenbluth_potential_boundary_data_exact!(rpbd_exact,
+                  H_M_exact,dHdvpa_M_exact,dHdvperp_M_exact,G_M_exact,
+                  dGdvperp_M_exact,d2Gdvperp2_M_exact,
+                  d2Gdvperpdvpa_M_exact,d2Gdvpa2_M_exact,vpa,vperp)
+            # calculate the potentials numerically
+            calculate_rosenbluth_potentials_via_elliptic_solve!(fkpl_arrays.GG,fkpl_arrays.HH,fkpl_arrays.dHdvpa,fkpl_arrays.dHdvperp,
+                 fkpl_arrays.d2Gdvpa2,fkpl_arrays.dGdvperp,fkpl_arrays.d2Gdvperpdvpa,fkpl_arrays.d2Gdvperp2,F_M,
+                 vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays;
+                 algebraic_solve_for_d2Gdvperp2=false,calculate_GG=true,calculate_dGdvperp=true)
+            # extract C[Fs,Fs'] result
+            # and Rosenbluth potentials for testing
+            begin_vperp_vpa_region()
+            @loop_vperp_vpa ivperp ivpa begin
+                G_M_num[ivpa,ivperp] = fkpl_arrays.GG[ivpa,ivperp]
+                H_M_num[ivpa,ivperp] = fkpl_arrays.HH[ivpa,ivperp]
+                dHdvpa_M_num[ivpa,ivperp] = fkpl_arrays.dHdvpa[ivpa,ivperp]
+                dHdvperp_M_num[ivpa,ivperp] = fkpl_arrays.dHdvperp[ivpa,ivperp]
+                dGdvperp_M_num[ivpa,ivperp] = fkpl_arrays.dGdvperp[ivpa,ivperp]
+                d2Gdvperp2_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvperp2[ivpa,ivperp]
+                d2Gdvpa2_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvpa2[ivpa,ivperp]
+                d2Gdvperpdvpa_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvperpdvpa[ivpa,ivperp]
+            end
+            begin_serial_region()
+            @serial_region begin
+                # test the boundary data
+                max_H_boundary_data_err, max_dHdvpa_boundary_data_err, 
+                max_dHdvperp_boundary_data_err, max_G_boundary_data_err,
+                max_dGdvperp_boundary_data_err, max_d2Gdvperp2_boundary_data_err, 
+                max_d2Gdvperpdvpa_boundary_data_err, max_d2Gdvpa2_boundary_data_err = test_rosenbluth_potential_boundary_data(fkpl_arrays.rpbd,rpbd_exact,vpa,vperp,print_to_screen=print_to_screen)
+                atol_max = 2.0e-13
+                @test max_H_boundary_data_err < atol_max
+                atol_max = 2.0e-12
+                @test max_dHdvpa_boundary_data_err < atol_max
+                atol_max = 3.0e-9
+                @test max_dHdvperp_boundary_data_err < atol_max
+                atol_max = 7.0e-12
+                @test max_G_boundary_data_err < atol_max
+                atol_max = 2.0e-7
+                @test max_dGdvperp_boundary_data_err < atol_max
+                atol_max = 2.0e-8
+                @test max_d2Gdvperp2_boundary_data_err < atol_max
+                atol_max = 2.0e-8
+                @test max_d2Gdvperpdvpa_boundary_data_err < atol_max
+                atol_max = 7.0e-12
+                @test max_d2Gdvpa2_boundary_data_err < atol_max
+                # test the elliptic solvers
+                H_M_max, H_M_L2 = print_test_data(H_M_exact,H_M_num,H_M_err,"H_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                dHdvpa_M_max, dHdvpa_M_L2 = print_test_data(dHdvpa_M_exact,dHdvpa_M_num,dHdvpa_M_err,"dHdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                dHdvperp_M_max, dHdvperp_M_L2 = print_test_data(dHdvperp_M_exact,dHdvperp_M_num,dHdvperp_M_err,"dHdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                G_M_max, G_M_L2 = print_test_data(G_M_exact,G_M_num,G_M_err,"G_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                d2Gdvpa2_M_max, d2Gdvpa2_M_L2 = print_test_data(d2Gdvpa2_M_exact,d2Gdvpa2_M_num,d2Gdvpa2_M_err,"d2Gdvpa2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                dGdvperp_M_max, dGdvperp_M_L2 = print_test_data(dGdvperp_M_exact,dGdvperp_M_num,dGdvperp_M_err,"dGdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                d2Gdvperpdvpa_M_max, d2Gdvperpdvpa_M_L2 = print_test_data(d2Gdvperpdvpa_M_exact,d2Gdvperpdvpa_M_num,d2Gdvperpdvpa_M_err,"d2Gdvperpdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                d2Gdvperp2_M_max, d2Gdvperp2_M_L2 = print_test_data(d2Gdvperp2_M_exact,d2Gdvperp2_M_num,d2Gdvperp2_M_err,"d2Gdvperp2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                atol_max = 2.0e-7
+                atol_L2 = 5.0e-9
+                @test H_M_max < atol_max
+                @test H_M_L2 < atol_L2
+                atol_max = 2.0e-6
+                atol_L2 = 5.0e-8
+                @test dHdvpa_M_max < atol_max
+                @test dHdvpa_M_L2 < atol_L2
+                atol_max = 2.0e-5
+                atol_L2 = 1.0e-7
+                @test dHdvperp_M_max < atol_max
+                @test dHdvperp_M_L2 < atol_L2
+                atol_max = 2.0e-8
+                atol_L2 = 7.0e-10
+                @test G_M_max < atol_max
+                @test G_M_L2 < atol_L2
+                atol_max = 2.0e-7
+                atol_L2 = 4.0e-9
+                @test d2Gdvpa2_M_max < atol_max
+                @test d2Gdvpa2_M_L2 < atol_L2
+                atol_max = 2.0e-6
+                atol_L2 = 2.0e-7
+                @test dGdvperp_M_max < atol_max
+                @test dGdvperp_M_L2 < atol_L2
+                atol_max = 2.0e-6
+                atol_L2 = 2.0e-8
+                @test d2Gdvperpdvpa_M_max < atol_max
+                @test d2Gdvperpdvpa_M_L2 < atol_L2
+                atol_max = 3.0e-7
+                atol_L2 = 2.0e-8
+                @test d2Gdvperp2_M_max < atol_max
+                @test d2Gdvperp2_M_L2 < atol_L2
+            end
+            finalize_comms!()                                                                  
+        end
+        
+        @testset " - test weak-form collision operator calculation" begin
+            ngrid = 9
+            nelement_vpa = 8
+            nelement_vperp = 4
+            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
+                                                                        Lvpa=12.0,Lvperp=6.0)
+            nc_global = vpa.n*vperp.n
+            begin_serial_region()
+            fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
+                                                                  precompute_weights=true, print_to_screen=print_to_screen)
+            
+            @testset "test_self_operator=$test_self_operator test_numerical_conserving_terms=$test_numerical_conserving_terms test_parallelism = $test_parallelism test_dense_construction=$test_dense_construction use_Maxwellian_Rosenbluth_coefficients=$use_Maxwellian_Rosenbluth_coefficients use_Maxwellian_field_particle_distribution=$use_Maxwellian_field_particle_distribution algebraic_solve_for_d2Gdvperp2=$algebraic_solve_for_d2Gdvperp2" for
+                    (test_self_operator, test_numerical_conserving_terms, test_parallelism, test_dense_construction, 
+                     use_Maxwellian_Rosenbluth_coefficients, use_Maxwellian_field_particle_distribution,
+                     algebraic_solve_for_d2Gdvperp2) in ((true,false,false,false,false,false,false),(false,false,false,false,false,false,false),
+                                                         (true,true,false,false,false,false,false),(true,false,true,false,false,false,false),
+                                                         (true,false,false,true,false,false,false),(true,false,false,false,true,false,false),
+                                                         (true,false,false,false,false,true,false),(true,false,false,false,false,false,true))
+                        
+                dummy_array = allocate_float(vpa.n,vperp.n)
+                Fs_M = allocate_float(vpa.n,vperp.n)
+                F_M = allocate_float(vpa.n,vperp.n)
+                C_M_num = allocate_shared_float(vpa.n,vperp.n)
+                C_M_exact = allocate_float(vpa.n,vperp.n)
+                C_M_err = allocate_float(vpa.n,vperp.n)
+                if test_self_operator
+                    dens, upar, vth = 1.0, 1.0, 1.0
+                    denss, upars, vths = dens, upar, vth
+                else
+                    denss, upars, vths = 1.0, -1.0, 2.0/3.0
+                    dens, upar, vth = 1.0, 1.0, 1.0
+                end
+                ms = 1.0
+                msp = 1.0
+                nussp = 1.0
+                begin_serial_region()
+                for ivperp in 1:vperp.n
+                    for ivpa in 1:vpa.n
+                        Fs_M[ivpa,ivperp] = F_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+                        F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                        C_M_exact[ivpa,ivperp] = Cssp_Maxwellian_inputs(denss,upars,vths,ms,
+                                                                        dens,upar,vth,msp,
+                                                                        nussp,vpa,vperp,ivpa,ivperp)
+                    end
+                end
+                fokker_planck_collision_operator_weak_form!(Fs_M,F_M,ms,msp,nussp,
+                                                 fkpl_arrays,
+                                                 vperp, vpa, vperp_spectral, vpa_spectral,
+                                                 test_assembly_serial=test_parallelism,
+                                                 use_Maxwellian_Rosenbluth_coefficients=use_Maxwellian_Rosenbluth_coefficients,
+                                                 use_Maxwellian_field_particle_distribution=use_Maxwellian_field_particle_distribution,
+                                                 algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
+                                                 calculate_GG = false, calculate_dGdvperp=false)
+                if test_numerical_conserving_terms && test_self_operator
+                    # enforce the boundary conditions on CC before it is used for timestepping
+                    enforce_vpavperp_BCs!(fkpl_arrays.CC,vpa,vperp,vpa_spectral,vperp_spectral)
+                    # make ad-hoc conserving corrections
+                    conserving_corrections!(fkpl_arrays.CC,Fs_M,vpa,vperp,dummy_array)            
+                end
+                # extract C[Fs,Fs'] result
+                begin_vperp_vpa_region()
+                @loop_vperp_vpa ivperp ivpa begin
+                    C_M_num[ivpa,ivperp] = fkpl_arrays.CC[ivpa,ivperp]
+                end
+                begin_serial_region()
+                @serial_region begin
+                    C_M_max, C_M_L2 = print_test_data(C_M_exact,C_M_num,C_M_err,"C_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                    if test_self_operator && !test_numerical_conserving_terms && !use_Maxwellian_Rosenbluth_coefficients && !use_Maxwellian_field_particle_distribution
+                        atol_max = 6.0e-4
+                        atol_L2 = 7.0e-6
+                    elseif test_self_operator && test_numerical_conserving_terms && !use_Maxwellian_Rosenbluth_coefficients && !use_Maxwellian_field_particle_distribution
+                        atol_max = 7.0e-4
+                        atol_L2 = 7.0e-6
+                    elseif test_self_operator && !test_numerical_conserving_terms && use_Maxwellian_Rosenbluth_coefficients && !use_Maxwellian_field_particle_distribution
+                        atol_max = 8.0e-4
+                        atol_L2 = 8.1e-6
+                    elseif test_self_operator && !test_numerical_conserving_terms && !use_Maxwellian_Rosenbluth_coefficients && use_Maxwellian_field_particle_distribution
+                        atol_max = 1.1e-3
+                        atol_L2 = 9.0e-6
+                    else
+                        atol_max = 7.0e-2
+                        atol_L2 = 6.0e-4
+                    end
+                    @test C_M_max < atol_max
+                    @test C_M_L2 < atol_L2
+                    # calculate the entropy production
+                    lnfC = fkpl_arrays.rhsvpavperp
+                    @loop_vperp_vpa ivperp ivpa begin
+                        lnfC[ivpa,ivperp] = Fs_M[ivpa,ivperp]*C_M_num[ivpa,ivperp]
+                    end
+                    dSdt = - get_density(lnfC,vpa,vperp)
+                    if test_self_operator && !test_numerical_conserving_terms
+                        if algebraic_solve_for_d2Gdvperp2
+                            rtol, atol = 0.0, 1.0e-7
+                        else
+                            rtol, atol = 0.0, 1.0e-8
+                        end
+                        @test isapprox(dSdt, rtol ; atol=atol)
+                        delta_n = get_density(C_M_num, vpa, vperp)
+                        delta_upar = get_upar(C_M_num, vpa, vperp, dens)
+                        delta_ppar = msp*get_ppar(C_M_num, vpa, vperp, upar)
+                        delta_pperp = msp*get_pperp(C_M_num, vpa, vperp)
+                        delta_pressure = get_pressure(delta_ppar,delta_pperp)
+                        rtol, atol = 0.0, 1.0e-12
+                        @test isapprox(delta_n, rtol ; atol=atol)
+                        rtol, atol = 0.0, 1.0e-9
+                        @test isapprox(delta_upar, rtol ; atol=atol)
+                        if algebraic_solve_for_d2Gdvperp2
+                            rtol, atol = 0.0, 1.0e-7
+                        else
+                            rtol, atol = 0.0, 1.0e-8
+                        end
+                        @test isapprox(delta_pressure, rtol ; atol=atol)
+                        if print_to_screen
+                            println("dSdt: $dSdt should be >0.0")
+                            println("delta_n: ", delta_n)
+                            println("delta_upar: ", delta_upar)
+                            println("delta_pressure: ", delta_pressure)
+                        end
+                    elseif test_self_operator && test_numerical_conserving_terms
+                        rtol, atol = 0.0, 6.0e-7
+                        @test isapprox(dSdt, rtol ; atol=atol)
+                        delta_n = get_density(C_M_num, vpa, vperp)
+                        delta_upar = get_upar(C_M_num, vpa, vperp, dens)
+                        delta_ppar = msp*get_ppar(C_M_num, vpa, vperp, upar)
+                        delta_pperp = msp*get_pperp(C_M_num, vpa, vperp)
+                        delta_pressure = get_pressure(delta_ppar,delta_pperp)
+                        rtol, atol = 0.0, 1.0e-15
+                        @test isapprox(delta_n, rtol ; atol=atol)
+                        rtol, atol = 0.0, 1.0e-15
+                        @test isapprox(delta_upar, rtol ; atol=atol)
+                        rtol, atol = 0.0, 1.0e-15
+                        @test isapprox(delta_pressure, rtol ; atol=atol)
+                        if print_to_screen
+                            println("dSdt: $dSdt should be >0.0")
+                            println("delta_n: ", delta_n)
+                            println("delta_upar: ", delta_upar)
+                            println("delta_pressure: ", delta_pressure)
+                        end
+                    else
+                        atol = 1.0e-4
+                        @test isapprox(dSdt, 2.543251178128757 ; atol=atol)
+                        delta_n = get_density(C_M_num, vpa, vperp)
+                        rtol, atol = 0.0, 1.0e-12
+                        @test isapprox(delta_n, rtol ; atol=atol)
+                        if print_to_screen
+                            println("dSdt: $dSdt")
+                            println("delta_n: ", delta_n)
+                        end
+                    end
+                end
+            end
+            finalize_comms!()                                                                  
+        end
+        
+        @testset " - test weak-form Rosenbluth potential calculation: direct integration" begin
+            ngrid = 5 # chosen for a quick test -- direct integration is slow!
+            nelement_vpa = 8
+            nelement_vperp = 4
+            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
+                                                                        Lvpa=12.0,Lvperp=6.0)
+            begin_serial_region()
+            fkpl_arrays = init_fokker_planck_collisions_direct_integration(vperp,vpa,precompute_weights=true,print_to_screen=print_to_screen)
+            dummy_array = allocate_float(vpa.n,vperp.n)
+            F_M = allocate_float(vpa.n,vperp.n)
+            H_M_exact = allocate_float(vpa.n,vperp.n)
+            H_M_num = allocate_shared_float(vpa.n,vperp.n)
+            H_M_err = allocate_float(vpa.n,vperp.n)
+            G_M_exact = allocate_float(vpa.n,vperp.n)
+            G_M_num = allocate_shared_float(vpa.n,vperp.n)
+            G_M_err = allocate_float(vpa.n,vperp.n)
+            d2Gdvpa2_M_exact = allocate_float(vpa.n,vperp.n)
+            d2Gdvpa2_M_num = allocate_shared_float(vpa.n,vperp.n)
+            d2Gdvpa2_M_err = allocate_float(vpa.n,vperp.n)
+            d2Gdvperp2_M_exact = allocate_float(vpa.n,vperp.n)
+            d2Gdvperp2_M_num = allocate_shared_float(vpa.n,vperp.n)
+            d2Gdvperp2_M_err = allocate_float(vpa.n,vperp.n)
+            dGdvperp_M_exact = allocate_float(vpa.n,vperp.n)
+            dGdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
+            dGdvperp_M_err = allocate_float(vpa.n,vperp.n)
+            d2Gdvperpdvpa_M_exact = allocate_float(vpa.n,vperp.n)
+            d2Gdvperpdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
+            d2Gdvperpdvpa_M_err = allocate_float(vpa.n,vperp.n)
+            dHdvpa_M_exact = allocate_float(vpa.n,vperp.n)
+            dHdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
+            dHdvpa_M_err = allocate_float(vpa.n,vperp.n)
+            dHdvperp_M_exact = allocate_float(vpa.n,vperp.n)
+            dHdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
+            dHdvperp_M_err = allocate_float(vpa.n,vperp.n)
+
+            dens, upar, vth = 1.0, 1.0, 1.0
+            begin_serial_region()
+            for ivperp in 1:vperp.n
+                for ivpa in 1:vpa.n
+                    F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    H_M_exact[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    G_M_exact[ivpa,ivperp] = G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    d2Gdvpa2_M_exact[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    d2Gdvperp2_M_exact[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    dGdvperp_M_exact[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    d2Gdvperpdvpa_M_exact[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    dHdvpa_M_exact[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    dHdvperp_M_exact[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                end
+            end
+            # calculate the potentials numerically
+            calculate_rosenbluth_potentials_via_direct_integration!(G_M_num,H_M_num,dHdvpa_M_num,dHdvperp_M_num,
+             d2Gdvpa2_M_num,dGdvperp_M_num,d2Gdvperpdvpa_M_num,d2Gdvperp2_M_num,F_M,
+             vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays)
+            begin_serial_region()
+            @serial_region begin
+                # test the integration
+                # to recalculate absolute tolerances atol, set print_to_screen = true
+                H_M_max, H_M_L2 = print_test_data(H_M_exact,H_M_num,H_M_err,"H_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                dHdvpa_M_max, dHdvpa_M_L2 = print_test_data(dHdvpa_M_exact,dHdvpa_M_num,dHdvpa_M_err,"dHdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                dHdvperp_M_max, dHdvperp_M_L2 = print_test_data(dHdvperp_M_exact,dHdvperp_M_num,dHdvperp_M_err,"dHdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                G_M_max, G_M_L2 = print_test_data(G_M_exact,G_M_num,G_M_err,"G_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                d2Gdvpa2_M_max, d2Gdvpa2_M_L2 = print_test_data(d2Gdvpa2_M_exact,d2Gdvpa2_M_num,d2Gdvpa2_M_err,"d2Gdvpa2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                dGdvperp_M_max, dGdvperp_M_L2 = print_test_data(dGdvperp_M_exact,dGdvperp_M_num,dGdvperp_M_err,"dGdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                d2Gdvperpdvpa_M_max, d2Gdvperpdvpa_M_L2 = print_test_data(d2Gdvperpdvpa_M_exact,d2Gdvperpdvpa_M_num,d2Gdvperpdvpa_M_err,"d2Gdvperpdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                d2Gdvperp2_M_max, d2Gdvperp2_M_L2 = print_test_data(d2Gdvperp2_M_exact,d2Gdvperp2_M_num,d2Gdvperp2_M_err,"d2Gdvperp2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                atol_max = 2.1e-4
+                atol_L2 = 6.5e-6
+                @test H_M_max < atol_max
+                @test H_M_L2 < atol_L2
+                atol_max = 1.5e-3
+                atol_L2 = 6.5e-5
+                @test dHdvpa_M_max < atol_max
+                @test dHdvpa_M_L2 < atol_L2
+                atol_max = 8.0e-4
+                atol_L2 = 4.0e-5
+                @test dHdvperp_M_max < atol_max
+                @test dHdvperp_M_L2 < atol_L2
+                atol_max = 1.1e-4
+                atol_L2 = 4.0e-5
+                @test G_M_max < atol_max
+                @test G_M_L2 < atol_L2
+                atol_max = 2.5e-4
+                atol_L2 = 1.2e-5
+                @test d2Gdvpa2_M_max < atol_max
+                @test d2Gdvpa2_M_L2 < atol_L2
+                atol_max = 9.0e-5
+                atol_L2 = 6.0e-5
+                @test dGdvperp_M_max < atol_max
+                @test dGdvperp_M_L2 < atol_L2
+                atol_max = 1.1e-4
+                atol_L2 = 9.0e-6
+                @test d2Gdvperpdvpa_M_max < atol_max
+                @test d2Gdvperpdvpa_M_L2 < atol_L2
+                atol_max = 2.0e-4
+                atol_L2 = 1.1e-5
+                @test d2Gdvperp2_M_max < atol_max
+                @test d2Gdvperp2_M_L2 < atol_L2
+            end
+            finalize_comms!()                                                                  
+        end
+        
+        
+    end
+end
+
+end #FokkerPlanckTests
+
+using .FokkerPlanckTests
+
+FokkerPlanckTests.runtests()
+
diff --git a/test/fokker_planck_time_evolution_tests.jl b/test/fokker_planck_time_evolution_tests.jl
new file mode 100644
index 000000000..a022170cb
--- /dev/null
+++ b/test/fokker_planck_time_evolution_tests.jl
@@ -0,0 +1,339 @@
+module FokkerPlanckTimeEvolutionTests
+include("setup.jl")
+
+using Base.Filesystem: tempname
+using MPI
+
+using moment_kinetics.coordinates: define_coordinate
+using moment_kinetics.input_structs: grid_input, advection_input
+using moment_kinetics.load_data: open_readonly_output_file, load_coordinate_data,
+                                 load_species_data, load_fields_data,
+                                 load_charged_particle_moments_data, load_pdf_data,
+                                 load_time_data, load_species_data
+using moment_kinetics.type_definitions: mk_float
+
+const analytical_rtol = 3.e-2
+const regression_rtol = 2.e-8
+
+# Create a temporary directory for test output
+test_output_directory = tempname()
+mkpath(test_output_directory)
+
+# The expected output
+struct expected_data
+    vpa::Array{mk_float, 1}
+    vperp::Array{mk_float, 1}
+    phi::Array{mk_float, 1} #time
+    n_charged::Array{mk_float, 1} #time
+    upar_charged::Array{mk_float, 1} # time
+    ppar_charged::Array{mk_float, 1} # time
+    pperp_charged::Array{mk_float, 1} # time
+    qpar_charged::Array{mk_float, 1} # time
+    v_t_charged::Array{mk_float, 1} # time
+    dSdt::Array{mk_float, 1} # time
+    f_charged::Array{mk_float, 3} # vpa, vperp, time
+end
+
+const expected =
+  expected_data(
+   [-3.0, -2.5, -2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0],
+   [0.155051025721682, 0.644948974278318, 1.000000000000000, 1.500000000000000, 2.000000000000000, 2.500000000000000, 3.000000000000000],
+   # Expected phi:
+   [-1.267505494648937, -1.275683298550937],
+   # Expected n_charged:
+   [0.2815330322340072, 0.2792400986636072],
+   # Expected upar_charged:
+   [0.0, 0.0],
+   # Expected ppar_charged:
+   [0.17982280248048935, 0.14891126175332367],
+   # Expected pperp_charged
+   [0.14340146667506784, 0.1581377822859991],
+   # Expected qpar_charged
+   [0.0, 0.0],
+   # Expected v_t_charged
+   [1.0511726083010418, 1.0538509291794658],
+   # Expected dSdt
+   [0.0, 1.1853081348031516e-5],
+   # Expected f_charged:
+   [0.0 0.0 0.0 0.0 0.0 0.0 0.0;
+    0.0006199600161806666 0.00047805300997075977 0.0002665817112117718 7.637693901737056e-5 1.3272321881722645e-5 1.3988924344690309e-6 0.0;
+    0.005882016862626724 0.0045356406743786385 0.002529256854781707 0.0007246442213864763 0.00012592428394890537 1.3272321881722645e-5 0.0;
+    0.03384866997225574 0.026100809957763767 0.01455486826237011 0.004170039574837177 0.000724644221263874 7.637693900444835e-5 0.0;
+    0.11813810317200342 0.09109664226661075 0.05079917556123135 0.01455420747483572 0.0025291420266981487 0.0002665696084208068 0.0;
+    0.22957946520936198 0.17702940755267918 0.0987187642706944 0.0282833995036643 0.004914917865936108 0.0005180285318549189 0.0;
+    0.0 0.0 0.0 0.0 0.0 0.0 0.0;
+    0.22957946520936204 0.1770294075526792 0.0987187642706944 0.0282833995036643 0.004914917865936108 0.0005180285318549189 0.0;
+    0.11813810317200349 0.0910966422666108 0.050799175561231376 0.01455420747483573 0.0025291420266981487 0.0002665696084208068 0.0;
+    0.03384866997225574 0.026100809957763767 0.01455486826237011 0.004170039574837177 0.000724644221263874 7.637693900444835e-5 0.0;
+    0.005882016862626724 0.0045356406743786385 0.002529256854781707 0.0007246442213864763 0.00012592428394890537 1.3272321881722645e-5 0.0;
+    0.0006199600161806666 0.00047805300997075977 0.0002665817112117718 7.637693901737056e-5 1.3272321881722645e-5 1.3988924344690309e-6 0.0;
+    0.0 0.0 0.0 0.0 0.0 0.0 0.0;;;
+    0.0 0.0 0.0 0.0 0.0 0.0 0.0;
+    0.0001712743622973216 7.105465094508053e-5 -7.829380680167827e-5 -0.00015364081956318698 -9.097098213067502e-5 -3.311284120491419e-5 0.0;
+    0.005883280697248667 0.004667594200766182 0.002855965521103658 0.0008138347136178689 2.44260649525292e-5 -9.753249634264602e-5 0.0;
+    0.02792209301450194 0.022385716644538384 0.01413535091105969 0.004677801530322722 0.0007105315221401102 -0.00022400635166536323 0.0;
+    0.08117458037332098 0.06563459159004267 0.04247673844050208 0.015087784332275832 0.0029056314178876035 -0.00023019804543218203 0.0;
+    0.15133793170654106 0.12313903060106579 0.08111673445361306 0.029975277983613262 0.00626735398468981 7.553501812465833e-6 0.0;
+    0.18493902160817713 0.15073513412904313 0.09976414473955808 0.037251581926306565 0.007941836186495122 0.00016196175024033304 0.0;
+    0.15133793170654092 0.12313903060106571 0.08111673445361306 0.02997527798361324 0.006267353984689816 7.553501812469816e-6 0.0;
+    0.081174580373321 0.06563459159004267 0.042476738440502065 0.015087784332275821 0.002905631417887614 -0.0002301980454321778 0.0;
+    0.027922093014501933 0.022385716644538384 0.014135350911059698 0.004677801530322729 0.0007105315221401184 -0.00022400635166536134 0.0;
+    0.005883280697248667 0.004667594200766184 0.002855965521103663 0.0008138347136178759 2.4426064952530956e-5 -9.753249634264635e-5 0.0;
+    0.0001712743622973275 7.105465094508572e-5 -7.829380680167411e-5 -0.00015364081956318568 -9.097098213067551e-5 -3.311284120491447e-5 0.0;
+    0.0 0.0 0.0 0.0 0.0 0.0 0.0])
+###########################################################################################
+# to modify the test, with a new expected f, print the new f using the following commands
+# in an interative Julia REPL. The path is the path to the .dfns file. 
+########################################################################################## 
+"""
+fid = open_readonly_output_file(path, "dfns")
+f_charged_vpavperpzrst = load_pdf_data(fid)
+f_charged = f_charged_vpavperpzrst[:,:,1,1,1,:]
+ntind = 2
+nvpa = 13  #subject to grid choices
+nvperp = 7 #subject to grid choices
+for k in 1:ntind
+  for j in 1:nvperp-1
+      for i in 1:nvpa-1
+         @printf("%.15f ", f_charged[i,j,k])
+         print("; ")
+      end
+      @printf("%.15f ", f_charged[nvpa,j,k])
+      print(";;\n")
+  end
+  for i in 1:nvpa-1
+    @printf("%.15f ", f_charged[i,nvperp,k])
+    print("; ")
+  end
+  @printf("%.15f ", f_charged[nvpa,nvperp,k])
+  if k < ntind
+      print(";;;\n")
+  end  
+end
+"""
+# default inputs for tests
+test_input_gauss_legendre = Dict("run_name" => "gausslegendre_pseudospectral",
+                              "base_directory" => test_output_directory,
+                              "n_ion_species" => 1,
+                              "n_neutral_species" => 0,
+                              "T_wall" => 1.0,
+                              "T_e" => 1.0,
+                              "initial_temperature2" => 1.0,
+                              "vpa_ngrid" => 3,
+                              "vpa_L" => 6.0,
+                              "vpa_nelement" => 6,
+                              "vpa_bc" => "zero",
+                              "vpa_discretization" => "gausslegendre_pseudospectral",
+                              "vperp_ngrid" => 3,
+                              "vperp_nelement" => 3,
+                              "vperp_L" => 3.0,
+                              "vperp_discretization" => "gausslegendre_pseudospectral",
+                              "n_rk_stages" => 4,
+                              "split_operators" => false,
+                              "ionization_frequency" => 0.0,
+                              "charge_exchange_frequency" => 0.0,
+                              "constant_ionization_rate" => false,
+                              "electron_physics" => "boltzmann_electron_response",
+                              "nuii" => 1.0,
+                              "use_semi_lagrange" => false,
+                              "Bzed" => 1.0,
+                              "Bmag" => 1.0,
+                              "rhostar" => 1.0,
+                              "z_IC_upar_amplitude1" => 0.0,
+                              "z_IC_density_amplitude1" => 0.001,
+                              "z_IC_upar_amplitude2" => 0.0,
+                              "z_IC_temperature_phase1" => 0.0,
+                              "z_IC_temperature_amplitude1" => 0.0,
+                              "evolve_moments_parallel_pressure" => false,
+                              "evolve_moments_conservation" => false,
+                              "z_IC_option1" => "sinusoid",
+                              "evolve_moments_parallel_flow" => false,
+                              "z_IC_density_phase2" => 0.0,
+                              "z_discretization" => "chebyshev_pseudospectral",                              
+                              "z_IC_upar_phase2" => 0.0,
+                              "evolve_moments_density" => false,
+                              "z_IC_temperature_amplitude2" => 0.0,
+                              "initial_density1" => 0.5,
+                              "z_IC_upar_phase1" => 0.0,
+                              "initial_density2" => 0.5,
+                              "z_IC_density_phase1" => 0.0,
+                              "z_IC_option2" => "sinusoid",
+                              "z_IC_density_amplitude2" => 0.001,
+                              "initial_temperature1" => 1.0,
+                              "z_IC_temperature_phase2" => 0.0,
+                              "z_ngrid" => 1,
+                              "z_nelement_local" => 1,  
+                              "z_nelement" => 1,
+                              "z_bc" => "wall",
+                              "r_discretization" => "chebyshev_pseudospectral",
+                              "r_ngrid" => 1, 
+                              "r_nelement" => 1,
+                              "r_nelement_local" => 1,
+                              "r_bc" => "periodic",   
+                              "dt" => 0.01,
+                              "nstep" => 5000,
+                              "nwrite" => 5000,
+                              "nwrite_dfns" => 5000 )
+
+
+"""
+Run a sound-wave test for a single set of parameters
+"""
+# Note 'name' should not be shared by any two tests in this file
+function run_test(test_input, rtol, atol, upar_rtol=nothing; args...)
+    # by passing keyword arguments to run_test, args becomes a Dict which can be used to
+    # update the default inputs
+
+    if upar_rtol === nothing
+        upar_rtol = rtol
+    end
+
+    # Convert keyword arguments to a unique name
+    name = test_input["run_name"]
+    if length(args) > 0
+        name = string(name, "_", (string(k, "-", v, "_") for (k, v) in args)...)
+
+        # Remove trailing "_"
+        name = chop(name)
+    end
+
+    # Provide some progress info
+    println("    - testing ", name)
+
+    # Convert dict from symbol keys to String keys
+    modified_inputs = Dict(String(k) => v for (k, v) in args)
+
+    # Update default inputs with values to be changed
+    input = merge(test_input, modified_inputs)
+    #input = test_input
+
+    input["run_name"] = name
+
+    # Suppress console output while running
+    quietoutput() do
+        # run simulation
+        run_moment_kinetics(input)
+    end
+
+    phi = nothing
+    n_charged = nothing
+    upar_charged = nothing
+    ppar_charged = nothing
+    pperp_charged = nothing
+    qpar_charged = nothing
+    v_t_charged = nothing
+    dSdt = nothing
+    f_charged = nothing
+    f_err = nothing
+    vpa, vpa_spectral = nothing, nothing
+    vperp, vperp_spectral = nothing, nothing
+
+    if global_rank[] == 0
+        quietoutput() do
+
+            # Load and analyse output
+            #########################
+
+            path = joinpath(realpath(input["base_directory"]), name, name)
+
+            # open the netcdf file containing moments data and give it the handle 'fid'
+            fid = open_readonly_output_file(path, "moments")
+
+            # load species, time coordinate data
+            n_ion_species, n_neutral_species = load_species_data(fid)
+            ntime, time = load_time_data(fid)
+            n_ion_species, n_neutral_species = load_species_data(fid)
+            
+            # load fields data
+            phi_zrt, Er_zrt, Ez_zrt = load_fields_data(fid)
+
+            # load velocity moments data
+            n_charged_zrst, upar_charged_zrst, ppar_charged_zrst, 
+            pperp_charged_zrst, qpar_charged_zrst, v_t_charged_zrst, dSdt_zrst = load_charged_particle_moments_data(fid,extended_moments=true)
+            
+            close(fid)
+            
+            # open the netcdf file containing pdf data
+            fid = open_readonly_output_file(path, "dfns")
+            # load coordinates
+            vpa, vpa_spectral = load_coordinate_data(fid, "vpa")
+            vperp, vperp_spectral = load_coordinate_data(fid, "vperp")
+
+            # load particle distribution function (pdf) data
+            f_charged_vpavperpzrst = load_pdf_data(fid)
+            
+            close(fid)
+            # select the single z, r, s point
+            # keep the two time points in the arrays
+            phi = phi_zrt[1,1,:]
+            n_charged = n_charged_zrst[1,1,1,:]
+            upar_charged = upar_charged_zrst[1,1,1,:]
+            ppar_charged = ppar_charged_zrst[1,1,1,:]
+            pperp_charged = pperp_charged_zrst[1,1,1,:]
+            qpar_charged = qpar_charged_zrst[1,1,1,:]
+            v_t_charged = v_t_charged_zrst[1,1,1,:]
+            dSdt = dSdt_zrst[1,1,1,:]
+            f_charged = f_charged_vpavperpzrst[:,:,1,1,1,:]
+            f_err = copy(f_charged)
+            # Unnormalize f
+            # NEED TO UPGRADE TO 2V MOMENT KINETICS HERE
+            
+        end
+        
+        function test_values(tind)
+            @testset "tind=$tind" begin
+                # Check grids
+                #############
+                
+                @test isapprox(expected.vpa[:], vpa.grid[:], atol=atol)
+                @test isapprox(expected.vperp[:], vperp.grid[:], atol=atol)
+            
+                # Check electrostatic potential
+                ###############################
+                
+                @test isapprox(expected.phi[tind], phi[tind], rtol=rtol)
+
+                # Check charged particle moments and f
+                ######################################
+
+                @test isapprox(expected.n_charged[tind], n_charged[tind], atol=atol)
+                @test isapprox(expected.upar_charged[tind], upar_charged[tind], atol=atol)
+                @test isapprox(expected.ppar_charged[tind], ppar_charged[tind], atol=atol)
+                @test isapprox(expected.pperp_charged[tind], pperp_charged[tind], atol=atol)
+                @test isapprox(expected.qpar_charged[tind], qpar_charged[tind], atol=atol)
+                @test isapprox(expected.v_t_charged[tind], v_t_charged[tind], atol=atol)
+                @test isapprox(expected.dSdt[tind], dSdt[tind], atol=atol)
+                @. f_err = abs(expected.f_charged - f_charged)
+                max_f_err = maximum(f_err)
+                @test isapprox(max_f_err, 0.0, atol=atol)
+                @test isapprox(expected.f_charged[:,:,tind], f_charged[:,:,tind], atol=atol)
+            end
+        end
+
+        # Test initial values
+        test_values(1)
+
+        # Test final values
+        test_values(2)
+    end
+end
+
+
+function runtests()
+    @testset "Fokker Planck dFdt = C[F,F] relaxation test" verbose=use_verbose begin
+        println("Fokker Planck dFdt = C[F,F] relaxation test")
+
+        # GaussLegendre pseudospectral
+        # Benchmark data is taken from this run (GaussLegendre)
+        @testset "Gauss Legendre base" begin
+            run_test(test_input_gauss_legendre, 1.e-14, 1.0e-14 )
+        end
+    end
+end
+
+end # FokkerPlanckTimeEvolutionTests
+
+
+using .FokkerPlanckTimeEvolutionTests
+
+FokkerPlanckTimeEvolutionTests.runtests()
diff --git a/test/interpolation_tests.jl b/test/interpolation_tests.jl
index 8885224cc..1951183a1 100644
--- a/test/interpolation_tests.jl
+++ b/test/interpolation_tests.jl
@@ -37,11 +37,12 @@ function runtests()
 			nrank_per_block = 0 # dummy value
 			irank = 0 # dummy value
 			comm = MPI.COMM_NULL # dummy value
+            cheb_option = "FFT"
             #element_spacing_option = "uniform"
-			input = grid_input("coord", ngrid, nelement,
-				nelement_local, nrank_per_block, irank, L,
-				discretization, fd_option, bc, adv_input, comm,
-                element_spacing_option)
+            input = grid_input("coord", ngrid, nelement,
+                                nelement_local, nrank_per_block, irank, L, 
+                                discretization, fd_option, cheb_option, bc, adv_input, comm,
+                                element_spacing_option)
             # create the coordinate struct 'z'
             z, spectral = define_coordinate(input)
 
diff --git a/test/loop_setup_tests.jl b/test/loop_setup_tests.jl
index c34ea677b..725714d96 100644
--- a/test/loop_setup_tests.jl
+++ b/test/loop_setup_tests.jl
@@ -197,26 +197,26 @@ function runtests()
             @test loop_ranges_store[()].z == 1:0
 
             @test loop_ranges_store[(:s,)].s == 1:0
-            @test loop_ranges_store[(:s,)].r == 1:0
-            @test loop_ranges_store[(:s,)].z == 1:0
+            @test loop_ranges_store[(:s,)].r == 1:3
+            @test loop_ranges_store[(:s,)].z == 1:4
 
-            @test loop_ranges_store[(:r,)].s == 1:0
+            @test loop_ranges_store[(:r,)].s == 1:2
             @test loop_ranges_store[(:r,)].r == 1:0
-            @test loop_ranges_store[(:r,)].z == 1:0
+            @test loop_ranges_store[(:r,)].z == 1:4
 
-            @test loop_ranges_store[(:z,)].s == 1:0
-            @test loop_ranges_store[(:z,)].r == 1:0
+            @test loop_ranges_store[(:z,)].s == 1:2
+            @test loop_ranges_store[(:z,)].r == 1:3
             @test loop_ranges_store[(:z,)].z == 1:0
 
             @test loop_ranges_store[(:s,:r)].s == 1:0
             @test loop_ranges_store[(:s,:r)].r == 1:0
-            @test loop_ranges_store[(:s,:r)].z == 1:0
+            @test loop_ranges_store[(:s,:r)].z == 1:4
 
             @test loop_ranges_store[(:s,:z)].s == 2:2
             @test loop_ranges_store[(:s,:z)].r == 1:3
             @test loop_ranges_store[(:s,:z)].z == 1:4
 
-            @test loop_ranges_store[(:r,:z)].s == 1:0
+            @test loop_ranges_store[(:r,:z)].s == 1:2
             @test loop_ranges_store[(:r,:z)].r == 1:0
             @test loop_ranges_store[(:r,:z)].z == 1:0
 
@@ -269,23 +269,23 @@ function runtests()
             @test loop_ranges_store[()].z == 1:0
 
             @test loop_ranges_store[(:s,)].s == 1:0
-            @test loop_ranges_store[(:s,)].r == 1:0
-            @test loop_ranges_store[(:s,)].z == 1:0
+            @test loop_ranges_store[(:s,)].r == 1:3
+            @test loop_ranges_store[(:s,)].z == 1:4
 
-            @test loop_ranges_store[(:r,)].s == 1:0
+            @test loop_ranges_store[(:r,)].s == 1:2
             @test loop_ranges_store[(:r,)].r == 1:0
-            @test loop_ranges_store[(:r,)].z == 1:0
+            @test loop_ranges_store[(:r,)].z == 1:4
 
-            @test loop_ranges_store[(:z,)].s == 1:0
-            @test loop_ranges_store[(:z,)].r == 1:0
+            @test loop_ranges_store[(:z,)].s == 1:2
+            @test loop_ranges_store[(:z,)].r == 1:3
             @test loop_ranges_store[(:z,)].z == 1:0
 
             @test loop_ranges_store[(:s,:r)].s == 1:0
             @test loop_ranges_store[(:s,:r)].r == 1:0
-            @test loop_ranges_store[(:s,:r)].z == 1:0
+            @test loop_ranges_store[(:s,:r)].z == 1:4
 
             @test loop_ranges_store[(:s,:z)].s == 1:0
-            @test loop_ranges_store[(:s,:z)].r == 1:0
+            @test loop_ranges_store[(:s,:z)].r == 1:3
             @test loop_ranges_store[(:s,:z)].z == 1:0
 
             @test loop_ranges_store[(:r,:z)].s == 1:2
@@ -305,23 +305,23 @@ function runtests()
             @test loop_ranges_store[()].z == 1:0
 
             @test loop_ranges_store[(:s,)].s == 1:0
-            @test loop_ranges_store[(:s,)].r == 1:0
-            @test loop_ranges_store[(:s,)].z == 1:0
+            @test loop_ranges_store[(:s,)].r == 1:3
+            @test loop_ranges_store[(:s,)].z == 1:4
 
-            @test loop_ranges_store[(:r,)].s == 1:0
+            @test loop_ranges_store[(:r,)].s == 1:2
             @test loop_ranges_store[(:r,)].r == 1:0
-            @test loop_ranges_store[(:r,)].z == 1:0
+            @test loop_ranges_store[(:r,)].z == 1:4
 
-            @test loop_ranges_store[(:z,)].s == 1:0
-            @test loop_ranges_store[(:z,)].r == 1:0
+            @test loop_ranges_store[(:z,)].s == 1:2
+            @test loop_ranges_store[(:z,)].r == 1:3
             @test loop_ranges_store[(:z,)].z == 1:0
 
             @test loop_ranges_store[(:s,:r)].s == 1:0
             @test loop_ranges_store[(:s,:r)].r == 1:0
-            @test loop_ranges_store[(:s,:r)].z == 1:0
+            @test loop_ranges_store[(:s,:r)].z == 1:4
 
             @test loop_ranges_store[(:s,:z)].s == 1:0
-            @test loop_ranges_store[(:s,:z)].r == 1:0
+            @test loop_ranges_store[(:s,:z)].r == 1:3
             @test loop_ranges_store[(:s,:z)].z == 1:0
 
             @test loop_ranges_store[(:r,:z)].s == 1:2
@@ -341,23 +341,23 @@ function runtests()
             @test loop_ranges_store[()].z == 1:0
 
             @test loop_ranges_store[(:s,)].s == 1:0
-            @test loop_ranges_store[(:s,)].r == 1:0
-            @test loop_ranges_store[(:s,)].z == 1:0
+            @test loop_ranges_store[(:s,)].r == 1:3
+            @test loop_ranges_store[(:s,)].z == 1:4
 
-            @test loop_ranges_store[(:r,)].s == 1:0
+            @test loop_ranges_store[(:r,)].s == 1:2
             @test loop_ranges_store[(:r,)].r == 1:0
-            @test loop_ranges_store[(:r,)].z == 1:0
+            @test loop_ranges_store[(:r,)].z == 1:4
 
-            @test loop_ranges_store[(:z,)].s == 1:0
-            @test loop_ranges_store[(:z,)].r == 1:0
+            @test loop_ranges_store[(:z,)].s == 1:2
+            @test loop_ranges_store[(:z,)].r == 1:3
             @test loop_ranges_store[(:z,)].z == 1:0
 
             @test loop_ranges_store[(:s,:r)].s == 1:0
             @test loop_ranges_store[(:s,:r)].r == 1:0
-            @test loop_ranges_store[(:s,:r)].z == 1:0
+            @test loop_ranges_store[(:s,:r)].z == 1:4
 
             @test loop_ranges_store[(:s,:z)].s == 1:0
-            @test loop_ranges_store[(:s,:z)].r == 1:0
+            @test loop_ranges_store[(:s,:z)].r == 1:3
             @test loop_ranges_store[(:s,:z)].z == 1:0
 
             @test loop_ranges_store[(:r,:z)].s == 1:2
@@ -377,26 +377,26 @@ function runtests()
             @test loop_ranges_store[()].z == 1:0
 
             @test loop_ranges_store[(:s,)].s == 1:0
-            @test loop_ranges_store[(:s,)].r == 1:0
-            @test loop_ranges_store[(:s,)].z == 1:0
+            @test loop_ranges_store[(:s,)].r == 1:3
+            @test loop_ranges_store[(:s,)].z == 1:4
 
-            @test loop_ranges_store[(:r,)].s == 1:0
+            @test loop_ranges_store[(:r,)].s == 1:2
             @test loop_ranges_store[(:r,)].r == 1:0
-            @test loop_ranges_store[(:r,)].z == 1:0
+            @test loop_ranges_store[(:r,)].z == 1:4
 
-            @test loop_ranges_store[(:z,)].s == 1:0
-            @test loop_ranges_store[(:z,)].r == 1:0
+            @test loop_ranges_store[(:z,)].s == 1:2
+            @test loop_ranges_store[(:z,)].r == 1:3
             @test loop_ranges_store[(:z,)].z == 1:0
 
             @test loop_ranges_store[(:s,:r)].s == 1:0
             @test loop_ranges_store[(:s,:r)].r == 1:0
-            @test loop_ranges_store[(:s,:r)].z == 1:0
+            @test loop_ranges_store[(:s,:r)].z == 1:4
 
             @test loop_ranges_store[(:s,:z)].s == 1:0
-            @test loop_ranges_store[(:s,:z)].r == 1:0
+            @test loop_ranges_store[(:s,:z)].r == 1:3
             @test loop_ranges_store[(:s,:z)].z == 1:0
 
-            @test loop_ranges_store[(:r,:z)].s == 1:0
+            @test loop_ranges_store[(:r,:z)].s == 1:2
             @test loop_ranges_store[(:r,:z)].r == 1:0
             @test loop_ranges_store[(:r,:z)].z == 1:0
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 9d7d79cfa..26a8b4e67 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,6 +15,8 @@ function runtests()
         include(joinpath(@__DIR__, "harrisonthompson.jl"))
         include(joinpath(@__DIR__, "wall_bc_tests.jl"))
         include(joinpath(@__DIR__, "recycling_fraction_tests.jl"))
+        include(joinpath(@__DIR__, "fokker_planck_tests.jl"))
+        include(joinpath(@__DIR__, "fokker_planck_time_evolution_tests.jl"))
     end
 end
 
diff --git a/test/sound_wave_tests.jl b/test/sound_wave_tests.jl
index dc18220c1..21c187288 100644
--- a/test/sound_wave_tests.jl
+++ b/test/sound_wave_tests.jl
@@ -63,7 +63,6 @@ test_input_finite_difference = Dict("n_ion_species" => 1,
                                     "vperp_ngrid" => 1,
                                     "vperp_nelement" => 1,
                                     "vperp_L" => 1.0,
-                                    "vperp_bc" => "periodic",
                                     "vperp_discretization" => "finite_difference",
                                     "vpa_ngrid" => 180,
                                     "vpa_nelement" => 1,
diff --git a/test/velocity_integral_tests.jl b/test/velocity_integral_tests.jl
index 07d0c5027..2ceaa7e86 100644
--- a/test/velocity_integral_tests.jl
+++ b/test/velocity_integral_tests.jl
@@ -26,6 +26,7 @@ function runtests()
         # fd_option and adv_input not actually used so given values unimportant
         discretization = "chebyshev_pseudospectral"
         fd_option = "fourth_order_centered"
+        cheb_option = "FFT"
         adv_input = advection_input("default", 1.0, 0.0, 0.0)
         nrank = 1
         irank = 0
@@ -33,15 +34,15 @@ function runtests()
         # create the 'input' struct containing input info needed to create a
         # coordinate
         vr_input = grid_input("vperp", 1, 1, 1, nrank, irank, 1.0, discretization,
-                              fd_option, bc, adv_input, comm, "uniform")
+                              fd_option, cheb_option, bc, adv_input, comm, "uniform")
         vz_input = grid_input("vpa", ngrid, nelement_global, nelement_local, nrank, irank,
-                              Lvpa, discretization, fd_option, bc, adv_input, comm,
+                              Lvpa, discretization, fd_option, cheb_option, bc, adv_input, comm,
                               "uniform")
         vpa_input = grid_input("vpa", ngrid, nelement_global, nelement_local, nrank,
-                               irank, Lvpa, discretization, fd_option, bc, adv_input,
+                               irank, Lvpa, discretization, fd_option, cheb_option, bc, adv_input,
                                comm, "uniform")
         vperp_input = grid_input("vperp", ngrid, nelement_global, nelement_local, nrank,
-                                 irank, Lvperp, discretization, fd_option, bc, adv_input,
+                                 irank, Lvperp, discretization, fd_option, cheb_option, bc, adv_input,
                                  comm, "uniform")
         # create the coordinate struct 'x'
         vpa, vpa_spectral = define_coordinate(vpa_input)
diff --git a/test/wall_bc_tests.jl b/test/wall_bc_tests.jl
index 38de0b258..b87e26fd9 100644
--- a/test/wall_bc_tests.jl
+++ b/test/wall_bc_tests.jl
@@ -207,13 +207,14 @@ function run_test(test_input, expected_phi, tolerance; args...)
         # create the 'input' struct containing input info needed to create a coordinate
         # adv_input not actually used in this test so given values unimportant
         adv_input = advection_input("default", 1.0, 0.0, 0.0)
+        cheb_option = "FFT"
 		nrank_per_block = 0 # dummy value
 		irank = 0 # dummy value
 		comm = MPI.COMM_NULL # dummy value
         element_spacing_option = "uniform"
         input = grid_input("coord", test_input["z_ngrid"], test_input["z_nelement"], 
-						   test_input["z_nelement"], nrank_per_block, irank, 1.0,
-                           test_input["z_discretization"], "", test_input["z_bc"],
+                           test_input["z_nelement"], nrank_per_block, irank, 1.0,
+                           test_input["z_discretization"], "", cheb_option, test_input["z_bc"],
                            adv_input, comm, test_input["z_element_spacing_option"])
         z, z_spectral = define_coordinate(input)
 
diff --git a/test_scripts/2D_FEM_assembly_test.jl b/test_scripts/2D_FEM_assembly_test.jl
new file mode 100644
index 000000000..a86eed2c3
--- /dev/null
+++ b/test_scripts/2D_FEM_assembly_test.jl
@@ -0,0 +1,589 @@
+export run_assembly_test
+using Printf
+using Plots
+using LaTeXStrings
+using MPI
+using Measures
+using Dates
+import moment_kinetics
+using moment_kinetics.array_allocation: allocate_float, allocate_shared_float
+using moment_kinetics.input_structs: grid_input, advection_input
+using moment_kinetics.coordinates: define_coordinate
+using moment_kinetics.chebyshev: setup_chebyshev_pseudospectral
+using moment_kinetics.gauss_legendre: setup_gausslegendre_pseudospectral, get_QQ_local!
+using moment_kinetics.type_definitions: mk_float, mk_int
+using moment_kinetics.fokker_planck: init_fokker_planck_collisions_weak_form
+using moment_kinetics.fokker_planck: fokker_planck_collision_operator_weak_form!
+using moment_kinetics.fokker_planck: conserving_corrections!
+using moment_kinetics.calculus: derivative!
+using moment_kinetics.velocity_moments: get_density, get_upar, get_ppar, get_pperp, get_pressure
+using moment_kinetics.communication
+using moment_kinetics.communication: MPISharedArray
+using moment_kinetics.looping
+using SparseArrays: sparse
+using LinearAlgebra: mul!, lu, cholesky
+
+using moment_kinetics.fokker_planck_test: F_Maxwellian, G_Maxwellian, H_Maxwellian
+using moment_kinetics.fokker_planck_test: d2Gdvpa2_Maxwellian, d2Gdvperp2_Maxwellian, d2Gdvperpdvpa_Maxwellian, dGdvperp_Maxwellian
+using moment_kinetics.fokker_planck_test: dHdvperp_Maxwellian, dHdvpa_Maxwellian
+using moment_kinetics.fokker_planck_test: Cssp_Maxwellian_inputs
+using moment_kinetics.fokker_planck_test: print_test_data, plot_test_data, fkpl_error_data, allocate_error_data
+
+using moment_kinetics.fokker_planck_calculus: elliptic_solve!, ravel_c_to_vpavperp!, ravel_vpavperp_to_c!, ravel_c_to_vpavperp_parallel!
+using moment_kinetics.fokker_planck_calculus: enforce_zero_bc!, allocate_rosenbluth_potential_boundary_data
+using moment_kinetics.fokker_planck_calculus: calculate_rosenbluth_potential_boundary_data!, calculate_rosenbluth_potential_boundary_data_exact!
+using moment_kinetics.fokker_planck_calculus: test_rosenbluth_potential_boundary_data, enforce_vpavperp_BCs!
+using moment_kinetics.fokker_planck_calculus: calculate_rosenbluth_potentials_via_elliptic_solve!
+
+
+
+    
+    function print_matrix(matrix,name::String,n::mk_int,m::mk_int)
+        println("\n ",name," \n")
+        for i in 1:n
+            for j in 1:m
+                @printf("%.2f ", matrix[i,j])
+            end
+            println("")
+        end
+        println("\n")
+    end
+    
+    function print_vector(vector,name::String,m::mk_int)
+        println("\n ",name," \n")
+        for j in 1:m
+            @printf("%.3f ", vector[j])
+        end
+        println("")
+        println("\n")
+    end 
+
+    function test_weak_form_collisions(ngrid,nelement_vpa,nelement_vperp;
+        Lvpa=12.0,Lvperp=6.0,plot_test_output=false,
+        test_parallelism=false,test_self_operator=true,
+        test_dense_construction=false,standalone=false,
+        use_Maxwellian_Rosenbluth_coefficients=false,
+        use_Maxwellian_field_particle_distribution=false,
+        test_numerical_conserving_terms=false,
+        algebraic_solve_for_d2Gdvperp2=false)
+        # define inputs needed for the test
+        #plot_test_output = false#true
+        #test_parallelism = false#true
+        #test_self_operator = true
+        #test_dense_construction = false#true
+        #ngrid = 3 #number of points per element 
+        nelement_local_vpa = nelement_vpa # number of elements per rank
+        nelement_global_vpa = nelement_local_vpa # total number of elements 
+        nelement_local_vperp = nelement_vperp # number of elements per rank
+        nelement_global_vperp = nelement_local_vperp # total number of elements 
+        #Lvpa = 12.0 #physical box size in reference units 
+        #Lvperp = 6.0 #physical box size in reference units 
+        bc = "" #not required to take a particular value, not used 
+        # fd_option and adv_input not actually used so given values unimportant
+        #discretization = "chebyshev_pseudospectral"
+        discretization = "gausslegendre_pseudospectral"
+        fd_option = "fourth_order_centered"
+        cheb_option = "matrix"
+        adv_input = advection_input("default", 1.0, 0.0, 0.0)
+        nrank = 1
+        irank = 0
+        comm = MPI.COMM_NULL
+        # create the 'input' struct containing input info needed to create a
+        # coordinate
+        element_spacing_option = "uniform"
+        vpa_input = grid_input("vpa", ngrid, nelement_global_vpa, nelement_local_vpa, 
+            nrank, irank, Lvpa, discretization, fd_option, cheb_option, bc, adv_input,comm,element_spacing_option)
+        vperp_input = grid_input("vperp", ngrid, nelement_global_vperp, nelement_local_vperp, 
+            nrank, irank, Lvperp, discretization, fd_option, cheb_option, bc, adv_input,comm,element_spacing_option)
+        # create the coordinate struct 'x'
+        println("made inputs")
+        println("vpa: ngrid: ",ngrid," nelement: ",nelement_local_vpa, " Lvpa: ",Lvpa)
+        println("vperp: ngrid: ",ngrid," nelement: ",nelement_local_vperp, " Lvperp: ",Lvperp)
+        vpa, vpa_spectral = define_coordinate(vpa_input)
+        vperp, vperp_spectral = define_coordinate(vperp_input)
+        
+        # Set up MPI
+        if standalone
+            initialize_comms!()
+        end
+        setup_distributed_memory_MPI(1,1,1,1)
+        looping.setup_loop_ranges!(block_rank[], block_size[];
+                                       s=1, sn=1,
+                                       r=1, z=1, vperp=vperp.n, vpa=vpa.n,
+                                       vzeta=1, vr=1, vz=1)
+        nc_global = vpa.n*vperp.n
+        begin_serial_region()
+        start_init_time = now()
+        
+        fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral; 
+                           precompute_weights=true, test_dense_matrix_construction=test_dense_construction)
+        KKpar2D_with_BC_terms_sparse = fkpl_arrays.KKpar2D_with_BC_terms_sparse
+        KKperp2D_with_BC_terms_sparse = fkpl_arrays.KKperp2D_with_BC_terms_sparse
+        lu_obj_MM = fkpl_arrays.lu_obj_MM
+        finish_init_time = now()
+        
+        fvpavperp = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        fvpavperp_test = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        fvpavperp_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2fvpavperp_dvpa2_exact = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2fvpavperp_dvpa2_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2fvpavperp_dvpa2_num = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2fvpavperp_dvperp2_exact = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2fvpavperp_dvperp2_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2fvpavperp_dvperp2_num = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        fc = Array{mk_float,1}(undef,nc_global)
+        dfc = Array{mk_float,1}(undef,nc_global)
+        gc = Array{mk_float,1}(undef,nc_global)
+        dgc = Array{mk_float,1}(undef,nc_global)
+        for ivperp in 1:vperp.n
+            for ivpa in 1:vpa.n
+                fvpavperp[ivpa,ivperp] = exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
+                d2fvpavperp_dvpa2_exact[ivpa,ivperp] = (4.0*vpa.grid[ivpa]^2 - 2.0)*exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
+                d2fvpavperp_dvperp2_exact[ivpa,ivperp] = (4.0*vperp.grid[ivperp]^2 - 2.0)*exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
+            end
+        end
+        
+        # fill fc with fvpavperp
+        ravel_vpavperp_to_c!(fc,fvpavperp,vpa.n,vperp.n)
+        ravel_c_to_vpavperp!(fvpavperp_test,fc,nc_global,vpa.n)
+        @. fvpavperp_err = abs(fvpavperp - fvpavperp_test)
+        @serial_region begin
+            println("max(ravel_err)",maximum(fvpavperp_err))
+        end
+        #print_vector(fc,"fc",nc_global)
+        # multiply by KKpar2D and fill dfc
+        mul!(dfc,KKpar2D_with_BC_terms_sparse,fc)
+        mul!(dgc,KKperp2D_with_BC_terms_sparse,fc)
+        # invert mass matrix and fill fc
+        fc = lu_obj_MM \ dfc
+        gc = lu_obj_MM \ dgc
+        #print_vector(fc,"fc",nc_global)
+        # unravel
+        ravel_c_to_vpavperp!(d2fvpavperp_dvpa2_num,fc,nc_global,vpa.n)
+        ravel_c_to_vpavperp!(d2fvpavperp_dvperp2_num,gc,nc_global,vpa.n)
+        @serial_region begin 
+            if nc_global < 30
+                print_matrix(d2fvpavperp_dvpa2_num,"d2fvpavperp_dvpa2_num",vpa.n,vperp.n)
+            end
+            @. d2fvpavperp_dvpa2_err = abs(d2fvpavperp_dvpa2_num - d2fvpavperp_dvpa2_exact)
+            println("maximum(d2fvpavperp_dvpa2_err): ",maximum(d2fvpavperp_dvpa2_err))
+            @. d2fvpavperp_dvperp2_err = abs(d2fvpavperp_dvperp2_num - d2fvpavperp_dvperp2_exact)
+            println("maximum(d2fvpavperp_dvperp2_err): ",maximum(d2fvpavperp_dvperp2_err))
+            if nc_global < 30
+                print_matrix(d2fvpavperp_dvpa2_err,"d2fvpavperp_dvpa2_err",vpa.n,vperp.n)
+            end
+            if plot_test_output
+                plot_test_data(d2fvpavperp_dvpa2_exact,d2fvpavperp_dvpa2_num,d2fvpavperp_dvpa2_err,"d2fvpavperp_dvpa2",vpa,vperp)
+                plot_test_data(d2fvpavperp_dvperp2_exact,d2fvpavperp_dvperp2_num,d2fvpavperp_dvperp2_err,"d2fvpavperp_dvperp2",vpa,vperp)
+            end
+        end
+        # test the Laplacian solve with a standard F_Maxwellian -> H_Maxwellian test
+        dummy_vpavperp = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        Fs_M = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        F_M = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        C_M_num = allocate_shared_float(vpa.n,vperp.n)
+        C_M_exact = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        C_M_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        #dFdvpa_M = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        #dFdvperp_M = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        #d2Fdvperpdvpa_M = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        H_M_exact = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        H_M_num = allocate_shared_float(vpa.n,vperp.n)
+        H_M_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        G_M_exact = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        G_M_num = allocate_shared_float(vpa.n,vperp.n)
+        G_M_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2Gdvpa2_M_exact = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2Gdvpa2_M_num = allocate_shared_float(vpa.n,vperp.n)
+        d2Gdvpa2_M_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2Gdvperp2_M_exact = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2Gdvperp2_M_num = allocate_shared_float(vpa.n,vperp.n)
+        d2Gdvperp2_M_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        dGdvperp_M_exact = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        dGdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
+        dGdvperp_M_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2Gdvperpdvpa_M_exact = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        d2Gdvperpdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
+        d2Gdvperpdvpa_M_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        dHdvpa_M_exact = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        dHdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
+        dHdvpa_M_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        dHdvperp_M_exact = Array{mk_float,2}(undef,vpa.n,vperp.n)
+        dHdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
+        dHdvperp_M_err = Array{mk_float,2}(undef,vpa.n,vperp.n)
+
+        if test_self_operator
+            dens, upar, vth = 1.0, 1.0, 1.0
+            denss, upars, vths = dens, upar, vth
+        else
+            denss, upars, vths = 1.0, -1.0, 2.0/3.0
+            dens, upar, vth = 1.0, 1.0, 1.0
+        end
+        ms = 1.0
+        msp = 1.0
+        nussp = 1.0
+        begin_serial_region()
+        for ivperp in 1:vperp.n
+            for ivpa in 1:vpa.n
+                Fs_M[ivpa,ivperp] = F_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+                F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                H_M_exact[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                G_M_exact[ivpa,ivperp] = G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                d2Gdvpa2_M_exact[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                d2Gdvperp2_M_exact[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                dGdvperp_M_exact[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                d2Gdvperpdvpa_M_exact[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                dHdvpa_M_exact[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                dHdvperp_M_exact[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                C_M_exact[ivpa,ivperp] = Cssp_Maxwellian_inputs(denss,upars,vths,ms,
+                                                                dens,upar,vth,msp,
+                                                                nussp,vpa,vperp,ivpa,ivperp)
+            end
+        end
+        rpbd_exact = allocate_rosenbluth_potential_boundary_data(vpa,vperp)
+        # use known test function to provide exact data
+        calculate_rosenbluth_potential_boundary_data_exact!(rpbd_exact,
+              H_M_exact,dHdvpa_M_exact,dHdvperp_M_exact,G_M_exact,
+              dGdvperp_M_exact,d2Gdvperp2_M_exact,
+              d2Gdvperpdvpa_M_exact,d2Gdvpa2_M_exact,vpa,vperp)
+        @serial_region begin
+            println("begin C calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+
+        fokker_planck_collision_operator_weak_form!(Fs_M,F_M,ms,msp,nussp,
+                                             fkpl_arrays,
+                                             vperp, vpa, vperp_spectral, vpa_spectral,
+                                             test_assembly_serial=test_parallelism,
+                                             use_Maxwellian_Rosenbluth_coefficients=use_Maxwellian_Rosenbluth_coefficients,
+                                             use_Maxwellian_field_particle_distribution=use_Maxwellian_field_particle_distribution,
+                                             algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
+                                             calculate_GG = false, calculate_dGdvperp=false)
+        if test_numerical_conserving_terms && test_self_operator
+            # enforce the boundary conditions on CC before it is used for timestepping
+            enforce_vpavperp_BCs!(fkpl_arrays.CC,vpa,vperp,vpa_spectral,vperp_spectral)
+            # make ad-hoc conserving corrections
+            conserving_corrections!(fkpl_arrays.CC,Fs_M,vpa,vperp,dummy_vpavperp)            
+        end
+        # calculate Rosenbluth potentials again as a standalone to G and dGdvperp
+        calculate_rosenbluth_potentials_via_elliptic_solve!(fkpl_arrays.GG,fkpl_arrays.HH,fkpl_arrays.dHdvpa,fkpl_arrays.dHdvperp,
+             fkpl_arrays.d2Gdvpa2,fkpl_arrays.dGdvperp,fkpl_arrays.d2Gdvperpdvpa,fkpl_arrays.d2Gdvperp2,F_M,
+             vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays;
+             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=true,calculate_dGdvperp=true)
+        # extract C[Fs,Fs'] result
+        # and Rosenbluth potentials for testing
+        begin_vperp_vpa_region()
+        @loop_vperp_vpa ivperp ivpa begin
+            C_M_num[ivpa,ivperp] = fkpl_arrays.CC[ivpa,ivperp]
+            G_M_num[ivpa,ivperp] = fkpl_arrays.GG[ivpa,ivperp]
+            H_M_num[ivpa,ivperp] = fkpl_arrays.HH[ivpa,ivperp]
+            dHdvpa_M_num[ivpa,ivperp] = fkpl_arrays.dHdvpa[ivpa,ivperp]
+            dHdvperp_M_num[ivpa,ivperp] = fkpl_arrays.dHdvperp[ivpa,ivperp]
+            dGdvperp_M_num[ivpa,ivperp] = fkpl_arrays.dGdvperp[ivpa,ivperp]
+            d2Gdvperp2_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvperp2[ivpa,ivperp]
+            d2Gdvpa2_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvpa2[ivpa,ivperp]
+            d2Gdvperpdvpa_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvperpdvpa[ivpa,ivperp]
+        end
+        
+        init_time = Dates.value(finish_init_time - start_init_time)
+        calculate_time = Dates.value(now() - finish_init_time)
+        begin_serial_region()
+        fkerr = allocate_error_data()
+        @serial_region begin
+            println("finished C calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
+            
+            # test the boundary data calculation
+            if !use_Maxwellian_Rosenbluth_coefficients
+                max_H_err, max_dHdvpa_err, max_dHdvperp_err, max_G_err, max_dGdvperp_err,
+                max_d2Gdvperp2_err, max_d2Gdvperpdvpa_err, max_d2Gdvpa2_err = test_rosenbluth_potential_boundary_data(fkpl_arrays.rpbd,rpbd_exact,vpa,vperp)
+            end
+            dummy_array = Array{mk_float,2}(undef,vpa.n,vperp.n)
+            fkerr.H_M.max, fkerr.H_M.L2 = print_test_data(H_M_exact,H_M_num,H_M_err,"H_M",vpa,vperp,dummy_array)
+            fkerr.dHdvpa_M.max, fkerr.dHdvpa_M.L2 = print_test_data(dHdvpa_M_exact,dHdvpa_M_num,dHdvpa_M_err,"dHdvpa_M",vpa,vperp,dummy_array)
+            fkerr.dHdvperp_M.max, fkerr.dHdvperp_M.L2 = print_test_data(dHdvperp_M_exact,dHdvperp_M_num,dHdvperp_M_err,"dHdvperp_M",vpa,vperp,dummy_array)
+            fkerr.G_M.max, fkerr.G_M.L2 = print_test_data(G_M_exact,G_M_num,G_M_err,"G_M",vpa,vperp,dummy_array)
+            fkerr.d2Gdvpa2_M.max, fkerr.d2Gdvpa2_M.L2 = print_test_data(d2Gdvpa2_M_exact,d2Gdvpa2_M_num,d2Gdvpa2_M_err,"d2Gdvpa2_M",vpa,vperp,dummy_array)
+            fkerr.dGdvperp_M.max, fkerr.dGdvperp_M.L2 = print_test_data(dGdvperp_M_exact,dGdvperp_M_num,dGdvperp_M_err,"dGdvperp_M",vpa,vperp,dummy_array)
+            fkerr.d2Gdvperpdvpa_M.max, fkerr.d2Gdvperpdvpa_M.L2 = print_test_data(d2Gdvperpdvpa_M_exact,d2Gdvperpdvpa_M_num,d2Gdvperpdvpa_M_err,"d2Gdvperpdvpa_M",vpa,vperp,dummy_array)
+            fkerr.d2Gdvperp2_M.max, fkerr.d2Gdvperp2_M.L2 = print_test_data(d2Gdvperp2_M_exact,d2Gdvperp2_M_num,d2Gdvperp2_M_err,"d2Gdvperp2_M",vpa,vperp,dummy_array)
+            fkerr.C_M.max, fkerr.C_M.L2 = print_test_data(C_M_exact,C_M_num,C_M_err,"C_M",vpa,vperp,dummy_array)
+            
+            # calculate the entropy production
+            lnfC = fkpl_arrays.rhsvpavperp
+            @loop_vperp_vpa ivperp ivpa begin
+                lnfC[ivpa,ivperp] = Fs_M[ivpa,ivperp]*C_M_num[ivpa,ivperp]
+            end
+            dSdt = - get_density(lnfC,vpa,vperp)
+            println("dSdt: $dSdt should be >0.0")
+            if plot_test_output
+                plot_test_data(C_M_exact,C_M_num,C_M_err,"C_M",vpa,vperp)
+                plot_test_data(H_M_exact,H_M_num,H_M_err,"H_M",vpa,vperp)
+                plot_test_data(dHdvpa_M_exact,dHdvpa_M_num,dHdvpa_M_err,"dHdvpa_M",vpa,vperp)
+                plot_test_data(dHdvperp_M_exact,dHdvperp_M_num,dHdvperp_M_err,"dHdvperp_M",vpa,vperp)
+                plot_test_data(G_M_exact,G_M_num,G_M_err,"G_M",vpa,vperp)
+                plot_test_data(dGdvperp_M_exact,dGdvperp_M_num,dGdvperp_M_err,"dGdvperp_M",vpa,vperp)
+                plot_test_data(d2Gdvperp2_M_exact,d2Gdvperp2_M_num,d2Gdvperp2_M_err,"d2Gdvperp2_M",vpa,vperp)
+                plot_test_data(d2Gdvperpdvpa_M_exact,d2Gdvperpdvpa_M_num,d2Gdvperpdvpa_M_err,"d2Gdvperpdvpa_M",vpa,vperp)
+                plot_test_data(d2Gdvpa2_M_exact,d2Gdvpa2_M_num,d2Gdvpa2_M_err,"d2Gdvpa2_M",vpa,vperp)
+            end
+        end
+        if test_self_operator
+            delta_n = get_density(C_M_num, vpa, vperp)
+            delta_upar = get_upar(C_M_num, vpa, vperp, dens)
+            delta_ppar = msp*get_ppar(C_M_num, vpa, vperp, upar)
+            delta_pperp = msp*get_pperp(C_M_num, vpa, vperp)
+            delta_pressure = get_pressure(delta_ppar,delta_pperp)
+            @serial_region begin
+                println("delta_n: ", delta_n)
+                println("delta_upar: ", delta_upar)
+                println("delta_pressure: ", delta_pressure)
+            end
+            fkerr.moments.delta_density = delta_n
+            fkerr.moments.delta_upar = delta_upar
+            fkerr.moments.delta_pressure = delta_pressure
+        else
+            delta_n = get_density(C_M_num, vpa, vperp)
+            @serial_region begin
+                println("delta_n: ", delta_n)
+            end
+            fkerr.moments.delta_density = delta_n
+        end
+        if standalone
+            finalize_comms!()
+        end
+        return fkerr, calculate_time, init_time
+    end
+
+    function expected_nelement_scaling!(expected,nelement_list,ngrid,nscan)
+        for iscan in 1:nscan
+            expected[iscan] = (1.0/nelement_list[iscan])^(ngrid - 1)
+        end
+    end
+
+    function expected_nelement_integral_scaling!(expected,nelement_list,ngrid,nscan)
+        for iscan in 1:nscan
+            expected[iscan] = (1.0/nelement_list[iscan])^(ngrid+1)
+        end
+    end
+
+    function expect_timing!(expected,nelement_list,nscan,power)
+        for iscan in 1:nscan
+            expected[iscan] = nelement_list[iscan]^power
+        end
+    end
+    
+    function run_assembly_test(; ngrid=5, nelement_list = [8],
+        plot_scan=true,
+        plot_test_output = false,
+        use_Maxwellian_Rosenbluth_coefficients=false,
+        use_Maxwellian_field_particle_distribution=false,
+        test_dense_construction=false,
+        test_parallelism=false,
+        test_numerical_conserving_terms=false,
+        algebraic_solve_for_d2Gdvperp2=false,
+        test_self_operator = true,
+        Lvpa = 12.0, Lvperp = 6.0)
+        initialize_comms!()
+        #ngrid = 5
+        #plot_scan = true
+        #plot_test_output = true#false
+        #test_parallelism = false
+        #test_self_operator = true
+        #test_dense_construction = false
+        #nelement_list = Int[8, 16, 32, 64, 128]
+        #nelement_list = Int[4, 8, 16, 32, 64]
+        #nelement_list = Int[2, 4, 8]
+        #nelement_list = Int[4, 8, 16, 32, 64]
+        #nelement_list = Int[2, 4, 8, 16, 32]
+        #nelement_list = Int[2, 4, 8, 16]
+        #nelement_list = Int[100]
+        #nelement_list = Int[8]
+        #nelement_list = Int[4]
+        nscan = size(nelement_list,1)
+        max_C_err = Array{mk_float,1}(undef,nscan)
+        max_H_err = Array{mk_float,1}(undef,nscan)
+        max_G_err = Array{mk_float,1}(undef,nscan)
+        max_dHdvpa_err = Array{mk_float,1}(undef,nscan)
+        max_dHdvperp_err = Array{mk_float,1}(undef,nscan)
+        max_d2Gdvperp2_err = Array{mk_float,1}(undef,nscan)
+        max_d2Gdvpa2_err = Array{mk_float,1}(undef,nscan)
+        max_d2Gdvperpdvpa_err = Array{mk_float,1}(undef,nscan)
+        max_dGdvperp_err = Array{mk_float,1}(undef,nscan)
+        L2_C_err = Array{mk_float,1}(undef,nscan)
+        L2_H_err = Array{mk_float,1}(undef,nscan)
+        L2_G_err = Array{mk_float,1}(undef,nscan)
+        L2_dHdvpa_err = Array{mk_float,1}(undef,nscan)
+        L2_dHdvperp_err = Array{mk_float,1}(undef,nscan)
+        L2_d2Gdvperp2_err = Array{mk_float,1}(undef,nscan)
+        L2_d2Gdvpa2_err = Array{mk_float,1}(undef,nscan)
+        L2_d2Gdvperpdvpa_err = Array{mk_float,1}(undef,nscan)
+        L2_dGdvperp_err = Array{mk_float,1}(undef,nscan)
+        #max_d2fsdvpa2_err = Array{mk_float,1}(undef,nscan)
+        #max_d2fsdvperp2_err = Array{mk_float,1}(undef,nscan)
+        n_err = Array{mk_float,1}(undef,nscan)
+        u_err = Array{mk_float,1}(undef,nscan)
+        p_err = Array{mk_float,1}(undef,nscan)
+        calculate_times = Array{mk_float,1}(undef,nscan)
+        init_times = Array{mk_float,1}(undef,nscan)
+        
+        expected = Array{mk_float,1}(undef,nscan)
+        expected_nelement_scaling!(expected,nelement_list,ngrid,nscan)
+        expected_integral = Array{mk_float,1}(undef,nscan)
+        expected_nelement_integral_scaling!(expected_integral,nelement_list,ngrid,nscan)
+        expected_label = L"(1/N_{el})^{n_g - 1}"
+        expected_integral_label = L"(1/N_{el})^{n_g +1}"
+        
+        expected_t_2 = Array{mk_float,1}(undef,nscan)
+        expected_t_3 = Array{mk_float,1}(undef,nscan)
+        expect_timing!(expected_t_2,nelement_list,nscan,2)
+        expect_timing!(expected_t_3,nelement_list,nscan,3)
+        expected_t_2_label = L"(N_{element})^2"
+        expected_t_3_label = L"(N_{element})^3"
+        
+        for iscan in 1:nscan
+            local nelement = nelement_list[iscan]
+            nelement_vpa = 2*nelement
+            nelement_vperp = nelement
+            fkerr, calculate_times[iscan], init_times[iscan] = test_weak_form_collisions(ngrid,nelement_vpa,nelement_vperp,
+            plot_test_output=plot_test_output,
+            test_parallelism=test_parallelism,
+            test_self_operator=test_self_operator,
+            test_dense_construction=test_dense_construction,
+            use_Maxwellian_Rosenbluth_coefficients=use_Maxwellian_Rosenbluth_coefficients,
+            use_Maxwellian_field_particle_distribution=use_Maxwellian_field_particle_distribution,
+            test_numerical_conserving_terms=test_numerical_conserving_terms,
+            algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
+            standalone=false, Lvpa=Lvpa, Lvperp=Lvperp)
+            max_C_err[iscan], L2_C_err[iscan] = fkerr.C_M.max ,fkerr.C_M.L2
+            max_H_err[iscan], L2_H_err[iscan] = fkerr.H_M.max ,fkerr.H_M.L2
+            max_dHdvpa_err[iscan], L2_dHdvpa_err[iscan] = fkerr.dHdvpa_M.max ,fkerr.dHdvpa_M.L2
+            max_dHdvperp_err[iscan], L2_dHdvperp_err[iscan] = fkerr.dHdvperp_M.max ,fkerr.dHdvperp_M.L2
+            max_G_err[iscan], L2_G_err[iscan] = fkerr.G_M.max ,fkerr.G_M.L2
+            max_dGdvperp_err[iscan], L2_dGdvperp_err[iscan] = fkerr.dGdvperp_M.max ,fkerr.dGdvperp_M.L2
+            max_d2Gdvpa2_err[iscan], L2_d2Gdvpa2_err[iscan] = fkerr.d2Gdvpa2_M.max ,fkerr.d2Gdvpa2_M.L2
+            max_d2Gdvperpdvpa_err[iscan], L2_d2Gdvperpdvpa_err[iscan] = fkerr.d2Gdvperpdvpa_M.max ,fkerr.d2Gdvperpdvpa_M.L2
+            max_d2Gdvperp2_err[iscan], L2_d2Gdvperp2_err[iscan] = fkerr.d2Gdvperp2_M.max ,fkerr.d2Gdvperp2_M.L2
+            n_err[iscan] = abs(fkerr.moments.delta_density)
+            u_err[iscan] = abs(fkerr.moments.delta_upar)
+            p_err[iscan] = abs(fkerr.moments.delta_pressure)
+        end
+        if global_rank[]==0 && plot_scan
+            fontsize = 8
+            #ytick_sequence = Array([1.0e-13,1.0e-12,1.0e-11,1.0e-10,1.0e-9,1.0e-8,1.0e-7,1.0e-6,1.0e-5,1.0e-4,1.0e-3,1.0e-2,1.0e-1,1.0e-0,1.0e1])
+            ytick_sequence = Array([1.0e-12,1.0e-11,1.0e-10,1.0e-9,1.0e-8,1.0e-7,1.0e-6,1.0e-5,1.0e-4,1.0e-3,1.0e-2,1.0e-1])
+            xlabel = L"N_{element}"
+            Clabel = L"\epsilon_{\infty}(C)"
+            Hlabel = L"\epsilon_{\infty}(H)"
+            Glabel = L"\epsilon_{\infty}(G)"
+            dHdvpalabel = L"\epsilon_{\infty}(dH/d v_{\|\|})"
+            dHdvperplabel = L"\epsilon_{\infty}(dH/d v_{\perp})"
+            d2Gdvperp2label = L"\epsilon_{\infty}(d^2G/d v_{\perp}^2)"
+            d2Gdvpa2label = L"\epsilon_{\infty}(d^2G/d v_{\|\|}^2)"
+            d2Gdvperpdvpalabel = L"\epsilon_{\infty}(d^2G/d v_{\perp} d v_{\|\|})"
+            dGdvperplabel = L"\epsilon_{\infty}(dG/d v_{\perp})"
+            
+            #println(max_G_err,max_H_err,max_dHdvpa_err,max_dHdvperp_err,max_d2Gdvperp2_err,max_d2Gdvpa2_err,max_d2Gdvperpdvpa_err,max_dGdvperp_err, expected, expected_integral)
+            plot(nelement_list, [max_C_err,max_H_err,max_G_err, expected, expected_integral],
+            xlabel=xlabel, label=[Clabel Hlabel Glabel expected_label expected_integral_label], ylabel="",
+             shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
+              xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+              foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
+            outfile = "fkpl_C_G_H_max_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            savefig(outfile)
+            println(outfile)
+            println([max_C_err,max_H_err,max_G_err, expected, expected_integral])
+            
+            plot(nelement_list,  [max_dHdvpa_err, max_dHdvperp_err, max_d2Gdvperp2_err, max_d2Gdvpa2_err, max_d2Gdvperpdvpa_err, max_dGdvperp_err, expected,      expected_integral],
+            xlabel=xlabel, label=[dHdvpalabel     dHdvperplabel     d2Gdvperp2label     d2Gdvpa2label     d2Gdvperpdvpalabel     dGdvperplabel     expected_label expected_integral_label], ylabel="",
+             shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
+              xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+              foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
+            outfile = "fkpl_coeffs_max_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            savefig(outfile)
+            println(outfile)
+            println([max_dHdvpa_err, max_dHdvperp_err, max_d2Gdvperp2_err, max_d2Gdvpa2_err, max_d2Gdvperpdvpa_err, max_dGdvperp_err, expected,      expected_integral])
+            
+            
+            ClabelL2 = L"\epsilon_{L2}(C)"
+            HlabelL2 = L"\epsilon_{L2}(H)"
+            GlabelL2 = L"\epsilon_{L2}(G)"
+            dHdvpalabelL2 = L"\epsilon_{L2}(dH/d v_{\|\|})"
+            dHdvperplabelL2 = L"\epsilon_{L2}(dH/d v_{\perp})"
+            d2Gdvperp2labelL2 = L"\epsilon_{L2}(d^2G/d v_{\perp}^2)"
+            d2Gdvpa2labelL2 = L"\epsilon_{L2}(d^2G/d v_{\|\|}^2)"
+            d2GdvperpdvpalabelL2 = L"\epsilon_{L2}(d^2G/d v_{\perp} d v_{\|\|})"
+            dGdvperplabelL2 = L"\epsilon_{L2}(dG/d v_{\perp})"
+            
+            
+            plot(nelement_list, [L2_C_err,L2_H_err,L2_G_err, expected, expected_integral],
+            xlabel=xlabel, label=[ClabelL2 HlabelL2 GlabelL2 expected_label expected_integral_label], ylabel="",
+             shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
+              xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+              foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
+            outfile = "fkpl_C_G_H_L2_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            savefig(outfile)
+            println(outfile)
+            println([L2_C_err,L2_H_err,L2_G_err, expected, expected_integral])
+            
+            plot(nelement_list,  [L2_dHdvpa_err, L2_dHdvperp_err, L2_d2Gdvperp2_err, L2_d2Gdvpa2_err, L2_d2Gdvperpdvpa_err, L2_dGdvperp_err,  expected,      expected_integral],
+            xlabel=xlabel, label=[dHdvpalabelL2  dHdvperplabelL2  d2Gdvperp2labelL2  d2Gdvpa2labelL2  d2GdvperpdvpalabelL2  dGdvperplabelL2   expected_label expected_integral_label], ylabel="",
+             shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
+              xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+              foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
+            outfile = "fkpl_coeffs_L2_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            savefig(outfile)
+            println(outfile)
+            println([L2_dHdvpa_err, L2_dHdvperp_err, L2_d2Gdvperp2_err, L2_d2Gdvpa2_err, L2_d2Gdvperpdvpa_err, L2_dGdvperp_err,  expected,      expected_integral])
+            
+            nlabel = L"|\Delta n|"
+            ulabel = L"|\Delta u_{\|\|}|"
+            plabel = L"|\Delta p|"
+            
+            if test_self_operator
+                plot(nelement_list, [max_C_err, L2_C_err, n_err, u_err, p_err, expected, expected_integral],
+                xlabel=xlabel, label=[Clabel ClabelL2 nlabel ulabel plabel expected_label expected_integral_label], ylabel="",
+                 shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
+                  xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+                  foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
+                outfile = "fkpl_conservation_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+                savefig(outfile)
+                println(outfile)
+                println([max_C_err, L2_C_err, n_err, u_err, p_err, expected, expected_integral])
+            else
+                plot(nelement_list, [max_C_err, L2_C_err, n_err, expected, expected_integral],
+                xlabel=xlabel, label=[Clabel ClabelL2 nlabel expected_label expected_integral_label], ylabel="",
+                 shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
+                  xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+                  foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
+                outfile = "fkpl_conservation_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+                savefig(outfile)
+                println(outfile)        
+                println([max_C_err, L2_C_err, n_err, expected, expected_integral])
+            end
+            
+            calculate_timeslabel = "time/step (ms)"
+            init_timeslabel = "time/init (ms)"
+            ytick_sequence_timing = Array([10^2,10^3,10^4,10^5,10^6])
+            plot(nelement_list, [calculate_times, init_times, expected_t_2, expected_t_3],
+            xlabel=xlabel, label=[calculate_timeslabel init_timeslabel expected_t_2_label expected_t_3_label], ylabel="",
+             shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), markersize = 5, linewidth=2, 
+              xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+              foreground_color_legend = nothing, background_color_legend = nothing, legend=:topleft)
+            outfile = "fkpl_timing_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            savefig(outfile)
+            println(outfile)
+            println([calculate_times, init_times, expected_t_2, expected_t_3])
+        end
+        finalize_comms!()
+    return nothing
+    end
+
+if abspath(PROGRAM_FILE) == @__FILE__
+    using Pkg
+    Pkg.activate(".")
+    
+    run_assembly_test() # to ensure routines are compiled before plots are made
+    run_assembly_test(ngrid=3,nelement_list=[8,16,32,64,128],plot_scan=true)
+    run_assembly_test(ngrid=5,nelement_list=[4,8,16,32,64],plot_scan=true)
+    run_assembly_test(ngrid=7,nelement_list=[2,4,8,16,32],plot_scan=true)
+    run_assembly_test(ngrid=9,nelement_list=[2,4,8,16],plot_scan=true)
+end
diff --git a/test_scripts/GaussLobattoLegendre_test.jl b/test_scripts/GaussLobattoLegendre_test.jl
new file mode 100644
index 000000000..80b865716
--- /dev/null
+++ b/test_scripts/GaussLobattoLegendre_test.jl
@@ -0,0 +1,224 @@
+export gausslegendre_test
+
+using FastGaussQuadrature
+using LegendrePolynomials: Pl
+using LinearAlgebra: mul!, lu, inv, cond
+using Printf
+using Plots
+using LaTeXStrings
+using MPI
+using Measures
+
+import moment_kinetics
+using moment_kinetics.gauss_legendre
+using moment_kinetics.input_structs: grid_input, advection_input
+using moment_kinetics.coordinates: define_coordinate
+using moment_kinetics.calculus: derivative!, second_derivative!, laplacian_derivative!
+using moment_kinetics.calculus: mass_matrix_solve!
+
+
+    function print_matrix(matrix,name,n,m)
+        println("\n ",name," \n")
+        for i in 1:n
+            for j in 1:m
+                @printf("%.3f ", matrix[i,j])
+            end
+            println("")
+        end
+        println("\n")
+    end
+    
+    function print_vector(vector,name,m)
+        println("\n ",name," \n")
+        for j in 1:m
+            @printf("%.3f ", vector[j])
+        end
+        println("")
+        println("\n")
+    end 
+
+    function gausslegendre_test(; ngrid=17, nelement=4, L_in=6.0)
+        
+        # elemental grid tests 
+        #ngrid = 17
+        #nelement = 4
+        y_ngrid = ngrid #number of points per element 
+        y_nelement_local = nelement # number of elements per rank
+        y_nelement_global = y_nelement_local # total number of elements 
+        bc = "zero" 
+        discretization = "gausslegendre_pseudospectral"
+        # fd_option and adv_input not actually used so given values unimportant
+        fd_option = "fourth_order_centered"
+        cheb_option = "matrix"
+        adv_input = advection_input("default", 1.0, 0.0, 0.0)
+        nrank = 1
+        irank = 0#1
+        comm = MPI.COMM_NULL
+        element_spacing_option = "uniform"
+        # create the 'input' struct containing input info needed to create a
+        # coordinate
+        for y_name in ["vpa","vperp"]
+            println("")
+            println("$y_name test")
+            println("")
+            if y_name == "vperp"
+                y_L = L_in #physical box size in reference units 
+            else 
+                y_L = 2*L_in
+            end
+            y_input = grid_input(y_name, y_ngrid, y_nelement_global, y_nelement_local, 
+                nrank, irank, y_L, discretization, fd_option, cheb_option, bc, adv_input,comm,element_spacing_option)
+            
+            # create the coordinate structs
+            y, y_spectral = define_coordinate(y_input,init_YY=false)
+            #print_matrix(Mmat,"Mmat",y.n,y.n)
+            #print_matrix(y_spectral.radau.M0,"local radau mass matrix M0",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.radau.M1,"local radau mass matrix M1",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.lobatto.M0,"local mass matrix M0",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.lobatto.M1,"local mass matrix M1",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.mass_matrix,"global mass matrix",y.n,y.n)
+            #print_matrix(y_spectral.lobatto.S0,"local S0 matrix",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.lobatto.S1,"local S1 matrix",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.S_matrix,"global S matrix",y.n,y.n)
+            #print_matrix(y_spectral.radau.K0,"local radau K matrix K0",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.radau.K1,"local radau K matrix K1",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.lobatto.K0,"local K matrix K0",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.lobatto.K1,"local K matrix K1",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.radau.P0,"local radau P matrix P0",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.lobatto.P0,"local P matrix P0",y.ngrid,y.ngrid)
+            #print_matrix(y_spectral.K_matrix,"global K matrix",y.n,y.n)
+            #print_matrix(y_spectral.L_matrix,"global L matrix",y.n,y.n)
+            #@views y_spectral.K_matrix[1,:] *= (4.0/3.0)
+            #print_matrix(y_spectral.K_matrix,"global K matrix (hacked) ",y.n,y.n)
+            #print_matrix(y_spectral.radau.Dmat,"local radau D matrix Dmat",y.ngrid,y.ngrid)
+            #print_vector(y_spectral.radau.D0,"local radau D matrix D0",y.ngrid)
+            #print_matrix(y_spectral.lobatto.Dmat,"local lobatto D matrix Dmat",y.ngrid,y.ngrid)
+            #print_vector(y_spectral.lobatto.D0,"local lobatto D matrix D0",y.ngrid)
+            
+            f_exact = Array{Float64,1}(undef,y.n)
+            df_exact = Array{Float64,1}(undef,y.n)
+            df_num = Array{Float64,1}(undef,y.n)
+            df_err = Array{Float64,1}(undef,y.n)
+            g_exact = Array{Float64,1}(undef,y.n)
+            h_exact = Array{Float64,1}(undef,y.n)
+            divg_exact = Array{Float64,1}(undef,y.n)
+            divg_num = Array{Float64,1}(undef,y.n)
+            divg_err = Array{Float64,1}(undef,y.n)
+            laph_exact = Array{Float64,1}(undef,y.n)
+            laph_num = Array{Float64,1}(undef,y.n)
+            laph_err = Array{Float64,1}(undef,y.n)
+            d2f_exact = Array{Float64,1}(undef,y.n)
+            d2f_num = Array{Float64,1}(undef,y.n)
+            d2f_err = Array{Float64,1}(undef,y.n)
+            b = Array{Float64,1}(undef,y.n)
+            for iy in 1:y.n
+                f_exact[iy] = exp(-y.grid[iy]^2)
+                df_exact[iy] = -2.0*y.grid[iy]*exp(-y.grid[iy]^2)
+                d2f_exact[iy] = (4.0*y.grid[iy]^2 - 2.0)*exp(-y.grid[iy]^2)
+                g_exact[iy] = y.grid[iy]*exp(-y.grid[iy]^2)
+                divg_exact[iy] = 2.0*(1.0-y.grid[iy]^2)*exp(-y.grid[iy]^2)
+                h_exact[iy] = exp(-y.grid[iy]^2)
+                laph_exact[iy] = 4.0*(y.grid[iy]^2 - 1.0)*exp(-y.grid[iy]^2)
+                #h_exact[iy] = exp(-2.0*y.grid[iy]^2)
+                #laph_exact[iy] = 8.0*(2.0*y.grid[iy]^2 - 1.0)*exp(-2.0*y.grid[iy]^2)
+                #h_exact[iy] = exp(-y.grid[iy]^3)
+                #laph_exact[iy] = 9.0*y.grid[iy]*(y.grid[iy]^3 - 1.0)*exp(-y.grid[iy]^3)
+                #f_exact[iy] = -2.0*y.grid[iy]*exp(-y.grid[iy]^2)
+                
+            end
+            if y.name == "vpa" 
+                F_exact = sqrt(pi)
+            elseif y.name == "vperp"
+                F_exact = 1.0
+            end
+            # do a test integration
+            #println(f_exact)
+            F_num = sum(y.wgts.*f_exact)
+            F_err = abs(F_num - F_exact)
+            #for ix in 1:ngrid
+            #    F_num += w[ix]*df_exact[ix]
+            #end
+            println("F_err: ", F_err,  " F_exact: ",F_exact, " F_num: ", F_num)
+            
+            derivative!(df_num, f_exact, y, y_spectral)
+            @. df_err = df_num - df_exact
+            println("max(df_err) (interpolation): ",maximum(df_err))
+            derivative!(d2f_num, df_num, y, y_spectral)
+            @. d2f_err = d2f_num - d2f_exact
+            println("max(d2f_err) (double first derivative by interpolation): ",maximum(d2f_err))  
+            if y.name == "vpa"
+                mul!(b,y_spectral.S_matrix,f_exact)
+                mass_matrix_solve!(df_num,b,y_spectral)
+                @. df_err = df_num - df_exact
+                #println("df_num (weak form): ",df_num)
+                #println("df_exact (weak form): ",df_exact)
+                println("max(df_err) (weak form): ",maximum(df_err))
+                second_derivative!(d2f_num, f_exact, y, y_spectral)
+                #mul!(b,y_spectral.K_matrix,f_exact)
+                #mass_matrix_solve!(d2f_num,b,y_spectral)
+                @. d2f_err = abs(d2f_num - d2f_exact) #(0.5*y.L/y.nelement_global)*
+                #println(d2f_num)
+                #println(d2f_exact)
+                println("max(d2f_err) (weak form): ",maximum(d2f_err))
+                plot([y.grid, y.grid], [d2f_num, d2f_exact], xlabel="vpa", label=["num" "exact"], ylabel="")
+                outfile = "vpa_test.pdf"
+                savefig(outfile)
+                
+            elseif y.name == "vperp"
+                #println("condition: ",cond(y_spectral.mass_matrix)) 
+                mul!(b,y_spectral.S_matrix,g_exact)
+                mass_matrix_solve!(divg_num,b,y_spectral)
+                @. divg_err = abs(divg_num - divg_exact)
+                #println("divg_b (weak form): ",b)
+                #println("divg_num (weak form): ",divg_num)
+                #println("divg_exact (weak form): ",divg_exact)
+                println("max(divg_err) (weak form): ",maximum(divg_err))
+                
+                second_derivative!(d2f_num, f_exact, y, y_spectral)
+                #mul!(b,y_spectral.K_matrix,f_exact)
+                #mass_matrix_solve!(d2f_num,b,y_spectral)
+                @. d2f_err = abs(d2f_num - d2f_exact) #(0.5*y.L/y.nelement_global)*
+                #println(d2f_num)
+                #println(d2f_exact)
+                #println(d2f_err[1:10])
+                println("max(d2f_err) (weak form): ",maximum(d2f_err))
+                plot([y.grid, y.grid], [d2f_num, d2f_exact], xlabel="vpa", label=["num" "exact"], ylabel="")
+                outfile = "vperp_second_derivative_test.pdf"
+                savefig(outfile)
+                 
+                laplacian_derivative!(laph_num, h_exact, y, y_spectral)
+                #mul!(b,y_spectral.L_matrix,h_exact)
+                #mass_matrix_solve!(laph_num,b,y_spectral)
+                @. laph_err = abs(laph_num - laph_exact) #(0.5*y.L/y.nelement_global)*
+                #println(b[1:10])
+                #println(laph_num)
+                #println(laph_exact)
+                #println(laph_err[1:10])
+                println("max(laph_err) (weak form): ",maximum(laph_err))
+                plot([y.grid, y.grid], [laph_num, laph_exact], xlabel="vperp", label=["num" "exact"], ylabel="")
+                outfile = "vperp_laplacian_test.pdf"
+                savefig(outfile)
+                
+                @. y.scratch = y.grid*g_exact
+                derivative!(y.scratch2, y.scratch, y, y_spectral)
+                @. divg_num = y.scratch2/y.grid
+                @. divg_err = abs(divg_num - divg_exact)
+                println("max(divg_err) (interpolation): ",maximum(divg_err))
+                
+                derivative!(y.scratch, h_exact, y, y_spectral)
+                @. y.scratch2 = y.grid*y.scratch
+                derivative!(y.scratch, y.scratch2, y, y_spectral)
+                @. laph_num = y.scratch/y.grid
+                @. laph_err = abs(laph_num - laph_exact)
+                println("max(laph_err) (interpolation): ",maximum(laph_err))
+                
+            end
+        end
+    end
+
+if abspath(PROGRAM_FILE) == @__FILE__
+    using Pkg
+    Pkg.activate(".")
+
+    gausslegendre_test()
+end
diff --git a/test_scripts/cheb_matrix_test.jl b/test_scripts/cheb_matrix_test.jl
new file mode 100644
index 000000000..9c6324ecd
--- /dev/null
+++ b/test_scripts/cheb_matrix_test.jl
@@ -0,0 +1,1252 @@
+using Printf
+using Plots
+using LaTeXStrings
+using MPI
+using Measures
+
+if abspath(PROGRAM_FILE) == @__FILE__
+    using Pkg
+    Pkg.activate(".")
+
+    import moment_kinetics
+	using moment_kinetics.input_structs: grid_input, advection_input
+	using moment_kinetics.coordinates: define_coordinate
+	using moment_kinetics.chebyshev: setup_chebyshev_pseudospectral, chebyshev_radau_derivative_single_element!
+	using moment_kinetics.calculus: derivative!, integral
+    #import LinearAlgebra
+    #using IterativeSolvers: jacobi!, gauss_seidel!, idrs!
+    using LinearAlgebra: mul!, lu, cond, det
+    using SparseArrays: sparse
+    using SpecialFunctions: erf
+    zero = 1.0e-10
+    
+    function print_matrix(matrix,name,n,m)
+        println("\n ",name," \n")
+        for i in 1:n
+            for j in 1:m
+                @printf("%.1f ", matrix[i,j])
+            end
+            println("")
+        end
+        println("\n")
+    end
+    
+    function print_vector(vector,name,m)
+        println("\n ",name," \n")
+        for j in 1:m
+            @printf("%.3f ", vector[j])
+        end
+        println("")
+        println("\n")
+    end 
+    
+    function Djj(x::Array{Float64,1},j::Int64)
+        return -0.5*x[j]/( 1.0 - x[j]^2)
+    end
+    function Djk(x::Array{Float64,1},j::Int64,k::Int64,c_j::Float64,c_k::Float64)
+        return  (c_j/c_k)*((-1)^(k+j))/(x[j] - x[k])
+    end
+    
+    """
+    The function below is based on the numerical method outlined in 
+    Chapter 8.2 from Trefethen 1994 
+    https://people.maths.ox.ac.uk/trefethen/8all.pdf
+    full list of Chapters may be obtained here 
+    https://people.maths.ox.ac.uk/trefethen/pdetext.html
+    """
+    
+    function cheb_derivative_matrix!(D::Array{Float64,2},x::Array{Float64,1},n) 
+        D[:,:] .= 0.0
+        
+        # top left, bottom right
+        D[1,1] = (2.0*(n - 1.0)^2 + 1.0)/6.0
+        D[n,n] = -(2.0*(n - 1.0)^2 + 1.0)/6.0
+        
+        # top row 
+        j = 1
+        c_j = 2.0 
+        c_k = 1.0
+        for k in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        k = n 
+        c_k = 2.0
+        D[j,k] = Djk(x,j,k,c_j,c_k)
+        
+        # bottom row 
+        j = n
+        c_j = 2.0 
+        c_k = 1.0
+        for k in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        k = 1
+        c_k = 2.0
+        D[j,k] = Djk(x,j,k,c_j,c_k)
+        
+        #left column
+        k = 1
+        c_j = 1.0 
+        c_k = 2.0
+        for j in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        
+        #right column
+        k = n
+        c_j = 1.0 
+        c_k = 2.0
+        for j in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        
+        # interior rows and columns
+        for j in 2:n-1
+            D[j,j] = Djj(x,j)
+            #D[j,j] = -0.5*x[j]/( 1.0 - x[j]^2)
+            for k in 2:n-1
+                if j == k 
+                    continue
+                end
+                c_k = 1.0
+                c_j = 1.0
+                #D[j,k] = (c_j/c_k)*((-1)^(k+j))/(x[j] - x[k])
+                D[j,k] = Djk(x,j,k,c_j,c_k)
+            end
+        end
+    end 
+    
+    function cheb_derivative_matrix_reversed!(D::Array{Float64,2},x) 
+        D_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
+        cheb_derivative_matrix_elementwise_reversed!(D_elementwise,x.ngrid,x.L,x.nelement_global)    
+        if x.ngrid < 8
+            println("\n D_elementwise \n")
+            for i in 1:x.ngrid
+                for j in 1:x.ngrid
+                    @printf("%.1f ", D_elementwise[i,j])
+                end
+                println("")
+            end
+        end 
+        assign_cheb_derivative_matrix!(D,D_elementwise,x)
+    end
+    
+    function cheb_second_derivative_matrix_reversed!(D::Array{Float64,2},x) 
+        D_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
+        cheb_derivative_matrix_elementwise_reversed!(D_elementwise,x.ngrid,x.L,x.nelement_global)    
+        D2_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
+        mul!(D2_elementwise,D_elementwise,D_elementwise)
+        if x.ngrid < 8
+            print_matrix(D2_elementwise,"D2_elementwise",x.ngrid,x.ngrid)
+        end
+        assign_cheb_derivative_matrix!(D,D2_elementwise,x)
+    end
+    
+    function assign_cheb_derivative_matrix!(D::Array{Float64,2},D_elementwise::Array{Float64,2},x) 
+        
+        # zero output matrix before assignment 
+        D[:,:] .= 0.0
+        imin = x.imin
+        imax = x.imax
+        
+        zero_bc_upper_boundary = x.bc == "zero" || x.bc == "zero_upper"
+        zero_bc_lower_boundary = x.bc == "zero" || x.bc == "zero_lower"
+        
+        # fill in first element 
+        j = 1
+        if zero_bc_lower_boundary #x.bc == "zero"
+            D[imin[j],imin[j]:imax[j]] .+= D_elementwise[1,:]./2.0 #contributions from this element/2
+            D[imin[j],imin[j]] += D_elementwise[x.ngrid,x.ngrid]/2.0 #contribution from missing `zero' element/2
+        else 
+            D[imin[j],imin[j]:imax[j]] .+= D_elementwise[1,:]
+        end
+        for k in 2:imax[j]-imin[j] 
+            D[k,imin[j]:imax[j]] .+= D_elementwise[k,:]
+        end
+        if zero_bc_upper_boundary && x.nelement_local == 1
+            D[imax[j],imin[j]-1:imax[j]] .+= D_elementwise[x.ngrid,:]./2.0 #contributions from this element/2
+            D[imax[j],imax[j]] += D_elementwise[1,1]/2.0              #contribution from missing `zero' element/2
+        elseif x.nelement_local > 1 #x.bc == "zero"
+            D[imax[j],imin[j]:imax[j]] .+= D_elementwise[x.ngrid,:]./2.0
+        else
+            D[imax[j],imin[j]:imax[j]] .+= D_elementwise[x.ngrid,:]
+        end 
+        # remaining elements recalling definitions of imax and imin
+        for j in 2:x.nelement_local
+            #lower boundary condition on element
+            D[imin[j]-1,imin[j]-1:imax[j]] .+= D_elementwise[1,:]./2.0
+            for k in 2:imax[j]-imin[j]+1 
+                D[k+imin[j]-2,imin[j]-1:imax[j]] .+= D_elementwise[k,:]
+            end
+            # upper boundary condition on element 
+            if j == x.nelement_local && !(zero_bc_upper_boundary)
+                D[imax[j],imin[j]-1:imax[j]] .+= D_elementwise[x.ngrid,:]
+            elseif j == x.nelement_local && zero_bc_upper_boundary
+                D[imax[j],imin[j]-1:imax[j]] .+= D_elementwise[x.ngrid,:]./2.0 #contributions from this element/2
+                D[imax[j],imax[j]] += D_elementwise[1,1]/2.0 #contribution from missing `zero' element/2
+            else 
+                D[imax[j],imin[j]-1:imax[j]] .+= D_elementwise[x.ngrid,:]./2.0
+            end
+        end
+        
+    end
+    
+    function cheb_derivative_matrix_elementwise_reversed!(D::Array{Float64,2},n::Int64,L::Float64,nelement::Int64) 
+        
+        #define Chebyshev points in reversed order x_j = { -1, ... , 1}
+        x = Array{Float64,1}(undef,n)
+        for j in 1:n
+            x[j] = cospi((n-j)/(n-1))
+        end
+        
+        # zero matrix before allocating values
+        D[:,:] .= 0.0
+        
+        # top row 
+        j = 1
+        c_j = 2.0 
+        c_k = 1.0
+        for k in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        k = n 
+        c_k = 2.0
+        D[j,k] = Djk(x,j,k,c_j,c_k)
+        
+        # bottom row 
+        j = n
+        c_j = 2.0 
+        c_k = 1.0
+        for k in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        k = 1
+        c_k = 2.0
+        D[j,k] = Djk(x,j,k,c_j,c_k)
+        
+        #left column
+        k = 1
+        c_j = 1.0 
+        c_k = 2.0
+        for j in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        
+        #right column
+        k = n
+        c_j = 1.0 
+        c_k = 2.0
+        for j in 2:n-1
+            D[j,k] = Djk(x,j,k,c_j,c_k)
+        end
+        
+        
+        # top left, bottom right
+        #D[n,n] = (2.0*(n - 1.0)^2 + 1.0)/6.0
+        #D[1,1] = -(2.0*(n - 1.0)^2 + 1.0)/6.0        
+        # interior rows and columns
+        for j in 2:n-1
+            #D[j,j] = Djj(x,j)
+            for k in 2:n-1
+                if j == k 
+                    continue
+                end
+                c_k = 1.0
+                c_j = 1.0
+                D[j,k] = Djk(x,j,k,c_j,c_k)
+            end
+        end
+        
+        # calculate diagonal entries to guarantee that
+        # D * (1, 1, ..., 1, 1) = (0, 0, ..., 0, 0)
+        for j in 1:n
+            D[j,j] = -sum(D[j,:])
+        end
+        
+        #multiply by scale factor for element length
+        D .= (2.0*float(nelement)/L).*D
+    end 
+    
+    """
+    derivative matrix for radau grid 
+    """
+    function calculate_chebyshev_radau_D_matrix_via_FFT!(D::Array{Float64,2}, coord, spectral)
+        ff_buffer = Array{Float64,1}(undef,coord.ngrid)
+        df_buffer = Array{Float64,1}(undef,coord.ngrid)
+        # use response matrix approach to calculate derivative matrix D 
+        for j in 1:coord.ngrid 
+            ff_buffer .= 0.0 
+            ff_buffer[j] = 1.0
+            @views chebyshev_radau_derivative_single_element!(df_buffer[:], ff_buffer[:],
+                spectral.radau.f[:,1], spectral.radau.df, spectral.radau.fext, spectral.radau.forward, coord)
+            @. D[:,j] = df_buffer[:] # assign appropriate column of derivative matrix 
+        end
+        # correct diagonal elements to gurantee numerical stability
+        # gives D*[1.0, 1.0, ... 1.0] = [0.0, 0.0, ... 0.0]
+        for j in 1:coord.ngrid
+            D[j,j] = 0.0
+            D[j,j] = -sum(D[j,:])
+        end
+        
+        #multiply by scale factor for element length
+        D .= (2.0*float(coord.nelement_global)/coord.L).*D
+    end
+    
+    function cheb_radau_derivative_matrix_reversed!(D::Array{Float64,2},x,x_spectral) 
+        D_lobotto_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
+        cheb_derivative_matrix_elementwise_reversed!(D_lobotto_elementwise,x.ngrid,x.L,x.nelement_global) 
+
+        D_radau_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
+        calculate_chebyshev_radau_D_matrix_via_FFT!(D_radau_elementwise,x,x_spectral)
+        if x.ngrid < 8
+            print_matrix(D_lobotto_elementwise,"D_lobotto_elementwise",x.ngrid,x.ngrid)
+            print_matrix(D_radau_elementwise,"D_radau_elementwise",x.ngrid,x.ngrid)
+        end 
+        assign_cheb_derivative_matrix!(D,D_lobotto_elementwise,D_radau_elementwise,x)
+    end
+
+    function cheb_radau_second_derivative_matrix_reversed!(D::Array{Float64,2},x,x_spectral) 
+        D_lobotto_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
+        cheb_derivative_matrix_elementwise_reversed!(D_lobotto_elementwise,x.ngrid,x.L,x.nelement_global)    
+        D2_lobotto_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
+        mul!(D2_lobotto_elementwise,D_lobotto_elementwise,D_lobotto_elementwise)
+        
+        D_radau_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
+        calculate_chebyshev_radau_D_matrix_via_FFT!(D_radau_elementwise,x,x_spectral)
+        D2_radau_elementwise = Array{Float64,2}(undef,x.ngrid,x.ngrid)
+        mul!(D2_radau_elementwise,D_radau_elementwise,D_radau_elementwise)
+        
+        if x.ngrid < 8
+            #print_matrix(D_lobotto_elementwise,"D_lobotto_elementwise",x.ngrid,x.ngrid)
+            print_matrix(D2_lobotto_elementwise,"D2_lobotto_elementwise",x.ngrid,x.ngrid)
+            #print_matrix(D_radau_elementwise,"D_radau_elementwise",x.ngrid,x.ngrid)
+            print_matrix(D2_radau_elementwise,"D2_radau_elementwise",x.ngrid,x.ngrid)
+        end
+        assign_cheb_derivative_matrix!(D,D2_lobotto_elementwise,D2_radau_elementwise,x)
+    end
+    
+    
+    function assign_cheb_derivative_matrix!(D::Array{Float64,2},D_lobotto_elementwise::Array{Float64,2},D_radau_elementwise::Array{Float64,2},x) 
+        
+        # zero output matrix before assignment 
+        D[:,:] .= 0.0
+        imin = x.imin
+        imax = x.imax
+        
+        zero_bc_upper_boundary = x.bc == "zero" || x.bc == "zero_upper"
+        zero_bc_lower_boundary = x.bc == "zero" || x.bc == "zero_lower"
+        
+        # fill in first element 
+        j = 1
+        if zero_bc_lower_boundary #x.bc == "zero"
+            D[imin[j],imin[j]:imax[j]] .+= D_radau_elementwise[1,:]./2.0 #contributions from this element/2
+            D[imin[j],imin[j]] += D_radau_elementwise[x.ngrid,x.ngrid]/2.0 #contribution from missing `zero' element/2
+        else 
+            D[imin[j],imin[j]:imax[j]] .+= D_radau_elementwise[1,:]
+        end
+        for k in 2:imax[j]-imin[j] 
+            D[k,imin[j]:imax[j]] .+= D_radau_elementwise[k,:]
+        end
+        if zero_bc_upper_boundary && x.nelement_local == 1
+            D[imax[j],imin[j]-1:imax[j]] .+= D_radau_elementwise[x.ngrid,:]./2.0 #contributions from this element/2
+            D[imax[j],imax[j]] += D_lobotto_elementwise[1,1]/2.0              #contribution from missing `zero' element/2
+        elseif x.nelement_local > 1 #x.bc == "zero"
+            D[imax[j],imin[j]:imax[j]] .+= D_radau_elementwise[x.ngrid,:]./2.0
+        else
+            D[imax[j],imin[j]:imax[j]] .+= D_radau_elementwise[x.ngrid,:]
+        end 
+        # remaining elements recalling definitions of imax and imin
+        for j in 2:x.nelement_local
+            #lower boundary condition on element
+            D[imin[j]-1,imin[j]-1:imax[j]] .+= D_lobotto_elementwise[1,:]./2.0
+            for k in 2:imax[j]-imin[j]+1 
+                D[k+imin[j]-2,imin[j]-1:imax[j]] .+= D_lobotto_elementwise[k,:]
+            end
+            # upper boundary condition on element 
+            if j == x.nelement_local && !(zero_bc_upper_boundary)
+                D[imax[j],imin[j]-1:imax[j]] .+= D_lobotto_elementwise[x.ngrid,:]
+            elseif j == x.nelement_local && zero_bc_upper_boundary
+                D[imax[j],imin[j]-1:imax[j]] .+= D_lobotto_elementwise[x.ngrid,:]./2.0 #contributions from this element/2
+                D[imax[j],imax[j]] += D_lobotto_elementwise[1,1]/2.0 #contribution from missing `zero' element/2
+            else 
+                D[imax[j],imin[j]-1:imax[j]] .+= D_lobotto_elementwise[x.ngrid,:]./2.0
+            end
+        end
+        
+    end
+    
+    """
+    function integrating d y / d t = f(t)
+    """
+    function forward_euler_step!(ynew,yold,f,dt,n)
+        for i in 1:n
+            ynew[i] = yold[i] + dt*f[i]
+        end
+    end
+    """
+    function creating lu object for A = I - dt*nu*D2
+    """
+    function diffusion_matrix(D2,n,dt,nu;return_A=false)
+        A = Array{Float64,2}(undef,n,n)
+        for i in 1:n
+            for j in 1:n
+                A[i,j] = - dt*nu*D2[i,j]
+            end
+            A[i,i] += 1.0
+        end
+        lu_obj = lu(A)
+        if return_A
+            return lu_obj, A
+        else
+            return lu_obj
+        end
+    end
+    
+    """
+    functions for Rosenbluth potential tests
+    """
+    function dH_Maxwellian_dvpa(vpa,vperp,ivpa,ivperp)
+        # speed variable
+        eta = sqrt(vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)
+        zero = 1.0e-10
+        if eta < zero
+            dHdvpa = -(4.0*vpa.grid[ivpa])/(3.0*sqrt(pi))
+        else 
+            dHdvpa = (vpa.grid[ivpa]/eta)*((2.0/sqrt(pi))*(exp(-eta^2)/eta)  - (erf(eta)/(eta^2)))
+        end
+        return dHdvpa
+    end
+    function d2H_Maxwellian_dvpa2(vpa,vperp,ivpa,ivperp)
+        # speed variable
+        eta = sqrt(vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)
+        zero = 1.0e-10
+        if eta < zero
+            dHdeta_over_eta = -4.0/(3.0*sqrt(pi)) 
+            d2Hdeta2 = -4.0/(3.0*sqrt(pi))
+        else 
+            dHdeta_over_eta = (2.0/sqrt(pi))*(exp(-eta^2)/eta^2)  - (erf(eta)/(eta^3))
+            d2Hdeta2 = 2.0*(erf(eta)/(eta^3)) - (4.0/sqrt(pi))*(1.0 + (1.0/eta^2))*exp(-eta^2)
+        end
+        d2Hdvpa2 = ((vperp.grid[ivperp]^2)/(eta^2))*dHdeta_over_eta + ((vpa.grid[ivpa]^2)/(eta^2))*d2Hdeta2
+        return d2Hdvpa2
+    end
+    function d2G_Maxwellian_dvpa2(vpa,vperp,ivpa,ivperp)
+        # speed variable
+        eta = sqrt(vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)
+        zero = 1.0e-10
+        if eta < zero
+            dGdeta_over_eta = 4.0/(3.0*sqrt(pi)) 
+            d2Gdeta2 = 4.0/(3.0*sqrt(pi))
+        else 
+            dGdeta_over_eta = (1.0/sqrt(pi))*(exp(-eta^2)/(eta^2)) + (1.0 - (0.5/eta^2))*erf(eta)/eta
+            d2Gdeta2 = (erf(eta)/(eta^3)) - (2.0/sqrt(pi))*exp(-eta^2)/eta^2
+        end
+        d2Gdvpa2 = ((vperp.grid[ivperp]^2)/(eta^2))*dGdeta_over_eta + ((vpa.grid[ivpa]^2)/(eta^2))*d2Gdeta2
+        return d2Gdvpa2
+    end
+    
+    function dHdvpa_inf(vpa,vperp,ivpa,ivperp)
+        eta = sqrt(vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)
+        dHdvpa_inf = -vpa.grid[ivpa]/eta^3
+        return dHdvpa_inf
+    end
+    function d2Gdvpa2_inf(vpa,vperp,ivpa,ivperp)
+        eta = sqrt(vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)
+        d2Gdvpa2_inf = ((vpa.grid[ivpa]^2)/eta^5) + ((vperp.grid[ivperp]^2)/eta^3)*( 1.0 - (0.5/eta^2))
+        return d2Gdvpa2_inf
+    end
+    
+    
+    #using LinearAlgebra.mul
+    discretization = "chebyshev_pseudospectral"
+    #discretization = "finite_difference"
+	etol = 1.0e-15
+    outprefix = "derivative_test"
+	###################
+	## df/dx Nonperiodic (No) BC test
+	###################
+	
+	# define inputs needed for the test
+	ngrid = 17 #number of points per element 
+	nelement_local = 20 # number of elements per rank
+	nelement_global = nelement_local # total number of elements 
+	L = 1.0 #physical box size in reference units 
+	bc = "" #not required to take a particular value, not used 
+	# fd_option and adv_input not actually used so given values unimportant
+	fd_option = "fourth_order_centered"
+	adv_input = advection_input("default", 1.0, 0.0, 0.0)
+	nrank = 1
+    irank = 0
+    comm = MPI.COMM_NULL
+	# create the 'input' struct containing input info needed to create a
+	# coordinate
+    input = grid_input("coord", ngrid, nelement_global, nelement_local, 
+		nrank, irank, L, discretization, fd_option, bc, adv_input,comm)
+	# create the coordinate struct 'x'
+	println("made inputs")
+	x = define_coordinate(input)
+	println("made x")
+    Dx = Array{Float64,2}(undef, x.n, x.n)
+    xchebgrid = Array{Float64,1}(undef, x.n)
+    for i in 1:x.n
+        xchebgrid[i] = cos(pi*(i - 1)/(x.n - 1))
+    end
+    #println("x",xchebgrid[:])
+    cheb_derivative_matrix!(Dx,xchebgrid,x.n)
+    #println("")
+    #println("Dx \n")
+    #for i in 1:x.n
+    #    println(Dx[i,:])
+    #end
+    
+     # create array for the function f(x) to be differentiated/integrated
+	f = Array{Float64,1}(undef, x.n)
+	# create array for the derivative df/dx
+	df = Array{Float64,1}(undef, x.n)
+	df2 = Array{Float64,1}(undef, x.n)
+	df2cheb = Array{Float64,1}(undef, x.n)
+    df_exact = Array{Float64,1}(undef, x.n)
+    df2_exact = Array{Float64,1}(undef, x.n)
+    df_err = Array{Float64,1}(undef, x.n)
+    df2_err = Array{Float64,1}(undef, x.n)
+    df2cheb_err = Array{Float64,1}(undef, x.n)
+
+    for ix in 1:x.n
+        f[ix] = sin(pi*xchebgrid[ix])
+        df_exact[ix] = (pi)*cos(pi*xchebgrid[ix])
+    end
+    mul!(df,Dx,f)
+    for ix in 1:x.n
+        df_err[ix] = df[ix]-df_exact[ix]
+    end
+    # test standard cheb D f = df 
+    #println("df \n",df)
+    #println("df_exact \n",df_exact)
+    #println("df_err \n",df_err)
+    input = grid_input("coord", ngrid, nelement_global, nelement_local, 
+		nrank, irank, L, discretization, fd_option, "zero", adv_input,comm)
+	# create the coordinate struct 'x'
+	x = define_coordinate(input)
+   
+    Dxreverse = Array{Float64,2}(undef, x.n, x.n)
+    cheb_derivative_matrix_reversed!(Dxreverse,x)
+    Dxreverse2 = Array{Float64,2}(undef, x.n, x.n)
+    mul!(Dxreverse2,Dxreverse,Dxreverse)
+    D2xreverse = Array{Float64,2}(undef, x.n, x.n)
+    cheb_second_derivative_matrix_reversed!(D2xreverse,x)
+    
+    Dxreverse2[1,1] = 2.0*Dxreverse2[1,1]
+    Dxreverse2[end,end] = 2.0*Dxreverse2[end,end]
+    #println("x.grid \n",x.grid)
+    if x.n < 20
+        print_matrix(Dxreverse,"\n Dxreverse \n",x.n,x.n)
+        print_matrix(Dxreverse2,"\n Dxreverse*Dxreverse \n",x.n,x.n)
+        print_matrix(D2xreverse,"\n D2xreverse \n",x.n,x.n)
+        println("\n")
+    end
+
+    alpha = 512.0    
+    for ix in 1:x.n
+#        f[ix] = sin(2.0*pi*x.grid[ix]/x.L)
+#        df_exact[ix] = (2.0*pi/x.L)*cos(2.0*pi*x.grid[ix]/x.L)
+#        df2_exact[ix] = -(2.0*pi/x.L)*(2.0*pi/x.L)*sin(2.0*pi*x.grid[ix]/x.L)
+ 
+        f[ix] = exp(-alpha*(x.grid[ix])^2)
+        df_exact[ix] = -2.0*alpha*x.grid[ix]*exp(-alpha*(x.grid[ix])^2)
+        df2_exact[ix] = ((2.0*alpha*x.grid[ix])^2 - 2.0*alpha)*exp(-alpha*(x.grid[ix])^2)
+    end
+    #println("test f: \n",f)
+    # calculate d f / d x from matrix 
+    mul!(df,Dxreverse,f)
+    # calculate d^2 f / d x from second application of Dx matrix 
+    mul!(df2,Dxreverse2,f)
+    # calculate d^2 f / d x from applition of D2x matrix 
+    mul!(df2cheb,D2xreverse,f)
+    for ix in 1:x.n
+        df_err[ix] = df[ix]-df_exact[ix]
+        df2_err[ix] = df2[ix]-df2_exact[ix]
+        df2cheb_err[ix] = df2cheb[ix]-df2_exact[ix]
+    end
+    println("Reversed - multiple elements")
+    #println("df \n",df)
+    #println("df_exact \n",df_exact)
+    #println("df_err \n",df_err)
+    #println("df2 \n",df2)
+    #println("df2_exact \n",df2_exact)
+    #println("df2_err \n",df2_err)
+    #println("df2cheb_err \n",df2cheb_err)
+    
+    println("max(df_err) \n",maximum(abs.(df_err)))
+    println("max(df2_err) \n",maximum(abs.(df2_err)))
+    println("max(df2cheb_err) \n",maximum(abs.(df2cheb_err)))
+    
+    ### attempt at matrix inversion via LU decomposition
+    Dt = 0.1
+    Nu = 1.0
+    lu_obj, AA = diffusion_matrix(Dxreverse2,x.n,Dt,Nu,return_A=true)
+    #AA = Array{Float64,2}(undef,x.n,x.n)
+    #for i in 1:x.n
+    #    for j in 1:x.n
+    #        AA[i,j] = - Dt*Nu*Dxreverse2[i,j]
+    #    end
+    #    AA[i,i] += 1.0
+    #end
+    #lu_obj = lu(AA)
+    if x.n < 20
+        println("L : \n",lu_obj.L)
+        println("U : \n",lu_obj.U)
+        println("p vector : \n",lu_obj.p)
+    end
+    LUtest = true
+    AA_test_lhs = lu_obj.L*lu_obj.U 
+    AA_test_rhs = AA[lu_obj.p,:]
+    for i in 1:x.n
+        for j in 1:x.n
+            if abs.(AA_test_lhs[i,j]-AA_test_rhs[i,j]) > zero
+                global LUtest = false
+            end
+        end
+    end
+    println("LU == AA : \n",LUtest)
+    
+    #bb = ones(x.n) try this for bc = "" rather than bc = "zero"
+    bb = Array{Float64,1}(undef,x.n)
+    yy = Array{Float64,1}(undef,x.n)
+    #for i in 1:x.n
+    #    bb[i] = f[i]#exp(-(4.0*x.grid[i]/x.L)^2)
+    #end
+    #yy = lu_obj \ bb # solution to AA yy = bb 
+    #println("result", yy)
+    #println("check result", AA*yy, bb)
+    MMS_test = false 
+    evolution_test = false#true 
+    elliptic_solve_test = false#true
+    elliptic_solve_1D_infinite_domain_test = false#true
+    elliptic_2Dsolve_test = true
+    if MMS_test
+        ntest = 5
+        MMS_errors = Array{Float64,1}(undef,ntest)
+        Dt_list = Array{Float64,1}(undef,ntest)
+        fac_list = Array{Int64,1}(undef,ntest)
+        fac_list .= [1, 10, 100, 1000, 10000]
+        #for itest in [1, 10, 100, 1000, 10000]
+        for itest in 1:ntest
+            fac = fac_list[itest]
+            #println(fac)
+            ntime = 1000*fac
+            nwrite = 100*fac
+            dt = 0.001/fac
+            #println(ntime," ",dt)
+            nu = 1.0
+            LU_obj = diffusion_matrix(Dxreverse2,x.n,dt,nu)
+            
+            time = Array{Float64,1}(undef,ntime)
+            ff = Array{Float64,2}(undef,x.n,ntime)
+            ss = Array{Float64,1}(undef,x.n) #source
+
+            time[1] = 0.0
+            ff[:,1] .= f[:] #initial condition
+            for i in 1:ntime-1
+                time[i+1] = (i+1)*dt
+                bb .= ff[:,i]
+                yy .= LU_obj\bb # implicit backward euler diffusion step
+                @. ss = -nu*df2_exact # source term
+                 # explicit forward_euler_step with source
+                @views forward_euler_step!(ff[:,i+1],yy,ss,dt,x.n)
+            end
+
+            ff_error = Array{Float64,1}(undef,x.n)
+            ff_error[:] .= abs.(ff[:,end] - ff[:,1])
+            maxfferr = maximum(ff_error)
+            #println("ff_error \n",ff_error)
+            println("max(ff_error) \n",maxfferr)
+            #println("t[end]: ",time[end])
+            MMS_errors[itest] = maxfferr
+            Dt_list[itest] = dt
+        end 
+        @views plot(Dt_list, [MMS_errors, 100.0*Dt_list], label=[L"max(\epsilon(f))" L"100\Delta t"], 
+                     xlabel=L"\Delta t", ylabel="", xscale=:log10, yscale=:log10, shape =:circle)
+        outfile = string("ff_err_vs_dt.pdf")
+        savefig(outfile)
+    end
+    
+    if evolution_test
+        ntime = 100
+        nwrite = 1
+        dt = 0.001
+        nu = 1.0
+        LU_obj = diffusion_matrix(Dxreverse2,x.n,dt,nu)
+        
+        time = Array{Float64,1}(undef,ntime)
+        ff = Array{Float64,2}(undef,x.n,ntime)
+        ss = Array{Float64,1}(undef,x.n) #source
+
+        time[1] = 0.0
+        ff[:,1] .= f[:] #initial condition
+        for i in 1:ntime-1
+            time[i+1] = (i+1)*dt
+            bb .= ff[:,i]
+            yy .= LU_obj\bb # implicit backward euler diffusion step
+            @. ss = 0.0 # source term
+             # explicit forward_euler_step with source
+            @views forward_euler_step!(ff[:,i+1],yy,ss,dt,x.n)
+        end
+
+        ffmin = minimum(ff)
+        ffmax = maximum(ff)
+        anim = @animate for i in 1:nwrite:ntime
+                @views plot(x.grid, ff[:,i], xlabel="x", ylabel="f", ylims = (ffmin,ffmax))
+            end
+        outfile = string("ff_vs_x.gif")
+        gif(anim, outfile, fps=5)
+    end
+    
+    if elliptic_solve_test
+        println("elliptic solve test")
+        ngrid = 25
+        nelement_local = 50
+        L = 8
+        nelement_global = nelement_local
+        radau = true #false
+        if radau        
+            input = grid_input("vperp", ngrid, nelement_global, nelement_local, 
+            nrank, irank, L, discretization, fd_option, "zero_upper", adv_input,comm)
+            y = define_coordinate(input)
+            y_spectral = setup_chebyshev_pseudospectral(y)
+            Dy = Array{Float64,2}(undef, y.n, y.n)
+            cheb_radau_derivative_matrix_reversed!(Dy,y,y_spectral)
+        else #lobotto
+            input = grid_input("vpa", ngrid, nelement_global, nelement_local, 
+            nrank, irank, L, discretization, fd_option, "zero_upper", adv_input,comm)
+            y = define_coordinate(input)
+            @. y.grid += y.L/2
+            Dy = Array{Float64,2}(undef, y.n, y.n)
+            cheb_derivative_matrix_reversed!(Dy,y)
+        end  
+        
+        yDy = Array{Float64,2}(undef, y.n, y.n)
+        for iy in 1:y.n
+            @. yDy[iy,:] = y.grid[iy]*Dy[iy,:]
+        end
+        
+        
+        Dy_yDy = Array{Float64,2}(undef, y.n, y.n)
+        mul!(Dy_yDy,Dy,yDy)
+        #Dy_yDy[1,1] = 2.0*Dy_yDy[1,1]
+        #Dy_yDy[end,end] = 2.0*Dy_yDy[end,end]
+        
+        D2y = Array{Float64,2}(undef, y.n, y.n)
+        mul!(D2y,Dy,Dy)
+        #Dy_yDy[1,1] = 2.0*Dy_yDy[1,1]
+        D2y[end,end] = 2.0*D2y[end,end]
+        yD2y = Array{Float64,2}(undef, y.n, y.n)
+        for iy in 1:y.n
+            @. yD2y[iy,:] = y.grid[iy]*D2y[iy,:]
+        end
+        
+        if y.n < 20
+            print_matrix(Dy,"Dy",y.n,y.n)
+            print_matrix(yDy,"yDy",y.n,y.n)
+            print_matrix(Dy_yDy,"Dy_yDy",y.n,y.n)
+            print_matrix(yD2y+Dy,"yD2y+Dy",y.n,y.n)
+        end 
+        Sy = Array{Float64,1}(undef, y.n)
+        Fy = Array{Float64,1}(undef, y.n)
+        Fy_exact = Array{Float64,1}(undef, y.n)
+        Fy_err = Array{Float64,1}(undef, y.n)
+        for iy in 1:y.n
+            #Sy[iy] = (y.grid[iy] - 1.0)*exp(-y.grid[iy])
+            #Fy_exact[iy] = exp(-y.grid[iy])
+            Sy[iy] = 4.0*y.grid[iy]*(y.grid[iy]^2 - 1.0)*exp(-y.grid[iy]^2)
+            Fy_exact[iy] = exp(-y.grid[iy]^2)
+        end
+        LL = Array{Float64,2}(undef, y.n, y.n)
+        #@. LL = yD2y + Dy
+        for iy in 1:y.n 
+            #@. LL[iy,:] = Dy_yDy[iy,:] #*(1.0/y.grid[iy])
+            @. LL[iy,:] = yD2y[iy,:] + Dy[iy,:] #*(1.0/y.grid[iy])
+        end
+        Dirichlet = true
+        if Dirichlet
+            # fixed value at orgin -- doesn't work well 
+            #@. LL[1,:] = 0.0
+            #Sy[1] = Fy_exact[1]
+            set_flux = false 
+            if set_flux
+                # set flux at origin 
+                @. LL[1,:] = 0.0
+                ilim = y.imax[1]  
+                @. LL[1,:] = yDy[ilim,:]
+                
+                print_vector(Sy,"Sy before",y.n)
+                integrand = Array{Float64,1}(undef,ilim)
+                @. integrand[1:ilim] = Sy[1:ilim]*y.wgts[1:ilim]/(2.0*y.grid[1:ilim])
+                
+                print_vector(integrand,"integrand",ilim)
+                print_vector(y.wgts,"wgts",y.n)
+                #@. integrand[1:ilim] = y.grid[1:ilim]*Sy[1:ilim]*y.wgts[1:ilim]
+                flux = sum(integrand)
+                Sy[1] = flux
+            end  
+            # zero at infinity  
+            @. LL[end,:] = 0.0
+            LL[end,end] = 1.0
+            #LL[1,1] = 1.0
+#            @. LL[1,:] = 2.0*D2y[1,:] 
+            Sy[end] = Fy_exact[end]
+            
+            #print_matrix(LL,"LL",y.n,y.n)
+            #print_vector(Sy,"Sy",y.n)
+        end
+        
+        #lu_solver = false
+        #gauss_seidel_solver = true
+        #if lu_solver
+            println("det: ", det(LL))
+            println("condition number: ", cond(LL))
+            LL_lu_obj = lu(sparse(LL))
+            
+            # do elliptic solve 
+            Fy = LL_lu_obj\Sy
+        #elseif gauss_seidel_solver
+        #    niter=100
+        #    @. Fy[:] = Fy_exact[:] # initial guess
+        #    gauss_seidel!(Fy,sparse(LL),Sy,maxiter=niter)
+        #else 
+        #    println("no solution method prescribed")
+        #end 
+        @. Fy_err = abs(Fy - Fy_exact)        
+        println("maximum(Fy_err)",maximum(Fy_err))
+        #println("Fy_err",Fy_err)
+        #println("Fy_exact",Fy_exact)
+        #println("Fy",Fy)
+        plot([y.grid,y.grid,y.grid], [Fy,Fy_exact,Fy_err], xlabel="y", ylabel="", label=["F" "F_exact" "F_err"],
+             shape =:circle, markersize = 5, linewidth=2)
+        outfile = "1D_elliptic_solve_test.pdf"
+        savefig(outfile)
+        plot([y.grid], [Fy_err], xlabel="x", ylabel="", label=["F_err"],
+             shape =:circle, markersize = 5, linewidth=2)
+        outfile = "1D_elliptic_solve_test_err.pdf"
+        savefig(outfile)
+
+    end
+    
+    if elliptic_solve_1D_infinite_domain_test
+        println("elliptic solve 1D infinite domain test")
+        ngrid = 17
+        nelement_local = 50
+        L = 25
+        nelement_global = nelement_local
+        input = grid_input("vpa", ngrid, nelement_global, nelement_local, 
+		nrank, irank, L, discretization, fd_option, "zero", adv_input,comm)
+        x = define_coordinate(input)
+        Dx = Array{Float64,2}(undef, x.n, x.n)
+        cheb_derivative_matrix_reversed!(Dx,x)
+        
+        D2x = Array{Float64,2}(undef, x.n, x.n)
+        mul!(D2x,Dx,Dx)
+        Dirichlet= true
+        if Dirichlet
+            # Dirichlet BC?
+            @. D2x[1,:] = 0.0
+            @. D2x[end,:] = 0.0
+            D2x[1,1] = 1.0    
+            D2x[end,end] = 1.0    
+        else 
+            # FD-like zero - BC
+            D2x[1,1] = 2.0*D2x[1,1]
+            D2x[end,end] = 2.0*D2x[end,end]
+        end 
+        
+        if x.n < 20
+            print_matrix(Dx,"Dx",x.n,x.n)
+            print_matrix(D2x,"D2x",x.n,x.n)
+        end 
+        LLx = Array{Float64,2}(undef, x.n, x.n)
+        @. LLx = D2x
+        
+        Sx = Array{Float64,1}(undef, x.n)
+        Fx = Array{Float64,1}(undef, x.n)
+        Fx_exact = Array{Float64,1}(undef, x.n)
+        Fx_err = Array{Float64,1}(undef, x.n)
+        for ix in 1:x.n
+            Sx[ix] = (4.0*x.grid[ix]^2 - 2.0)*exp(-x.grid[ix]^2)
+            Fx_exact[ix] = exp(-x.grid[ix]^2)
+        end
+        # do elliptic solve 
+        if Dirichlet 
+            Sx[1] = 0.0; Sx[end] = 0.0 #Dirichlet BC values
+        end 
+        
+        println("condition number: ", cond(LLx))
+        LLx_lu_obj = lu(sparse(LLx)) 
+        lu_solver = true#false
+        #iterative_solver= false#true
+        if lu_solver
+            Fx = LLx_lu_obj\Sx
+        #elseif iterative_solver
+        #    niter=1000
+        #    @. Fx[:] = 1.0/(x.grid[:]^8 + 1.0) # initial guess Fx_exact[:]
+        #    Fx[1] =0.0; Fx[end] =0.0
+        #    #gauss_seidel!(Fx,sparse(LLx),Sx,maxiter=niter)
+        #    #jacobi!(Fx,sparse(LLx),Sx,maxiter=niter)
+        #    #idrs!(Fx,sparse(LLx),Sx;abstol=10^(-10))
+        else 
+            println("no solution method prescribed")
+        end
+        @. Fx_err = abs(Fx - Fx_exact)
+        
+        println("test 1: maximum(Fx_err)",maximum(Fx_err))
+        #println("Fx_err",Fx_err)
+        #println("Fx_exact",Fx_exact)
+        #println("Fx",Fx)
+        plot([x.grid,x.grid,x.grid], [Fx,Fx_exact,Fx_err], xlabel="x", ylabel="", label=["F" "F_exact" "F_err"],
+             shape =:circle, markersize = 5, linewidth=2)
+        outfile = "1D_infinite_domain_elliptic_solve_test.pdf"
+        savefig(outfile)
+        plot([x.grid], [Fx_err], xlabel="x", ylabel="", label=["F_err"],
+             shape =:circle, markersize = 5, linewidth=2)
+        outfile = "1D_infinite_domain_elliptic_solve_test_err.pdf"
+        savefig(outfile)
+        
+        for ix in 1:x.n
+            Sx[ix] = exp(-x.grid[ix]^2)
+            Fx_exact[ix] = (sqrt(pi)/2.0)*x.grid[ix]*erf(x.grid[ix]) + exp(-x.grid[ix]^2)/2.0
+        end
+        if Dirichlet 
+            Sx[1] = 0.0; Sx[end] = 0.0 #Dirichlet BC values
+        end 
+        
+        if lu_solver
+            Fx = LLx_lu_obj\Sx
+        elseif iterative_solver
+            niter=1000
+            @. Fx[:] = 1.0/(x.grid[:]^8 + 1.0) # initial guess Fx_exact[:]
+            Fx[1] =0.0; Fx[end] =0.0
+            #gauss_seidel!(Fx,sparse(LLx),Sx,maxiter=niter)
+            #jacobi!(Fx,sparse(LLx),Sx,maxiter=niter)
+            idrs!(Fx,sparse(LLx),Sx)
+        else 
+            println("no solution method prescribed")
+        end
+        
+        @. Fx += (sqrt(pi)/2.0)*x.grid[end]
+        @. Fx_err = abs(Fx - Fx_exact)
+        println("test 2: maximum(Fx_err)",maximum(Fx_err))
+        plot([x.grid], [Fx], xlabel="x", ylabel="", label=["Fx"],
+             shape =:circle, markersize = 5, linewidth=2)
+        outfile = "1D_infinite_domain_elliptic_solve_gaussian_source.pdf"
+        savefig(outfile)
+        plot([x.grid], [Fx_err], xlabel="x", ylabel="", label=["F_err"],
+             shape =:circle, markersize = 5, linewidth=2)
+        outfile = "1D_infinite_domain_elliptic_solve_gaussian_source_err.pdf"
+        savefig(outfile)
+
+    end
+    
+    if elliptic_2Dsolve_test
+        println("elliptic 2D solve test")
+        x_ngrid = 17
+        x_nelement_local = 4
+        x_L = 12
+        y_L = 6
+        y_ngrid = 17
+        y_nelement_local = 2
+        
+        x_nelement_global = x_nelement_local
+        y_nelement_global = y_nelement_local
+        # bc option
+        dirichlet_zero = true#false#
+        dirichlet_fixed_value = false#true#
+        # test option
+        secular_decay_test = false#true
+        exponential_decay_test = true#false
+        dHdvpa_test = false 
+        d2Gdvpa2_test = false#true 
+        # second derivative option 
+        # default = false -> if true then use D2coord matrices based on D_elementwise^2
+        second_derivative_elementwise = false#true 
+        
+        input = grid_input("vpa", x_ngrid, x_nelement_global, x_nelement_local, 
+		nrank, irank, x_L, discretization, fd_option, "zero", adv_input, comm)
+        x = define_coordinate(input)
+        x_spectral = setup_chebyshev_pseudospectral(x)
+        
+        Dx = Array{Float64,2}(undef, x.n, x.n)
+        cheb_derivative_matrix_reversed!(Dx,x)
+        D2x = Array{Float64,2}(undef, x.n, x.n)
+        if second_derivative_elementwise
+            cheb_second_derivative_matrix_reversed!(D2x,x)
+        else
+            mul!(D2x,Dx,Dx)
+        end
+        # set x bc on D2x
+        if dirichlet_zero
+            D2x[1,1] = 2.0*D2x[1,1]
+            D2x[end,end] = 2.0*D2x[end,end]
+        elseif dirichlet_fixed_value
+            @. D2x[1,:] = 0.0 
+            @. D2x[end,:] = 0.0 
+            D2x[1,1] = 1.0
+            D2x[end,end] = 1.0
+        end 
+        
+        IIx = Array{Float64,2}(undef,x.n,x.n) 
+        @. IIx = 0.0
+        for ix in 1:x.n
+            IIx[ix,ix] = 1.0 
+        end 
+        
+        if x.n < 20
+            print_matrix(Dx,"Dx",x.n,x.n)
+            print_matrix(D2x,"D2x",x.n,x.n)
+            print_matrix(IIx,"IIx",x.n,x.n)
+        end 
+        
+        input = grid_input("vperp", y_ngrid, y_nelement_global, y_nelement_local, 
+		nrank, irank, y_L, discretization, fd_option, "zero_upper", adv_input, comm)
+        y = define_coordinate(input)
+        y_spectral = setup_chebyshev_pseudospectral(y)
+        Dy = Array{Float64,2}(undef, y.n, y.n)
+        cheb_radau_derivative_matrix_reversed!(Dy,y,y_spectral)
+        
+        D2y = Array{Float64,2}(undef, y.n, y.n)
+        if second_derivative_elementwise
+            cheb_radau_second_derivative_matrix_reversed!(D2y,y,y_spectral)
+        else
+            mul!(D2y,Dy,Dy)
+        end
+        if dirichlet_zero
+            D2y[end,end] = 2.0*D2y[end,end]
+        end         
+        # y derivative operator 
+        LLy = Array{Float64,2}(undef,y.n,y.n)
+        for iy in 1:y.n 
+            @. LLy[iy,:] = D2y[iy,:] + (1.0/y.grid[iy])*Dy[iy,:]
+        end
+        if dirichlet_fixed_value
+            @. LLy[end,:] = 0.0
+            LLy[end,end] = 1.0
+        end
+        IIy = Array{Float64,2}(undef,y.n,y.n) 
+        @. IIy = 0.0
+        for iy in 1:y.n
+            IIy[iy,iy] = 1.0 
+        end      
+        if y.n < 20
+            print_matrix(Dy,"Dy",y.n,y.n)
+            print_matrix(D2y,"D2y",y.n,y.n)
+            print_matrix(LLy,"LLy",y.n,y.n)
+            print_matrix(IIy,"IIy",y.n,y.n)
+        end 
+        println("Initialised 1D arrays")
+        ### now form 2D matrix to invert and corresponding sources 
+        
+        # Array in 2D form 
+        nx = x.n   
+        ny = y.n 
+        Sxy = Array{Float64,2}(undef, nx, ny)
+        Sxy_check = Array{Float64,2}(undef, nx, ny)
+        Sxy_check_err = Array{Float64,2}(undef, nx, ny)
+        Txy = Array{Float64,2}(undef, nx, ny)
+        Fxy = Array{Float64,2}(undef, nx, ny)
+        Fxy_exact = Array{Float64,2}(undef, nx, ny)
+        Fxy_err = Array{Float64,2}(undef, nx, ny)
+        #LLxy = Array{Float64,4}(undef, nx, ny, nx, ny)
+        # Array in compound 1D form 
+        # ic = (ix-1) + nx*(iy-1) + 1
+        # iy = mod(ic,nx) + 1
+        # ix = rem(ic,nx)
+        function icfunc(ix,iy,nx)
+            return ix + nx*(iy-1)
+        end
+        function iyfunc(ic,nx)
+            #return mod(ic,nx) + 1
+            return floor(Int64,(ic-1)/nx) + 1
+        end
+        function ixfunc(ic,nx)
+            ix = ic - nx*(iyfunc(ic,nx) - 1)
+            #return rem(ic,nx)
+            return ix
+        end
+        function kronecker_delta(i,j)
+            delta = 0.0 
+            if i == j 
+                delta = 1.0
+            end
+            return delta
+        end
+        nc = nx*ny
+        Fc = Array{Float64,1}(undef, nc)
+        Sc = Array{Float64,1}(undef, nc)
+        LLc = Array{Float64,2}(undef, nc, nc)
+        
+        if exponential_decay_test
+            for iy in 1:ny
+                for ix in 1:nx
+                    # Exponential test inputs below 
+                    Sxy[ix,iy] = ((4.0*x.grid[ix]^2 - 2.0) + (4.0*y.grid[iy]^2 - 4.0))*exp(-y.grid[iy]^2-x.grid[ix]^2)
+                    Fxy_exact[ix,iy] = exp(-x.grid[ix]^2 - y.grid[iy]^2) 
+                end
+            end
+        elseif secular_decay_test
+            for iy in 1:ny
+                for ix in 1:nx
+                    # secular test inputs below
+                    eta2 = x.grid[ix]^2 + y.grid[iy]^2
+                    zero = 1.0e-10
+                    if eta2 < zero
+                        Sxy[ix,iy] = 0.0
+                        Fxy_exact[ix,iy] = 0.5
+                    else
+                        Sxy[ix,iy] = exp(-1.0/eta2)*(1.0/eta2^2 - 2.0/eta2^3)
+                        Fxy_exact[ix,iy] = 0.5 - 0.5*exp(-1.0/eta2)
+                    end
+                end
+            end
+        elseif dHdvpa_test
+            for iy in 1:ny
+                for ix in 1:nx
+                    # Rosenbluth dHdvpa test 
+                    Sxy[ix,iy] = -(4.0/sqrt(pi))*(-2.0*x.grid[ix]*exp(-y.grid[iy]^2-x.grid[ix]^2))
+                    Fxy_exact[ix,iy] = dH_Maxwellian_dvpa(x,y,ix,iy)
+                end
+            end
+        elseif d2Gdvpa2_test
+            for iy in 1:ny
+                for ix in 1:nx
+                    # Rosenbluth d2Gdvpa2 test 
+                    Sxy[ix,iy] = 2.0*d2H_Maxwellian_dvpa2(x,y,ix,iy)
+                    Fxy_exact[ix,iy] = d2G_Maxwellian_dvpa2(x,y,ix,iy)
+                    Txy[ix,iy] = 2.0*dH_Maxwellian_dvpa(x,y,ix,iy)
+                end
+                @views derivative!(Sxy_check[:,iy],Txy[:,iy],x,x_spectral)
+            end
+            @. Sxy_check_err = abs(Sxy - Sxy_check)
+            println("maximum(Sxy_check_err)",maximum(Sxy_check_err))
+            #@views heatmap(y.grid, x.grid, Sxy[:,:], xlabel=L"y", ylabel=L"x", c = :deep, interpolation = :cubic,
+            #    windowsize = (360,240), margin = 15pt)
+            #    outfile = string("Sxy_exact_2D_solve.pdf")
+            #    savefig(outfile)
+            #@views heatmap(y.grid, x.grid, Sxy_check[:,:], xlabel=L"y", ylabel=L"x", c = :deep, interpolation = :cubic,
+            #    windowsize = (360,240), margin = 15pt)
+            #    outfile = string("Sxy_num_2D_solve.pdf")
+            #    savefig(outfile)
+        else 
+            println("No Sxy or Fxy_exact specified")
+        end
+        
+        if dirichlet_fixed_value
+            # set boundary values 
+            if dHdvpa_test
+                for ix in 1:nx
+                    Sxy[ix,ny] = dHdvpa_inf(x,y,ix,ny)  
+                end
+                for iy in 1:ny
+                    Sxy[1,iy] = dHdvpa_inf(x,y,1,iy)  
+                    Sxy[nx,iy] = dHdvpa_inf(x,y,nx,iy)  
+                end
+            elseif d2Gdvpa2_test
+                for ix in 1:nx
+                    Sxy[ix,ny] = d2Gdvpa2_inf(x,y,ix,ny)  
+                end
+                for iy in 1:ny
+                    Sxy[1,iy] = d2Gdvpa2_inf(x,y,1,iy)  
+                    Sxy[nx,iy] = d2Gdvpa2_inf(x,y,nx,iy)  
+                end            
+            end
+            println("Check boundary specification")
+            #println(Sxy[:,ny])
+            #println(Fxy_exact[:,ny])
+            println(abs.(Sxy[:,ny] .- Fxy_exact[:,ny]))
+            println(abs.(Sxy[1,:] .- Fxy_exact[1,:]))
+            println(abs.(Sxy[nx,:] .- Fxy_exact[nx,:]))
+            #println(Sxy[1,:])
+            #println(Fxy_exact[1,:])
+            #println(Sxy[nx,:])
+            #println(Fxy_exact[nx,:])
+        end
+        # assign values to arrays in compound coordinates
+        @. LLc = 0.0
+        for ic in 1:nc
+            ix = ixfunc(ic,nx)
+            iy = iyfunc(ic,nx)
+            Sc[ic] = Sxy[ix,iy]
+            for icp in 1:nc
+                ixp = ixfunc(icp,nx)
+                iyp = iyfunc(icp,nx)
+                #println("ic: ",ic," ix: ", ix," iy: ",iy," icp: ",icp," ixp: ", ixp," iyp: ",iyp)
+                LLc[icp,ic] = D2x[ixp,ix]*IIy[iyp,iy] + LLy[iyp,iy]*IIx[ixp,ix]
+            end
+        end
+        #set fixed bc in LLc directly
+        if dirichlet_fixed_value
+            ix = 1; ixp = 1
+            for iyp in 1:ny
+                for iy in 1:ny
+                    ic = icfunc(ix,iy,nx) 
+                    icp = icfunc(ixp,iyp,nx)
+                    LLc[icp,ic] = IIy[iyp,iy]                
+                end            
+            end
+            ix = nx; ixp = nx
+            for iyp in 1:ny
+                for iy in 1:ny
+                    ic = icfunc(ix,iy,nx) 
+                    icp = icfunc(ixp,iyp,nx)
+                    LLc[icp,ic] = IIy[iyp,iy]                
+                end            
+            end
+            iy = ny; iyp = ny
+            for ixp in 1:nx
+                for ix in 1:nx
+                    ic = icfunc(ix,iy,nx) 
+                    icp = icfunc(ixp,iyp,nx)
+                    LLc[icp,ic] = IIx[ixp,ix]                
+                end            
+            end
+        end
+        println("Initialised 2D arrays")
+        if nc < 30
+            print_matrix(LLc,"LLc",nc,nc)
+        end 
+        println("condition number(LLc): ", cond(LLc))
+        println("determinant(LLc): ", det(LLc))
+        LLc_lu_obj = lu(LLc)
+        println("Initialised 2D solve") 
+        # do elliptic solve 
+        Fc = LLc_lu_obj\Sc
+        #reshape to 2D vector 
+        for ic in 1:nc
+            ix = ixfunc(ic,nx)
+            iy = iyfunc(ic,nx)
+            Fxy[ix,iy] = Fc[ic]
+        end
+        #if dHdvpa_test && dirichlet_zero
+        #    for iy in 1:ny
+        #        for ix in 1:nx
+        #            Fxy[ix,iy] += dHdvpa_inf(x,y,ix,iy)
+        #        end
+        #    end
+        #end
+        println("Finished 2D solve")
+        @. Fxy_err = abs(Fxy - Fxy_exact)
+        
+        println("maximum(Fxy_err)",maximum(Fxy_err))
+        #println("Fxy_err",Fxy_err[1,:])
+        #println("Fxy_exact",Fxy_exact[1,:])
+        #println("Fxy",Fxy[1,:])
+        @views heatmap(y.grid, x.grid, Fxy_exact[:,:], xlabel=L"y", ylabel=L"x", c = :deep, interpolation = :cubic,
+                windowsize = (360,240), margin = 15pt)
+                outfile = string("Fxy_exact_2D_solve.pdf")
+                savefig(outfile)
+        @views heatmap(y.grid, x.grid, Fxy[:,:], xlabel=L"y", ylabel=L"x", c = :deep, interpolation = :cubic,
+                windowsize = (360,240), margin = 15pt)
+                outfile = string("Fxy_num_2D_solve.pdf")
+                savefig(outfile)
+        @views heatmap(y.grid, x.grid, Fxy_err[:,:], xlabel=L"y", ylabel=L"x", c = :deep, interpolation = :cubic,
+                windowsize = (360,240), margin = 15pt)
+                outfile = string("Fxy_err_2D_solve.pdf")
+                savefig(outfile)
+    end
+end
diff --git a/test_scripts/chebyshev_radau_test.jl b/test_scripts/chebyshev_radau_test.jl
new file mode 100644
index 000000000..57f2a0782
--- /dev/null
+++ b/test_scripts/chebyshev_radau_test.jl
@@ -0,0 +1,107 @@
+export chebyshevradau_test
+
+using Printf
+using Plots
+using LaTeXStrings
+using MPI
+using Measures
+
+import moment_kinetics
+using moment_kinetics.chebyshev
+using moment_kinetics.input_structs: grid_input, advection_input
+using moment_kinetics.coordinates: define_coordinate
+using moment_kinetics.calculus: derivative!
+
+
+function print_matrix(matrix,name,n,m)
+    println("\n ",name," \n")
+    for i in 1:n
+        for j in 1:m
+            @printf("%.3f ", matrix[i,j])
+        end
+        println("")
+    end
+    println("\n")
+end
+
+function print_vector(vector,name,m)
+    println("\n ",name," \n")
+    for j in 1:m
+        @printf("%.3f ", vector[j])
+    end
+    println("")
+    println("\n")
+end 
+"""
+Test for derivative vector D0 that is used to compute 
+the numerical derivative on the Chebyshev-Radau elements
+at the lower endpoint of the domain (-1,1] in the normalised
+coordinate x. Here in the tests the shifted coordinate y 
+is used with the vperp label so that the grid runs from (0,L].
+"""
+function chebyshevradau_test(; ngrid=5, L_in=3.0)
+
+    # elemental grid tests 
+    #ngrid = 17
+    #nelement = 4
+    y_ngrid = ngrid #number of points per element 
+    y_nelement_local = 1 # number of elements per rank
+    y_nelement_global = y_nelement_local # total number of elements 
+    y_L = L_in
+    bc = "zero" 
+    discretization = "gausslegendre_pseudospectral"
+    # fd_option and adv_input not actually used so given values unimportant
+    fd_option = "fourth_order_centered"
+    cheb_option = "matrix"
+    adv_input = advection_input("default", 1.0, 0.0, 0.0)
+    nrank = 1
+    irank = 0#1
+    comm = MPI.COMM_NULL
+    element_spacing_option = "uniform"
+    # create the 'input' struct containing input info needed to create a
+    # coordinate
+    y_name = "vperp" # to use radau grid
+    y_input = grid_input(y_name, y_ngrid, y_nelement_global, y_nelement_local, 
+            nrank, irank, y_L, discretization, fd_option, cheb_option, bc, adv_input,comm,element_spacing_option)
+    y, y_spectral = define_coordinate(y_input)
+      
+    Dmat = y_spectral.radau.Dmat
+    print_matrix(Dmat,"Radau Dmat",y.ngrid,y.ngrid)
+    D0 = y_spectral.radau.D0
+    print_vector(D0,"Radau D0",y.ngrid)
+    
+    ff_err = Array{Float64,1}(undef,y.n)
+    ff = Array{Float64,1}(undef,y.n)
+    for iy in 1:y.n
+        ff[iy] = exp(-4.0*y.grid[iy])
+    end
+    df_exact = -4.0
+    df_num = sum(D0.*ff)/y.element_scale[1]
+    df_err = abs(df_num - df_exact)
+    println("f(y) = exp(-4 y) test")
+    println("exact df: ",df_exact," num df: ",df_num," abs(err): ",df_err) 
+    
+    for iy in 1:y.n
+        ff[iy] = sin(y.grid[iy])
+    end
+    df_exact = 1.0
+    df_num = sum(D0.*ff)/y.element_scale[1]
+    df_err = abs(df_num - df_exact)
+    println("f(y) = sin(y) test")
+    println("exact df: ",df_exact," num df: ",df_num," abs(err): ",df_err) 
+    for iy in 1:y.n
+        ff[iy] = y.grid[iy] + (y.grid[iy])^2 + (y.grid[iy])^3
+    end
+    df_exact = 1.0
+    df_num = sum(D0.*ff)/y.element_scale[1]
+    df_err = abs(df_num - df_exact)
+    println("f(y) = y + y^2 + y^3 test")
+    println("exact df: ",df_exact," num df: ",df_num," abs(err): ",df_err) 
+end
+
+if abspath(PROGRAM_FILE) == @__FILE__
+    using Pkg
+    Pkg.activate(".")
+
+    chebyshevradau_test()
+end
\ No newline at end of file
diff --git a/test_scripts/fkpl_direct_integration_test.jl b/test_scripts/fkpl_direct_integration_test.jl
new file mode 100644
index 000000000..cf4fdaeb2
--- /dev/null
+++ b/test_scripts/fkpl_direct_integration_test.jl
@@ -0,0 +1,438 @@
+export test_rosenbluth_potentials_direct_integration
+
+using Printf
+using Plots
+using LaTeXStrings
+using Measures
+using MPI
+using Dates
+
+import moment_kinetics
+using moment_kinetics.input_structs: grid_input, advection_input
+using moment_kinetics.coordinates: define_coordinate
+using moment_kinetics.fokker_planck: init_fokker_planck_collisions_direct_integration
+using moment_kinetics.fokker_planck_calculus: calculate_rosenbluth_potentials_via_direct_integration!
+using moment_kinetics.fokker_planck_test: d2Gdvpa2_Maxwellian, dGdvperp_Maxwellian, d2Gdvperpdvpa_Maxwellian, d2Gdvperp2_Maxwellian
+using moment_kinetics.fokker_planck_test: dHdvpa_Maxwellian, dHdvperp_Maxwellian, H_Maxwellian, G_Maxwellian
+using moment_kinetics.fokker_planck_test: F_Maxwellian, dFdvpa_Maxwellian, dFdvperp_Maxwellian
+using moment_kinetics.fokker_planck_test: d2Fdvpa2_Maxwellian, d2Fdvperpdvpa_Maxwellian, d2Fdvperp2_Maxwellian
+using moment_kinetics.type_definitions: mk_float, mk_int
+using moment_kinetics.calculus: derivative!
+using moment_kinetics.velocity_moments: integrate_over_vspace, get_pressure
+using moment_kinetics.communication
+using moment_kinetics.looping
+using moment_kinetics.array_allocation: allocate_shared_float, allocate_float
+
+function get_vth(pres,dens,mass)
+        return sqrt(2.0*pres/(dens*mass))
+end
+
+function expected_nelement_scaling!(expected,nelement_list,ngrid,nscan)
+    for iscan in 1:nscan
+        expected[iscan] = (1.0/nelement_list[iscan])^(ngrid - 1)
+    end
+end
+
+function expected_nelement_integral_scaling!(expected,nelement_list,ngrid,nscan)
+    for iscan in 1:nscan
+        expected[iscan] = (1.0/nelement_list[iscan])^(ngrid+1)
+    end
+end
+"""
+L2norm assuming the input is the 
+absolution error ff_err = ff - ff_exact
+We compute sqrt( int (ff_err)^2 d^3 v / int d^3 v)
+where the volume of velocity space is finite
+"""
+function L2norm_vspace(ff_err,vpa,vperp)
+    ff_ones = copy(ff_err)
+    @. ff_ones = 1.0
+    gg = copy(ff_err)
+    @. gg = (ff_err)^2
+    num = integrate_over_vspace(@view(gg[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 0, vperp.wgts)
+    denom = integrate_over_vspace(@view(ff_ones[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 0, vperp.wgts)
+    L2norm = sqrt(num/denom)
+    return L2norm
+end
+
+
+function init_grids(nelement,ngrid)
+    discretization = "gausslegendre_pseudospectral"
+    #discretization = "chebyshev_pseudospectral"
+    #discretization = "finite_difference"
+    
+    # define inputs needed for the test
+    vpa_ngrid = ngrid #number of points per element 
+    vpa_nelement_local = nelement # number of elements per rank
+    vpa_nelement_global = vpa_nelement_local # total number of elements 
+    vpa_L = 12.0 #physical box size in reference units 
+    bc = "zero" 
+    vperp_ngrid = ngrid #number of points per element 
+    vperp_nelement_local = nelement # number of elements per rank
+    vperp_nelement_global = vperp_nelement_local # total number of elements 
+    vperp_L = 6.0 #physical box size in reference units 
+    bc = "zero" 
+    
+    # fd_option and adv_input not actually used so given values unimportant
+    fd_option = "fourth_order_centered"
+    cheb_option = "matrix"
+    element_spacing_option = "uniform"
+    adv_input = advection_input("default", 1.0, 0.0, 0.0)
+    nrank = 1
+    irank = 0
+    comm = MPI.COMM_NULL
+    # create the 'input' struct containing input info needed to create a
+    # coordinate
+    vpa_input = grid_input("vpa", vpa_ngrid, vpa_nelement_global, vpa_nelement_local, 
+        nrank, irank, vpa_L, discretization, fd_option, cheb_option, bc, adv_input,comm,element_spacing_option)
+    vperp_input = grid_input("vperp", vperp_ngrid, vperp_nelement_global, vperp_nelement_local, 
+        nrank, irank, vperp_L, discretization, fd_option, cheb_option, bc, adv_input,comm,element_spacing_option)
+    
+    # create the coordinate structs
+    #println("made inputs")
+    vpa, vpa_spectral = define_coordinate(vpa_input)
+    vperp, vperp_spectral = define_coordinate(vperp_input)
+    return vpa, vperp, vpa_spectral, vperp_spectral
+end
+
+test_Lagrange_integral = false #true
+test_Lagrange_integral_scan = true
+
+function test_Lagrange_Rosenbluth_potentials(ngrid,nelement; standalone=true)
+    # set up grids for input Maxwellian
+    vpa, vperp, vpa_spectral, vperp_spectral =  init_grids(nelement,ngrid)
+    # set up necessary inputs for collision operator functions 
+    nvperp = vperp.n
+    nvpa = vpa.n
+    # Set up MPI
+    if standalone
+        initialize_comms!()
+    end
+    setup_distributed_memory_MPI(1,1,1,1)
+    looping.setup_loop_ranges!(block_rank[], block_size[];
+                                   s=1, sn=1,
+                                   r=1, z=1, vperp=vperp.n, vpa=vpa.n,
+                                   vzeta=1, vr=1, vz=1)
+    @serial_region begin
+        println("beginning allocation   ", Dates.format(now(), dateformat"H:MM:SS"))
+    end
+    
+    fs_in = Array{mk_float,2}(undef,nvpa,nvperp)
+    
+    dfsdvpa_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    d2fsdvpa2_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    dfsdvperp_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    d2fsdvperpdvpa_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    d2fsdvperp2_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    dfsdvpa_err = Array{mk_float,2}(undef,nvpa,nvperp)
+    d2fsdvpa2_err = Array{mk_float,2}(undef,nvpa,nvperp)
+    dfsdvperp_err = Array{mk_float,2}(undef,nvpa,nvperp)
+    d2fsdvperpdvpa_err = Array{mk_float,2}(undef,nvpa,nvperp)
+    d2fsdvperp2_err = Array{mk_float,2}(undef,nvpa,nvperp)
+    
+    GG_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    GG_err = allocate_shared_float(nvpa,nvperp)
+    d2Gdvpa2_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    d2Gdvpa2_err = allocate_shared_float(nvpa,nvperp)
+    dGdvperp_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    dGdvperp_err = allocate_shared_float(nvpa,nvperp)
+    d2Gdvperpdvpa_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    d2Gdvperpdvpa_err = allocate_shared_float(nvpa,nvperp)
+    d2Gdvperp2_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    d2Gdvperp2_err = allocate_shared_float(nvpa,nvperp)
+    
+    HH_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    HH_err = allocate_shared_float(nvpa,nvperp)
+    dHdvpa_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    dHdvpa_err = allocate_shared_float(nvpa,nvperp)
+    dHdvperp_Maxwell = Array{mk_float,2}(undef,nvpa,nvperp)
+    dHdvperp_err = allocate_shared_float(nvpa,nvperp)
+    
+    @serial_region begin
+        println("setting up input arrays   ", Dates.format(now(), dateformat"H:MM:SS"))
+    end
+    
+    # set up test Maxwellian
+    denss = 1.0 #3.0/4.0
+    upars = 0.0 #2.0/3.0
+    ppars = 1.0 #2.0/3.0
+    pperps = 1.0 #2.0/3.0
+    press = get_pressure(ppars,pperps) 
+    ms = 1.0
+    vths = get_vth(press,denss,ms)
+    
+    for ivperp in 1:nvperp
+        for ivpa in 1:nvpa
+            fs_in[ivpa,ivperp] = F_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp) #(denss/vths^3)*exp( - ((vpa.grid[ivpa]-upar)^2 + vperp.grid[ivperp]^2)/vths^2 ) 
+            dfsdvpa_Maxwell[ivpa,ivperp] = dFdvpa_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+            dfsdvperp_Maxwell[ivpa,ivperp] = dFdvperp_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+            d2fsdvperpdvpa_Maxwell[ivpa,ivperp] = d2Fdvperpdvpa_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+            
+            GG_Maxwell[ivpa,ivperp] = G_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+            d2Gdvpa2_Maxwell[ivpa,ivperp] = d2Gdvpa2_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+            dGdvperp_Maxwell[ivpa,ivperp] = dGdvperp_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+            d2Gdvperpdvpa_Maxwell[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+            d2Gdvperp2_Maxwell[ivpa,ivperp] = d2Gdvperp2_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+            HH_Maxwell[ivpa,ivperp] = H_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+            dHdvperp_Maxwell[ivpa,ivperp] = dHdvperp_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+            dHdvpa_Maxwell[ivpa,ivperp] = dHdvpa_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+        end
+    end
+    
+    # initialise the weights
+    fokkerplanck_arrays = init_fokker_planck_collisions_direct_integration(vperp,vpa; precompute_weights=true)
+    fka = fokkerplanck_arrays
+    # calculate the potentials by direct integration
+    calculate_rosenbluth_potentials_via_direct_integration!(fka.GG,fka.HH,fka.dHdvpa,fka.dHdvperp,
+             fka.d2Gdvpa2,fka.dGdvperp,fka.d2Gdvperpdvpa,fka.d2Gdvperp2,fs_in,
+             vpa,vperp,vpa_spectral,vperp_spectral,fka)
+            
+    # error analysis of distribution function
+    begin_serial_region()
+    @serial_region begin
+        println("finished integration   ", Dates.format(now(), dateformat"H:MM:SS"))
+        @. dfsdvpa_err = abs(fka.dfdvpa - dfsdvpa_Maxwell)
+        max_dfsdvpa_err = maximum(dfsdvpa_err)
+        println("max_dfsdvpa_err: ",max_dfsdvpa_err)
+        @. dfsdvperp_err = abs(fka.dfdvperp - dfsdvperp_Maxwell)
+        max_dfsdvperp_err = maximum(dfsdvperp_err)
+        println("max_dfsdvperp_err: ",max_dfsdvperp_err)
+        @. d2fsdvperpdvpa_err = abs(fka.d2fdvperpdvpa - d2fsdvperpdvpa_Maxwell)
+        max_d2fsdvperpdvpa_err = maximum(d2fsdvperpdvpa_err)
+        println("max_d2fsdvperpdvpa_err: ",max_d2fsdvperpdvpa_err)
+        
+    end
+    
+    plot_dHdvpa = false #true
+    plot_dHdvperp = false #true
+    plot_d2Gdvperp2 = false #true
+    plot_d2Gdvperpdvpa = false #true
+    plot_dGdvperp = false #true
+    plot_d2Gdvpa2 = false #true
+    
+    @serial_region begin
+        @. GG_err = abs(fka.GG - GG_Maxwell)
+        max_GG_err, max_GG_index = findmax(GG_err)
+        println("max_GG_err: ",max_GG_err," ",max_GG_index)
+        println("spot check GG_err: ",GG_err[end,end], " GG: ",fka.GG[end,end])
+        
+        @. HH_err = abs(fka.HH - HH_Maxwell)
+        max_HH_err, max_HH_index = findmax(HH_err)
+        println("max_HH_err: ",max_HH_err," ",max_HH_index)
+        println("spot check HH_err: ",HH_err[end,end], " HH: ",fka.HH[end,end])
+        @. dHdvperp_err = abs(fka.dHdvperp - dHdvperp_Maxwell)
+        max_dHdvperp_err, max_dHdvperp_index = findmax(dHdvperp_err)
+        println("max_dHdvperp_err: ",max_dHdvperp_err," ",max_dHdvperp_index)
+        println("spot check dHdvperp_err: ",dHdvperp_err[end,end], " dHdvperp: ",fka.dHdvperp[end,end])
+        @. dHdvpa_err = abs(fka.dHdvpa - dHdvpa_Maxwell)
+        max_dHdvpa_err, max_dHdvpa_index = findmax(dHdvpa_err)
+        println("max_dHdvpa_err: ",max_dHdvpa_err," ",max_dHdvpa_index)
+        println("spot check dHdvpa_err: ",dHdvpa_err[end,end], " dHdvpa: ",fka.dHdvpa[end,end])
+        
+        if plot_dHdvpa
+            @views heatmap(vperp.grid, vpa.grid, dHspdvpa[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_dHdvpa_lagrange.pdf")
+                 savefig(outfile)
+            @views heatmap(vperp.grid, vpa.grid, dHdvpa_Maxwell[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_dHdvpa_Maxwell.pdf")
+                 savefig(outfile)
+             @views heatmap(vperp.grid, vpa.grid, dHdvpa_err[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_dHdvpa_err.pdf")
+                 savefig(outfile)
+        end
+        if plot_dHdvperp
+            @views heatmap(vperp.grid, vpa.grid, dHspdvperp[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_dHdvperp_lagrange.pdf")
+                 savefig(outfile)
+            @views heatmap(vperp.grid, vpa.grid, dHdvperp_Maxwell[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_dHdvperp_Maxwell.pdf")
+                 savefig(outfile)
+             @views heatmap(vperp.grid, vpa.grid, dHdvperp_err[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_dHdvperp_err.pdf")
+                 savefig(outfile)
+        end
+        @. d2Gdvperp2_err = abs(fka.d2Gdvperp2 - d2Gdvperp2_Maxwell)
+        max_d2Gdvperp2_err, max_d2Gdvperp2_index = findmax(d2Gdvperp2_err)
+        println("max_d2Gdvperp2_err: ",max_d2Gdvperp2_err," ",max_d2Gdvperp2_index)
+        println("spot check d2Gdvperp2_err: ",d2Gdvperp2_err[end,end], " d2Gdvperp2: ",fka.d2Gdvperp2[end,end])
+        if plot_d2Gdvperp2
+            @views heatmap(vperp.grid, vpa.grid, d2Gspdvperp2[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_d2Gdvperp2_lagrange.pdf")
+                 savefig(outfile)
+            @views heatmap(vperp.grid, vpa.grid, d2Gdvperp2_Maxwell[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_d2Gdvperp2_Maxwell.pdf")
+                 savefig(outfile)
+             @views heatmap(vperp.grid, vpa.grid, d2Gdvperp2_err[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_d2Gdvperp2_err.pdf")
+                 savefig(outfile)
+        end
+        @. d2Gdvperpdvpa_err = abs(fka.d2Gdvperpdvpa - d2Gdvperpdvpa_Maxwell)
+        max_d2Gdvperpdvpa_err, max_d2Gdvperpdvpa_index = findmax(d2Gdvperpdvpa_err)
+        println("max_d2Gdvperpdvpa_err: ",max_d2Gdvperpdvpa_err," ",max_d2Gdvperpdvpa_index)
+        println("spot check d2Gdvperpdpva_err: ",d2Gdvperpdvpa_err[end,end], " d2Gdvperpdvpa: ",fka.d2Gdvperpdvpa[end,end])
+        if plot_d2Gdvperpdvpa
+            @views heatmap(vperp.grid, vpa.grid, d2Gspdvperpdvpa[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_d2Gdvperpdvpa_lagrange.pdf")
+                 savefig(outfile)
+            @views heatmap(vperp.grid, vpa.grid, d2Gdvperpdvpa_Maxwell[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_d2Gdvperpdvpa_Maxwell.pdf")
+                 savefig(outfile)
+             @views heatmap(vperp.grid, vpa.grid, d2Gdvperpdvpa_err[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_d2Gdvperpdvpa_err.pdf")
+                 savefig(outfile)
+        end
+        @. dGdvperp_err = abs(fka.dGdvperp - dGdvperp_Maxwell)
+        max_dGdvperp_err, max_dGdvperp_index = findmax(dGdvperp_err)
+        println("max_dGdvperp_err: ",max_dGdvperp_err," ",max_dGdvperp_index)
+        println("spot check dGdvperp_err: ",dGdvperp_err[end,end], " dGdvperp: ",fka.dGdvperp[end,end])
+        if plot_dGdvperp
+            @views heatmap(vperp.grid, vpa.grid, dGspdvperp[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_dGdvperp_lagrange.pdf")
+                 savefig(outfile)
+            @views heatmap(vperp.grid, vpa.grid, dGdvperp_Maxwell[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_dGdvperp_Maxwell.pdf")
+                 savefig(outfile)
+             @views heatmap(vperp.grid, vpa.grid, dGdvperp_err[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_dGdvperp_err.pdf")
+                 savefig(outfile)
+        end
+        @. d2Gdvpa2_err = abs(fka.d2Gdvpa2 - d2Gdvpa2_Maxwell)
+        max_d2Gdvpa2_err, max_d2Gdvpa2_index = findmax(d2Gdvpa2_err)
+        println("max_d2Gdvpa2_err: ",max_d2Gdvpa2_err," ",max_d2Gdvpa2_index)
+        println("spot check d2Gdvpa2_err: ",d2Gdvpa2_err[end,end], " d2Gdvpa2: ",fka.d2Gdvpa2[end,end])
+        if plot_d2Gdvpa2
+            @views heatmap(vperp.grid, vpa.grid, d2Gspdvpa2[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_d2Gdvpa2_lagrange.pdf")
+                 savefig(outfile)
+            @views heatmap(vperp.grid, vpa.grid, d2Gdvpa2_Maxwell[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_d2Gdvpa2_Maxwell.pdf")
+                 savefig(outfile)
+             @views heatmap(vperp.grid, vpa.grid, d2Gdvpa2_err[:,:], xlabel=L"v_{\perp}", ylabel=L"v_{||}", c = :deep, interpolation = :cubic,
+                 windowsize = (360,240), margin = 15pt)
+                 outfile = string("fkpl_d2Gdvpa2_err.pdf")
+                 savefig(outfile)
+        end
+        
+    end
+    _block_synchronize()
+    if standalone 
+        finalize_comms!()
+    end
+    #println(maximum(G_err), maximum(H_err), maximum(dHdvpa_err), maximum(dHdvperp_err), maximum(d2Gdvperp2_err), maximum(d2Gdvpa2_err), maximum(d2Gdvperpdvpa_err), maximum(dGdvperp_err))
+    results = (maximum(GG_err), maximum(HH_err), maximum(dHdvpa_err), maximum(dHdvperp_err), maximum(d2Gdvperp2_err), maximum(d2Gdvpa2_err), maximum(d2Gdvperpdvpa_err), maximum(dGdvperp_err),
+    maximum(dfsdvpa_err), maximum(dfsdvperp_err), maximum(d2fsdvperpdvpa_err))
+    return results 
+end
+
+function test_rosenbluth_potentials_direct_integration(;ngrid=5,nelement_list=[2],plot_scan=true)
+    if size(nelement_list,1) == 1
+        nelement = nelement_list[1]
+        test_Lagrange_Rosenbluth_potentials(ngrid,nelement,standalone=true)
+    else
+        initialize_comms!()
+        nscan = size(nelement_list,1)
+        max_G_err = Array{mk_float,1}(undef,nscan)
+        max_H_err = Array{mk_float,1}(undef,nscan)
+        max_dHdvpa_err = Array{mk_float,1}(undef,nscan)
+        max_dHdvperp_err = Array{mk_float,1}(undef,nscan)
+        max_d2Gdvperp2_err = Array{mk_float,1}(undef,nscan)
+        max_d2Gdvpa2_err = Array{mk_float,1}(undef,nscan)
+        max_d2Gdvperpdvpa_err = Array{mk_float,1}(undef,nscan)
+        max_dGdvperp_err = Array{mk_float,1}(undef,nscan)
+        max_dfsdvpa_err = Array{mk_float,1}(undef,nscan)
+        max_dfsdvperp_err = Array{mk_float,1}(undef,nscan)
+        max_d2fsdvperpdvpa_err = Array{mk_float,1}(undef,nscan)
+        
+        expected = Array{mk_float,1}(undef,nscan)
+        expected_nelement_scaling!(expected,nelement_list,ngrid,nscan)
+        expected_integral = Array{mk_float,1}(undef,nscan)
+        expected_nelement_integral_scaling!(expected_integral,nelement_list,ngrid,nscan)
+        
+        expected_label = L"(1/N_{el})^{n_g - 1}"
+        expected_integral_label = L"(1/N_{el})^{n_g +1}"
+        
+        for iscan in 1:nscan
+            local nelement = nelement_list[iscan]
+            ((max_G_err[iscan], max_H_err[iscan], max_dHdvpa_err[iscan],
+            max_dHdvperp_err[iscan], max_d2Gdvperp2_err[iscan],
+            max_d2Gdvpa2_err[iscan], max_d2Gdvperpdvpa_err[iscan],
+            max_dGdvperp_err[iscan], max_dfsdvpa_err[iscan],
+            max_dfsdvperp_err[iscan], max_d2fsdvperpdvpa_err[iscan])
+            = test_Lagrange_Rosenbluth_potentials(ngrid,nelement,standalone=false))
+        end
+        if global_rank[]==0 && plot_scan
+            fontsize = 8
+            ytick_sequence = Array([1.0e-13,1.0e-12,1.0e-11,1.0e-10,1.0e-9,1.0e-8,1.0e-7,1.0e-6,1.0e-5,1.0e-4,1.0e-3,1.0e-2,1.0e-1,1.0e-0,1.0e1])
+            xlabel = L"N_{element}"
+            dHdvpalabel = L"\epsilon(dH/d v_{\|\|})"
+            dHdvperplabel = L"\epsilon(dH/d v_{\perp})"
+            d2Gdvperp2label = L"\epsilon(d^2G/d v_{\perp}^2)"
+            d2Gdvpa2label = L"\epsilon(d^2G/d v_{\|\|}^2)"
+            d2Gdvperpdvpalabel = L"\epsilon(d^2G/d v_{\perp} d v_{\|\|})"
+            dGdvperplabel = L"\epsilon(dG/d v_{\perp})"
+            #println(max_G_err,max_H_err,max_dHdvpa_err,max_dHdvperp_err,max_d2Gdvperp2_err,max_d2Gdvpa2_err,max_d2Gdvperpdvpa_err,max_dGdvperp_err, expected)
+            plot(nelement_list, [max_dHdvpa_err,max_dHdvperp_err,max_d2Gdvperp2_err,max_d2Gdvpa2_err,max_d2Gdvperpdvpa_err,max_dGdvperp_err, expected, expected_integral],
+            xlabel=xlabel, label=[dHdvpalabel dHdvperplabel d2Gdvperp2label d2Gdvpa2label d2Gdvperpdvpalabel dGdvperplabel expected_label expected_integral_label], ylabel="",
+             shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
+              xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+              foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
+            #outfile = "fkpl_coeffs_numerical_lagrange_integration_test_ngrid_"*string(ngrid)*".pdf"
+            outfile = "fkpl_essential_coeffs_numerical_lagrange_integration_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            savefig(outfile)
+            println(outfile)
+            
+            HHlabel = L"\epsilon(H)"
+            GGlabel = L"\epsilon(G)"
+            #println(max_G_err,max_H_err,max_dHdvpa_err,max_dHdvperp_err,max_d2Gdvperp2_err,max_d2Gdvpa2_err,max_d2Gdvperpdvpa_err,max_dGdvperp_err, expected)
+            plot(nelement_list, [max_H_err, max_G_err, expected, expected_integral],
+            xlabel=xlabel, label=[HHlabel GGlabel expected_label expected_integral_label], ylabel="",
+             shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
+              xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+              foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
+            #outfile = "fkpl_coeffs_numerical_lagrange_integration_test_ngrid_"*string(ngrid)*".pdf"
+            outfile = "fkpl_potentials_numerical_lagrange_integration_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            savefig(outfile)
+            println(outfile)
+            
+            dfsdvpa_label = L"\epsilon(d F_s / d v_{\|\|})"
+            dfsdvperp_label = L"\epsilon(d F_s /d v_{\perp})"
+            d2fsdvperpdvpa_label = L"\epsilon(d^2 F_s /d v_{\perp}d v_{\|\|})"
+            plot(nelement_list, [max_dfsdvpa_err,max_dfsdvperp_err,max_d2fsdvperpdvpa_err,expected],
+            xlabel=xlabel, label=[dfsdvpa_label dfsdvperp_label d2fsdvperpdvpa_label expected_label], ylabel="",
+             shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
+              xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+              foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
+            #outfile = "fkpl_coeffs_numerical_lagrange_integration_test_ngrid_"*string(ngrid)*".pdf"
+            outfile = "fkpl_fs_numerical_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            savefig(outfile)
+            println(outfile)
+        end
+        finalize_comms!()
+    end
+end
+
+if abspath(PROGRAM_FILE) == @__FILE__
+    using Pkg
+    Pkg.activate(".")
+    ngrid = 5
+    nelement_list = [2,4,8,16,32]
+    plot_scan = true
+    test_rosenbluth_potentials_direct_integration(ngrid=ngrid,nelement_list=nelement_list,plot_scan=plot_scan)
+end 
diff --git a/spline_derivatives_test.jl b/test_scripts/spline_derivatives_test.jl
similarity index 100%
rename from spline_derivatives_test.jl
rename to test_scripts/spline_derivatives_test.jl