From 23db005da728e58330cadb75fb24300a1a822762 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Wed, 10 Jul 2024 18:12:44 +0100
Subject: [PATCH 01/41] A series of useful python scripts based on xarray for
 plotting data from moment_kinetics simulations.

---
 xarray_post_processing/plot_error_data.py     | 119 ++++++++
 .../plot_integration_error_data.py            |  62 ++++
 .../plot_many_collisions.py                   | 267 ++++++++++++++++++
 xarray_post_processing/plot_mk_utils.py       | 148 ++++++++++
 xarray_post_processing/plot_sd.py             | 176 ++++++++++++
 xarray_post_processing/plot_wall.py           | 150 ++++++++++
 xarray_post_processing/xarray_mk_utils.py     |  33 +++
 7 files changed, 955 insertions(+)
 create mode 100644 xarray_post_processing/plot_error_data.py
 create mode 100644 xarray_post_processing/plot_integration_error_data.py
 create mode 100644 xarray_post_processing/plot_many_collisions.py
 create mode 100644 xarray_post_processing/plot_mk_utils.py
 create mode 100644 xarray_post_processing/plot_sd.py
 create mode 100644 xarray_post_processing/plot_wall.py
 create mode 100644 xarray_post_processing/xarray_mk_utils.py

diff --git a/xarray_post_processing/plot_error_data.py b/xarray_post_processing/plot_error_data.py
new file mode 100644
index 000000000..1dfac40f9
--- /dev/null
+++ b/xarray_post_processing/plot_error_data.py
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+import matplotlib
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use('Agg') # this line allows plots to be made without using a display environment variable
+from matplotlib.backends.backend_pdf import PdfPages
+import numpy as np
+import h5py
+from plot_mk_utils import plot_1d_list_pdf, plot_1d_loglog_list_pdf
+from plot_mk_utils import plot_1d_semilog_list_pdf
+
+
+def get_fkpl_error_data(filename):
+    f = h5py.File(filename,'r')
+    print(f.keys())
+    ncore = np.copy(f['ncore'][...])
+    ngrid = np.copy(f['ngrid'][...])
+    print("ngrid: ",ngrid)
+    nelement_list = np.copy(f['nelement_list'][:])
+    print("nelement_list: ",nelement_list[:])
+    max_C_err = np.copy(f['max_C_err'][:])
+    max_G_err = np.copy(f['max_G_err'][:])
+    max_H_err = np.copy(f['max_H_err'][:])
+    max_dHdvpa_err = np.copy(f['max_dHdvpa_err'][:])
+    max_dHdvperp_err = np.copy(f['max_dHdvperp_err'][:])
+    max_d2Gdvperpdvpa_err = np.copy(f['max_d2Gdvperpdvpa_err'][:])
+    max_d2Gdvpa2_err = np.copy(f['max_d2Gdvpa2_err'][:])
+    max_d2Gdvperp2_err = np.copy(f['max_d2Gdvperp2_err'][:])
+    L2_C_err = np.copy(f['L2_C_err'][:])
+    L2_G_err = np.copy(f['L2_G_err'][:])
+    L2_H_err = np.copy(f['L2_H_err'][:])
+    L2_dHdvpa_err = np.copy(f['L2_dHdvpa_err'][:])
+    L2_dHdvperp_err = np.copy(f['L2_dHdvperp_err'][:])
+    L2_d2Gdvperpdvpa_err = np.copy(f['L2_d2Gdvperpdvpa_err'][:])
+    L2_d2Gdvpa2_err = np.copy(f['L2_d2Gdvpa2_err'][:])
+    L2_d2Gdvperp2_err = np.copy(f['L2_d2Gdvperp2_err'][:])
+    expected_diff = np.copy(f['expected_diff'][:])
+    expected_integral = np.copy(f['expected_integral'][:])
+    calculate_times = np.copy(f['calculate_times'][:])
+    init_times = np.copy(f['init_times'][:])
+    expected_t_2 = np.copy(f['expected_t_2'][:])
+    expected_t_3 = np.copy(f['expected_t_3'][:])
+    n_err = np.copy(f['n_err'][:])
+    u_err = np.copy(f['u_err'][:])
+    p_err = np.copy(f['p_err'][:])
+    print(p_err)
+    
+    nelement_string = "N_{\\rm EL}" #\\scriptscriptstyle 
+    ngrid_string = "N_{\\rm GR}" 
+    
+    
+    file = filename+".plots.pdf"
+    pdf = PdfPages(file)
+    
+    marker_list = ['r--o','b--s','g--.','m--x','c--v','k']
+    C_list = [max_C_err,L2_C_err,n_err,u_err,p_err,expected_diff]
+    nelements = [nelement_list for item in C_list] 
+    ylab_list = ["$\\epsilon_{\\infty}(C[F,F])$", "$\\epsilon_{L_2}(C[F,F])$",
+                 "$|\\Delta n|$",
+                 "$|\\Delta u_{||}|$",
+                 "$|\\Delta p |$",
+                 "$(1/"+nelement_string+")^{"+ngrid_string+"-1}$"]
+    plot_1d_loglog_list_pdf (nelements,C_list,marker_list,"$"+ nelement_string+"$", pdf,
+      title='',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = nelement_list, yticks = None,
+      markersize=10, legend_title="", use_legend=True,loc_opt='lower left', ylab_list = ylab_list,
+      bbox_to_anchor_opt=(0.05, 0.05), legend_fontsize=15, ncol_opt=1)
+      
+    marker_list = ['r--o','b--s','m-o','k-s']
+    time_list = [init_times, calculate_times, expected_t_3, expected_t_2]
+    nelements = [nelement_list for item in time_list] 
+    ylab_list = ["time/init (ms)", "time/step (ms)",
+                 "$"+nelement_string+"^3$",
+                 "$"+nelement_string+"^2$"]
+    plot_1d_loglog_list_pdf (nelements,time_list,marker_list,"$"+ nelement_string+"$", pdf,
+      title='',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = nelement_list, yticks = None,
+      markersize=10, legend_title="", use_legend=True,loc_opt='upper left', ylab_list = ylab_list,
+      bbox_to_anchor_opt=(0.05, 0.95), legend_fontsize=15, ncol_opt=1)
+    
+    
+    marker_list = ['r--o','b--^','g--.','m--x','c--v','k']
+    Infnorm_list = [max_dHdvpa_err,
+    max_dHdvperp_err,max_d2Gdvperpdvpa_err,max_d2Gdvpa2_err,max_d2Gdvperp2_err, 
+    #expected_diff, 
+    expected_integral]
+    nelements = [nelement_list for item in Infnorm_list] 
+    ylab_list = ["$\\epsilon_{\\infty}(d H / d v_{||})$","$\\epsilon_{\\infty}(d H / d v_{\\perp})$",
+                 "$\\epsilon_{\\infty}(d^2 G / d v_{\\perp} d v_{||})$", 
+                 "$\\epsilon_{\\infty}(d^2 G / d v^2_{||})$", 
+                 "$\\epsilon_{\\infty}(d^2 G / d v^2_{\\perp})$",
+                 #"$(1/"+nelement_string+")^{"+ngrid_string+"-1}$",
+                 "$(1/"+nelement_string+")^{"+ngrid_string+"+1}$"]
+    plot_1d_loglog_list_pdf (nelements,Infnorm_list,marker_list,"$"+ nelement_string+"$", pdf,
+      title='',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = nelement_list, yticks = None,
+      markersize=10, legend_title="", use_legend=True,loc_opt='lower left', ylab_list = ylab_list,
+      bbox_to_anchor_opt=(0.05, 0.05), legend_fontsize=15, ncol_opt=1)
+      
+    L2norm_list = [L2_dHdvpa_err,
+    L2_dHdvperp_err,L2_d2Gdvperpdvpa_err,L2_d2Gdvpa2_err,L2_d2Gdvperp2_err, 
+    #expected_diff, 
+    expected_integral]
+    nelements = [nelement_list for item in L2norm_list] 
+    ylab_list = ["$\\epsilon_{L_2}(d H / d v_{||})$","$\\epsilon_{L_2}(d H / d v_{\\perp})$",
+                 "$\\epsilon_{L_2}(d^2 G / d v_{\\perp} d v_{||})$", 
+                 "$\\epsilon_{L_2}(d^2 G / d v^2_{||})$", 
+                 "$\\epsilon_{L_2}(d^2 G / d v^2_{\\perp})$",
+                 #"$(1/"+nelement_string+")^{"+ngrid_string+"-1}$",
+                 "$(1/"+nelement_string+")^{"+ngrid_string+"+1}$"]
+    plot_1d_loglog_list_pdf (nelements,L2norm_list,marker_list,"$"+ nelement_string+"$", pdf,
+      title='',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = nelement_list, yticks = None,
+      markersize=10, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+      bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1)
+    pdf.close()
+    print(file)
+    
+    f.close()
+    return None
+
+workdir = ""    
+filename = workdir + "moment_kinetics_collisions/fkpl_error_data_ngrid_9_ncore_2.h5"
+get_fkpl_error_data(filename)
diff --git a/xarray_post_processing/plot_integration_error_data.py b/xarray_post_processing/plot_integration_error_data.py
new file mode 100644
index 000000000..0ddd04f42
--- /dev/null
+++ b/xarray_post_processing/plot_integration_error_data.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+import matplotlib
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use('Agg') # this line allows plots to be made without using a display environment variable
+from matplotlib.backends.backend_pdf import PdfPages
+import numpy as np
+import h5py
+from plot_mk_utils import plot_1d_list_pdf, plot_1d_loglog_list_pdf
+from plot_mk_utils import plot_1d_semilog_list_pdf
+
+
+def get_fkpl_integration_error_data(filename):
+    f = h5py.File(filename,'r')
+    print(f.keys())
+    ncore = np.copy(f['ncore'][...])
+    ngrid = np.copy(f['ngrid'][...])
+    print("ngrid: ",ngrid)
+    nelement_list = np.copy(f['nelement_list'][:])
+    print("nelement_list: ",nelement_list[:])
+    max_dHdvpa_err = np.copy(f['max_dHdvpa_err'][:])
+    max_dHdvperp_err = np.copy(f['max_dHdvperp_err'][:])
+    max_d2Gdvperpdvpa_err = np.copy(f['max_d2Gdvperpdvpa_err'][:])
+    max_d2Gdvpa2_err = np.copy(f['max_d2Gdvpa2_err'][:])
+    max_d2Gdvperp2_err = np.copy(f['max_d2Gdvperp2_err'][:])
+    expected_diff = np.copy(f['expected_diff'][:])
+    expected_integral = np.copy(f['expected_integral'][:])
+    
+    nelement_string = "N_{\\rm EL}" #\\scriptscriptstyle 
+    ngrid_string = "N_{\\rm GR}" 
+    
+    
+    file = filename+".plots.pdf"
+    pdf = PdfPages(file)
+    
+   
+    
+    marker_list = ['r--o','b--^','g--.','m--x','c--v','b','k']
+    Infnorm_list = [max_dHdvpa_err,
+    max_dHdvperp_err,max_d2Gdvperpdvpa_err,max_d2Gdvpa2_err,max_d2Gdvperp2_err, 
+    expected_diff, 
+    expected_integral]
+    nelements = [nelement_list for item in Infnorm_list] 
+    ylab_list = ["$\\epsilon_{\\infty}(d H / d v_{||})$","$\\epsilon_{\\infty}(d H / d v_{\\perp})$",
+                 "$\\epsilon_{\\infty}(d^2 G / d v_{\\perp} d v_{||})$", 
+                 "$\\epsilon_{\\infty}(d^2 G / d v^2_{||})$", 
+                 "$\\epsilon_{\\infty}(d^2 G / d v^2_{\\perp})$",
+                 "$(1/"+nelement_string+")^{"+ngrid_string+"-1}$",
+                 "$(1/"+nelement_string+")^{"+ngrid_string+"+1}$"]
+    plot_1d_loglog_list_pdf (nelements,Infnorm_list,marker_list,"$"+ nelement_string+"$", pdf,
+      title='',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = nelement_list, yticks = None,
+      markersize=10, legend_title="", use_legend=True,loc_opt='lower left', ylab_list = ylab_list,
+      bbox_to_anchor_opt=(0.05, 0.05), legend_fontsize=15, ncol_opt=1)
+      
+    pdf.close()
+    print(file)
+    
+    f.close()
+    return None
+
+workdir = "" 
+filename = workdir + "moment_kinetics_collisions/fkpl_integration_error_data_ngrid_5_ncore_1.h5"
+get_fkpl_integration_error_data(filename)
diff --git a/xarray_post_processing/plot_many_collisions.py b/xarray_post_processing/plot_many_collisions.py
new file mode 100644
index 000000000..6a44cb83a
--- /dev/null
+++ b/xarray_post_processing/plot_many_collisions.py
@@ -0,0 +1,267 @@
+# -*- coding: utf-8 -*-
+import matplotlib
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use('Agg') # this line allows plots to be made without using a display environment variable
+from matplotlib.backends.backend_pdf import PdfPages
+import numpy as np
+import toml as tml
+import h5py
+from xarray_mk_utils import grid_data, wgts_data
+from xarray_mk_utils import dynamic_data
+from plot_mk_utils import plot_1d_list_pdf, plot_1d_loglog_list_pdf
+from plot_mk_utils import plot_1d_semilog_list_pdf, plot_2d_pdf
+
+
+def plot_ff_norms_with_vspace(filename,ff,ffm,vpagrid,vperpgrid):
+    # plot infinity norm
+    pdffile = filename+".ffplots.pdf"
+    pdf = PdfPages(pdffile)
+    ffplot = np.abs(ff[:,:]-ffm[:,:])
+    plot_2d_pdf(vpagrid,vperpgrid,ffplot,pdf,title="$|F-F_M|$",ylab="$v_{\\perp}$",xlab="$v_{||}$")
+    pdf.close()
+    print("Saving figures: "+pdffile)
+    return None
+
+def get_time_evolving_data(filename):
+    print(filename)
+    nz_global, nz_local, zgrid = grid_data(filename,"z")
+    nr_global, nr_local, rgrid = grid_data(filename,"r")
+    nvpa_global, nvpa_local, vpagrid = grid_data(filename,"vpa")
+    nvperp_global, nvperp_local, vperpgrid = grid_data(filename,"vperp")
+    vpawgts = wgts_data(filename,"vpa")
+    vperpwgts = wgts_data(filename,"vperp")
+    time, time_present = dynamic_data(filename,"time")
+    ff, ff_present = dynamic_data(filename,"f")
+    dsdt, dsdt_present = dynamic_data(filename,"entropy_production") 
+    density, density_present = dynamic_data(filename,"density") 
+    parallel_flow, parallel_flow_present = dynamic_data(filename,"parallel_flow") 
+    parallel_pressure, parallel_pressure_present = dynamic_data(filename,"parallel_pressure") 
+    perpendicular_pressure, perpendicular_pressure_present = dynamic_data(filename,"perpendicular_pressure") 
+    if parallel_flow_present and perpendicular_pressure_present:
+        pressure = (2.0*perpendicular_pressure + parallel_pressure)/3.0
+    else:
+        pressure = None
+    thermal_speed, thermal_speed_present = dynamic_data(filename,"thermal_speed") 
+    ntime = time.size
+    nvperp = vperpgrid.size
+    nvpa = vpagrid.size
+    #print(dsdt)
+    #print(density)
+    #print(parallel_flow)
+    #print(np.shape(thermal_speed))
+    ffm = np.copy(ff)
+    for it in range(0,ntime):
+       	for ivperp in range(0,nvperp):
+            for ivpa in range(0,nvpa):
+                vth = thermal_speed[it,0,0,0]
+                v2 = ((vpagrid[ivpa]-parallel_flow[it,0,0,0])/vth)**2 + (vperpgrid[ivperp]/vth)**2
+                ffm[it,0,0,0,ivperp,ivpa] = density[it,0,0,0]*np.exp(-v2)/(vth**3)
+
+    L2fm = np.copy(dsdt) 
+    L2denom = np.sum(vperpwgts[:])*np.sum(vpawgts[:])
+    #print(np.shape(L2fm))
+    #print(np.shape(ff))
+    for it in range(0,ntime):
+        L2fm[it,0,0,0] = 0.0 
+       	for ivperp in range(0,nvperp):
+            for ivpa in range(0,nvpa):
+                L2fm[it,0,0,0] += vperpwgts[ivperp]*vpawgts[ivpa]*(ff[it,0,0,0,ivperp,ivpa]-ffm[it,0,0,0,ivperp,ivpa])**2
+                #continue
+        L2fm[it,0,0,0] = np.sqrt(L2fm[it,0,0,0]/L2denom)
+        
+    Inffm = np.copy(dsdt) 
+    for it in range(0,ntime):
+        Inffm[it,0,0,0] = 0.0 
+        Inffm[it,0,0,0] = np.max(np.abs(ff[it,0,0,0,:,:]-ffm[it,0,0,0,:,:]))
+    #plot_ff_norms_with_vspace(filename,ff[it,0,0,0,:,:],ffm[it,0,0,0,:,:],vpagrid,vperpgrid)
+    print("delta n: ", density[-1,0,0,0]-density[0,0,0,0])
+    print("delta u: ", parallel_flow[-1,0,0,0]-parallel_flow[0,0,0,0])
+    print("delta vth: ", thermal_speed[-1,0,0,0]-thermal_speed[0,0,0,0])
+    #print("L2fm(t): ",L2fm[::50,0,0,0]," time: ",time[::50])
+    print("L2fm: ", L2fm[0,0,0,0]," ",L2fm[-1,0,0,0])
+    #print("Inffm(t): ",Inffm[::50,0,0,0]," time: ",time[::50])
+    print("Inffm: ", Inffm[0,0,0,0]," ",Inffm[-1,0,0,0])
+    return time, dsdt[:,0,0,0], L2fm[:,0,0,0], Inffm[:,0,0,0], density[:,0,0,0], parallel_flow[:,0,0,0], thermal_speed[:,0,0,0], pressure[:,0,0,0], vpagrid, vperpgrid, ff[-1,0,0,0,:,:], ffm[-1,0,0,0,:,:]
+
+def save_plot_data(filename, time, dSdt, L2norm, Infnorm, dens, upar, vth, pres, 
+        vpagrid, vperpgrid, ff, ffm):
+        f = h5py.File(filename+".hdf5", "w")
+        f.create_dataset("time",data=time)
+        f.create_dataset("dSdt",data=dSdt)
+        f.create_dataset("L2norm",data=L2norm)
+        f.create_dataset("Infnorm",data=Infnorm)
+        f.create_dataset("dens",data=dens)
+        f.create_dataset("upar",data=upar)
+        f.create_dataset("vth",data=vth)
+        f.create_dataset("pres",data=pres)
+        f.create_dataset("vpagrid",data=vpagrid)
+        f.create_dataset("vperpgrid",data=vperpgrid)
+        f.create_dataset("ff",data=ff)
+        f.create_dataset("ffm",data=ffm)
+        f.close()
+        return None
+        
+def load_plot_data(filename):
+        f = h5py.File(filename+".hdf5", "r")
+        time = np.copy(f["time"][:])
+        dSdt = np.copy(f["dSdt"][:])
+        L2norm = np.copy(f["L2norm"][:])
+        Infnorm = np.copy(f["Infnorm"][:])
+        dens = np.copy(f["dens"][:])
+        upar = np.copy(f["upar"][:])
+        vth = np.copy(f["vth"][:])
+        pres = np.copy(f["pres"][:])
+        vpagrid = np.copy(f["vpagrid"][:])
+        vperpgrid = np.copy(f["vperpgrid"][:])
+        ff = np.copy(f["ff"][:,:])
+        ffm = np.copy(f["ffm"][:,:])
+        f.close()
+        return time, dSdt, L2norm, Infnorm, dens, upar, vth, pres, vpagrid, vperpgrid, ff, ffm
+
+time_list = []
+Stime_list = []
+Mtime_list = []
+Mnoupar_time_list = []
+dSdt_list = []
+L2norm_list = []
+Infnorm_list = []
+dens_list = []
+upar_list = []
+vth_list = []
+p_list = []
+M_list = []
+Mnoupar_list = []
+M2_list = []
+M2noupar_list = []
+
+#input_raw_names = ["fokker-planck-relaxation-beam-init1",
+#                   "fokker-planck-relaxation-beam-init2",
+#                   "fokker-planck-relaxation-beam-init3"]
+#input_raw_names = ["fokker-planck-relaxation-beam-init1long",
+#                   "fokker-planck-relaxation-beam-init2long",
+#                   "fokker-planck-relaxation-beam-init3long"]
+#input_raw_names = ["fokker-planck-relaxation-no-dfdvperp1",
+#                   "fokker-planck-relaxation-no-dfdvperp2",
+#                   "fokker-planck-relaxation-no-dfdvperp3"]
+#input_raw_names = ["fokker-planck-relaxation-no-dfdvperp-no-conserve1",
+#                   "fokker-planck-relaxation-no-dfdvperp-no-conserve2",
+#                   "fokker-planck-relaxation-no-dfdvperp-no-conserve3"]
+workdir = ""
+input_raw_names = ["fokker-planck-relaxation-flux-bc-only1",
+                   "fokker-planck-relaxation-flux-bc-only2",
+                   "fokker-planck-relaxation-flux-bc-only3"]
+inputname_list = [workdir+instr+".toml" for instr in input_raw_names]
+outfilename_list = [workdir+instr+"/"+instr+".dfns.0.h5" for instr in input_raw_names]
+process_raw_data = True
+
+for outfilename in outfilename_list:
+    savefilename = outfilename[:-10]+".processed.h5"
+    if process_raw_data:
+        time, dSdt, L2norm, Infnorm, dens, upar, vth, pres, vpagrid, vperpgrid, ff, ffm = get_time_evolving_data(outfilename)
+        print("Saving processed data: ",savefilename)
+        save_plot_data(savefilename, time, dSdt, L2norm, Infnorm, dens, upar, vth, pres,
+                        vpagrid, vperpgrid, ff, ffm)
+    else:
+        print("Loading pre-processed data: ",savefilename)
+        time, dSdt, L2norm, Infnorm, dens, upar, vth, pres, vpagrid, vperpgrid, ff, ffm = load_plot_data(savefilename)
+    
+    plot_ff_norms_with_vspace(outfilename,ff,ffm,vpagrid,vperpgrid)
+    time_list.append(time)
+    Stime_list.append(time[1:])
+    dSdt_list.append(dSdt[1:])
+    L2norm_list.append(L2norm)
+    Infnorm_list.append(Infnorm)
+    dens_list.append(dens-dens[0])
+    upar_list.append(upar-upar[0])
+    vth_list.append(vth-vth[0])
+    p_list.append(pres-pres[0])
+    Mtime_list.append(time[:])
+    Mtime_list.append(time[:])
+    Mtime_list.append(time[:])
+    M_list.append(np.abs(dens-dens[0]))
+    M_list.append(np.abs(upar-upar[0]))
+    M_list.append(np.abs(vth-vth[0]))
+    Mnoupar_time_list.append(time[:])
+    Mnoupar_time_list.append(time[:])
+    Mnoupar_list.append(np.abs(dens-dens[0]))
+    Mnoupar_list.append(np.abs(vth-vth[0]))
+    M2_list.append(np.abs(dens-dens[0]))
+    M2_list.append(np.abs(upar-upar[0]))
+    M2_list.append(np.abs(pres-pres[0]))
+    M2noupar_list.append(np.abs(dens-dens[0]))
+    M2noupar_list.append(np.abs(pres-pres[0]))
+
+file = workdir + "collisions_plots_many.pdf"
+pdf = PdfPages(file)
+tlabel = "$ \\nu_{ss} t $"
+marker_list = ['k','r-.','b--']
+ylab_list = ["#1", "#2", "#3"]
+plot_1d_semilog_list_pdf (Stime_list,dSdt_list,marker_list,tlabel, pdf,
+  title='$\\dot{S}$',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="Resolutions", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1)
+plot_1d_semilog_list_pdf (time_list,L2norm_list,marker_list,tlabel, pdf,
+  title='$L_2(F-F_M)$',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="Resolutions", use_legend=True,loc_opt='lower right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.95, 0.05), legend_fontsize=15, ncol_opt=1)
+plot_1d_semilog_list_pdf (time_list,Infnorm_list,marker_list,tlabel, pdf,
+  title='$L_{\infty}(F-F_M)$',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="Resolutions", use_legend=True,loc_opt='lower right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.95, 0.05), legend_fontsize=15, ncol_opt=1)
+
+Mylab_list = ["$|n(t)-n(0)|$ #1","$|u_{||}(t)- u_{||}(0)|$ #1","$|v_{\\rm th}(t) - v_{\\rm th}(0)|$ #1",
+              "$|n(t)-n(0)|$ #2","$|u_{||}(t)- u_{||}(0)|$ #2","$|v_{\\rm th}(t) - v_{\\rm th}(0)|$ #2",
+              "$|n(t)-n(0)|$ #3","$|u_{||}(t)- u_{||}(0)|$ #3","$|v_{\\rm th}(t) - v_{\\rm th}(0)|$ #3",]
+marker_list = ['k','k-.','k--','r','r-.','r--','b','b-.','b--',]
+print(len(Mtime_list))
+print(len(M_list))
+print(len(marker_list))
+print(len(Mylab_list))
+plot_1d_semilog_list_pdf (Mtime_list,M_list,marker_list,tlabel, pdf,
+  title='',ylab='',xlims=None,ylims=[None,10**(0)],aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = Mylab_list,
+  bbox_to_anchor_opt=(0.975, 0.975), legend_fontsize=15, ncol_opt=3)
+  
+Mylab_list = ["$|n(t)-n(0)|$ #1","$|v_{\\rm th}(t) - v_{\\rm th}(0)|$ #1",
+              "$|n(t)-n(0)|$ #2","$|v_{\\rm th}(t) - v_{\\rm th}(0)|$ #2",
+              "$|n(t)-n(0)|$ #3","$|v_{\\rm th}(t) - v_{\\rm th}(0)|$ #3",]
+marker_list = ['k','k--','r','r--','b','b--',]
+print(len(Mnoupar_time_list))
+print(len(Mnoupar_list))
+print(len(marker_list))
+print(len(Mylab_list))
+plot_1d_semilog_list_pdf (Mnoupar_time_list,Mnoupar_list,marker_list,tlabel, pdf,
+  title='',ylab='',xlims=None,ylims=[None,10**(-1)],aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = Mylab_list,
+  bbox_to_anchor_opt=(0.975, 0.975), legend_fontsize=15, ncol_opt=3)
+
+Mylab_list = ["$|\\Delta n(t)|$ #1","$|\\Delta u_{||}(t)|$ #1","$|\\Delta p(t)|$ #1",
+              "$|\\Delta n(t)|$ #2","$|\\Delta u_{||}(t)|$ #2","$|\\Delta p(t)|$ #2",
+              "$|\\Delta n(t)|$ #3","$|\\Delta u_{||}(t)|$ #3","$|\\Delta p(t)|$ #3",]
+marker_list = ['k','k-.','k--','r','r-.','r--','b','b-.','b--',]
+print(len(Mtime_list))
+print(len(M2_list))
+print(len(marker_list))
+print(len(Mylab_list))
+#ylims = [None,10**(-7)]
+#ylims = [10**(-14),10**(0)]
+ylims = [None,10**(0)]
+plot_1d_semilog_list_pdf (Mtime_list,M2_list,marker_list,tlabel, pdf,
+  title='',ylab='',xlims=None,ylims=ylims,aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = Mylab_list,
+  bbox_to_anchor_opt=(0.825, 1.0), legend_fontsize=15, ncol_opt=3)
+  
+Mylab_list = ["$|\\Delta n(t)|$ #1","$|\\Delta p(t)|$ #1",
+              "$|\\Delta n(t)|$ #2","$|\\Delta p(t)|$ #2",
+              "$|\\Delta n(t)|$ #3","$|\\Delta p(t)|$ #3",]
+marker_list = ['k','k--','r','r--','b','b--',]
+print(len(Mnoupar_time_list))
+print(len(M2noupar_list))
+print(len(marker_list))
+print(len(Mylab_list))
+plot_1d_semilog_list_pdf (Mnoupar_time_list,M2noupar_list,marker_list,tlabel, pdf,
+  title='',ylab='',xlims=None,ylims=[None,10**(-1)],aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = Mylab_list,
+  bbox_to_anchor_opt=(0.8, 0.975), legend_fontsize=15, ncol_opt=3)
+pdf.close()
+print("Saving figure: "+file)
diff --git a/xarray_post_processing/plot_mk_utils.py b/xarray_post_processing/plot_mk_utils.py
new file mode 100644
index 000000000..01d52ecb9
--- /dev/null
+++ b/xarray_post_processing/plot_mk_utils.py
@@ -0,0 +1,148 @@
+import matplotlib.pyplot as plt
+from matplotlib import rcParams
+import numpy as np
+plt.rc('text', usetex=False)
+plt.rc('font', family='serif')
+plt.rc('font', size=20)
+rcParams.update({'text.latex.preamble' : r'\usepackage{bm}'})
+rcParams.update({'figure.autolayout': True})
+
+def plot_1d_list_pdf (xlist,ylist,marker_list,xlab, pdf,
+  title='',ylab='',xlims=None,ylims=None,aspx=12,aspy=8, xticks = None, xticks_labels=None, yticks = None,
+  markersize=5, legend_title="", use_legend=False,loc_opt='upper right', ylab_list = None,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=10, ncol_opt=1,
+  legend_shadow=False,legend_frame=False, vlines = None,hlines = None,marker_fill_style = None,
+  cartoon=False, linewidth=None, texts = None, slines=None):
+
+    fig=plt.figure(figsize=(aspx,aspy))
+    nlist = len(ylist)
+    if(ylab_list is None):
+        ylab_list = [None for i in range(0,nlist)]
+    for iy in range(0,nlist):
+        plt.plot(xlist[iy],ylist[iy],marker_list[iy],markersize=markersize,label=ylab_list[iy],
+        fillstyle = marker_fill_style, linewidth = linewidth)
+    plt.xlabel(xlab)
+    if len(ylab) > 0:
+        plt.ylabel(ylab)
+    if len(title) > 0:
+        plt.title(title)
+    if(not xlims is None):
+        plt.xlim(xlims[0],xlims[1])
+    if(not ylims is None):
+        plt.ylim(ylims[0],ylims[1])
+    if(not vlines is None):
+        for xin,xlabel,xcolor,xlinestyle in vlines:
+            plt.axvline(x=xin, label=xlabel, color=xcolor,linestyle=xlinestyle,linewidth=linewidth)   
+    if(not hlines is None):
+        for yin,ylabel,ycolor,ylinestyle in hlines:
+            plt.axhline(y=yin, label=ylabel, color=ycolor,linestyle=ylinestyle,linewidth=linewidth)   
+    if(not texts is None):
+        for xin, yin, textin in texts:    
+            print(xin,yin,textin)
+            plt.text(xin,yin,textin)
+    if (not slines is None):
+        for m,c,marker,label in slines:
+            plt.plot(xlist[0],m*xlist[0]+c,marker,label=label)
+    if(use_legend):
+        plt.legend(title=legend_title,loc=loc_opt, bbox_to_anchor=bbox_to_anchor_opt,
+        fontsize=legend_fontsize, frameon=legend_frame, handlelength=1, labelspacing=0.5,
+        ncol=ncol_opt, columnspacing = 0.5 , handletextpad = 0.5, shadow=legend_shadow)
+    if(not xticks is None):
+        plt.xticks(xticks)
+    if(not yticks is None):
+        plt.yticks(yticks)    
+    if (cartoon):
+        plt.tick_params(top='off', bottom='off', left='off', right='off', labelleft='off', labelbottom='off')
+        plt.box(False)
+    pdf.savefig(fig)# pdf is the object of the current open PDF file to which the figures are appended
+    plt.close (fig)
+    return
+
+def plot_1d_semilog_list_pdf (xlist,ylist,marker_list,xlab, pdf,
+  title='',ylab='',xlims=None,ylims=None,aspx=12,aspy=8, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=False,loc_opt='upper right', ylab_list = None,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=10, ncol_opt=1,
+  legend_shadow=False,legend_frame=False):
+
+    fig=plt.figure(figsize=(aspx,aspy))
+    nlist = len(ylist)
+    if(ylab_list is None):
+        ylab_list = [None for i in range(0,nlist)]
+    for iy in range(0,nlist):
+        plt.semilogy(xlist[iy],ylist[iy],marker_list[iy],markersize=markersize,label=ylab_list[iy])
+    plt.xlabel(xlab)
+    if len(ylab) > 0:
+        plt.ylabel(ylab)
+    if len(title) > 0:
+        plt.title(title)
+    if(not xlims is None):
+        plt.xlim(xlims[0],xlims[1])
+    if(not ylims is None):
+        plt.ylim(ylims[0],ylims[1])
+    if(use_legend):
+        plt.legend(title=legend_title,loc=loc_opt, bbox_to_anchor=bbox_to_anchor_opt,
+        fontsize=legend_fontsize, frameon=legend_frame, handlelength=1, labelspacing=0.5,
+        ncol=ncol_opt, columnspacing = 0.5 , handletextpad = 0.5, shadow=legend_shadow)
+    if(not xticks is None):
+        plt.xticks(xticks)
+    if(not yticks is None):
+        plt.yticks(yticks)    
+    pdf.savefig(fig)# pdf is the object of the current open PDF file to which the figures are appended
+    plt.close (fig)
+    return
+
+def plot_1d_loglog_list_pdf (xlist,ylist,marker_list,xlab, pdf,
+  title='',ylab='',xlims=None,ylims=None,aspx=12,aspy=8, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=False,loc_opt='upper right', ylab_list = None,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=10, ncol_opt=1,
+  legend_shadow=False,legend_frame=False):
+
+    fig=plt.figure(figsize=(aspx,aspy))
+    nlist = len(ylist)
+    if(ylab_list is None):
+        ylab_list = [None for i in range(0,nlist)]
+    for iy in range(0,nlist):
+        plt.loglog(xlist[iy],ylist[iy],marker_list[iy],markersize=markersize,label=ylab_list[iy])
+    plt.xlabel(xlab)
+    if len(ylab) > 0:
+        plt.ylabel(ylab)
+    if len(title) > 0:
+        plt.title(title)
+    if(not xlims is None):
+        plt.xlim(xlims[0],xlims[1])
+    if(not ylims is None):
+        plt.ylim(ylims[0],ylims[1])
+    if(use_legend):
+        plt.legend(title=legend_title,loc=loc_opt, bbox_to_anchor=bbox_to_anchor_opt,
+        fontsize=legend_fontsize, frameon=legend_frame, handlelength=1, labelspacing=0.5,
+        ncol=ncol_opt, columnspacing = 0.5 , handletextpad = 0.5, shadow=legend_shadow)
+    if(not xticks is None):
+        plt.xticks([])
+        plt.minorticks_off()
+        #print(plt.xticks())
+        plt.xticks(xticks,[str(tick) for tick in xticks])
+        #print(plt.xticks())
+        
+    if(not yticks is None):
+        plt.yticks(yticks)    
+    pdf.savefig(fig)# pdf is the object of the current open PDF file to which the figures are appended
+    plt.close (fig)
+    return
+
+def plot_2d_pdf(x,y,z,pdf,title="",ylab="",xlab=""):
+
+    # make data
+    X, Y = np.meshgrid(x, y)
+    levels = np.linspace(z.min(), z.max(), 7)
+
+    # plot
+    fig = plt.figure()
+
+    plt.contourf(X, Y, z, levels=levels)
+    plt.colorbar()
+    plt.title(title)
+    plt.xlabel(xlab)
+    plt.ylabel(ylab)
+    pdf.savefig(fig)
+    plt.close(fig)
+    return None
diff --git a/xarray_post_processing/plot_sd.py b/xarray_post_processing/plot_sd.py
new file mode 100644
index 000000000..dfc9548a1
--- /dev/null
+++ b/xarray_post_processing/plot_sd.py
@@ -0,0 +1,176 @@
+# -*- coding: utf-8 -*-
+import matplotlib
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use('Agg') # this line allows plots to be made without using a display environment variable
+from matplotlib.backends.backend_pdf import PdfPages
+import numpy as np
+import toml as tml
+from xarray_mk_utils import grid_data
+from xarray_mk_utils import dynamic_data
+from plot_mk_utils import plot_1d_list_pdf, plot_1d_loglog_list_pdf
+
+def get_sd_plot_data(filename):
+    # return grid, moments, and pdf at the last timestep of the simulation
+    # where the pdf is written
+    # assume that there is single output file, from a simulation
+    # using parallel HDF5 or a single shared-memory region
+    
+    nz_global, nz_local, zgrid = grid_data(filename,"z")
+    nr_global, nr_local, rgrid = grid_data(filename,"r")
+    nvpa_global, nvpa_local, vpagrid = grid_data(filename,"vpa")
+    nvperp_global, nvperp_local, vperpgrid = grid_data(filename,"vperp")
+
+    time, time_present = dynamic_data(filename,"time")
+    ff, ff_present = dynamic_data(filename,"f")
+    
+    ntime = time.size
+    
+    #print("z: ",zgrid)
+    #print("r: ",rgrid)
+    #print("vpa: ",vpagrid)
+    #print("vperp: ",vperpgrid)
+
+    # return the data of interest only
+    it = ntime - 1
+    ivpa = nvpa_global//2+1
+    ivperp = 0
+    ir = 0
+    iz = 0 # lower wall plate
+    ispec = 0 # single species
+    
+    return vperpgrid, ff[it,ispec,ir,iz,:,ivpa], vpagrid, ff[it,ispec,ir,iz,ivperp,:]
+
+def get_sd_input_data(filename):
+    print(filename)
+    with open(filename, 'r') as file:
+        inputdata = tml.load(file)
+    key = "fokker_planck_collisions"
+    print(inputdata[key])
+    ni = inputdata[key]["sd_density"]
+    Ti = inputdata[key]["sd_temp"]
+    Te = Ti
+    # only a single charge ion is evolved, hence 
+    # we interpret species i as alpha here
+    Zalpha = inputdata[key]["Zi"]
+    # Zi must be absolute / relative to proton charge
+    # whereas the fixed Maxwellian ion charge number is given here
+    Zi = inputdata[key]["sd_q"]
+    ne = Zi*ni + 1.0*Zalpha # quasineutrality to determine electron density
+    # initial alpha density must be unity
+    mi = inputdata[key]["sd_mi"]
+    me = inputdata[key]["sd_me"]
+    nuref = inputdata[key]["nuii"]
+    # compute critical speed vc3/cref^3, cref = sqrt(2 Tref/mref) -> factor of 1/ 2 sqrt 2 
+    vc3 = (np.sqrt(2.0)/4.0)*3.0*np.sqrt(np.pi/2.0)*(Zi**2)*((Te)**1.5)*(ni/ne)/(np.sqrt(me)*mi)
+
+    key = "ion_source"
+    print(inputdata[key])
+    v0 = inputdata[key]["source_v0"]
+    Salpha = inputdata[key]["source_strength"]
+    # use that nuref = gamma_alphaalpha nref / mref^2 cref^3, with cref = sqrt(2Tref/mref) and alphas the reference species
+    # gamma_alphaalpha = 2 pi Zalpha^4 e^4 ln Lambda/ (4pi epsilon0)^2
+    # and nu_alphae = (4/(3sqrt(2pi))) gamma_alphae ne Te^(-3/2) sqrt(me)/m_alpha
+    nualphae = nuref*(8.0/3.0)*(1.0/np.sqrt(np.pi))*ne*np.sqrt(me)*(Te**(-1.5))*(Zalpha**(2))
+    amplitude = (np.sqrt(np.pi)/4.0)*Salpha/nualphae # pi^3/2 * (1/4 pi) factor had pi^3/2 due to normalisation of integration and pdf
+    return v0, vc3, amplitude
+
+workdir = ""
+input_filename_list = [workdir+"/excalibur/moment_kinetics_gyro/runs/fokker-planck-relaxation-example-4.toml"]
+filename_dfns_list = [workdir+"/excalibur/moment_kinetics_gyro/runs/fokker-planck-relaxation-example-4/fokker-planck-relaxation-example-4.dfns.0.h5",]
+identity = "example-4"
+
+vpagrid_list = []
+vperpgrid_list = []
+ff_list = []
+logff_list = []
+ffvpa_list = []
+logffvpa_list = []
+
+for ifile, filename_dfn in enumerate(filename_dfns_list):
+
+    v0, vc3, amplitude = get_sd_input_data(input_filename_list[ifile])
+
+    vperpgrid, ff, vpagrid, ffvpa = get_sd_plot_data(filename_dfn)
+    vperpgrid_list.append(vperpgrid)
+    ff_list.append(ff)
+    logff_list.append(np.log(np.abs(ff)+1.0e-15))
+    vpagrid_list.append(vpagrid)
+    ffvpa_list.append(ffvpa)
+    logffvpa_list.append(np.log(np.abs(ffvpa)+1.0e-15))
+
+    # compute a slowing down distribution for comparison from an analytical formula
+
+    ff_sd = np.copy(vperpgrid)
+    nvperp = vperpgrid.size
+    vc3test = 3.0*np.sqrt(np.pi/2.0)*((0.01)**1.5)*(1.0/(np.sqrt(2.7e-4)*0.5))*(0.5*0.5*1.0/2.0)
+    #print(vc3test," ",vc3test**(1.0/3.0))
+    print("vc3: ", vc3," vc: ",vc3**(1.0/3.0))
+    for ivperp in range(0,nvperp):
+        vperp = vperpgrid[ivperp]
+        if vperp < v0:
+            ff_sd[ivperp] = 1.0/(vc3 + vperp**3.0)
+            #print(ivperp)
+        else:
+            ff_sd[ivperp] = 0.0
+    # pick a point to normalise by
+    ivperp = 32 #nvperp//3 + 1
+    amplitude_test=ff[ivperp]/ff_sd[ivperp]
+    print(amplitude_test," ",amplitude, " ", amplitude/amplitude_test)
+    ff_sd = ff_sd*amplitude
+#    ff_sd = ff_sd*amplitude_test
+
+    vperpgrid_list.append(vperpgrid)
+    ff_list.append(ff_sd)
+    logff_list.append(np.log(np.abs(ff_sd)+1.0e-15))
+
+    ff_sd_vpa = np.copy(vpagrid)
+    nvpa = vpagrid.size
+    for ivpa in range(0,nvpa):
+        vpa = vpagrid[ivpa]
+        if np.abs(vpa) < v0:
+            ff_sd_vpa[ivpa] = 1.0/(vc3 + np.abs(vpa)**3.0)
+            #print(ivpa)
+        else:
+            ff_sd_vpa[ivpa] = 0.0
+    # pick a point to normalise by
+    ivpa = 96# nvpa//2 + nvpa//6 + 1
+    amplitude_test=ffvpa[ivperp]/ff_sd_vpa[ivperp]
+    print(amplitude_test," ",amplitude, " ", amplitude/amplitude_test)
+    ff_sd_vpa = ff_sd_vpa*amplitude
+#    ff_sd_vpa = ff_sd_vpa*amplitude_test
+
+    vpagrid_list.append(vpagrid)
+    ffvpa_list.append(ff_sd_vpa)
+    logffvpa_list.append(np.log(np.abs(ff_sd_vpa)+1.0e-15))
+
+marker_list = ['k','b','r','g','c','y']
+ylab_list = ["Num","SD"]
+file = workdir + "excalibur/moment_kinetics_gyro/sd_scan_"+str(identity)+".pdf"
+pdf = PdfPages(file)
+
+# plot ff
+plot_1d_list_pdf (vperpgrid_list,ff_list,marker_list,"$v_{\\perp}$", pdf,
+  title='$f(v_{\\|}=0,v_{\\perp})$',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1)
+# plot logff
+plot_1d_list_pdf (vperpgrid_list,logff_list,marker_list,"$v_{\\perp}$", pdf,
+  title='$\\ln|f(v_{\\|}=0,v_{\\perp})|$',ylab='',xlims=None,ylims=[-10.0,1.1*np.max(logff_list[0])],aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1)
+
+# plot ffvpa
+#print(vpagrid_list,ffvpa_list)
+plot_1d_list_pdf (vpagrid_list,ffvpa_list,marker_list,"$v_{\\|}$", pdf,
+  title='$f(v_{\\|},v_{\\perp}=0)$',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1)
+# plot logffvpa
+plot_1d_list_pdf (vpagrid_list,logffvpa_list,marker_list,"$v_{\\|}$", pdf,
+  title='$\\ln|f(v_{\\|},v_{\\perp}=0)|$',ylab='',xlims=None,ylims=[-10.0,1.1*np.max(logffvpa_list[0])],aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1)
+
+
+pdf.close()
+print("Saving figure: "+file)
diff --git a/xarray_post_processing/plot_wall.py b/xarray_post_processing/plot_wall.py
new file mode 100644
index 000000000..5eb3c5865
--- /dev/null
+++ b/xarray_post_processing/plot_wall.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+import matplotlib
+# Force matplotlib to not use any Xwindows backend.
+matplotlib.use('Agg') # this line allows plots to be made without using a display environment variable
+from matplotlib.backends.backend_pdf import PdfPages
+import numpy as np
+
+from xarray_mk_utils import grid_data
+from xarray_mk_utils import dynamic_data
+from plot_mk_utils import plot_1d_list_pdf, plot_1d_loglog_list_pdf
+
+def get_wall_plot_data(filename):
+    # return grid, moments, and pdf at the last timestep of the simulation
+    # where the pdf is written
+    # assume that there is single output file, from a simulation
+    # using parallel HDF5 or a single shared-memory region
+    
+    nz_global, nz_local, zgrid = grid_data(filename,"z")
+    nr_global, nr_local, rgrid = grid_data(filename,"r")
+    nvpa_global, nvpa_local, vpagrid = grid_data(filename,"vpa")
+    nvperp_global, nvperp_local, vperpgrid = grid_data(filename,"vperp")
+
+    time, time_present = dynamic_data(filename,"time")
+    ff, ff_present = dynamic_data(filename,"f")
+    Ez, Ez_present = dynamic_data(filename,"Ez")
+    phi, phi_present = dynamic_data(filename,"phi")
+    cil, cil_present = dynamic_data(filename,"chodura_integral_lower")
+    
+    ntime = time.size
+    
+    #print("z: ",zgrid)
+    #print("r: ",rgrid)
+    #print("vpa: ",vpagrid)
+    #print("vperp: ",vperpgrid)
+
+    # return the data of interest only
+    it = ntime - 1
+    ivperp = 0
+    ir = 0
+    iz = 0 # lower wall plate
+    ispec = 0 # single species
+    return zgrid, vpagrid, ff[it,ispec,ir,iz,ivperp,:], Ez[it,ir,:], phi[it,ir,:], cil[it,ir]
+
+workdir = ""
+filename = workdir + "moment_kinetics_newgeo/runs/wall-bc_cheb/wall-bc_cheb.moments.0.h5"
+filename_dfns_list = ["moment_kinetics_newgeo/runs/wall-bc_cheb_epsz1/wall-bc_cheb_epsz1.dfns.0.h5",
+                      "moment_kinetics_newgeo/runs/wall-bc_cheb_epsz0.1/wall-bc_cheb_epsz0.1.dfns.0.h5",
+                      "moment_kinetics_newgeo/runs/wall-bc_cheb_epsz0.01/wall-bc_cheb_epsz0.01.dfns.0.h5",
+                      "moment_kinetics_newgeo/runs/wall-bc_cheb_epsz0.001/wall-bc_cheb_epsz0.001.dfns.0.h5",
+                      "moment_kinetics_newgeo/runs/wall-bc_cheb_epsz0/wall-bc_cheb_epsz0.dfns.0.h5"]
+
+zgrid_list = []
+vpagrid_list = []
+ff_list = []
+ff_over_vpa2_list = []
+Ez_list = []
+phi_list = []
+cil_list = []
+logphi_list = []
+logEz_list = []
+logz_list = []
+for filename_dfn in filename_dfns_list:
+    zgrid, vpagrid, ff, Ez, phi, cil = get_wall_plot_data(workdir+filename_dfn)
+    nz = zgrid.size
+    zgrid_list.append(zgrid)
+    vpagrid_list.append(vpagrid)
+    ff_list.append(ff)
+    Ez_list.append(Ez)
+    phi_list.append(phi)
+    cil_list.append(cil)
+    nzlog = nz//12
+    #print(zgrid[-1-nzlog:-1])
+    logz_list.append(np.log(0.5-zgrid[-1-nzlog:-1]))
+    logphi_list.append(np.log(phi[-1-nzlog:-1]-phi[-1]))
+    logEz_list.append(np.log(Ez[-1-nzlog:-1]))
+    nvpa = vpagrid.size
+    vpafunc = np.zeros(nvpa)
+    deltavpa = np.amin(vpagrid[1:nvpa]-vpagrid[:nvpa-1])
+    #print(deltavpa)
+    for ivpa in range(0,nvpa):
+        if np.abs(vpagrid[ivpa]) > 0.5*deltavpa:
+            vpafunc[ivpa] = 1.0/(vpagrid[ivpa]**2)    
+    ff_over_vpa2_list.append(ff*vpafunc)
+
+#print(logz_list)    
+#print(logEz_list)    
+#print(logphi_list)    
+#print(ff_over_vpa2_list)
+epsz_values = [1.0,0.1,0.01,0.001,0.0]
+marker_list = ['k','b','r','g','c','y']
+ylab_list = [str(epsz) for epsz in epsz_values]
+file = workdir + "moment_kinetics_newgeo/wall_boundary_cutoff_scan.pdf"
+pdf = PdfPages(file)
+
+# plot ff
+plot_1d_list_pdf (vpagrid_list,ff_list,marker_list,"$v_{\\|\\|}$", pdf,
+  title='$f(z_{\\rm wall-},v_{\\|\\|})$',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="$\\epsilon_z$", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1)
+# plot ff/vpa2
+plot_1d_list_pdf (vpagrid_list,ff_over_vpa2_list,marker_list,"$v_{\\|\\|}$", pdf,
+  title='$f(z_{\\rm wall-},v_{\\|\\|})/v_{\\|\\|}^2$',ylab='',xlims=None,ylims=[-0.1,5.0],aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="$\\epsilon_z$", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1)
+# plot Bohm condition
+ylist = [np.array(cil_list)]
+xlist = [np.array(epsz_values)]
+plot_1d_list_pdf (xlist,ylist,["kx--"],"$\\epsilon_z$", pdf,
+  title='$(T_{\\rm e}/2 n) \\int (f/v_{\\|\\|}^2) d v_{\\|\\|}/\\sqrt{\\pi} $',ylab='',
+  xlims=None,ylims=None,aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=15, legend_title="", use_legend=False,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.25, 1.0), legend_fontsize=15, ncol_opt=1, hlines = [[1.0,"","r","--"]])
+# plot phi
+plot_1d_list_pdf (zgrid_list,phi_list,marker_list,"$z/L_z$", pdf,
+  title='$\\phi(z)$',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="$\\epsilon_z$", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.55, 0.85), legend_fontsize=15, ncol_opt=1)
+# plot Ez
+plot_1d_list_pdf (zgrid_list,Ez_list,marker_list,"$z/L_z$", pdf,
+  title='$E_z(z)$',ylab='',xlims=None,ylims=None,aspx=9,aspy=6, xticks = None, yticks = None,
+  markersize=5, legend_title="$\\epsilon_z$", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.25, 1.0), legend_fontsize=15, ncol_opt=1)
+# plot log phi
+plot_1d_list_pdf (logz_list,logphi_list,marker_list,"$\\ln(0.5 - z/L_z)$", pdf,
+  title='$\\ln (\\phi(z)-\\phi_{\\rm wall})$',ylab='',xlims=None,ylims=None,aspx=12,aspy=8, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1,
+  legend_shadow=False,legend_frame=False,slines=[[0.5,0.0,'k--'," 0.5 log dz"],[0.6666, 1.5, 'b--',"2 log dz/3 + 1.5"]])
+# plot log Ez
+plot_1d_list_pdf (logz_list,logEz_list,marker_list,"$\\ln(0.5 - z/L_z)$", pdf,
+  title='$\\ln E_z(z)$',ylab='',xlims=None,ylims=None,aspx=12,aspy=8, xticks = None, yticks = None,
+  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1,
+  legend_shadow=False,legend_frame=False,slines=[[-0.5,0.0,'k--'," -0.5 log dz"],[-0.333, 0.9,'b--',"0.9 - log dz/3"]])
+
+# plot log phi
+#plot_1d_loglog_list_pdf (logz_list,logphi_list,marker_list,"$0.5 - z/L_z$", pdf,
+#  title='$\\phi(z)-\\phi_{\\rm wall}$',ylab='',xlims=None,ylims=None,aspx=12,aspy=8, xticks = None, yticks = None,
+#  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+#  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1,
+#  legend_shadow=False,legend_frame=False)
+# plot log Ez
+#plot_1d_loglog_list_pdf (logz_list,logEz_list,marker_list,"$0.5 - z/L_z$", pdf,
+#  title='$E_z(z)$',ylab='',xlims=None,ylims=None,aspx=12,aspy=8, xticks = None, yticks = None,
+#  markersize=5, legend_title="", use_legend=True,loc_opt='upper right', ylab_list = ylab_list,
+#  bbox_to_anchor_opt=(0.95, 0.95), legend_fontsize=15, ncol_opt=1,
+#  legend_shadow=False,legend_frame=False)
+
+pdf.close()
+print("Saving figure: "+file)
diff --git a/xarray_post_processing/xarray_mk_utils.py b/xarray_post_processing/xarray_mk_utils.py
new file mode 100644
index 000000000..c5106e197
--- /dev/null
+++ b/xarray_post_processing/xarray_mk_utils.py
@@ -0,0 +1,33 @@
+import xarray as xr
+
+
+def read_variable(dataset,varstring):
+    try:
+        var=dataset[varstring].data
+        var_present=True
+    except KeyError:
+            print('INFO: '+varstring+' not found in data file')
+            var=None
+            var_present = False
+    return var, var_present
+    
+def grid_data(filename,coord):
+    dataset = xr.open_dataset(filename,group ="coords/"+coord)
+    n_global = dataset["n_global"].data
+    n_local = dataset["n_local"].data
+    grid = dataset["grid"].data
+    dataset.close()
+    return n_global, n_local, grid
+
+def wgts_data(filename,coord):
+    dataset = xr.open_dataset(filename,group ="coords/"+coord)
+    wgts = dataset["wgts"].data
+    dataset.close()
+    return wgts
+    
+def dynamic_data(filename,varstring):
+    dataset = xr.open_dataset(filename,group ="dynamic_data/")
+    var, var_present = read_variable(dataset,varstring)
+    return var, var_present
+    
+    

From a4954879d5239ab1d357882804992ae427315ef3 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 10:12:31 +0100
Subject: [PATCH 02/41] First attempt to port Chebyshev documentation.

---
 docs/src/chebyshev.md | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 docs/src/chebyshev.md

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
new file mode 100644
index 000000000..e69de29bb

From 65c36b0b1df804db89e366185c663e2ed420ba1a Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 10:15:54 +0100
Subject: [PATCH 03/41] Add Chebyshev documentation to .md file.

---
 docs/src/chebyshev.md | 251 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 251 insertions(+)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index e69de29bb..e5f3b17f2 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -0,0 +1,251 @@
+Chebyshev tranform via Fourier transform
+===============================================
+
+```math
+\begin{equation}
+\end{equation}
+```
+We express a function $f$ as a sum of Chebyshev polynomials
+```math 
+\begin{equation} f(x) = \sum^N_{n=0} a_{n}T_n(x)\label{eq:cheb-expansion}\end{equation}
+```
+The Chebyshev polynomials are defined by 
+```math
+\begin{equation} T_n(\cos \theta) = \cos n \theta, {\rm~with~}x = \cos \theta. \end{equation}
+```
+We can see how to find $\{a_{n}\}$ given $\{f(x_j)\}$ via Fourier transform. 
+The Fourier series representation of $f$ on a uniform grid indexed by $j$ is defined by 
+```math
+\begin{equation} f_j = \sum_{k=0}^{M-1} b_{k}\exp\left[i \frac{2\pi k j}{M}\right].\label{eq:fourier-series}\end{equation}
+```
+
+Gauss-Chebyshev-Lobotto points
+===============================================
+
+We pick points 
+```math
+\begin{equation} x_j = \cos \theta_j, \quad \theta_j = \frac{j \pi}{N} \quad 0 \leq j \leq N.\end{equation}
+```
+Then 
+```math
+\begin{equation} T_n{x_j} = \cos \frac{n j \pi}{N}.\end{equation}
+```
+Assuming that $M = 2N$, with $N$ an integer, and $b_{k} = b_{M-k}$ for $k>0$, we have that 
+```math
+\begin{equation} f_j = b_{0} + b_{N}(-1)^j + \sum_{n=1}^{N-1}
+b_{n}\left(\exp\left[i \frac{\pi n j}{N}\right]+\exp\left[-i \frac{\pi n j}{N}\right]\right).\end{equation}
+```
+Comparing this to the expression for $f(x_j)$ in the Chebyshev representation, 
+using the form of $T_n(x_j)$, 
+```math
+\begin{equation} f_j = a_{0} + a_{N}(-1)^j + \frac{1}{2}\sum_{n=1}^{N-1}
+a_{n}\left(\exp\left[i \frac{\pi n j}{N}\right]+\exp\left[-i \frac{\pi n j}{N}\right]\right),\end{equation}
+```
+we find that the Chebyshev representation on the Chebyshev points is equivalent 
+to the Fourier representation on the uniform grid points, if we identify
+```math
+\begin{equation} b_{0} = a_{0}, \quad  b_{N} = a_{N}, \quad b_{j} = \frac{a_{j}}{2} {\rm~for~} 1 \leq j \leq N-1. \end{equation}
+```
+This fact allows us to carry out the Chebyshev tranform by Fourier transforming the $\{f_j\}$ data
+and carrying out the correct normalisation of the resulting coefficients. 
+
+Gauss-Chebyshev-Radau points
+===============================================
+
+The last subsection dealt with grids which contain both endpoints on the $[-1,1]$ domain. 
+Certain problems require domains which contain a single endpoint, i.e., $x \in (-1,1]$. For 
+these cases we choose the points 
+```math
+\begin{equation} x_j = \cos \theta_j, \quad \theta_j = \frac{2 j \pi}{2 N + 1} \quad 0 \leq j \leq N.\end{equation}
+```
+Writing out the Chebyshev series \eq{eq:cheb-expansion}, 
+we have that 
+```math
+\begin{equation} \begin{split} f(x_j) = & \sum^N_{n=0} a_{n} \cos \frac{2 n j \pi}{2 N + 1} \\ & = a_{0} + \sum^N_{n=1} \frac{a_{n}}{2}\left(\exp\left[i \frac{2\pi n j}{2N +1}\right] + \exp\left[-i \frac{2\pi n j}{2N +1}\right]\right).\end{split} \label{eq:cheb-expansion-radau-points}\end{equation}
+```
+The form of the series \eq{eq:cheb-expansion-radau-points} is identical to the form of 
+a Fourier series on an odd number of points, i.e., taking $M = 2 N + 1$ in equation \eq{eq:fourier-series},
+and assuming $b_{k} = b_{M -k}$ for $k>1$,
+we have that 
+```math
+\begin{equation} f_j = b_{0} + \sum_{k=1}^{N} b_{k}\left(\exp\left[i \frac{2\pi k j}{2N+1}\right] + \exp\left[-i \frac{2\pi k j}{2N+1} \right]\right). \end{equation}
+```
+We can thus take a Chebyshev transform using a Fourier transform on Gauss-Chebyshev-Radau points if we identify 
+```math
+\begin{equation} b_{0} = a_{0}, \quad b_{j} = \frac{a_{j}}{2} {\rm~for~} 1 \leq j \leq N. \end{equation}
+```
+
+Chebyshev coefficients of derivatives of a function
+===============================================
+
+Starting from the expression of $f$ as a sum Chebyshev polynomials, equation \eq{eq:cheb-expansion},
+we can obtain an expression for the derivative
+```math \begin{equation}
+\frac{d f}{d x} = \sum^N_{n=0} a_{n}\frac{d T_{n}}{d x}. \label{eq:derivative-def}\end{equation}
+```
+We note that we must be able to express ${d f}/{d x}$ as a sum 
+of Chebyshev polynomials of up to order $N-1$, i.e.,
+```math
+\begin{equation} \frac{d f}{d x} = \sum^{N-1}_{n=0} d_{n}T_{n}. \end{equation}
+```
+We must determine the set $\{d_{n}\}$ in terms of the set $\{a_{n}\}$.
+First, we equate the two expressions to find that 
+```math
+\begin{equation} \sum^N_{k=0} a_{k}\frac{d T_{k}}{d x} = \sum^{N-1}_{n=0} d_{n}T_{n}. \label{eq:dn-def}\end{equation}
+```
+We use the Chebyshev polynomials of the second kind $U_n{x}$ to aid us in the calculation of the set $\{d_{n}\}$. 
+These polynomials are defined by 
+```math
+\begin{equation} U_{0}(x) = 1, \quad U_{1}(x) = 2x, \quad U_{n+1} = 2 x U_{n}(x) - U_{n-1}(x).\end{equation}
+```
+Note the useful relations 
+```math
+\begin{equation} \frac{d T_{n}}{d x} = n U_{n-1}, {\rm~for~}n\geq 1, \quad \frac{d T_{0}}{d x} = 0,\end{equation}
+```
+```math
+\begin{equation} T_{n} = \frac{1}{2}\left(U_{n} - U_{n -2}\right), T_{0} = U_{0}\quad, {\rm ~and~} \quad 2 T_{1} = U_{1}. \end{equation}
+```
+Using these identities, which may be obtained from the trigonometric definition of $U_{n}(\cos \theta)$
+```math
+\begin{equation}  U_{n}(\cos \theta) \sin \theta = \sin \left((n+1)\theta\right),\end{equation}
+```
+we find that equation  \eq{eq:dn-def} becomes 
+```math
+\begin{equation} \begin{split}\sum^N_{n=1} a_{n} n U_{n-1}(x) =& \frac{d_{N-1}}{2}U_{N-1}+\frac{d_{N-2}}{2}U_{N-2} 
+\\ & + \sum^{N-3}_{k=1} \frac{d_{k}-d_{k+2}}{2}U_{k} + \left(d_{0} - \frac{d_{2}}{2}\right)U_{0}. \end{split}
+\label{eq:dn-def-U}\end{equation}
+```
+Using the orthogonality relation 
+```math
+\begin{equation} \int^1_{-1} U_{m}(x)U_{n}(x)\sqrt{1-x^2} \; d x = 
+\left\{\begin{tabular}{l} $0 {\rm ~if~} n\neq m $ \\ $\pi/2 {\rm ~if~} n=m$  \end{tabular}\right.,\end{equation}
+```
+we obtain the (unqiuely-determined) relations 
+```math
+\begin{equation} \begin{split} &d_{N-1} = 2Na_{N},\quad d_{N-2} = 2(N-1)a_{N-1}, \\ 
+& d_{k} = 2(k+1) a_{k+1} + d_{k+2}, \quad d_{0} = \frac{d_{2}}{2} + a_{1}.\end{split} \label{eq:dn-result-U}\end{equation}
+```
+Note the lack of a second relation for $d_{0}$, but that otherwise the expressions \eq{eq:dn-result-U}
+agree with those of the last section.       
+
+Clenshaw-Curtis integration weights
+===============================================
+
+We require the integration weights for the set of points $\{x_j\}$ chosen 
+in our numerical scheme. The weights $w_{j}$ are defined implicitly by 
+```math
+\begin{equation} \int^{1}_{-1} f(x) \; d x = \sum_{j=0}^N f(x_j) w_{j}. \label{eq:w-sum}\end{equation}
+```
+In the Chebyshev scheme we use the change of variables $x = \cos \theta$
+to write 
+```math
+\begin{equation} \int^{1}_{-1} f(x) \; d x = \int^\pi_0 f(\cos\theta) \sin \theta \; d \theta . \label{eq:change-of-variables-integral} \end{equation}
+```
+ Using the series expansion \eq{eq:cheb-expansion} in equation \eq{eq:change-of-variables-integral}
+ we find that 
+ ```math
+ \begin{equation} \int^{1}_{-1} f(x) \; d x = \sum^N_{n=0} a_{n}\int^\pi_0 \cos (n \theta) \sin \theta \; d \theta
+ . \label{eq:series-integral} \end{equation}
+ ```
+ Note the integral identity
+ ```math
+\begin{equation} \int^\pi_0 \cos(n \theta) \sin \theta \; d \theta = \frac{\cos(n \pi) +1}{1 - n^2} {\rm~for~} n \geq 0.\end{equation}
+```
+ Also note that 
+ ```math
+ \begin{equation} \frac{\cos(n \pi) +1}{1 - n^2} = \left\{\begin{tabular}{l} $0 {\rm ~if~} n = 2 r + 1, ~r \in \mathbb{Z} $ \\ $2/(1 - n^2) {\rm ~if~} n=2r,~r. \in \mathbb{Z}$  \end{tabular}\right. \end{equation}
+ ```
+ We define ```math
+ \begin{equation} \J_{n} = \frac{\cos(n \pi) +1}{1 - n^2}. \end{equation}
+ ```
+ Using this definition, we can write the integral of $f(x)$ can be written 
+ in terms of a sum over of the Chebyshev coefficients:
+ ```math
+ \begin{equation} \int^{1}_{-1} f(x) \; d x = \sum_{n=0}^N \J_{n} a_{n}. \label{eq:Cheb-sum}\end{equation}
+ ```
+ 
+ To avoid computing the set of coefficients $\{a_{n}\}$ every time we wish to integrate $f(x_j)$,
+ we use the inverse transforms. This transform allows us to rewrite equation \eq{eq:Cheb-sum} in the form \eq{eq:w-sum}.
+ Since the inverse transform differs between the Gauss-Chebyshev-Lobotto and Gauss-Chebyshev-Radau cases, we treat each 
+ case separately below. 
+ 
+Weights on Gauss-Chebyshev-Lobotto points
+===============================================
+  We use the inverse transformation 
+ ```math
+ \begin{equation} a_{n} = \frac{\q_{n}}{2N}\sum^{2N-1}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N}\right], \label{eq:inverse-transform-GCL}\end{equation}
+ ```
+ where 
+ ```math
+ \begin{equation} \q_{n} = \left\{\begin{tabular}{l} $2 {\rm ~if~} n\neq0,N $ \\ $1 {\rm ~if~} n=0,N$  \end{tabular}\right.,\end{equation}
+ ```
+and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
+```math
+\begin{equation} \hat{f}_j = f(x_{j}) {\rm~for~} 0 \leq j \leq N ,\quad \hat{f}_j = f(x_{2N-j}){\rm~for~} N+1 \leq j \leq 2N-1. \end{equation}
+```
+ With this inverse tranformation, we can write 
+```math
+\begin{equation} \begin{split}\sum_{n=0}^N \J_{n} a_{n} & =  \sum^{2N-1}_{n=0} \frac{a_{n}\J_{n}}{\q_{n}} \\
+ & = \sum^{2N-1}_{j=0}\sum^{2N-1}_{n=0} \frac{\hat{f}_j \J_{n}}{2N} \exp\left[-i \frac{2\pi n j}{2N}\right] \\ 
+ & = \sum^{2N-1}_{j=0} \hat{f}_j \v_{j} = \sum^{N}_{j=0} \hat{f}_j \q_{j}\v_{j},\end{split}\label{eq:weights-working}\end{equation}
+```
+ where in the first step we have extended the sum from $N$ to $2N-1$ and used FFT-order definitions of $\J_{n}$ and $a_{n}$
+```math
+\begin{equation} \J_{j} = \J_{2N-j}, {\rm~for~} N+1 \leq j \leq 2N-1,\end{equation}
+```
+```math
+\begin{equation} a_{j} = a_{2N-j}, {\rm~for~} N+1 \leq j \leq 2N-1.\end{equation}
+```
+In the second step we use the definition of the inverse transform \eq{eq:inverse-transform-GCR}, and 
+in the third step we define 
+```math
+\begin{equation} \v_{j} = \sum_{n=0}^{2N-1}\frac{\J_{n}}{2N}\exp\left[-i \frac{2\pi n j}{2N}\right].\end{equation}
+```
+Finally, we can compare equations \eq{eq:w-sum} and \eq{eq:weights-working} and deduce that 
+```math
+\begin{equation} w_{j} = \q_{j}\v_{j} {\rm~for~} 0 \leq j \leq N.  \end{equation}
+```
+We can write $\v_{j}$ in terms of a discrete cosine transform, i.e.,
+```math
+\begin{equation} \v_{j} = \frac{1}{2N}\left(\J_{0} + (-1)^j\J_{N} + 2\sum_{n=1}^{N-1}\J_{n}\cos\left(\frac{\pi n j}{N}\right)\right).\end{equation}
+```
+ 
+Weights on Gauss-Chebyshev-Radau points
+===============================================
+We use the inverse transformation 
+```math
+\begin{equation} a_{n} = \frac{\q_{n}}{2N+1}\sum^{2N}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N+1}\right], \label{eq:inverse-transform-GCR}\end{equation}
+```
+where 
+```math
+\begin{equation} \q_{n} = \left\{\begin{tabular}{l} $2 {\rm ~if~} n > 0 $ \\ $1 {\rm ~if~} n=0$  \end{tabular}\right.,\end{equation}
+```
+and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
+```math
+\begin{equation} \hat{f}_j = f(x_{j}) {\rm~for~} 0 \leq j \leq N ,\quad \hat{f}_j = f(x_{2N-j+1}){\rm~for~} N+1 \leq j \leq 2N. \end{equation}
+```
+Note that the details of what is the appropriate FFT order depends on the order in which the points $x_j$ are stored.
+The key detail in the Chebyshev-Radau scheme is that (in the notation above)
+$x_0 = 1$ is not a repeated point, and must occupy $\hat{f}_0$. 
+With this inverse tranformation, we can write 
+```math
+\begin{equation} \begin{split}\sum_{n=0}^N \J_{n} a_{n} & =  \sum^{2N}_{n=0} \frac{a_{n}\J_{n}}{\q_{n}} \\
+& = \sum^{2N}_{j=0}\sum^{2N}_{n=0} \frac{\hat{f}_j \J_{n}}{2N+1} \exp\left[-i \frac{2\pi n j}{2N+1}\right] \\ 
+& = \sum^{2N}_{j=0} \hat{f}_j \v_{j} = \sum^{N}_{j=0} \hat{f}_j \q_{j}\v_{j},\end{split}\label{eq:weights-working}\end{equation}
+```
+where in the first step we have extended the sum from $N$ to $2N$ and used FFT-order definitions of $\J_{n}$ and $a_{n}$
+```math
+\begin{equation} \J_{j} = \J_{2N+1-j}, {\rm~for~} N+1 \leq j \leq 2N,\end{equation}
+```
+```math
+\begin{equation} a_{j} = a_{2N+1-j}, {\rm~for~} N+1 \leq j \leq 2N.\end{equation}
+```
+In the second step we use the definition of the inverse transform \eq{eq:inverse-transform-GCR}, and 
+in the third step we define 
+```math
+\begin{equation} \v_{j} = \sum_{n=0}^{2N}\frac{\J_{n}}{2N+1}\exp\left[-i \frac{2\pi n j}{2N+1}\right].\end{equation}
+```
+Finally, we can compare equations \eq{eq:w-sum} and \eq{eq:weights-working} and deduce that 
+```math
+\begin{equation} w_{j} = \q_{j}\v_{j} {\rm~for~} 0 \leq j \leq N.  \end{equation}
+```
\ No newline at end of file

From 957264f426fa92a6828e78bbbf11bcab2c0e2de1 Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 10:21:15 +0100
Subject: [PATCH 04/41] Debug chebyshev.md

---
 docs/src/chebyshev.md | 54 +++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index e5f3b17f2..e70c5e339 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -28,7 +28,7 @@ We pick points
 ```
 Then 
 ```math
-\begin{equation} T_n{x_j} = \cos \frac{n j \pi}{N}.\end{equation}
+\begin{equation} T_n(x_j) = \cos \frac{n j \pi}{N}.\end{equation}
 ```
 Assuming that $M = 2N$, with $N$ an integer, and $b_{k} = b_{M-k}$ for $k>0$, we have that 
 ```math
@@ -124,9 +124,7 @@ we obtain the (unqiuely-determined) relations
 ```math
 \begin{equation} \begin{split} &d_{N-1} = 2Na_{N},\quad d_{N-2} = 2(N-1)a_{N-1}, \\ 
 & d_{k} = 2(k+1) a_{k+1} + d_{k+2}, \quad d_{0} = \frac{d_{2}}{2} + a_{1}.\end{split} \label{eq:dn-result-U}\end{equation}
-```
-Note the lack of a second relation for $d_{0}$, but that otherwise the expressions \eq{eq:dn-result-U}
-agree with those of the last section.       
+```       
 
 Clenshaw-Curtis integration weights
 ===============================================
@@ -156,12 +154,12 @@ to write
  \begin{equation} \frac{\cos(n \pi) +1}{1 - n^2} = \left\{\begin{tabular}{l} $0 {\rm ~if~} n = 2 r + 1, ~r \in \mathbb{Z} $ \\ $2/(1 - n^2) {\rm ~if~} n=2r,~r. \in \mathbb{Z}$  \end{tabular}\right. \end{equation}
  ```
  We define ```math
- \begin{equation} \J_{n} = \frac{\cos(n \pi) +1}{1 - n^2}. \end{equation}
+ \begin{equation} J_{n} = \frac{\cos(n \pi) +1}{1 - n^2}. \end{equation}
  ```
  Using this definition, we can write the integral of $f(x)$ can be written 
  in terms of a sum over of the Chebyshev coefficients:
  ```math
- \begin{equation} \int^{1}_{-1} f(x) \; d x = \sum_{n=0}^N \J_{n} a_{n}. \label{eq:Cheb-sum}\end{equation}
+ \begin{equation} \int^{1}_{-1} f(x) \; d x = \sum_{n=0}^N J_{n} a_{n}. \label{eq:Cheb-sum}\end{equation}
  ```
  
  To avoid computing the set of coefficients $\{a_{n}\}$ every time we wish to integrate $f(x_j)$,
@@ -173,25 +171,25 @@ Weights on Gauss-Chebyshev-Lobotto points
 ===============================================
   We use the inverse transformation 
  ```math
- \begin{equation} a_{n} = \frac{\q_{n}}{2N}\sum^{2N-1}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N}\right], \label{eq:inverse-transform-GCL}\end{equation}
+ \begin{equation} a_{n} = \frac{q_{n}}{2N}\sum^{2N-1}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N}\right], \label{eq:inverse-transform-GCL}\end{equation}
  ```
  where 
  ```math
- \begin{equation} \q_{n} = \left\{\begin{tabular}{l} $2 {\rm ~if~} n\neq0,N $ \\ $1 {\rm ~if~} n=0,N$  \end{tabular}\right.,\end{equation}
+ \begin{equation} q_{n} = \left\{\begin{tabular}{l} $2 {\rm ~if~} n\neq0,N $ \\ $1 {\rm ~if~} n=0,N$  \end{tabular}\right.,\end{equation}
  ```
 and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 ```math
-\begin{equation} \hat{f}_j = f(x_{j}) {\rm~for~} 0 \leq j \leq N ,\quad \hat{f}_j = f(x_{2N-j}){\rm~for~} N+1 \leq j \leq 2N-1. \end{equation}
+\begin{equation} \hat{f}_j = f(x_{j}) {\rm~for~} 0 \leq j \leq N ,quad \hat{f}_j = f(x_{2N-j}){\rm~for~} N+1 \leq j \leq 2N-1. \end{equation}
 ```
  With this inverse tranformation, we can write 
 ```math
-\begin{equation} \begin{split}\sum_{n=0}^N \J_{n} a_{n} & =  \sum^{2N-1}_{n=0} \frac{a_{n}\J_{n}}{\q_{n}} \\
- & = \sum^{2N-1}_{j=0}\sum^{2N-1}_{n=0} \frac{\hat{f}_j \J_{n}}{2N} \exp\left[-i \frac{2\pi n j}{2N}\right] \\ 
- & = \sum^{2N-1}_{j=0} \hat{f}_j \v_{j} = \sum^{N}_{j=0} \hat{f}_j \q_{j}\v_{j},\end{split}\label{eq:weights-working}\end{equation}
+\begin{equation} \begin{split}\sum_{n=0}^N J_{n} a_{n} & =  \sum^{2N-1}_{n=0} \frac{a_{n}J_{n}}{q_{n}} \\
+ & = \sum^{2N-1}_{j=0}\sum^{2N-1}_{n=0} \frac{\hat{f}_j J_{n}}{2N} \exp\left[-i \frac{2\pi n j}{2N}\right] \\ 
+ & = \sum^{2N-1}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\label{eq:weights-working}\end{equation}
 ```
- where in the first step we have extended the sum from $N$ to $2N-1$ and used FFT-order definitions of $\J_{n}$ and $a_{n}$
+ where in the first step we have extended the sum from $N$ to $2N-1$ and used FFT-order definitions of $J_{n}$ and $a_{n}$
 ```math
-\begin{equation} \J_{j} = \J_{2N-j}, {\rm~for~} N+1 \leq j \leq 2N-1,\end{equation}
+\begin{equation} J_{j} = J_{2N-j}, {\rm~for~} N+1 \leq j \leq 2N-1,\end{equation}
 ```
 ```math
 \begin{equation} a_{j} = a_{2N-j}, {\rm~for~} N+1 \leq j \leq 2N-1.\end{equation}
@@ -199,26 +197,26 @@ and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 In the second step we use the definition of the inverse transform \eq{eq:inverse-transform-GCR}, and 
 in the third step we define 
 ```math
-\begin{equation} \v_{j} = \sum_{n=0}^{2N-1}\frac{\J_{n}}{2N}\exp\left[-i \frac{2\pi n j}{2N}\right].\end{equation}
+\begin{equation} v_{j} = \sum_{n=0}^{2N-1}\frac{J_{n}}{2N}\exp\left[-i \frac{2\pi n j}{2N}\right].\end{equation}
 ```
 Finally, we can compare equations \eq{eq:w-sum} and \eq{eq:weights-working} and deduce that 
 ```math
-\begin{equation} w_{j} = \q_{j}\v_{j} {\rm~for~} 0 \leq j \leq N.  \end{equation}
+\begin{equation} w_{j} = q_{j}v_{j} {\rm~for~} 0 \leq j \leq N.  \end{equation}
 ```
-We can write $\v_{j}$ in terms of a discrete cosine transform, i.e.,
+We can write $v_{j}$ in terms of a discrete cosine transform, i.e.,
 ```math
-\begin{equation} \v_{j} = \frac{1}{2N}\left(\J_{0} + (-1)^j\J_{N} + 2\sum_{n=1}^{N-1}\J_{n}\cos\left(\frac{\pi n j}{N}\right)\right).\end{equation}
+\begin{equation} v_{j} = \frac{1}{2N}\left(J_{0} + (-1)^jJ_{N} + 2\sum_{n=1}^{N-1}J_{n}\cos\left(\frac{\pi n j}{N}\right)\right).\end{equation}
 ```
  
 Weights on Gauss-Chebyshev-Radau points
 ===============================================
 We use the inverse transformation 
 ```math
-\begin{equation} a_{n} = \frac{\q_{n}}{2N+1}\sum^{2N}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N+1}\right], \label{eq:inverse-transform-GCR}\end{equation}
+\begin{equation} a_{n} = \frac{q_{n}}{2N+1}\sum^{2N}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N+1}\right], \label{eq:inverse-transform-GCR}\end{equation}
 ```
 where 
 ```math
-\begin{equation} \q_{n} = \left\{\begin{tabular}{l} $2 {\rm ~if~} n > 0 $ \\ $1 {\rm ~if~} n=0$  \end{tabular}\right.,\end{equation}
+\begin{equation} q_{n} = \left\{\begin{tabular}{l} $2 {\rm ~if~} n > 0 $ \\ $1 {\rm ~if~} n=0$  \end{tabular}\right.,\end{equation}
 ```
 and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 ```math
@@ -229,13 +227,13 @@ The key detail in the Chebyshev-Radau scheme is that (in the notation above)
 $x_0 = 1$ is not a repeated point, and must occupy $\hat{f}_0$. 
 With this inverse tranformation, we can write 
 ```math
-\begin{equation} \begin{split}\sum_{n=0}^N \J_{n} a_{n} & =  \sum^{2N}_{n=0} \frac{a_{n}\J_{n}}{\q_{n}} \\
-& = \sum^{2N}_{j=0}\sum^{2N}_{n=0} \frac{\hat{f}_j \J_{n}}{2N+1} \exp\left[-i \frac{2\pi n j}{2N+1}\right] \\ 
-& = \sum^{2N}_{j=0} \hat{f}_j \v_{j} = \sum^{N}_{j=0} \hat{f}_j \q_{j}\v_{j},\end{split}\label{eq:weights-working}\end{equation}
+\begin{equation} \begin{split}\sum_{n=0}^N J_{n} a_{n} & =  \sum^{2N}_{n=0} \frac{a_{n}J_{n}}{q_{n}} \\
+& = \sum^{2N}_{j=0}\sum^{2N}_{n=0} \frac{\hat{f}_j J_{n}}{2N+1} \exp\left[-i \frac{2\pi n j}{2N+1}\right] \\ 
+& = \sum^{2N}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\label{eq:weights-working}\end{equation}
 ```
-where in the first step we have extended the sum from $N$ to $2N$ and used FFT-order definitions of $\J_{n}$ and $a_{n}$
+where in the first step we have extended the sum from $N$ to $2N$ and used FFT-order definitions of $J_{n}$ and $a_{n}$
 ```math
-\begin{equation} \J_{j} = \J_{2N+1-j}, {\rm~for~} N+1 \leq j \leq 2N,\end{equation}
+\begin{equation} J_{j} = J_{2N+1-j}, {\rm~for~} N+1 \leq j \leq 2N,\end{equation}
 ```
 ```math
 \begin{equation} a_{j} = a_{2N+1-j}, {\rm~for~} N+1 \leq j \leq 2N.\end{equation}
@@ -243,9 +241,9 @@ where in the first step we have extended the sum from $N$ to $2N$ and used FFT-o
 In the second step we use the definition of the inverse transform \eq{eq:inverse-transform-GCR}, and 
 in the third step we define 
 ```math
-\begin{equation} \v_{j} = \sum_{n=0}^{2N}\frac{\J_{n}}{2N+1}\exp\left[-i \frac{2\pi n j}{2N+1}\right].\end{equation}
+\begin{equation} v_{j} = \sum_{n=0}^{2N}\frac{J_{n}}{2N+1}\exp\left[-i \frac{2\pi n j}{2N+1}\right].\end{equation}
 ```
 Finally, we can compare equations \eq{eq:w-sum} and \eq{eq:weights-working} and deduce that 
 ```math
-\begin{equation} w_{j} = \q_{j}\v_{j} {\rm~for~} 0 \leq j \leq N.  \end{equation}
-```
\ No newline at end of file
+\begin{equation} w_{j} = q_{j}v_{j} {\rm~for~} 0 \leq j \leq N.  \end{equation}
+```

From 5e69ba5b66ffcaec6c1711d30499a48c85fdedf6 Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 10:27:22 +0100
Subject: [PATCH 05/41] Debug chebyshev.md

---
 docs/src/chebyshev.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index e70c5e339..61285204d 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -80,8 +80,8 @@ Chebyshev coefficients of derivatives of a function
 
 Starting from the expression of $f$ as a sum Chebyshev polynomials, equation \eq{eq:cheb-expansion},
 we can obtain an expression for the derivative
-```math \begin{equation}
-\frac{d f}{d x} = \sum^N_{n=0} a_{n}\frac{d T_{n}}{d x}. \label{eq:derivative-def}\end{equation}
+```math
+\begin{equation} \frac{d f}{d x} = \sum^N_{n=0} a_{n}\frac{d T_{n}}{d x}. \label{eq:derivative-def}\end{equation}
 ```
 We note that we must be able to express ${d f}/{d x}$ as a sum 
 of Chebyshev polynomials of up to order $N-1$, i.e.,

From 2cf21670ab257e457e4da052264c1f64a3af7073 Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 10:29:32 +0100
Subject: [PATCH 06/41] Debug chebyshev.md

---
 docs/src/chebyshev.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index 61285204d..54c19aea0 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -229,7 +229,7 @@ With this inverse tranformation, we can write
 ```math
 \begin{equation} \begin{split}\sum_{n=0}^N J_{n} a_{n} & =  \sum^{2N}_{n=0} \frac{a_{n}J_{n}}{q_{n}} \\
 & = \sum^{2N}_{j=0}\sum^{2N}_{n=0} \frac{\hat{f}_j J_{n}}{2N+1} \exp\left[-i \frac{2\pi n j}{2N+1}\right] \\ 
-& = \sum^{2N}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\label{eq:weights-working}\end{equation}
+& = \sum^{2N}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\label{eq:weights-working-radau}\end{equation}
 ```
 where in the first step we have extended the sum from $N$ to $2N$ and used FFT-order definitions of $J_{n}$ and $a_{n}$
 ```math
@@ -243,7 +243,7 @@ in the third step we define
 ```math
 \begin{equation} v_{j} = \sum_{n=0}^{2N}\frac{J_{n}}{2N+1}\exp\left[-i \frac{2\pi n j}{2N+1}\right].\end{equation}
 ```
-Finally, we can compare equations \eq{eq:w-sum} and \eq{eq:weights-working} and deduce that 
+Finally, we can compare equations \eq{eq:w-sum} and \eq{eq:weights-working-radau} and deduce that 
 ```math
 \begin{equation} w_{j} = q_{j}v_{j} {\rm~for~} 0 \leq j \leq N.  \end{equation}
 ```

From f12082c7e79a636694fe12f4637245836035b9f6 Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 10:33:06 +0100
Subject: [PATCH 07/41] Debug chebyshev.md

---
 docs/src/chebyshev.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index 54c19aea0..d2f5caf66 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -150,10 +150,11 @@ to write
 \begin{equation} \int^\pi_0 \cos(n \theta) \sin \theta \; d \theta = \frac{\cos(n \pi) +1}{1 - n^2} {\rm~for~} n \geq 0.\end{equation}
 ```
  Also note that 
- ```math
- \begin{equation} \frac{\cos(n \pi) +1}{1 - n^2} = \left\{\begin{tabular}{l} $0 {\rm ~if~} n = 2 r + 1, ~r \in \mathbb{Z} $ \\ $2/(1 - n^2) {\rm ~if~} n=2r,~r. \in \mathbb{Z}$  \end{tabular}\right. \end{equation}
+```math
+\begin{equation} \frac{\cos(n \pi) +1}{1 - n^2} = \left\{\begin{tabular}{l} $0 {\rm ~if~} n = 2 r + 1, ~r \in \mathbb{Z} $ \\ $2/(1 - n^2) {\rm ~if~} n=2r,~r. \in \mathbb{Z}$  \end{tabular}\right. \end{equation}
  ```
- We define ```math
+ We define 
+ ```math
  \begin{equation} J_{n} = \frac{\cos(n \pi) +1}{1 - n^2}. \end{equation}
  ```
  Using this definition, we can write the integral of $f(x)$ can be written 

From 616be7a39f0fbcec46d66dc4c2f5b82ae399da1a Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 10:33:58 +0100
Subject: [PATCH 08/41] Debug chebyshev.md

---
 docs/src/chebyshev.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index d2f5caf66..f14e013b1 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -180,7 +180,7 @@ Weights on Gauss-Chebyshev-Lobotto points
  ```
 and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 ```math
-\begin{equation} \hat{f}_j = f(x_{j}) {\rm~for~} 0 \leq j \leq N ,quad \hat{f}_j = f(x_{2N-j}){\rm~for~} N+1 \leq j \leq 2N-1. \end{equation}
+\begin{equation} \hat{f}_j = f(x_{j}) {\rm~for~} 0 \leq j \leq N ,\quad \hat{f}_j = f(x_{2N-j}){\rm~for~} N+1 \leq j \leq 2N-1. \end{equation}
 ```
  With this inverse tranformation, we can write 
 ```math

From d379912cb9e627b9a1684be9d2138be7470addf8 Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 10:46:42 +0100
Subject: [PATCH 09/41] Debug chebyshev.md

---
 docs/src/chebyshev.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index f14e013b1..721960a89 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -118,7 +118,7 @@ we find that equation  \eq{eq:dn-def} becomes
 Using the orthogonality relation 
 ```math
 \begin{equation} \int^1_{-1} U_{m}(x)U_{n}(x)\sqrt{1-x^2} \; d x = 
-\left\{\begin{tabular}{l} $0 {\rm ~if~} n\neq m $ \\ $\pi/2 {\rm ~if~} n=m$  \end{tabular}\right.,\end{equation}
+\left\{\begin{array}{l} 0 {\rm ~if~} n\neq m  \\ \pi/2 {\rm ~if~} n=m \\ \end{array} \right.,\end{equation}
 ```
 we obtain the (unqiuely-determined) relations 
 ```math

From 1893d93ed437bd7fe2392e0e25756973846c1f11 Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 10:48:43 +0100
Subject: [PATCH 10/41] Debug chebyshev.md

---
 docs/src/chebyshev.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index 721960a89..76c76cbf1 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -118,7 +118,7 @@ we find that equation  \eq{eq:dn-def} becomes
 Using the orthogonality relation 
 ```math
 \begin{equation} \int^1_{-1} U_{m}(x)U_{n}(x)\sqrt{1-x^2} \; d x = 
-\left\{\begin{array}{l} 0 {\rm ~if~} n\neq m  \\ \pi/2 {\rm ~if~} n=m \\ \end{array} \right.,\end{equation}
+\left\{\begin{array}{l} 0 {\rm ~if~} n\neq m  \\ \pi/2 {\rm ~if~} n=m \\ \end{array} \right.\end{equation}
 ```
 we obtain the (unqiuely-determined) relations 
 ```math
@@ -151,7 +151,7 @@ to write
 ```
  Also note that 
 ```math
-\begin{equation} \frac{\cos(n \pi) +1}{1 - n^2} = \left\{\begin{tabular}{l} $0 {\rm ~if~} n = 2 r + 1, ~r \in \mathbb{Z} $ \\ $2/(1 - n^2) {\rm ~if~} n=2r,~r. \in \mathbb{Z}$  \end{tabular}\right. \end{equation}
+\begin{equation} \frac{\cos(n \pi) +1}{1 - n^2} = \left\{\begin{array}{l} 0 {\rm ~if~} n = 2 r + 1, ~r \in \mathbb{Z}  \\ 2/(1 - n^2) {\rm ~if~} n=2r,~r. \in \mathbb{Z}  \end{array}\right. \end{equation}
  ```
  We define 
  ```math
@@ -176,7 +176,7 @@ Weights on Gauss-Chebyshev-Lobotto points
  ```
  where 
  ```math
- \begin{equation} q_{n} = \left\{\begin{tabular}{l} $2 {\rm ~if~} n\neq0,N $ \\ $1 {\rm ~if~} n=0,N$  \end{tabular}\right.,\end{equation}
+ \begin{equation} q_{n} = \left\{\begin{array}{l} 2 {\rm ~if~} n\neq0,N  \\ 1 {\rm ~if~} n=0,N  \end{array}\right.,\end{equation}
  ```
 and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 ```math
@@ -217,7 +217,7 @@ We use the inverse transformation
 ```
 where 
 ```math
-\begin{equation} q_{n} = \left\{\begin{tabular}{l} $2 {\rm ~if~} n > 0 $ \\ $1 {\rm ~if~} n=0$  \end{tabular}\right.,\end{equation}
+\begin{equation} q_{n} = \left\{\begin{array}{l} 2 {\rm ~if~} n > 0  \\ 1 {\rm ~if~} n=0  \end{array}\right.,\end{equation}
 ```
 and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 ```math

From 9a0a65172db48da996ef7ce4c1256a9c1dbeb1eb Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 10:49:54 +0100
Subject: [PATCH 11/41] Debug chebyshev.md

---
 docs/src/chebyshev.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index 76c76cbf1..8ff0a5de0 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -176,7 +176,7 @@ Weights on Gauss-Chebyshev-Lobotto points
  ```
  where 
  ```math
- \begin{equation} q_{n} = \left\{\begin{array}{l} 2 {\rm ~if~} n\neq0,N  \\ 1 {\rm ~if~} n=0,N  \end{array}\right.,\end{equation}
+ \begin{equation} q_{n} = \left\{\begin{array}{l} 2 {\rm ~if~} n\neq0,N  \\ 1 {\rm ~if~} n=0,N  \end{array}\right.\end{equation}
  ```
 and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 ```math
@@ -217,7 +217,7 @@ We use the inverse transformation
 ```
 where 
 ```math
-\begin{equation} q_{n} = \left\{\begin{array}{l} 2 {\rm ~if~} n > 0  \\ 1 {\rm ~if~} n=0  \end{array}\right.,\end{equation}
+\begin{equation} q_{n} = \left\{\begin{array}{l} 2 {\rm ~if~} n > 0  \\ 1 {\rm ~if~} n=0  \end{array}\right.\end{equation}
 ```
 and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 ```math

From c4c17636a19258a345b1eb2b154275283638b2ee Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 10:59:22 +0100
Subject: [PATCH 12/41] Update chebyshev.md to have a manual label.

---
 docs/src/chebyshev.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index 8ff0a5de0..20f772448 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -7,7 +7,7 @@ Chebyshev tranform via Fourier transform
 ```
 We express a function $f$ as a sum of Chebyshev polynomials
 ```math 
-\begin{equation} f(x) = \sum^N_{n=0} a_{n}T_n(x)\label{eq:cheb-expansion}\end{equation}
+\begin{equation} f(x) = \sum^N_{n=0} a_{n}T_n(x)\label{eq:cheb-expansion} \tag{1}\end{equation}
 ```
 The Chebyshev polynomials are defined by 
 ```math

From 37e49bdca3bd97c4e990a2bfb15b158468946306 Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 11:04:44 +0100
Subject: [PATCH 13/41] Update chebyshev.md to have manual labels.

---
 docs/src/chebyshev.md | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index 20f772448..8d9f42173 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -16,7 +16,7 @@ The Chebyshev polynomials are defined by
 We can see how to find $\{a_{n}\}$ given $\{f(x_j)\}$ via Fourier transform. 
 The Fourier series representation of $f$ on a uniform grid indexed by $j$ is defined by 
 ```math
-\begin{equation} f_j = \sum_{k=0}^{M-1} b_{k}\exp\left[i \frac{2\pi k j}{M}\right].\label{eq:fourier-series}\end{equation}
+\begin{equation} f_j = \sum_{k=0}^{M-1} b_{k}\exp\left[i \frac{2\pi k j}{M}\right].\label{eq:fourier-series}\tag{2}\end{equation}
 ```
 
 Gauss-Chebyshev-Lobotto points
@@ -61,7 +61,7 @@ these cases we choose the points
 Writing out the Chebyshev series \eq{eq:cheb-expansion}, 
 we have that 
 ```math
-\begin{equation} \begin{split} f(x_j) = & \sum^N_{n=0} a_{n} \cos \frac{2 n j \pi}{2 N + 1} \\ & = a_{0} + \sum^N_{n=1} \frac{a_{n}}{2}\left(\exp\left[i \frac{2\pi n j}{2N +1}\right] + \exp\left[-i \frac{2\pi n j}{2N +1}\right]\right).\end{split} \label{eq:cheb-expansion-radau-points}\end{equation}
+\begin{equation} \begin{split} f(x_j) = & \sum^N_{n=0} a_{n} \cos \frac{2 n j \pi}{2 N + 1} \\ & = a_{0} + \sum^N_{n=1} \frac{a_{n}}{2}\left(\exp\left[i \frac{2\pi n j}{2N +1}\right] + \exp\left[-i \frac{2\pi n j}{2N +1}\right]\right).\end{split} \label{eq:cheb-expansion-radau-points}\tag{3}\end{equation}
 ```
 The form of the series \eq{eq:cheb-expansion-radau-points} is identical to the form of 
 a Fourier series on an odd number of points, i.e., taking $M = 2 N + 1$ in equation \eq{eq:fourier-series},
@@ -81,7 +81,7 @@ Chebyshev coefficients of derivatives of a function
 Starting from the expression of $f$ as a sum Chebyshev polynomials, equation \eq{eq:cheb-expansion},
 we can obtain an expression for the derivative
 ```math
-\begin{equation} \frac{d f}{d x} = \sum^N_{n=0} a_{n}\frac{d T_{n}}{d x}. \label{eq:derivative-def}\end{equation}
+\begin{equation} \frac{d f}{d x} = \sum^N_{n=0} a_{n}\frac{d T_{n}}{d x}. \label{eq:derivative-def}\tag{4}\end{equation}
 ```
 We note that we must be able to express ${d f}/{d x}$ as a sum 
 of Chebyshev polynomials of up to order $N-1$, i.e.,
@@ -91,7 +91,7 @@ of Chebyshev polynomials of up to order $N-1$, i.e.,
 We must determine the set $\{d_{n}\}$ in terms of the set $\{a_{n}\}$.
 First, we equate the two expressions to find that 
 ```math
-\begin{equation} \sum^N_{k=0} a_{k}\frac{d T_{k}}{d x} = \sum^{N-1}_{n=0} d_{n}T_{n}. \label{eq:dn-def}\end{equation}
+\begin{equation} \sum^N_{k=0} a_{k}\frac{d T_{k}}{d x} = \sum^{N-1}_{n=0} d_{n}T_{n}. \label{eq:dn-def}\tag{5}\end{equation}
 ```
 We use the Chebyshev polynomials of the second kind $U_n{x}$ to aid us in the calculation of the set $\{d_{n}\}$. 
 These polynomials are defined by 
@@ -113,7 +113,7 @@ we find that equation  \eq{eq:dn-def} becomes
 ```math
 \begin{equation} \begin{split}\sum^N_{n=1} a_{n} n U_{n-1}(x) =& \frac{d_{N-1}}{2}U_{N-1}+\frac{d_{N-2}}{2}U_{N-2} 
 \\ & + \sum^{N-3}_{k=1} \frac{d_{k}-d_{k+2}}{2}U_{k} + \left(d_{0} - \frac{d_{2}}{2}\right)U_{0}. \end{split}
-\label{eq:dn-def-U}\end{equation}
+\label{eq:dn-def-U}\tag{6}\end{equation}
 ```
 Using the orthogonality relation 
 ```math
@@ -123,7 +123,7 @@ Using the orthogonality relation
 we obtain the (unqiuely-determined) relations 
 ```math
 \begin{equation} \begin{split} &d_{N-1} = 2Na_{N},\quad d_{N-2} = 2(N-1)a_{N-1}, \\ 
-& d_{k} = 2(k+1) a_{k+1} + d_{k+2}, \quad d_{0} = \frac{d_{2}}{2} + a_{1}.\end{split} \label{eq:dn-result-U}\end{equation}
+& d_{k} = 2(k+1) a_{k+1} + d_{k+2}, \quad d_{0} = \frac{d_{2}}{2} + a_{1}.\end{split} \label{eq:dn-result-U}\tag{7}\end{equation}
 ```       
 
 Clenshaw-Curtis integration weights
@@ -132,18 +132,18 @@ Clenshaw-Curtis integration weights
 We require the integration weights for the set of points $\{x_j\}$ chosen 
 in our numerical scheme. The weights $w_{j}$ are defined implicitly by 
 ```math
-\begin{equation} \int^{1}_{-1} f(x) \; d x = \sum_{j=0}^N f(x_j) w_{j}. \label{eq:w-sum}\end{equation}
+\begin{equation} \int^{1}_{-1} f(x) \; d x = \sum_{j=0}^N f(x_j) w_{j}. \label{eq:w-sum}\tag{8}\end{equation}
 ```
 In the Chebyshev scheme we use the change of variables $x = \cos \theta$
 to write 
 ```math
-\begin{equation} \int^{1}_{-1} f(x) \; d x = \int^\pi_0 f(\cos\theta) \sin \theta \; d \theta . \label{eq:change-of-variables-integral} \end{equation}
+\begin{equation} \int^{1}_{-1} f(x) \; d x = \int^\pi_0 f(\cos\theta) \sin \theta \; d \theta . \label{eq:change-of-variables-integral} \tag{9}\end{equation}
 ```
  Using the series expansion \eq{eq:cheb-expansion} in equation \eq{eq:change-of-variables-integral}
  we find that 
  ```math
  \begin{equation} \int^{1}_{-1} f(x) \; d x = \sum^N_{n=0} a_{n}\int^\pi_0 \cos (n \theta) \sin \theta \; d \theta
- . \label{eq:series-integral} \end{equation}
+ . \label{eq:series-integral}\tag{10} \end{equation}
  ```
  Note the integral identity
  ```math
@@ -160,7 +160,7 @@ to write
  Using this definition, we can write the integral of $f(x)$ can be written 
  in terms of a sum over of the Chebyshev coefficients:
  ```math
- \begin{equation} \int^{1}_{-1} f(x) \; d x = \sum_{n=0}^N J_{n} a_{n}. \label{eq:Cheb-sum}\end{equation}
+ \begin{equation} \int^{1}_{-1} f(x) \; d x = \sum_{n=0}^N J_{n} a_{n}. \label{eq:Cheb-sum}\tag{11}\end{equation}
  ```
  
  To avoid computing the set of coefficients $\{a_{n}\}$ every time we wish to integrate $f(x_j)$,
@@ -172,7 +172,7 @@ Weights on Gauss-Chebyshev-Lobotto points
 ===============================================
   We use the inverse transformation 
  ```math
- \begin{equation} a_{n} = \frac{q_{n}}{2N}\sum^{2N-1}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N}\right], \label{eq:inverse-transform-GCL}\end{equation}
+ \begin{equation} a_{n} = \frac{q_{n}}{2N}\sum^{2N-1}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N}\right], \label{eq:inverse-transform-GCL}\tag{12}\end{equation}
  ```
  where 
  ```math
@@ -186,7 +186,7 @@ and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 ```math
 \begin{equation} \begin{split}\sum_{n=0}^N J_{n} a_{n} & =  \sum^{2N-1}_{n=0} \frac{a_{n}J_{n}}{q_{n}} \\
  & = \sum^{2N-1}_{j=0}\sum^{2N-1}_{n=0} \frac{\hat{f}_j J_{n}}{2N} \exp\left[-i \frac{2\pi n j}{2N}\right] \\ 
- & = \sum^{2N-1}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\label{eq:weights-working}\end{equation}
+ & = \sum^{2N-1}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\label{eq:weights-working}\tag{13}\end{equation}
 ```
  where in the first step we have extended the sum from $N$ to $2N-1$ and used FFT-order definitions of $J_{n}$ and $a_{n}$
 ```math
@@ -213,7 +213,7 @@ Weights on Gauss-Chebyshev-Radau points
 ===============================================
 We use the inverse transformation 
 ```math
-\begin{equation} a_{n} = \frac{q_{n}}{2N+1}\sum^{2N}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N+1}\right], \label{eq:inverse-transform-GCR}\end{equation}
+\begin{equation} a_{n} = \frac{q_{n}}{2N+1}\sum^{2N}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N+1}\right], \label{eq:inverse-transform-GCR}\tag{14}\end{equation}
 ```
 where 
 ```math
@@ -230,7 +230,7 @@ With this inverse tranformation, we can write
 ```math
 \begin{equation} \begin{split}\sum_{n=0}^N J_{n} a_{n} & =  \sum^{2N}_{n=0} \frac{a_{n}J_{n}}{q_{n}} \\
 & = \sum^{2N}_{j=0}\sum^{2N}_{n=0} \frac{\hat{f}_j J_{n}}{2N+1} \exp\left[-i \frac{2\pi n j}{2N+1}\right] \\ 
-& = \sum^{2N}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\label{eq:weights-working-radau}\end{equation}
+& = \sum^{2N}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\label{eq:weights-working-radau}\tag{15}\end{equation}
 ```
 where in the first step we have extended the sum from $N$ to $2N$ and used FFT-order definitions of $J_{n}$ and $a_{n}$
 ```math

From a721ba7e7df33c5306dd336c2577c754915c602b Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 11:13:05 +0100
Subject: [PATCH 14/41] Update chebyshev.md to reference manual labels.

---
 docs/src/chebyshev.md | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index 8d9f42173..d7da3da0a 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -58,13 +58,13 @@ these cases we choose the points
 ```math
 \begin{equation} x_j = \cos \theta_j, \quad \theta_j = \frac{2 j \pi}{2 N + 1} \quad 0 \leq j \leq N.\end{equation}
 ```
-Writing out the Chebyshev series \eq{eq:cheb-expansion}, 
+Writing out the Chebyshev series (1), <!-- \eq{eq:cheb-expansion}, -->
 we have that 
 ```math
 \begin{equation} \begin{split} f(x_j) = & \sum^N_{n=0} a_{n} \cos \frac{2 n j \pi}{2 N + 1} \\ & = a_{0} + \sum^N_{n=1} \frac{a_{n}}{2}\left(\exp\left[i \frac{2\pi n j}{2N +1}\right] + \exp\left[-i \frac{2\pi n j}{2N +1}\right]\right).\end{split} \label{eq:cheb-expansion-radau-points}\tag{3}\end{equation}
 ```
-The form of the series \eq{eq:cheb-expansion-radau-points} is identical to the form of 
-a Fourier series on an odd number of points, i.e., taking $M = 2 N + 1$ in equation \eq{eq:fourier-series},
+The form of the series (3) <!--\eq{eq:cheb-expansion-radau-points}--> is identical to the form of 
+a Fourier series on an odd number of points, i.e., taking $M = 2 N + 1$ in equation (2) <--\eq{eq:fourier-series}-->,
 and assuming $b_{k} = b_{M -k}$ for $k>1$,
 we have that 
 ```math
@@ -78,7 +78,7 @@ We can thus take a Chebyshev transform using a Fourier transform on Gauss-Chebys
 Chebyshev coefficients of derivatives of a function
 ===============================================
 
-Starting from the expression of $f$ as a sum Chebyshev polynomials, equation \eq{eq:cheb-expansion},
+Starting from the expression of $f$ as a sum Chebyshev polynomials, equation (1) <!--\eq{eq:cheb-expansion}-->,
 we can obtain an expression for the derivative
 ```math
 \begin{equation} \frac{d f}{d x} = \sum^N_{n=0} a_{n}\frac{d T_{n}}{d x}. \label{eq:derivative-def}\tag{4}\end{equation}
@@ -109,7 +109,7 @@ Using these identities, which may be obtained from the trigonometric definition
 ```math
 \begin{equation}  U_{n}(\cos \theta) \sin \theta = \sin \left((n+1)\theta\right),\end{equation}
 ```
-we find that equation  \eq{eq:dn-def} becomes 
+we find that equation (5) <!--\eq{eq:dn-def}--> becomes 
 ```math
 \begin{equation} \begin{split}\sum^N_{n=1} a_{n} n U_{n-1}(x) =& \frac{d_{N-1}}{2}U_{N-1}+\frac{d_{N-2}}{2}U_{N-2} 
 \\ & + \sum^{N-3}_{k=1} \frac{d_{k}-d_{k+2}}{2}U_{k} + \left(d_{0} - \frac{d_{2}}{2}\right)U_{0}. \end{split}
@@ -139,7 +139,7 @@ to write
 ```math
 \begin{equation} \int^{1}_{-1} f(x) \; d x = \int^\pi_0 f(\cos\theta) \sin \theta \; d \theta . \label{eq:change-of-variables-integral} \tag{9}\end{equation}
 ```
- Using the series expansion \eq{eq:cheb-expansion} in equation \eq{eq:change-of-variables-integral}
+ Using the series expansion (1) <!--\eq{eq:cheb-expansion}--> in equation (9) <!--\eq{eq:change-of-variables-integral}-->
  we find that 
  ```math
  \begin{equation} \int^{1}_{-1} f(x) \; d x = \sum^N_{n=0} a_{n}\int^\pi_0 \cos (n \theta) \sin \theta \; d \theta
@@ -164,7 +164,7 @@ to write
  ```
  
  To avoid computing the set of coefficients $\{a_{n}\}$ every time we wish to integrate $f(x_j)$,
- we use the inverse transforms. This transform allows us to rewrite equation \eq{eq:Cheb-sum} in the form \eq{eq:w-sum}.
+ we use the inverse transforms. This transform allows us to rewrite equation (11) <!--\eq{eq:Cheb-sum}--> in the form (8) <!--\eq{eq:w-sum}-->.
  Since the inverse transform differs between the Gauss-Chebyshev-Lobotto and Gauss-Chebyshev-Radau cases, we treat each 
  case separately below. 
  
@@ -195,12 +195,12 @@ and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 ```math
 \begin{equation} a_{j} = a_{2N-j}, {\rm~for~} N+1 \leq j \leq 2N-1.\end{equation}
 ```
-In the second step we use the definition of the inverse transform \eq{eq:inverse-transform-GCR}, and 
+In the second step we use the definition of the inverse transform (14) <!--\eq{eq:inverse-transform-GCR}-->, and 
 in the third step we define 
 ```math
 \begin{equation} v_{j} = \sum_{n=0}^{2N-1}\frac{J_{n}}{2N}\exp\left[-i \frac{2\pi n j}{2N}\right].\end{equation}
 ```
-Finally, we can compare equations \eq{eq:w-sum} and \eq{eq:weights-working} and deduce that 
+Finally, we can compare equations (8) <!--\eq{eq:w-sum}--> and (13) <!--\eq{eq:weights-working}--> and deduce that 
 ```math
 \begin{equation} w_{j} = q_{j}v_{j} {\rm~for~} 0 \leq j \leq N.  \end{equation}
 ```
@@ -239,12 +239,12 @@ where in the first step we have extended the sum from $N$ to $2N$ and used FFT-o
 ```math
 \begin{equation} a_{j} = a_{2N+1-j}, {\rm~for~} N+1 \leq j \leq 2N.\end{equation}
 ```
-In the second step we use the definition of the inverse transform \eq{eq:inverse-transform-GCR}, and 
+In the second step we use the definition of the inverse transform (14) <!--\eq{eq:inverse-transform-GCR}-->, and 
 in the third step we define 
 ```math
 \begin{equation} v_{j} = \sum_{n=0}^{2N}\frac{J_{n}}{2N+1}\exp\left[-i \frac{2\pi n j}{2N+1}\right].\end{equation}
 ```
-Finally, we can compare equations \eq{eq:w-sum} and \eq{eq:weights-working-radau} and deduce that 
+Finally, we can compare equations (8) <!--\eq{eq:w-sum}--> and (15) <!--\eq{eq:weights-working-radau}--> and deduce that 
 ```math
 \begin{equation} w_{j} = q_{j}v_{j} {\rm~for~} 0 \leq j \leq N.  \end{equation}
 ```

From b2ccf62fc579a1b01108e8c1d056a0f74189e1d6 Mon Sep 17 00:00:00 2001
From: mrhardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 22 Jul 2024 11:15:24 +0100
Subject: [PATCH 15/41] Update chebyshev.md

---
 docs/src/chebyshev.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index d7da3da0a..908daaf09 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -35,8 +35,7 @@ Assuming that $M = 2N$, with $N$ an integer, and $b_{k} = b_{M-k}$ for $k>0$, we
 \begin{equation} f_j = b_{0} + b_{N}(-1)^j + \sum_{n=1}^{N-1}
 b_{n}\left(\exp\left[i \frac{\pi n j}{N}\right]+\exp\left[-i \frac{\pi n j}{N}\right]\right).\end{equation}
 ```
-Comparing this to the expression for $f(x_j)$ in the Chebyshev representation, 
-using the form of $T_n(x_j)$, 
+Comparing this to the expression for $f(x_j)$ in the Chebyshev representation,
 ```math
 \begin{equation} f_j = a_{0} + a_{N}(-1)^j + \frac{1}{2}\sum_{n=1}^{N-1}
 a_{n}\left(\exp\left[i \frac{\pi n j}{N}\right]+\exp\left[-i \frac{\pi n j}{N}\right]\right),\end{equation}

From d40a160e5c83cbd088c3ece2bd15dd637f12ae84 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Tue, 6 Aug 2024 13:03:39 +0100
Subject: [PATCH 16/41] Move xarray_post_processing files and add README.md and
 requirements.txt.

---
 .../xarray_post_processing/README.md          | 19 +++++++++++++++++++
 .../plot_error_data.py                        |  0
 .../plot_integration_error_data.py            |  0
 .../plot_many_collisions.py                   |  0
 .../xarray_post_processing}/plot_mk_utils.py  |  0
 .../xarray_post_processing}/plot_sd.py        |  0
 .../xarray_post_processing}/plot_wall.py      |  0
 .../xarray_post_processing/requirements.txt   |  5 +++++
 .../xarray_mk_utils.py                        |  0
 9 files changed, 24 insertions(+)
 create mode 100644 publication_inputs/xarray_post_processing/README.md
 rename {xarray_post_processing => publication_inputs/xarray_post_processing}/plot_error_data.py (100%)
 rename {xarray_post_processing => publication_inputs/xarray_post_processing}/plot_integration_error_data.py (100%)
 rename {xarray_post_processing => publication_inputs/xarray_post_processing}/plot_many_collisions.py (100%)
 rename {xarray_post_processing => publication_inputs/xarray_post_processing}/plot_mk_utils.py (100%)
 rename {xarray_post_processing => publication_inputs/xarray_post_processing}/plot_sd.py (100%)
 rename {xarray_post_processing => publication_inputs/xarray_post_processing}/plot_wall.py (100%)
 create mode 100644 publication_inputs/xarray_post_processing/requirements.txt
 rename {xarray_post_processing => publication_inputs/xarray_post_processing}/xarray_mk_utils.py (100%)

diff --git a/publication_inputs/xarray_post_processing/README.md b/publication_inputs/xarray_post_processing/README.md
new file mode 100644
index 000000000..3275baee0
--- /dev/null
+++ b/publication_inputs/xarray_post_processing/README.md
@@ -0,0 +1,19 @@
+
+## xarray & h5py plotting scripts for publication quality figures
+
+This directory contains python scripts for making publication quality figures.
+We briefly describe the contents of the files.
+
+* `plot_mk_utils.py`: A series of plotting functions using matplotlib and pyplot.
+
+* `xarray_mk_utils.py`: A series of utility functions for reading data from `moment_kinetics` output files.
+
+* `plot_wall.py`: A script for comparing sheath-boundary simulations.
+
+* `plot_sd.py`: A script for comparing the numerical solution from `moment_kinetics` to the analytical slowing-down solution.
+
+* `plot_many_collisions.py`: A script for comparing simulations of the relaxation to the Maxwellian distribution in the presence of self collisions.
+
+* `plot_error_data.py` and `plot_integration_error_data.py`: Scripts for plotting data produced by the evaluation tests of the Fokker-Planck collision operator.
+
+The `requirements.txt` file provides a list of required modules at the last used version.
diff --git a/xarray_post_processing/plot_error_data.py b/publication_inputs/xarray_post_processing/plot_error_data.py
similarity index 100%
rename from xarray_post_processing/plot_error_data.py
rename to publication_inputs/xarray_post_processing/plot_error_data.py
diff --git a/xarray_post_processing/plot_integration_error_data.py b/publication_inputs/xarray_post_processing/plot_integration_error_data.py
similarity index 100%
rename from xarray_post_processing/plot_integration_error_data.py
rename to publication_inputs/xarray_post_processing/plot_integration_error_data.py
diff --git a/xarray_post_processing/plot_many_collisions.py b/publication_inputs/xarray_post_processing/plot_many_collisions.py
similarity index 100%
rename from xarray_post_processing/plot_many_collisions.py
rename to publication_inputs/xarray_post_processing/plot_many_collisions.py
diff --git a/xarray_post_processing/plot_mk_utils.py b/publication_inputs/xarray_post_processing/plot_mk_utils.py
similarity index 100%
rename from xarray_post_processing/plot_mk_utils.py
rename to publication_inputs/xarray_post_processing/plot_mk_utils.py
diff --git a/xarray_post_processing/plot_sd.py b/publication_inputs/xarray_post_processing/plot_sd.py
similarity index 100%
rename from xarray_post_processing/plot_sd.py
rename to publication_inputs/xarray_post_processing/plot_sd.py
diff --git a/xarray_post_processing/plot_wall.py b/publication_inputs/xarray_post_processing/plot_wall.py
similarity index 100%
rename from xarray_post_processing/plot_wall.py
rename to publication_inputs/xarray_post_processing/plot_wall.py
diff --git a/publication_inputs/xarray_post_processing/requirements.txt b/publication_inputs/xarray_post_processing/requirements.txt
new file mode 100644
index 000000000..bae625561
--- /dev/null
+++ b/publication_inputs/xarray_post_processing/requirements.txt
@@ -0,0 +1,5 @@
+h5py==3.11.0
+matplotlib==3.7.3
+numpy==1.24.4
+toml==0.10.2
+xarray==2023.1.0
diff --git a/xarray_post_processing/xarray_mk_utils.py b/publication_inputs/xarray_post_processing/xarray_mk_utils.py
similarity index 100%
rename from xarray_post_processing/xarray_mk_utils.py
rename to publication_inputs/xarray_post_processing/xarray_mk_utils.py

From 1d680ce4b75924da143c89fa22830e24469bcfc8 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 2 Sep 2024 12:13:28 +0100
Subject: [PATCH 17/41] Change plot labels epsilon_L_2 -> epsilon_2.

---
 .../xarray_post_processing/plot_error_data.py          | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/publication_inputs/xarray_post_processing/plot_error_data.py b/publication_inputs/xarray_post_processing/plot_error_data.py
index 1dfac40f9..cc185de28 100644
--- a/publication_inputs/xarray_post_processing/plot_error_data.py
+++ b/publication_inputs/xarray_post_processing/plot_error_data.py
@@ -54,7 +54,7 @@ def get_fkpl_error_data(filename):
     marker_list = ['r--o','b--s','g--.','m--x','c--v','k']
     C_list = [max_C_err,L2_C_err,n_err,u_err,p_err,expected_diff]
     nelements = [nelement_list for item in C_list] 
-    ylab_list = ["$\\epsilon_{\\infty}(C[F,F])$", "$\\epsilon_{L_2}(C[F,F])$",
+    ylab_list = ["$\\epsilon_{\\infty}(C[F,F])$", "$\\epsilon_{2}(C[F,F])$",
                  "$|\\Delta n|$",
                  "$|\\Delta u_{||}|$",
                  "$|\\Delta p |$",
@@ -98,10 +98,10 @@ def get_fkpl_error_data(filename):
     #expected_diff, 
     expected_integral]
     nelements = [nelement_list for item in L2norm_list] 
-    ylab_list = ["$\\epsilon_{L_2}(d H / d v_{||})$","$\\epsilon_{L_2}(d H / d v_{\\perp})$",
-                 "$\\epsilon_{L_2}(d^2 G / d v_{\\perp} d v_{||})$", 
-                 "$\\epsilon_{L_2}(d^2 G / d v^2_{||})$", 
-                 "$\\epsilon_{L_2}(d^2 G / d v^2_{\\perp})$",
+    ylab_list = ["$\\epsilon_{2}(d H / d v_{||})$","$\\epsilon_{2}(d H / d v_{\\perp})$",
+                 "$\\epsilon_{2}(d^2 G / d v_{\\perp} d v_{||})$", 
+                 "$\\epsilon_{2}(d^2 G / d v^2_{||})$", 
+                 "$\\epsilon_{2}(d^2 G / d v^2_{\\perp})$",
                  #"$(1/"+nelement_string+")^{"+ngrid_string+"-1}$",
                  "$(1/"+nelement_string+")^{"+ngrid_string+"+1}$"]
     plot_1d_loglog_list_pdf (nelements,L2norm_list,marker_list,"$"+ nelement_string+"$", pdf,

From e44f36f8490a1ca1c05bf471710a44fd611b1b8e Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Tue, 15 Oct 2024 14:16:15 +0100
Subject: [PATCH 18/41] suggestions for online documentation of manual setup

---
 docs/src/manual_setup.md | 81 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/docs/src/manual_setup.md b/docs/src/manual_setup.md
index e303674df..418022311 100644
--- a/docs/src/manual_setup.md
+++ b/docs/src/manual_setup.md
@@ -63,6 +63,36 @@ file is named something other than `libmpi.so`, you might have to pass some
 keyword arguments to `use_system_binary()` - see
 <https://juliaparallel.org/MPI.jl/stable/reference/mpipreferences/#MPIPreferences.use_system_binary>.
 
+## Using the native Julia MPI
+
+As an alternative, you can also use the MPI that is shipped with Julia.
+```julia
+julia> using MPI; MPI.install_mpiexecjl(force=true)
+julia> using MPIPreferences; MPIPreferences.use_jll_binary()
+```
+The executable for the Julia MPI can be called from the root folder of the project with
+```
+.julia/bin/mpiexecjl --project=./ -n N julia --project -O3 run_your_script.jl
+```
+where `N` is the number of cores used.
+
+## Miscellaneous required packages
+
+For full functionality, including precompilation with
+```
+$ julia --project -O3 precompile.jl
+```
+and running of tests by
+```
+$ julia --project -O3 -e 'include("moment_kinetics/test/runtests.jl")'
+```
+we require to install the following packages
+```
+$ julia --project -O3
+julia> ]
+pkg> add PackageCompiler StatsBase SpecialFunctions Test 
+```
+
 ## Link HDF5
 
 To enable parallel I/O, you need to get HDF5.jl to use the system HDF5 library
@@ -124,3 +154,54 @@ system-provided Python) - to do so:
   julia> ENV["PYTHON"]="/your/python/location"
   julia> using Pkg; Pkg.build("PyCall")
   ```
+## An example manual setup script
+
+We include an example manual setup script below to show the process
+of carrying out the above steps together, for a case where no post processing is required,
+and we only desire to verify the install with an MPI test. We use the 
+native Julia MPI, and do not link to HDF5.
+```
+#!/bin/bash
+# simple moment_kinetics install script
+# not supporting parallel HDF5
+# not supporting diagnostics
+
+# first time use, uncomment this
+# otherwise, use from the moment_kinetics_install (root) folder 
+git clone https://github.com/mabarnes/moment_kinetics.git moment_kinetics_test_install
+cd moment_kinetics_test_install
+
+# set up modules and environment variables
+# need this everytime you use Julia
+# e.g. module load julia/1.10.2
+# this will be specific to your system
+# JULIA_DEPOT_PATH must be set to be the same for each use of a specific install
+export JULIA_DEPOT_PATH=$(pwd)/.julia
+
+# develop moment_kinetics, no plots, no symbolic function tests
+touch Project.toml
+julia --project -O3 -e 'using Pkg; Pkg.develop(path="./moment_kinetics")'
+julia --project -O3 -e 'using Pkg; Pkg.add("MPIPreferences")'
+julia --project -O3 -e 'using Pkg; Pkg.add("MPI")'
+julia --project -O3 -e 'using Pkg; Pkg.add("Test")'
+julia --project -O3 -e 'using Pkg; Pkg.add("SpecialFunctions")'
+julia --project -O3 -e 'using Pkg; Pkg.add("PackageCompiler")'
+julia --project -O3 -e 'using Pkg; Pkg.add("StatsBase")'
+
+# setup MPI preferences and binary
+julia --project -O3 -e 'using Pkg; Pkg.instantiate()'
+julia --project -O3 -e 'using Pkg; Pkg.resolve()'
+julia --project -O3 -e 'using MPI; MPI.install_mpiexecjl(force=true)'
+julia --project -O3 -e 'using MPIPreferences; MPIPreferences.use_jll_binary()
+julia --project -O3 -e 'using Pkg; Pkg.instantiate()'
+julia --project -O3 -e 'using Pkg; Pkg.resolve()'
+
+# generate moment_kinetics.so
+julia --project -O3 precompile.jl
+
+# check install with tests
+echo "MPI test with precompiled moment_kinetics.so"
+.julia/bin/mpiexecjl --project=./ -n 2 julia --project -O3 -Jmoment_kinetics.so -e 'include("moment_kinetics/test/runtests.jl")'
+```
+
+

From e99da092c20e850286e761d4a67a636e0970c4ec Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Wed, 23 Oct 2024 17:06:24 +0100
Subject: [PATCH 19/41] Infer number of field, gyro_field, moment dimensions
 from call to struct, rather than hardcode size.

---
 .../src/moment_kinetics_structs.jl            | 194 +++++++++---------
 1 file changed, 97 insertions(+), 97 deletions(-)

diff --git a/moment_kinetics/src/moment_kinetics_structs.jl b/moment_kinetics/src/moment_kinetics_structs.jl
index 18b5b0189..3f2fe08b4 100644
--- a/moment_kinetics/src/moment_kinetics_structs.jl
+++ b/moment_kinetics/src/moment_kinetics_structs.jl
@@ -41,277 +41,277 @@ end
 
 """
 """
-struct em_fields_struct
+struct em_fields_struct{n_field, n_gyrofield}
     # phi is the electrostatic potential
-    phi::MPISharedArray{mk_float,2}
+    phi::MPISharedArray{mk_float,n_field}
     # phi0 is the initial electrostatic potential
-    phi0::MPISharedArray{mk_float,2}
+    phi0::MPISharedArray{mk_float,n_field}
     # Er is the radial electric field
-    Er::MPISharedArray{mk_float,2}
+    Er::MPISharedArray{mk_float,n_field}
     # Ez is the parallel electric field
-    Ez::MPISharedArray{mk_float,2}
+    Ez::MPISharedArray{mk_float,n_field}
     # gphi is the gyroaveraged electrostatic potential
-    gphi::MPISharedArray{mk_float,4}
+    gphi::MPISharedArray{mk_float,n_gyrofield}
     # gEr is the gyroaveraged radial electric field
-    gEr::MPISharedArray{mk_float,4}
+    gEr::MPISharedArray{mk_float,n_gyrofield}
     # gEz is the gyroaveraged parallel electric field
-    gEz::MPISharedArray{mk_float,4}
+    gEz::MPISharedArray{mk_float,n_gyrofield}
     # if true, force Er = 0 at wall plates
     force_Er_zero_at_wall::Bool
 end
 
 """
 """
-struct moments_ion_substruct
+struct moments_ion_substruct{n_moment,n_moment_wall}
     # this is the particle density
-    dens::MPISharedArray{mk_float,3}
+    dens::MPISharedArray{mk_float,n_moment}
     # flag that keeps track of if the density needs updating before use
     # Note: may not be set for all species on this process, but this process only ever
     # sets/uses the value for the same subset of species. This means dens_update does
     # not need to be a shared memory array.
     dens_updated::Vector{Bool}
     # this is the parallel flow
-    upar::MPISharedArray{mk_float,3}
+    upar::MPISharedArray{mk_float,n_moment}
     # flag that keeps track of whether or not upar needs updating before use
     # Note: may not be set for all species on this process, but this process only ever
     # sets/uses the value for the same subset of species. This means upar_update does
     # not need to be a shared memory array.
     upar_updated::Vector{Bool}
     # this is the parallel pressure
-    ppar::MPISharedArray{mk_float,3}
+    ppar::MPISharedArray{mk_float,n_moment}
     # flag that keeps track of whether or not ppar needs updating before use
     # Note: may not be set for all species on this process, but this process only ever
     # sets/uses the value for the same subset of species. This means ppar_update does
     # not need to be a shared memory array.
     ppar_updated::Vector{Bool}
     # this is the perpendicular pressure
-    pperp::MPISharedArray{mk_float,3}
+    pperp::MPISharedArray{mk_float,n_moment}
     # this is the parallel heat flux
-    qpar::MPISharedArray{mk_float,3}
+    qpar::MPISharedArray{mk_float,n_moment}
     # flag that keeps track of whether or not qpar needs updating before use
     # Note: may not be set for all species on this process, but this process only ever
     # sets/uses the value for the same subset of species. This means qpar_update does
     # not need to be a shared memory array.
     qpar_updated::Vector{Bool}
     # this is the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(2*Tpar/m)
-    vth::MPISharedArray{mk_float,3}
+    vth::MPISharedArray{mk_float,n_moment}
     # generalised Chodura integrals for the lower and upper plates
-    chodura_integral_lower::MPISharedArray{mk_float,2}
-    chodura_integral_upper::MPISharedArray{mk_float,2}
+    chodura_integral_lower::MPISharedArray{mk_float,n_moment_wall}
+    chodura_integral_upper::MPISharedArray{mk_float,n_moment_wall}
     # if evolve_ppar = true, then the velocity variable is (vpa - upa)/vth, which introduces
     # a factor of vth for each power of wpa in velocity space integrals.
     # v_norm_fac accounts for this: it is vth if using the above definition for the parallel velocity,
     # and it is one otherwise
-    v_norm_fac::Union{MPISharedArray{mk_float,3},Nothing}
+    v_norm_fac::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the z-derivative of the particle density
-    ddens_dz::Union{MPISharedArray{mk_float,3},Nothing}
+    ddens_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the upwinded z-derivative of the particle density
-    ddens_dz_upwind::Union{MPISharedArray{mk_float,3},Nothing}
+    ddens_dz_upwind::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the second-z-derivative of the particle density
-    d2dens_dz2::Union{MPISharedArray{mk_float,3},Nothing}
+    d2dens_dz2::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the z-derivative of the parallel flow
-    dupar_dz::Union{MPISharedArray{mk_float,3},Nothing}
+    dupar_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the upwinded z-derivative of the parallel flow
-    dupar_dz_upwind::Union{MPISharedArray{mk_float,3},Nothing}
+    dupar_dz_upwind::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the second-z-derivative of the parallel flow
-    d2upar_dz2::Union{MPISharedArray{mk_float,3},Nothing}
+    d2upar_dz2::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the z-derivative of the parallel pressure
-    dppar_dz::Union{MPISharedArray{mk_float,3},Nothing}
+    dppar_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the upwinded z-derivative of the parallel pressure
-    dppar_dz_upwind::Union{MPISharedArray{mk_float,3},Nothing}
+    dppar_dz_upwind::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the second-z-derivative of the parallel pressure
-    d2ppar_dz2::Union{MPISharedArray{mk_float,3},Nothing}
+    d2ppar_dz2::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the z-derivative of the parallel heat flux
-    dqpar_dz::Union{MPISharedArray{mk_float,3},Nothing}
+    dqpar_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the z-derivative of the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(2*Tpar/m)
-    dvth_dz::Union{MPISharedArray{mk_float,3},Nothing}
-    # this is the entropy production dS/dt = - int (ln f sum_s' C_ss' [f_s,f_s']) d^3 v
-    dSdt::MPISharedArray{mk_float,3}
+    dvth_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    # this is the entropy production dS/dt = - int (ln f sum_s' C_ss' [f_s,f_s']) d^n_moment v
+    dSdt::MPISharedArray{mk_float,n_moment}
     # Spatially varying amplitude of the external source term (third index is for different sources)
-    external_source_amplitude::MPISharedArray{mk_float,3}
+    external_source_amplitude::MPISharedArray{mk_float,n_moment}
     # Spatially varying amplitude of the density moment of the external source term
-    external_source_density_amplitude::MPISharedArray{mk_float,3}
+    external_source_density_amplitude::MPISharedArray{mk_float,n_moment}
     # Spatially varying amplitude of the parallel momentum moment of the external source
     # term
-    external_source_momentum_amplitude::MPISharedArray{mk_float,3}
+    external_source_momentum_amplitude::MPISharedArray{mk_float,n_moment}
     # Spatially varying amplitude of the parallel pressure moment of the external source
     # term
-    external_source_pressure_amplitude::MPISharedArray{mk_float,3}
+    external_source_pressure_amplitude::MPISharedArray{mk_float,n_moment}
     # Integral term for the PID controller of the external source term
-    external_source_controller_integral::MPISharedArray{mk_float,3}
+    external_source_controller_integral::MPISharedArray{mk_float,n_moment}
     # Store coefficient 'A' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_A_coefficient::Union{MPISharedArray{mk_float,3},Nothing}
+    constraints_A_coefficient::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # Store coefficient 'B' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_B_coefficient::Union{MPISharedArray{mk_float,3},Nothing}
+    constraints_B_coefficient::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # Store coefficient 'C' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_C_coefficient::Union{MPISharedArray{mk_float,3},Nothing}
+    constraints_C_coefficient::Union{MPISharedArray{mk_float,n_moment},Nothing}
 end
 
 """
 moments_electron_substruct is a struct that contains moment information for electrons
 """
-struct moments_electron_substruct
+struct moments_electron_substruct{n_moment_electron,n_moment_electron_source}
     # this is the particle density
-    dens::MPISharedArray{mk_float,2}
+    dens::MPISharedArray{mk_float,n_moment_electron}
     # flag that keeps track of if the density needs updating before use
     dens_updated::Base.RefValue{Bool}
     # this is the parallel flow
-    upar::MPISharedArray{mk_float,2}
+    upar::MPISharedArray{mk_float,n_moment_electron}
     # flag that keeps track of whether or not upar needs updating before use
     upar_updated::Base.RefValue{Bool}
     # this is the parallel pressure
-    ppar::MPISharedArray{mk_float,2}
+    ppar::MPISharedArray{mk_float,n_moment_electron}
     # flag that keeps track of whether or not ppar needs updating before use
     ppar_updated::Base.RefValue{Bool}
     # this is the temperature
-    temp::MPISharedArray{mk_float,2}
+    temp::MPISharedArray{mk_float,n_moment_electron}
     # flag that keeps track of whether or not temp needs updating before use
     temp_updated::Base.RefValue{Bool}
     # this is the parallel heat flux
-    qpar::MPISharedArray{mk_float,2}
+    qpar::MPISharedArray{mk_float,n_moment_electron}
     # flag that keeps track of whether or not qpar needs updating before use
     qpar_updated::Base.RefValue{Bool}
-    # this is the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(2*Tpar/m)
-    vth::MPISharedArray{mk_float,2}
+    # this is the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(n_moment_electron*Tpar/m)
+    vth::MPISharedArray{mk_float,n_moment_electron}
     # this is the parallel friction force between ions and electrons
-    parallel_friction::MPISharedArray{mk_float,2}
+    parallel_friction::MPISharedArray{mk_float,n_moment_electron}
     # Spatially varying amplitude of the external source term
-    external_source_amplitude::MPISharedArray{mk_float,3}
+    external_source_amplitude::MPISharedArray{mk_float,n_moment_electron_source}
     # Spatially varying amplitude of the density moment of the external source term
-    external_source_density_amplitude::MPISharedArray{mk_float,3}
+    external_source_density_amplitude::MPISharedArray{mk_float,n_moment_electron_source}
     # Spatially varying amplitude of the parallel momentum moment of the external source
     # term
-    external_source_momentum_amplitude::MPISharedArray{mk_float,3}
+    external_source_momentum_amplitude::MPISharedArray{mk_float,n_moment_electron_source}
     # Spatially varying amplitude of the parallel pressure moment of the external source
     # term
-    external_source_pressure_amplitude::MPISharedArray{mk_float,3}
+    external_source_pressure_amplitude::MPISharedArray{mk_float,n_moment_electron_source}
     # if evolve_ppar = true, then the velocity variable is (vpa - upa)/vth, which introduces
     # a factor of vth for each power of wpa in velocity space integrals.
     # v_norm_fac accounts for this: it is vth if using the above definition for the parallel velocity,
     # and it is one otherwise
-    v_norm_fac::Union{MPISharedArray{mk_float,2},Nothing}
+    v_norm_fac::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the z-derivative of the particle density
-    ddens_dz::Union{MPISharedArray{mk_float,2},Nothing}
+    ddens_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the z-derivative of the parallel flow
-    dupar_dz::Union{MPISharedArray{mk_float,2},Nothing}
+    dupar_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the z-derivative of the parallel pressure
-    dppar_dz::Union{MPISharedArray{mk_float,2},Nothing}
+    dppar_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the upwinded z-derivative of the parallel pressure
-    dppar_dz_upwind::Union{MPISharedArray{mk_float,2},Nothing}
+    dppar_dz_upwind::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the second-z-derivative of the parallel pressure
-    d2ppar_dz2::Union{MPISharedArray{mk_float,2},Nothing}
+    dn_moment_electronppar_dzn_moment_electron::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the z-derivative of the parallel heat flux
-    dqpar_dz::Union{MPISharedArray{mk_float,2},Nothing}
+    dqpar_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the z-derivative of the parallel temperature Tpar = ppar/dens
-    dT_dz::Union{MPISharedArray{mk_float,2},Nothing}
+    dT_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the upwinded z-derivative of the temperature Tpar = ppar/dens
-    dT_dz_upwind::Union{MPISharedArray{mk_float,2},Nothing}
-    # this is the z-derivative of the electron thermal speed vth = sqrt(2*Tpar/m)
-    dvth_dz::Union{MPISharedArray{mk_float,2},Nothing}
+    dT_dz_upwind::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    # this is the z-derivative of the electron thermal speed vth = sqrt(n_moment_electron*Tpar/m)
+    dvth_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # Store coefficient 'A' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_A_coefficient::Union{MPISharedArray{mk_float,2},Nothing}
+    constraints_A_coefficient::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # Store coefficient 'B' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_B_coefficient::Union{MPISharedArray{mk_float,2},Nothing}
+    constraints_B_coefficient::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # Store coefficient 'C' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_C_coefficient::Union{MPISharedArray{mk_float,2},Nothing}
+    constraints_C_coefficient::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
 end
 
 """
 """
-struct moments_neutral_substruct
+struct moments_neutral_substruct{n_moment_neutral}
     # this is the particle density
-    dens::MPISharedArray{mk_float,3}
+    dens::MPISharedArray{mk_float,n_moment_neutral}
     # flag that keeps track of if the density needs updating before use
     # Note: may not be set for all species on this process, but this process only ever
     # sets/uses the value for the same subset of species. This means dens_update does
     # not need to be a shared memory array.
     dens_updated::Vector{Bool}
     # this is the particle mean velocity in z
-    uz::MPISharedArray{mk_float,3}
+    uz::MPISharedArray{mk_float,n_moment_neutral}
     # flag that keeps track of if uz needs updating before use
     uz_updated::Vector{Bool}
     # this is the particle mean velocity in r
-    ur::MPISharedArray{mk_float,3}
+    ur::MPISharedArray{mk_float,n_moment_neutral}
     # flag that keeps track of if ur needs updating before use
     ur_updated::Vector{Bool}
     # this is the particle mean velocity in zeta
-    uzeta::MPISharedArray{mk_float,3}
+    uzeta::MPISharedArray{mk_float,n_moment_neutral}
     # flag that keeps track of if uzeta needs updating before use
     uzeta_updated::Vector{Bool}
     # this is the zz particle pressure tensor component
-    pz::MPISharedArray{mk_float,3}
+    pz::MPISharedArray{mk_float,n_moment_neutral}
     # flag that keeps track of if pz needs updating before use
     pz_updated::Vector{Bool}
     # this is the rr particle pressure tensor component
-    pr::MPISharedArray{mk_float,3}
+    pr::MPISharedArray{mk_float,n_moment_neutral}
     # flag that keeps track of if pr needs updating before use
     pr_updated::Vector{Bool}
     # this is the zetazeta particle pressure tensor component
-    pzeta::MPISharedArray{mk_float,3}
+    pzeta::MPISharedArray{mk_float,n_moment_neutral}
     # flag that keeps track of if pzeta needs updating before use
     pzeta_updated::Vector{Bool}
     # this is the total (isotropic) particle pressure
-    ptot::MPISharedArray{mk_float,3}
+    ptot::MPISharedArray{mk_float,n_moment_neutral}
     # this is the heat flux along z
-    qz::MPISharedArray{mk_float,3}
+    qz::MPISharedArray{mk_float,n_moment_neutral}
     # flag that keeps track of if qz needs updating before use
     qz_updated::Vector{Bool}
     # this is the thermal speed based on the temperature T = ptot/dens: vth = sqrt(2*T/m)
-    vth::MPISharedArray{mk_float,3}
+    vth::MPISharedArray{mk_float,n_moment_neutral}
     # if evolve_ppar = true, then the velocity variable is (vz - uz)/vth, which introduces
     # a factor of vth for each power of wz in velocity space integrals.
     # v_norm_fac accounts for this: it is vth if using the above definition for the parallel velocity,
     # and it is one otherwise
-    v_norm_fac::MPISharedArray{mk_float,3}
+    v_norm_fac::MPISharedArray{mk_float,n_moment_neutral}
     # this is the z-derivative of the particle density
-    ddens_dz::Union{MPISharedArray{mk_float,3},Nothing}
+    ddens_dz::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # this is the z-derivative of the particle density
-    ddens_dz_upwind::Union{MPISharedArray{mk_float,3},Nothing}
+    ddens_dz_upwind::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # this is the second-z-derivative of the particle density
-    d2dens_dz2::Union{MPISharedArray{mk_float,3},Nothing}
+    d2dens_dz2::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # this is the z-derivative of the particle mean velocity in z
-    duz_dz::Union{MPISharedArray{mk_float,3},Nothing}
+    duz_dz::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # this is the upwinded z-derivative of the particle mean velocity in z
-    duz_dz_upwind::Union{MPISharedArray{mk_float,3},Nothing}
+    duz_dz_upwind::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # this is the second-z-derivative of the particle mean velocity in z
-    d2uz_dz2::Union{MPISharedArray{mk_float,3},Nothing}
+    d2uz_dz2::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # this is the z-derivative of the zz particle pressure tensor component
-    dpz_dz::Union{MPISharedArray{mk_float,3},Nothing}
+    dpz_dz::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # this is the upwinded z-derivative of the zz particle pressure tensor component
-    dpz_dz_upwind::Union{MPISharedArray{mk_float,3},Nothing}
+    dpz_dz_upwind::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # this is the second-z-derivative of the zz particle pressure tensor component
-    d2pz_dz2::Union{MPISharedArray{mk_float,3},Nothing}
+    d2pz_dz2::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # this is the z-derivative of the thermal speed based on the temperature T = ptot/dens: vth = sqrt(2*T/m)
-    dvth_dz::Union{MPISharedArray{mk_float,3},Nothing}
+    dvth_dz::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # this is the z-derivative of the heat flux along z
-    dqz_dz::Union{MPISharedArray{mk_float,3},Nothing}
+    dqz_dz::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # Spatially varying amplitude of the external source term
-    external_source_amplitude::MPISharedArray{mk_float,3}
+    external_source_amplitude::MPISharedArray{mk_float,n_moment_neutral}
     # Spatially varying amplitude of the density moment of the external source term
-    external_source_density_amplitude::MPISharedArray{mk_float,3}
+    external_source_density_amplitude::MPISharedArray{mk_float,n_moment_neutral}
     # Spatially varying amplitude of the parallel momentum moment of the external source
     # term
-    external_source_momentum_amplitude::MPISharedArray{mk_float,3}
+    external_source_momentum_amplitude::MPISharedArray{mk_float,n_moment_neutral}
     # Spatially varying amplitude of the parallel pressure moment of the external source
     # term
-    external_source_pressure_amplitude::MPISharedArray{mk_float,3}
+    external_source_pressure_amplitude::MPISharedArray{mk_float,n_moment_neutral}
     # Integral term for the PID controller of the external source term
-    external_source_controller_integral::MPISharedArray{mk_float,3}
+    external_source_controller_integral::MPISharedArray{mk_float,n_moment_neutral}
     # Store coefficient 'A' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_A_coefficient::Union{MPISharedArray{mk_float,3},Nothing}
+    constraints_A_coefficient::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # Store coefficient 'B' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_B_coefficient::Union{MPISharedArray{mk_float,3},Nothing}
+    constraints_B_coefficient::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
     # Store coefficient 'C' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_C_coefficient::Union{MPISharedArray{mk_float,3},Nothing}
+    constraints_C_coefficient::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
 end
 
 """

From e19cdea7ac44bda4d0331323145c8dc28c389560 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Wed, 23 Oct 2024 17:41:37 +0100
Subject: [PATCH 20/41] Fix bugs introduced by search and replace.

---
 moment_kinetics/src/moment_kinetics_structs.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/moment_kinetics/src/moment_kinetics_structs.jl b/moment_kinetics/src/moment_kinetics_structs.jl
index 3f2fe08b4..700ae89c3 100644
--- a/moment_kinetics/src/moment_kinetics_structs.jl
+++ b/moment_kinetics/src/moment_kinetics_structs.jl
@@ -125,7 +125,7 @@ struct moments_ion_substruct{n_moment,n_moment_wall}
     dqpar_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
     # this is the z-derivative of the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(2*Tpar/m)
     dvth_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
-    # this is the entropy production dS/dt = - int (ln f sum_s' C_ss' [f_s,f_s']) d^n_moment v
+    # this is the entropy production dS/dt = - int (ln f sum_s' C_ss' [f_s,f_s']) d^3 v
     dSdt::MPISharedArray{mk_float,n_moment}
     # Spatially varying amplitude of the external source term (third index is for different sources)
     external_source_amplitude::MPISharedArray{mk_float,n_moment}
@@ -174,7 +174,7 @@ struct moments_electron_substruct{n_moment_electron,n_moment_electron_source}
     qpar::MPISharedArray{mk_float,n_moment_electron}
     # flag that keeps track of whether or not qpar needs updating before use
     qpar_updated::Base.RefValue{Bool}
-    # this is the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(n_moment_electron*Tpar/m)
+    # this is the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(2*Tpar/m)
     vth::MPISharedArray{mk_float,n_moment_electron}
     # this is the parallel friction force between ions and electrons
     parallel_friction::MPISharedArray{mk_float,n_moment_electron}
@@ -202,14 +202,14 @@ struct moments_electron_substruct{n_moment_electron,n_moment_electron_source}
     # this is the upwinded z-derivative of the parallel pressure
     dppar_dz_upwind::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the second-z-derivative of the parallel pressure
-    dn_moment_electronppar_dzn_moment_electron::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    d2ppar_dz2::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the z-derivative of the parallel heat flux
     dqpar_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the z-derivative of the parallel temperature Tpar = ppar/dens
     dT_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # this is the upwinded z-derivative of the temperature Tpar = ppar/dens
     dT_dz_upwind::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
-    # this is the z-derivative of the electron thermal speed vth = sqrt(n_moment_electron*Tpar/m)
+    # this is the z-derivative of the electron thermal speed vth = sqrt(2*Tpar/m)
     dvth_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
     # Store coefficient 'A' from applying moment constraints so we can write it out as a
     # diagnostic

From 578a0c794c46d785f9f325db85123d7a70998b2f Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Thu, 24 Oct 2024 09:36:59 +0100
Subject: [PATCH 21/41] Change naming convention for dimension sizes according
 to JOmotani comment.

---
 .../src/moment_kinetics_structs.jl            | 242 +++++++++---------
 1 file changed, 121 insertions(+), 121 deletions(-)

diff --git a/moment_kinetics/src/moment_kinetics_structs.jl b/moment_kinetics/src/moment_kinetics_structs.jl
index 700ae89c3..299955cb2 100644
--- a/moment_kinetics/src/moment_kinetics_structs.jl
+++ b/moment_kinetics/src/moment_kinetics_structs.jl
@@ -9,324 +9,324 @@ using ..type_definitions: mk_float
 
 """
 """
-struct scratch_pdf{n_distribution_ion, n_moment, n_moment_electron,
-                   n_distribution_neutral, n_moment_neutral}
+struct scratch_pdf{ndim_distribution_ion, ndim_moment, ndim_moment_electron,
+                   ndim_distribution_neutral, ndim_moment_neutral}
     # ions
-    pdf::MPISharedArray{mk_float, n_distribution_ion}
-    density::MPISharedArray{mk_float, n_moment}
-    upar::MPISharedArray{mk_float, n_moment}
-    ppar::MPISharedArray{mk_float, n_moment}
-    pperp::MPISharedArray{mk_float, n_moment}
-    temp_z_s::MPISharedArray{mk_float, n_moment}
+    pdf::MPISharedArray{mk_float, ndim_distribution_ion}
+    density::MPISharedArray{mk_float, ndim_moment}
+    upar::MPISharedArray{mk_float, ndim_moment}
+    ppar::MPISharedArray{mk_float, ndim_moment}
+    pperp::MPISharedArray{mk_float, ndim_moment}
+    temp_z_s::MPISharedArray{mk_float, ndim_moment}
     # electrons
-    electron_density::MPISharedArray{mk_float, n_moment_electron}
-    electron_upar::MPISharedArray{mk_float, n_moment_electron}
-    electron_ppar::MPISharedArray{mk_float, n_moment_electron}
-    electron_pperp::MPISharedArray{mk_float, n_moment_electron}
-    electron_temp::MPISharedArray{mk_float, n_moment_electron}
+    electron_density::MPISharedArray{mk_float, ndim_moment_electron}
+    electron_upar::MPISharedArray{mk_float, ndim_moment_electron}
+    electron_ppar::MPISharedArray{mk_float, ndim_moment_electron}
+    electron_pperp::MPISharedArray{mk_float, ndim_moment_electron}
+    electron_temp::MPISharedArray{mk_float, ndim_moment_electron}
     # neutral particles 
-    pdf_neutral::MPISharedArray{mk_float, n_distribution_neutral}
-    density_neutral::MPISharedArray{mk_float, n_moment_neutral}
-    uz_neutral::MPISharedArray{mk_float, n_moment_neutral}
-    pz_neutral::MPISharedArray{mk_float, n_moment_neutral}
+    pdf_neutral::MPISharedArray{mk_float, ndim_distribution_neutral}
+    density_neutral::MPISharedArray{mk_float, ndim_moment_neutral}
+    uz_neutral::MPISharedArray{mk_float, ndim_moment_neutral}
+    pz_neutral::MPISharedArray{mk_float, ndim_moment_neutral}
 end
 
 """
 """
-struct scratch_electron_pdf{n_distribution_electron, n_moment_electron}
+struct scratch_electron_pdf{ndim_distribution_electron, ndim_moment_electron}
     # electrons
-    pdf_electron::MPISharedArray{mk_float, n_distribution_electron}
-    electron_ppar::MPISharedArray{mk_float, n_moment_electron}
+    pdf_electron::MPISharedArray{mk_float, ndim_distribution_electron}
+    electron_ppar::MPISharedArray{mk_float, ndim_moment_electron}
 end
 
 """
 """
-struct em_fields_struct{n_field, n_gyrofield}
+struct em_fields_struct{ndim_field, ndim_gyrofield}
     # phi is the electrostatic potential
-    phi::MPISharedArray{mk_float,n_field}
+    phi::MPISharedArray{mk_float,ndim_field}
     # phi0 is the initial electrostatic potential
-    phi0::MPISharedArray{mk_float,n_field}
+    phi0::MPISharedArray{mk_float,ndim_field}
     # Er is the radial electric field
-    Er::MPISharedArray{mk_float,n_field}
+    Er::MPISharedArray{mk_float,ndim_field}
     # Ez is the parallel electric field
-    Ez::MPISharedArray{mk_float,n_field}
+    Ez::MPISharedArray{mk_float,ndim_field}
     # gphi is the gyroaveraged electrostatic potential
-    gphi::MPISharedArray{mk_float,n_gyrofield}
+    gphi::MPISharedArray{mk_float,ndim_gyrofield}
     # gEr is the gyroaveraged radial electric field
-    gEr::MPISharedArray{mk_float,n_gyrofield}
+    gEr::MPISharedArray{mk_float,ndim_gyrofield}
     # gEz is the gyroaveraged parallel electric field
-    gEz::MPISharedArray{mk_float,n_gyrofield}
+    gEz::MPISharedArray{mk_float,ndim_gyrofield}
     # if true, force Er = 0 at wall plates
     force_Er_zero_at_wall::Bool
 end
 
 """
 """
-struct moments_ion_substruct{n_moment,n_moment_wall}
+struct moments_ion_substruct{ndim_moment,ndim_moment_wall}
     # this is the particle density
-    dens::MPISharedArray{mk_float,n_moment}
+    dens::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of if the density needs updating before use
     # Note: may not be set for all species on this process, but this process only ever
     # sets/uses the value for the same subset of species. This means dens_update does
     # not need to be a shared memory array.
     dens_updated::Vector{Bool}
     # this is the parallel flow
-    upar::MPISharedArray{mk_float,n_moment}
+    upar::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of whether or not upar needs updating before use
     # Note: may not be set for all species on this process, but this process only ever
     # sets/uses the value for the same subset of species. This means upar_update does
     # not need to be a shared memory array.
     upar_updated::Vector{Bool}
     # this is the parallel pressure
-    ppar::MPISharedArray{mk_float,n_moment}
+    ppar::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of whether or not ppar needs updating before use
     # Note: may not be set for all species on this process, but this process only ever
     # sets/uses the value for the same subset of species. This means ppar_update does
     # not need to be a shared memory array.
     ppar_updated::Vector{Bool}
     # this is the perpendicular pressure
-    pperp::MPISharedArray{mk_float,n_moment}
+    pperp::MPISharedArray{mk_float,ndim_moment}
     # this is the parallel heat flux
-    qpar::MPISharedArray{mk_float,n_moment}
+    qpar::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of whether or not qpar needs updating before use
     # Note: may not be set for all species on this process, but this process only ever
     # sets/uses the value for the same subset of species. This means qpar_update does
     # not need to be a shared memory array.
     qpar_updated::Vector{Bool}
     # this is the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(2*Tpar/m)
-    vth::MPISharedArray{mk_float,n_moment}
+    vth::MPISharedArray{mk_float,ndim_moment}
     # generalised Chodura integrals for the lower and upper plates
-    chodura_integral_lower::MPISharedArray{mk_float,n_moment_wall}
-    chodura_integral_upper::MPISharedArray{mk_float,n_moment_wall}
+    chodura_integral_lower::MPISharedArray{mk_float,ndim_moment_wall}
+    chodura_integral_upper::MPISharedArray{mk_float,ndim_moment_wall}
     # if evolve_ppar = true, then the velocity variable is (vpa - upa)/vth, which introduces
     # a factor of vth for each power of wpa in velocity space integrals.
     # v_norm_fac accounts for this: it is vth if using the above definition for the parallel velocity,
     # and it is one otherwise
-    v_norm_fac::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    v_norm_fac::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the z-derivative of the particle density
-    ddens_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    ddens_dz::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the upwinded z-derivative of the particle density
-    ddens_dz_upwind::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    ddens_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the second-z-derivative of the particle density
-    d2dens_dz2::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    d2dens_dz2::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the z-derivative of the parallel flow
-    dupar_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    dupar_dz::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the upwinded z-derivative of the parallel flow
-    dupar_dz_upwind::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    dupar_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the second-z-derivative of the parallel flow
-    d2upar_dz2::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    d2upar_dz2::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the z-derivative of the parallel pressure
-    dppar_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    dppar_dz::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the upwinded z-derivative of the parallel pressure
-    dppar_dz_upwind::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    dppar_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the second-z-derivative of the parallel pressure
-    d2ppar_dz2::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    d2ppar_dz2::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the z-derivative of the parallel heat flux
-    dqpar_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    dqpar_dz::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the z-derivative of the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(2*Tpar/m)
-    dvth_dz::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    dvth_dz::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the entropy production dS/dt = - int (ln f sum_s' C_ss' [f_s,f_s']) d^3 v
-    dSdt::MPISharedArray{mk_float,n_moment}
+    dSdt::MPISharedArray{mk_float,ndim_moment}
     # Spatially varying amplitude of the external source term (third index is for different sources)
-    external_source_amplitude::MPISharedArray{mk_float,n_moment}
+    external_source_amplitude::MPISharedArray{mk_float,ndim_moment}
     # Spatially varying amplitude of the density moment of the external source term
-    external_source_density_amplitude::MPISharedArray{mk_float,n_moment}
+    external_source_density_amplitude::MPISharedArray{mk_float,ndim_moment}
     # Spatially varying amplitude of the parallel momentum moment of the external source
     # term
-    external_source_momentum_amplitude::MPISharedArray{mk_float,n_moment}
+    external_source_momentum_amplitude::MPISharedArray{mk_float,ndim_moment}
     # Spatially varying amplitude of the parallel pressure moment of the external source
     # term
-    external_source_pressure_amplitude::MPISharedArray{mk_float,n_moment}
+    external_source_pressure_amplitude::MPISharedArray{mk_float,ndim_moment}
     # Integral term for the PID controller of the external source term
-    external_source_controller_integral::MPISharedArray{mk_float,n_moment}
+    external_source_controller_integral::MPISharedArray{mk_float,ndim_moment}
     # Store coefficient 'A' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_A_coefficient::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    constraints_A_coefficient::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # Store coefficient 'B' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_B_coefficient::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    constraints_B_coefficient::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # Store coefficient 'C' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_C_coefficient::Union{MPISharedArray{mk_float,n_moment},Nothing}
+    constraints_C_coefficient::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
 end
 
 """
 moments_electron_substruct is a struct that contains moment information for electrons
 """
-struct moments_electron_substruct{n_moment_electron,n_moment_electron_source}
+struct moments_electron_substruct{ndim_moment_electron,ndim_moment_electron_source}
     # this is the particle density
-    dens::MPISharedArray{mk_float,n_moment_electron}
+    dens::MPISharedArray{mk_float,ndim_moment_electron}
     # flag that keeps track of if the density needs updating before use
     dens_updated::Base.RefValue{Bool}
     # this is the parallel flow
-    upar::MPISharedArray{mk_float,n_moment_electron}
+    upar::MPISharedArray{mk_float,ndim_moment_electron}
     # flag that keeps track of whether or not upar needs updating before use
     upar_updated::Base.RefValue{Bool}
     # this is the parallel pressure
-    ppar::MPISharedArray{mk_float,n_moment_electron}
+    ppar::MPISharedArray{mk_float,ndim_moment_electron}
     # flag that keeps track of whether or not ppar needs updating before use
     ppar_updated::Base.RefValue{Bool}
     # this is the temperature
-    temp::MPISharedArray{mk_float,n_moment_electron}
+    temp::MPISharedArray{mk_float,ndim_moment_electron}
     # flag that keeps track of whether or not temp needs updating before use
     temp_updated::Base.RefValue{Bool}
     # this is the parallel heat flux
-    qpar::MPISharedArray{mk_float,n_moment_electron}
+    qpar::MPISharedArray{mk_float,ndim_moment_electron}
     # flag that keeps track of whether or not qpar needs updating before use
     qpar_updated::Base.RefValue{Bool}
     # this is the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(2*Tpar/m)
-    vth::MPISharedArray{mk_float,n_moment_electron}
+    vth::MPISharedArray{mk_float,ndim_moment_electron}
     # this is the parallel friction force between ions and electrons
-    parallel_friction::MPISharedArray{mk_float,n_moment_electron}
+    parallel_friction::MPISharedArray{mk_float,ndim_moment_electron}
     # Spatially varying amplitude of the external source term
-    external_source_amplitude::MPISharedArray{mk_float,n_moment_electron_source}
+    external_source_amplitude::MPISharedArray{mk_float,ndim_moment_electron_source}
     # Spatially varying amplitude of the density moment of the external source term
-    external_source_density_amplitude::MPISharedArray{mk_float,n_moment_electron_source}
+    external_source_density_amplitude::MPISharedArray{mk_float,ndim_moment_electron_source}
     # Spatially varying amplitude of the parallel momentum moment of the external source
     # term
-    external_source_momentum_amplitude::MPISharedArray{mk_float,n_moment_electron_source}
+    external_source_momentum_amplitude::MPISharedArray{mk_float,ndim_moment_electron_source}
     # Spatially varying amplitude of the parallel pressure moment of the external source
     # term
-    external_source_pressure_amplitude::MPISharedArray{mk_float,n_moment_electron_source}
+    external_source_pressure_amplitude::MPISharedArray{mk_float,ndim_moment_electron_source}
     # if evolve_ppar = true, then the velocity variable is (vpa - upa)/vth, which introduces
     # a factor of vth for each power of wpa in velocity space integrals.
     # v_norm_fac accounts for this: it is vth if using the above definition for the parallel velocity,
     # and it is one otherwise
-    v_norm_fac::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    v_norm_fac::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # this is the z-derivative of the particle density
-    ddens_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    ddens_dz::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # this is the z-derivative of the parallel flow
-    dupar_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    dupar_dz::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # this is the z-derivative of the parallel pressure
-    dppar_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    dppar_dz::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # this is the upwinded z-derivative of the parallel pressure
-    dppar_dz_upwind::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    dppar_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # this is the second-z-derivative of the parallel pressure
-    d2ppar_dz2::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    d2ppar_dz2::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # this is the z-derivative of the parallel heat flux
-    dqpar_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    dqpar_dz::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # this is the z-derivative of the parallel temperature Tpar = ppar/dens
-    dT_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    dT_dz::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # this is the upwinded z-derivative of the temperature Tpar = ppar/dens
-    dT_dz_upwind::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    dT_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # this is the z-derivative of the electron thermal speed vth = sqrt(2*Tpar/m)
-    dvth_dz::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    dvth_dz::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # Store coefficient 'A' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_A_coefficient::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    constraints_A_coefficient::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # Store coefficient 'B' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_B_coefficient::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    constraints_B_coefficient::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
     # Store coefficient 'C' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_C_coefficient::Union{MPISharedArray{mk_float,n_moment_electron},Nothing}
+    constraints_C_coefficient::Union{MPISharedArray{mk_float,ndim_moment_electron},Nothing}
 end
 
 """
 """
-struct moments_neutral_substruct{n_moment_neutral}
+struct moments_neutral_substruct{ndim_moment_neutral}
     # this is the particle density
-    dens::MPISharedArray{mk_float,n_moment_neutral}
+    dens::MPISharedArray{mk_float,ndim_moment_neutral}
     # flag that keeps track of if the density needs updating before use
     # Note: may not be set for all species on this process, but this process only ever
     # sets/uses the value for the same subset of species. This means dens_update does
     # not need to be a shared memory array.
     dens_updated::Vector{Bool}
     # this is the particle mean velocity in z
-    uz::MPISharedArray{mk_float,n_moment_neutral}
+    uz::MPISharedArray{mk_float,ndim_moment_neutral}
     # flag that keeps track of if uz needs updating before use
     uz_updated::Vector{Bool}
     # this is the particle mean velocity in r
-    ur::MPISharedArray{mk_float,n_moment_neutral}
+    ur::MPISharedArray{mk_float,ndim_moment_neutral}
     # flag that keeps track of if ur needs updating before use
     ur_updated::Vector{Bool}
     # this is the particle mean velocity in zeta
-    uzeta::MPISharedArray{mk_float,n_moment_neutral}
+    uzeta::MPISharedArray{mk_float,ndim_moment_neutral}
     # flag that keeps track of if uzeta needs updating before use
     uzeta_updated::Vector{Bool}
     # this is the zz particle pressure tensor component
-    pz::MPISharedArray{mk_float,n_moment_neutral}
+    pz::MPISharedArray{mk_float,ndim_moment_neutral}
     # flag that keeps track of if pz needs updating before use
     pz_updated::Vector{Bool}
     # this is the rr particle pressure tensor component
-    pr::MPISharedArray{mk_float,n_moment_neutral}
+    pr::MPISharedArray{mk_float,ndim_moment_neutral}
     # flag that keeps track of if pr needs updating before use
     pr_updated::Vector{Bool}
     # this is the zetazeta particle pressure tensor component
-    pzeta::MPISharedArray{mk_float,n_moment_neutral}
+    pzeta::MPISharedArray{mk_float,ndim_moment_neutral}
     # flag that keeps track of if pzeta needs updating before use
     pzeta_updated::Vector{Bool}
     # this is the total (isotropic) particle pressure
-    ptot::MPISharedArray{mk_float,n_moment_neutral}
+    ptot::MPISharedArray{mk_float,ndim_moment_neutral}
     # this is the heat flux along z
-    qz::MPISharedArray{mk_float,n_moment_neutral}
+    qz::MPISharedArray{mk_float,ndim_moment_neutral}
     # flag that keeps track of if qz needs updating before use
     qz_updated::Vector{Bool}
     # this is the thermal speed based on the temperature T = ptot/dens: vth = sqrt(2*T/m)
-    vth::MPISharedArray{mk_float,n_moment_neutral}
+    vth::MPISharedArray{mk_float,ndim_moment_neutral}
     # if evolve_ppar = true, then the velocity variable is (vz - uz)/vth, which introduces
     # a factor of vth for each power of wz in velocity space integrals.
     # v_norm_fac accounts for this: it is vth if using the above definition for the parallel velocity,
     # and it is one otherwise
-    v_norm_fac::MPISharedArray{mk_float,n_moment_neutral}
+    v_norm_fac::MPISharedArray{mk_float,ndim_moment_neutral}
     # this is the z-derivative of the particle density
-    ddens_dz::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    ddens_dz::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # this is the z-derivative of the particle density
-    ddens_dz_upwind::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    ddens_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # this is the second-z-derivative of the particle density
-    d2dens_dz2::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    d2dens_dz2::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # this is the z-derivative of the particle mean velocity in z
-    duz_dz::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    duz_dz::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # this is the upwinded z-derivative of the particle mean velocity in z
-    duz_dz_upwind::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    duz_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # this is the second-z-derivative of the particle mean velocity in z
-    d2uz_dz2::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    d2uz_dz2::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # this is the z-derivative of the zz particle pressure tensor component
-    dpz_dz::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    dpz_dz::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # this is the upwinded z-derivative of the zz particle pressure tensor component
-    dpz_dz_upwind::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    dpz_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # this is the second-z-derivative of the zz particle pressure tensor component
-    d2pz_dz2::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    d2pz_dz2::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # this is the z-derivative of the thermal speed based on the temperature T = ptot/dens: vth = sqrt(2*T/m)
-    dvth_dz::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    dvth_dz::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # this is the z-derivative of the heat flux along z
-    dqz_dz::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    dqz_dz::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # Spatially varying amplitude of the external source term
-    external_source_amplitude::MPISharedArray{mk_float,n_moment_neutral}
+    external_source_amplitude::MPISharedArray{mk_float,ndim_moment_neutral}
     # Spatially varying amplitude of the density moment of the external source term
-    external_source_density_amplitude::MPISharedArray{mk_float,n_moment_neutral}
+    external_source_density_amplitude::MPISharedArray{mk_float,ndim_moment_neutral}
     # Spatially varying amplitude of the parallel momentum moment of the external source
     # term
-    external_source_momentum_amplitude::MPISharedArray{mk_float,n_moment_neutral}
+    external_source_momentum_amplitude::MPISharedArray{mk_float,ndim_moment_neutral}
     # Spatially varying amplitude of the parallel pressure moment of the external source
     # term
-    external_source_pressure_amplitude::MPISharedArray{mk_float,n_moment_neutral}
+    external_source_pressure_amplitude::MPISharedArray{mk_float,ndim_moment_neutral}
     # Integral term for the PID controller of the external source term
-    external_source_controller_integral::MPISharedArray{mk_float,n_moment_neutral}
+    external_source_controller_integral::MPISharedArray{mk_float,ndim_moment_neutral}
     # Store coefficient 'A' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_A_coefficient::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    constraints_A_coefficient::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # Store coefficient 'B' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_B_coefficient::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    constraints_B_coefficient::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
     # Store coefficient 'C' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_C_coefficient::Union{MPISharedArray{mk_float,n_moment_neutral},Nothing}
+    constraints_C_coefficient::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
 end
 
 """
 """
-struct pdf_substruct{n_distribution}
-    norm::MPISharedArray{mk_float,n_distribution}
-    buffer::MPISharedArray{mk_float,n_distribution} # for collision operator terms when pdfs must be interpolated onto different velocity space grids, and for gyroaveraging
+struct pdf_substruct{ndim_distribution}
+    norm::MPISharedArray{mk_float,ndim_distribution}
+    buffer::MPISharedArray{mk_float,ndim_distribution} # for collision operator terms when pdfs must be interpolated onto different velocity space grids, and for gyroaveraging
 end
 
 """
 """
-struct electron_pdf_substruct{n_distribution}
-    norm::MPISharedArray{mk_float,n_distribution}
-    buffer::MPISharedArray{mk_float,n_distribution} # for collision operator terms when pdfs must be interpolated onto different velocity space grids
-    pdf_before_ion_timestep::MPISharedArray{mk_float,n_distribution}
+struct electron_pdf_substruct{ndim_distribution}
+    norm::MPISharedArray{mk_float,ndim_distribution}
+    buffer::MPISharedArray{mk_float,ndim_distribution} # for collision operator terms when pdfs must be interpolated onto different velocity space grids
+    pdf_before_ion_timestep::MPISharedArray{mk_float,ndim_distribution}
 end
 
 # struct of structs neatly contains i+n info?

From 6cb6058c7ae66fa7d2c0f0f1ebbe954f048cb40a Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Thu, 24 Oct 2024 09:51:11 +0100
Subject: [PATCH 22/41] Introduce constants for dimensions of arrays - comments
 on naming conventions appreciated.

---
 .../src/moment_kinetics_structs.jl            | 27 ++++++++++++++-----
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/moment_kinetics/src/moment_kinetics_structs.jl b/moment_kinetics/src/moment_kinetics_structs.jl
index 299955cb2..344aba8d7 100644
--- a/moment_kinetics/src/moment_kinetics_structs.jl
+++ b/moment_kinetics/src/moment_kinetics_structs.jl
@@ -7,6 +7,21 @@ module moment_kinetics_structs
 using ..communication
 using ..type_definitions: mk_float
 
+export ndim_pdf_ion, ndim_pdf_neutral, ndim_pdf_electron
+export ndim_field, ndim_moment, ndim_moment_electron
+export ndim_v, ndim_v_neutral
+
+# variables to define the number of dimensions in arrays
+const ndim_pdf_ion = 5 #(vpa + vperp + z + r + s)
+const ndim_pdf_neutral = 6 #(vz + vr + vzeta + z + r + s)
+const ndim_pdf_electron = 4 #(vpa + vperp + z + r)
+const ndim_field = 2 #(z + r)
+const ndim_moment = 3 #(z + r + s)
+const ndim_moment_electron = 2 #(z + r)
+const ndim_v = 2 #(vpa + vperp)
+const ndim_v_neutral = 3 #(vz + vr + vzeta)
+
+
 """
 """
 struct scratch_pdf{ndim_distribution_ion, ndim_moment, ndim_moment_electron,
@@ -334,11 +349,11 @@ end
 """
 struct pdf_struct
     #ion particles: s + r + z + vperp + vpa
-    ion::pdf_substruct{5}
+    ion::pdf_substruct{ndim_pdf_ion}
     # electron particles: r + z + vperp + vpa
-    electron::Union{electron_pdf_substruct{4},Nothing}
+    electron::Union{electron_pdf_substruct{ndim_pdf_electron},Nothing}
     #neutral particles: s + r + z + vzeta + vr + vz
-    neutral::pdf_substruct{6}
+    neutral::pdf_substruct{ndim_pdf_neutral}
 end
 
 """
@@ -365,11 +380,11 @@ end
 """
 struct boundary_distributions_struct
     # knudsen cosine distribution for imposing the neutral wall boundary condition
-    knudsen::MPISharedArray{mk_float,3}
+    knudsen::MPISharedArray{mk_float,ndim_v_neutral}
     # ion particle r boundary values (vpa,vperp,z,r,s)
-    pdf_rboundary_ion::MPISharedArray{mk_float,5}
+    pdf_rboundary_ion::MPISharedArray{mk_float,ndim_pdf_ion}
     # neutral particle r boundary values (vz,vr,vzeta,z,r,s)
-    pdf_rboundary_neutral::MPISharedArray{mk_float,6}
+    pdf_rboundary_neutral::MPISharedArray{mk_float,ndim_pdf_neutral}
 end
 
 """

From e680894bb4b70a5eded0b69dace16228778ccfa7 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Thu, 24 Oct 2024 15:18:03 +0100
Subject: [PATCH 23/41] Implement JOmotani comments regarding removing template
 variables.

---
 .../src/moment_kinetics_structs.jl            | 96 +++++++++----------
 moment_kinetics/src/time_advance.jl           |  4 +-
 2 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/moment_kinetics/src/moment_kinetics_structs.jl b/moment_kinetics/src/moment_kinetics_structs.jl
index 344aba8d7..de28014e8 100644
--- a/moment_kinetics/src/moment_kinetics_structs.jl
+++ b/moment_kinetics/src/moment_kinetics_structs.jl
@@ -16,6 +16,7 @@ const ndim_pdf_ion = 5 #(vpa + vperp + z + r + s)
 const ndim_pdf_neutral = 6 #(vz + vr + vzeta + z + r + s)
 const ndim_pdf_electron = 4 #(vpa + vperp + z + r)
 const ndim_field = 2 #(z + r)
+const ndim_gyrofield = 4 #(vperp + z + r + s)
 const ndim_moment = 3 #(z + r + s)
 const ndim_moment_electron = 2 #(z + r)
 const ndim_v = 2 #(vpa + vperp)
@@ -24,10 +25,9 @@ const ndim_v_neutral = 3 #(vz + vr + vzeta)
 
 """
 """
-struct scratch_pdf{ndim_distribution_ion, ndim_moment, ndim_moment_electron,
-                   ndim_distribution_neutral, ndim_moment_neutral}
+struct scratch_pdf
     # ions
-    pdf::MPISharedArray{mk_float, ndim_distribution_ion}
+    pdf::MPISharedArray{mk_float, ndim_pdf_ion}
     density::MPISharedArray{mk_float, ndim_moment}
     upar::MPISharedArray{mk_float, ndim_moment}
     ppar::MPISharedArray{mk_float, ndim_moment}
@@ -40,23 +40,23 @@ struct scratch_pdf{ndim_distribution_ion, ndim_moment, ndim_moment_electron,
     electron_pperp::MPISharedArray{mk_float, ndim_moment_electron}
     electron_temp::MPISharedArray{mk_float, ndim_moment_electron}
     # neutral particles 
-    pdf_neutral::MPISharedArray{mk_float, ndim_distribution_neutral}
-    density_neutral::MPISharedArray{mk_float, ndim_moment_neutral}
-    uz_neutral::MPISharedArray{mk_float, ndim_moment_neutral}
-    pz_neutral::MPISharedArray{mk_float, ndim_moment_neutral}
+    pdf_neutral::MPISharedArray{mk_float, ndim_pdf_neutral}
+    density_neutral::MPISharedArray{mk_float, ndim_moment}
+    uz_neutral::MPISharedArray{mk_float, ndim_moment}
+    pz_neutral::MPISharedArray{mk_float, ndim_moment}
 end
 
 """
 """
-struct scratch_electron_pdf{ndim_distribution_electron, ndim_moment_electron}
+struct scratch_electron_pdf
     # electrons
-    pdf_electron::MPISharedArray{mk_float, ndim_distribution_electron}
+    pdf_electron::MPISharedArray{mk_float, ndim_pdf_electron}
     electron_ppar::MPISharedArray{mk_float, ndim_moment_electron}
 end
 
 """
 """
-struct em_fields_struct{ndim_field, ndim_gyrofield}
+struct em_fields_struct
     # phi is the electrostatic potential
     phi::MPISharedArray{mk_float,ndim_field}
     # phi0 is the initial electrostatic potential
@@ -77,7 +77,7 @@ end
 
 """
 """
-struct moments_ion_substruct{ndim_moment,ndim_moment_wall}
+struct moments_ion_substruct{ndim_moment_wall}
     # this is the particle density
     dens::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of if the density needs updating before use
@@ -168,7 +168,7 @@ end
 """
 moments_electron_substruct is a struct that contains moment information for electrons
 """
-struct moments_electron_substruct{ndim_moment_electron,ndim_moment_electron_source}
+struct moments_electron_substruct{ndim_moment_electron_source}
     # this is the particle density
     dens::MPISharedArray{mk_float,ndim_moment_electron}
     # flag that keeps track of if the density needs updating before use
@@ -239,94 +239,94 @@ end
 
 """
 """
-struct moments_neutral_substruct{ndim_moment_neutral}
+struct moments_neutral_substruct
     # this is the particle density
-    dens::MPISharedArray{mk_float,ndim_moment_neutral}
+    dens::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of if the density needs updating before use
     # Note: may not be set for all species on this process, but this process only ever
     # sets/uses the value for the same subset of species. This means dens_update does
     # not need to be a shared memory array.
     dens_updated::Vector{Bool}
     # this is the particle mean velocity in z
-    uz::MPISharedArray{mk_float,ndim_moment_neutral}
+    uz::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of if uz needs updating before use
     uz_updated::Vector{Bool}
     # this is the particle mean velocity in r
-    ur::MPISharedArray{mk_float,ndim_moment_neutral}
+    ur::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of if ur needs updating before use
     ur_updated::Vector{Bool}
     # this is the particle mean velocity in zeta
-    uzeta::MPISharedArray{mk_float,ndim_moment_neutral}
+    uzeta::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of if uzeta needs updating before use
     uzeta_updated::Vector{Bool}
     # this is the zz particle pressure tensor component
-    pz::MPISharedArray{mk_float,ndim_moment_neutral}
+    pz::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of if pz needs updating before use
     pz_updated::Vector{Bool}
     # this is the rr particle pressure tensor component
-    pr::MPISharedArray{mk_float,ndim_moment_neutral}
+    pr::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of if pr needs updating before use
     pr_updated::Vector{Bool}
     # this is the zetazeta particle pressure tensor component
-    pzeta::MPISharedArray{mk_float,ndim_moment_neutral}
+    pzeta::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of if pzeta needs updating before use
     pzeta_updated::Vector{Bool}
     # this is the total (isotropic) particle pressure
-    ptot::MPISharedArray{mk_float,ndim_moment_neutral}
+    ptot::MPISharedArray{mk_float,ndim_moment}
     # this is the heat flux along z
-    qz::MPISharedArray{mk_float,ndim_moment_neutral}
+    qz::MPISharedArray{mk_float,ndim_moment}
     # flag that keeps track of if qz needs updating before use
     qz_updated::Vector{Bool}
     # this is the thermal speed based on the temperature T = ptot/dens: vth = sqrt(2*T/m)
-    vth::MPISharedArray{mk_float,ndim_moment_neutral}
+    vth::MPISharedArray{mk_float,ndim_moment}
     # if evolve_ppar = true, then the velocity variable is (vz - uz)/vth, which introduces
     # a factor of vth for each power of wz in velocity space integrals.
     # v_norm_fac accounts for this: it is vth if using the above definition for the parallel velocity,
     # and it is one otherwise
-    v_norm_fac::MPISharedArray{mk_float,ndim_moment_neutral}
+    v_norm_fac::MPISharedArray{mk_float,ndim_moment}
     # this is the z-derivative of the particle density
-    ddens_dz::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    ddens_dz::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the z-derivative of the particle density
-    ddens_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    ddens_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the second-z-derivative of the particle density
-    d2dens_dz2::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    d2dens_dz2::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the z-derivative of the particle mean velocity in z
-    duz_dz::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    duz_dz::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the upwinded z-derivative of the particle mean velocity in z
-    duz_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    duz_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the second-z-derivative of the particle mean velocity in z
-    d2uz_dz2::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    d2uz_dz2::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the z-derivative of the zz particle pressure tensor component
-    dpz_dz::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    dpz_dz::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the upwinded z-derivative of the zz particle pressure tensor component
-    dpz_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    dpz_dz_upwind::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the second-z-derivative of the zz particle pressure tensor component
-    d2pz_dz2::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    d2pz_dz2::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the z-derivative of the thermal speed based on the temperature T = ptot/dens: vth = sqrt(2*T/m)
-    dvth_dz::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    dvth_dz::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # this is the z-derivative of the heat flux along z
-    dqz_dz::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    dqz_dz::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # Spatially varying amplitude of the external source term
-    external_source_amplitude::MPISharedArray{mk_float,ndim_moment_neutral}
+    external_source_amplitude::MPISharedArray{mk_float,ndim_moment}
     # Spatially varying amplitude of the density moment of the external source term
-    external_source_density_amplitude::MPISharedArray{mk_float,ndim_moment_neutral}
+    external_source_density_amplitude::MPISharedArray{mk_float,ndim_moment}
     # Spatially varying amplitude of the parallel momentum moment of the external source
     # term
-    external_source_momentum_amplitude::MPISharedArray{mk_float,ndim_moment_neutral}
+    external_source_momentum_amplitude::MPISharedArray{mk_float,ndim_moment}
     # Spatially varying amplitude of the parallel pressure moment of the external source
     # term
-    external_source_pressure_amplitude::MPISharedArray{mk_float,ndim_moment_neutral}
+    external_source_pressure_amplitude::MPISharedArray{mk_float,ndim_moment}
     # Integral term for the PID controller of the external source term
-    external_source_controller_integral::MPISharedArray{mk_float,ndim_moment_neutral}
+    external_source_controller_integral::MPISharedArray{mk_float,ndim_moment}
     # Store coefficient 'A' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_A_coefficient::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    constraints_A_coefficient::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # Store coefficient 'B' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_B_coefficient::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    constraints_B_coefficient::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
     # Store coefficient 'C' from applying moment constraints so we can write it out as a
     # diagnostic
-    constraints_C_coefficient::Union{MPISharedArray{mk_float,ndim_moment_neutral},Nothing}
+    constraints_C_coefficient::Union{MPISharedArray{mk_float,ndim_moment},Nothing}
 end
 
 """
@@ -338,10 +338,10 @@ end
 
 """
 """
-struct electron_pdf_substruct{ndim_distribution}
-    norm::MPISharedArray{mk_float,ndim_distribution}
-    buffer::MPISharedArray{mk_float,ndim_distribution} # for collision operator terms when pdfs must be interpolated onto different velocity space grids
-    pdf_before_ion_timestep::MPISharedArray{mk_float,ndim_distribution}
+struct electron_pdf_substruct
+    norm::MPISharedArray{mk_float,ndim_pdf_electron}
+    buffer::MPISharedArray{mk_float,ndim_pdf_electron} # for collision operator terms when pdfs must be interpolated onto different velocity space grids
+    pdf_before_ion_timestep::MPISharedArray{mk_float,ndim_pdf_electron}
 end
 
 # struct of structs neatly contains i+n info?
@@ -351,7 +351,7 @@ struct pdf_struct
     #ion particles: s + r + z + vperp + vpa
     ion::pdf_substruct{ndim_pdf_ion}
     # electron particles: r + z + vperp + vpa
-    electron::Union{electron_pdf_substruct{ndim_pdf_electron},Nothing}
+    electron::Union{electron_pdf_substruct,Nothing}
     #neutral particles: s + r + z + vzeta + vr + vz
     neutral::pdf_substruct{ndim_pdf_neutral}
 end
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index b9d85420d..65663efef 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -1624,7 +1624,7 @@ function setup_scratch_arrays(moments, pdf, n)
     # (possibly) the same for electrons, and the same for neutrals. The actual array will
     # be created at the end of the first step of the loop below, once we have a
     # `scratch_pdf` object of the correct type.
-    scratch = Vector{scratch_pdf{5,3,2,6,3}}(undef, n)
+    scratch = Vector{scratch_pdf}(undef, n)
     pdf_dims = size(pdf.ion.norm)
     moment_dims = size(moments.ion.dens)
     moment_electron_dims = size(moments.electron.dens)
@@ -1687,7 +1687,7 @@ function setup_electron_scratch_arrays(moments, pdf, n)
     # array for electrons.
     # The actual array will be created at the end of the first step of the loop below,
     # once we have a `scratch_electron_pdf` object of the correct type.
-    scratch = Vector{scratch_electron_pdf{4,2}}(undef, n)
+    scratch = Vector{scratch_electron_pdf}(undef, n)
     pdf_dims = size(pdf.electron.norm)
     moment_dims = size(moments.electron.dens)
 

From 8d27207f9dc21204799c3a4584166b43e5dd6d39 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Thu, 24 Oct 2024 15:36:22 +0100
Subject: [PATCH 24/41] Initial improvement of autodocs for fokker_planck.jl

---
 moment_kinetics/src/fokker_planck.jl | 84 +++++++++++++++++++---------
 1 file changed, 59 insertions(+), 25 deletions(-)

diff --git a/moment_kinetics/src/fokker_planck.jl b/moment_kinetics/src/fokker_planck.jl
index fb5ac876b..521dca83d 100644
--- a/moment_kinetics/src/fokker_planck.jl
+++ b/moment_kinetics/src/fokker_planck.jl
@@ -1,5 +1,5 @@
 """
-module for including the Full-F Fokker-Planck Collision Operator
+Module for including the Full-F Fokker-Planck Collision Operator.
 
 The functions in this module are split into two groups. 
 
@@ -77,10 +77,10 @@ using ..reference_parameters: setup_reference_parameters
 Function for reading Fokker Planck collision operator input parameters. 
 Structure the namelist as follows.
 
-[fokker_planck_collisions]
-use_fokker_planck = true
-nuii = 1.0
-frequency_option = "manual"
+    [fokker_planck_collisions]
+    use_fokker_planck = true
+    nuii = 1.0
+    frequency_option = "manual"
 """
 function setup_fkpl_collisions_input(toml_input::Dict)
     reference_params = setup_reference_parameters(toml_input)
@@ -152,7 +152,7 @@ end
 ########################################################
 
 """
-function that initialises the arrays needed for Fokker Planck collisions
+Function that initialises the arrays needed for Fokker Planck collisions
 using numerical integration to compute the Rosenbluth potentials only
 at the boundary and using an elliptic solve to obtain the potentials 
 in the rest of the velocity space domain.
@@ -238,7 +238,8 @@ end
 """
 Function for advancing with the explicit, weak-form, self-collision operator
 using the existing method for computing the Rosenbluth potentials, with
-the addition of cross-species collisions against fixed Maxwellian distribution functions
+the addition of cross-species collisions against fixed Maxwellian distribution functions
+where the Rosenbluth potentials are specified using analytical results.
 """
 @timeit global_timer explicit_fp_collisions_weak_form_Maxwellian_cross_species!(
                          pdf_out, pdf_in, dSdt, composition, collisions, dt,
@@ -305,7 +306,7 @@ end
 
 
 """
-Function for advancing with the explicit, weak-form, self-collision operator
+Function for advancing with the explicit, weak-form, self-collision operator.
 """
 @timeit global_timer explicit_fokker_planck_collisions_weak_form!(
                          pdf_out, pdf_in, dSdt, composition, collisions, dt,
@@ -503,11 +504,10 @@ Function for computing the collision operator
 ```math
 \\sum_{s^\\prime} C[F_{s},F_{s^\\prime}]
 ```
-when 
-```math
-F_{s^\\prime}
-```
-is an analytically specified Maxwellian distribution
+when \$F_{s^\\prime}\$
+is an analytically specified Maxwellian distribution and
+the corresponding Rosenbluth potentials
+are specified using analytical results.
 """
 @timeit global_timer fokker_planck_collision_operator_weak_form_Maxwellian_Fsp!(
                          ffs_in, nuref::mk_float, ms::mk_float, Zs::mk_float,
@@ -590,11 +590,18 @@ is an analytically specified Maxwellian distribution
     return nothing
 end
 
-# solves A x = b for a matrix of the form
-# A00  0    A02
-# 0    A11  A12
-# A02  A12  A22
-# appropriate for the moment numerical conserving terms
+"""
+Function that solves `A x = b` for a matrix of the form
+```math
+\\begin{array}{ccc}
+A_{00} & 0 & A_{02} \\\\
+0 & A_{11} & A_{12} \\\\
+A_{02} & A_{12} & A_{22} \\\\
+\\end{array}
+```
+appropriate for the moment numerical conserving terms used in
+the Fokker-Planck collision operator.
+"""
 function symmetric_matrix_inverse(A00,A02,A11,A12,A22,b0,b1,b2)
     # matrix determinant
     detA = A00*(A11*A22 - A12^2) - A11*A02^2
@@ -615,11 +622,17 @@ function symmetric_matrix_inverse(A00,A02,A11,A12,A22,b0,b1,b2)
     return x0, x1, x2
 end
 
-# solves A x = b for a matrix of the form
-# A00  A01  A02
-# A01  A11  A12
-# A02  A12  A22
-# appropriate for the moment numerical conserving terms
+"""
+Function that solves `A x = b` for a matrix of the form
+```math
+\\begin{array}{ccc}
+A_{00} & A_{01} & A_{02} \\\\
+A_{01} & A_{11} & A_{12} \\\\
+A_{02} & A_{12} & A_{22} \\\\
+\\end{array}
+```
+appropriate for moment numerical conserving terms. 
+"""
 function symmetric_matrix_inverse(A00,A01,A02,A11,A12,A22,b0,b1,b2)
     # matrix determinant
     detA = A00*(A11*A22 - A12^2) - A01*(A01*A22 - A12*A02) + A02*(A01*A12 - A11*A02)
@@ -640,6 +653,18 @@ function symmetric_matrix_inverse(A00,A01,A02,A11,A12,A22,b0,b1,b2)
     return x0, x1, x2
 end
 
+"""
+Function that applies numerical-error correcting terms to ensure
+numerical conservation of the moments `density, upar, pressure` in the self-collision operator.
+Modifies the collision operator such that the operator becomes
+```math
+C_{ss} = C^\\ast_{ss}[F_s,F_{s}] - \\left(x_0 + x_1(v_{\\|}-u_{\\|})+ x_2(v_\\perp^2 +(v_{\\|}-u_{\\|})^2)\\right)F_s
+```
+where \$C^\\ast_{ss}[F_s,F_{s}]\$ is the weak-form self-collision operator computed using 
+the finite-element implementation, \$u_{\\|}\$ is the parallel velocity of \$F_s\$,
+and \$x_0,x_1,x_2\$ are parameters that are chosen so that \$C_{ss}\$
+conserves density, parallel velocity and pressure of \$F_s\$.
+"""
 function conserving_corrections!(CC,pdf_in,vpa,vperp,dummy_vpavperp)
     begin_anyv_region()
     x0, x1, x2, upar = 0.0, 0.0, 0.0, 0.0
@@ -687,6 +712,15 @@ function conserving_corrections!(CC,pdf_in,vpa,vperp,dummy_vpavperp)
     end
 end
 
+"""
+Function that applies a numerical-error correcting term to ensure
+numerical conservation of the `density` in the collision operator.
+```math
+C_{ss^\\prime} = C^\\ast_{ss}[F_s,F_{s^\\prime}] - x_0 F_s
+```
+where \$C^\\ast_{ss}[F_s,F_{s^\\prime}]\$ is the weak-form collision operator computed using 
+the finite-element implementation.
+"""
 function density_conserving_correction!(CC,pdf_in,vpa,vperp,dummy_vpavperp)
     begin_anyv_region()
     x0 = 0.0
@@ -734,7 +768,7 @@ end
 
 
 """
-allocate the required ancilliary arrays 
+Function that allocates the required ancilliary arrays for direct integration routines.
 """
 function allocate_fokkerplanck_arrays_direct_integration(vperp,vpa)
     nvpa = vpa.n
@@ -784,7 +818,7 @@ function allocate_fokkerplanck_arrays_direct_integration(vperp,vpa)
 end
 
 """
-function that initialises the arrays needed to calculate the Rosenbluth potentials
+Function that initialises the arrays needed to calculate the Rosenbluth potentials
 by direct integration. As this function is only supported to keep the testing
 of the direct integration method, the struct 'fka' created here does not contain
 all of the arrays necessary to compute the weak-form operator. This functionality

From c8e7c9b717b05683d5f2d9080a32edc8878f5de4 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Fri, 25 Oct 2024 12:59:12 +0100
Subject: [PATCH 25/41] Catch bug where scale and shift factors were calculated
 with functions rather than using coord.element_scale and coord.element_shift
 for get_KJ_local!() matrix. Introduce initial explanatory documentation.

---
 moment_kinetics/src/gauss_legendre.jl | 165 ++++++++++++++++++++------
 1 file changed, 132 insertions(+), 33 deletions(-)

diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl
index 97c31d54e..34548845a 100644
--- a/moment_kinetics/src/gauss_legendre.jl
+++ b/moment_kinetics/src/gauss_legendre.jl
@@ -34,9 +34,11 @@ using ..lagrange_polynomials: lagrange_poly_optimised
 using ..moment_kinetics_structs: weak_discretization_info
 
 
+#structs for passing around matrices for taking
+#the derivatives on Gauss-Legendre points in 1D
 """
-structs for passing around matrices for taking
-the derivatives on Gauss-Legendre points in 1D
+A struct for passing around elemental matrices
+on Gauss-Legendre points in 1D
 """
 struct gausslegendre_base_info
     # elementwise differentiation matrix (ngrid*ngrid)
@@ -83,6 +85,11 @@ struct gausslegendre_base_info
     Y31::Array{mk_float,3}
 end
 
+"""
+A struct for Gauss-Legendre arrays needed for global operations in 1D,
+contains the struct of elemental matrices for Lobatto and Radau points,
+as well as some assembled 1D global matrices.
+"""
 struct gausslegendre_info{TSparse, TSparseCSR, TLU, TLmat, TLmatLU} <: weak_discretization_info
     lobatto::gausslegendre_base_info
     radau::gausslegendre_base_info
@@ -114,6 +121,9 @@ struct gausslegendre_info{TSparse, TSparseCSR, TLU, TLmat, TLmatLU} <: weak_disc
     Qmat::Array{mk_float,2}
 end
 
+"""
+Function to create `gausslegendre_info` struct.
+"""
 function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true)
     lobatto = setup_gausslegendre_pseudospectral_lobatto(coord,collision_operator_dim=collision_operator_dim)
     radau = setup_gausslegendre_pseudospectral_radau(coord,collision_operator_dim=collision_operator_dim)
@@ -153,6 +163,9 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true)
                               mass_matrix_lu,L_matrix_lu,Qmat)
 end
 
+"""
+Function that fills and `n` x `n` array with the values of the identity matrix `I`.
+"""
 function identity_matrix!(I,n)
     @. I[:,:] = 0.0
     for i in 1:n
@@ -161,6 +174,11 @@ function identity_matrix!(I,n)
     return nothing    
 end
 
+"""
+Function that creates the `gausslegendre_base_info` struct for Lobatto points.
+If `collision_operator_dim = true`, assign the elemental matrices used to
+implement the Fokker-Planck collision operator.
+"""
 function setup_gausslegendre_pseudospectral_lobatto(coord; collision_operator_dim=true)
     x, w = gausslobatto(coord.ngrid)
     Dmat = allocate_float(coord.ngrid, coord.ngrid)
@@ -231,6 +249,11 @@ function setup_gausslegendre_pseudospectral_lobatto(coord; collision_operator_di
             K0,K1,K2,P0,P1,P2,D0,Y00,Y01,Y10,Y11,Y20,Y21,Y30,Y31)
 end
 
+"""
+Function that creates the `gausslegendre_base_info` struct for Lobatto points.
+If `collision_operator_dim = true`, assign the elemental matrices used to
+implement the Fokker-Planck collision operator.
+"""
 function setup_gausslegendre_pseudospectral_radau(coord; collision_operator_dim=true)
     # Gauss-Radau points on [-1,1)
     x, w = gaussradau(coord.ngrid)
@@ -303,6 +326,10 @@ function setup_gausslegendre_pseudospectral_radau(coord; collision_operator_dim=
             K0,K1,K2,P0,P1,P2,D0,Y00,Y01,Y10,Y11,Y20,Y21,Y30,Y31)
 end 
 
+"""
+A function that takes the first derivative in each element of `coord.grid`,
+leaving the result (element-wise) in `coord.scratch_2d`.
+"""
 function elementwise_derivative!(coord, ff, gausslegendre::gausslegendre_info)
     df = coord.scratch_2d
     # define local variable nelement for convenience
@@ -341,11 +368,19 @@ function elementwise_derivative!(coord, ff, gausslegendre::gausslegendre_info)
     return nothing
 end
 
+"""
+Wrapper function for element-wise derivatives with advection.
+Note that Gauss-Legendre spectral the element method implemented here
+does not use upwinding within an element.
+"""
 # Spectral element method does not use upwinding within an element
 function elementwise_derivative!(coord, ff, adv_fac, spectral::gausslegendre_info)
     return elementwise_derivative!(coord, ff, spectral)
 end
 
+"""
+Function to perform interpolation on a single element.
+"""
 function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord,
                                      gausslegendre::gausslegendre_base_info)
     n_new = length(newgrid)
@@ -369,6 +404,9 @@ function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, c
     return nothing
 end
 
+"""
+Function to carry out a 1D (global) mass matrix solve.
+"""
 function mass_matrix_solve!(f, b, spectral::gausslegendre_info)
     # invert mass matrix system
     y = spectral.mass_matrix_lu \ b
@@ -377,13 +415,13 @@ function mass_matrix_solve!(f, b, spectral::gausslegendre_info)
 end
 
 """
-Formula for differentiation matrix taken from p196 of Chpt `The Spectral Elemtent Method' of 
+Formula for Gauss-Legendre-Lobatto differentiation matrix taken from p196 of Chpt `The Spectral Elemtent Method' of 
 `Computational Seismology'. Heiner Igel First Edition. Published in 2017 by Oxford University Press.
 Or https://doc.nektar.info/tutorials/latest/fundamentals/differentiation/fundamentals-differentiationch2.html
 
-D -- differentiation matrix 
-x -- Gauss-Legendre-Lobatto points in [-1,1]
-ngrid -- number of points per element (incl. boundary points)
+    D -- differentiation matrix 
+    x -- Gauss-Legendre-Lobatto points in [-1,1]
+    ngrid -- number of points per element (incl. boundary points)
 
 Note that D has does not include a scaling factor
 """
@@ -408,13 +446,14 @@ function gausslobattolegendre_differentiation_matrix!(D::Array{Float64,2},x::Arr
     end 
     return nothing
 end
+
 """
-From 
+Formula for Gauss-Legendre-Radau differentiation matrix taken from
 https://doc.nektar.info/tutorials/latest/fundamentals/differentiation/fundamentals-differentiationch2.html
 
-D -- differentiation matrix 
-x -- Gauss-Legendre-Radau points in [-1,1)
-ngrid -- number of points per element (incl. boundary points)
+    D -- differentiation matrix 
+    x -- Gauss-Legendre-Radau points in [-1,1)
+    ngrid -- number of points per element (incl. boundary points)
 
 Note that D has does not include a scaling factor
 """
@@ -449,12 +488,12 @@ function gaussradaulegendre_differentiation_matrix!(D::Array{Float64,2},x::Array
 end
 
 """
-Gauss-Legendre derivative at arbitrary x values, for boundary condition on radau points
-D0 -- the vector
-xj -- the x location where the derivative is evaluated 
-ngrid -- number of points in x
-x -- the grid from -1, 1
-Note that D0 is not scaled to the physical grid
+Gauss-Legendre derivative at arbitrary x values, for boundary condition on Radau points.
+    D0 -- the vector
+    xj -- the x location where the derivative is evaluated 
+    ngrid -- number of points in x
+    x -- the grid from -1, 1
+Note that D0 is not scaled to the physical grid with a scaling factor.
 """
 function GaussLegendre_derivative_vector!(D0,xj,ngrid,x,wgts;radau=false)
     # coefficient in expansion of 
@@ -482,7 +521,7 @@ function GaussLegendre_derivative_vector!(D0,xj,ngrid,x,wgts;radau=false)
 end
 
 """
-result of the inner product of Legendre polys of order k
+Result of the inner product of Legendre polynomials of order k.
 """
 function Legendre_h_n(k)
     h_n = 2.0/(2.0*k + 1)
@@ -491,8 +530,42 @@ end
 
 
 """
-assign abitrary weak inner product matrix Q on a 1D line with Jacobian = 1
-matrix Q acts on a single vector x such that y = Q * x is also a vector
+Assign abitrary weak inner product matrix `Q` on a 1D line with Jacobian equal to 1
+matrix `Q` acts on a single vector `x` such that `y = Q * x` is also a vector.
+
+We use a projection onto Gauss-Legendre polynomials to carry out the calculation
+in two steps (see, e.g, S. A. Teukolsky, Short note on the mass matrix for Gauss–Lobatto grid
+points, J. Comput. Phys. 283 (2015) 408–413. https://doi.org/10.1016/j.jcp.2014.12.012).
+First, we write the desired matrix elements in terms of Legendre polynomials
+```math
+   l_i(x) = \\sum_j \\frac{P_j(x)P_j(x_i)w_i}{\\gamma_j}
+```
+with \$w_i\$ the weights from an integration on the Gauss-Legendre-Lobatto (or Radau) points \$x_i\$,
+i.e., 
+```math 
+ \\int^1_{-1} f(x) d x = \\sum_{i} f(x_i)w_i,
+```
+and \$\\gamma_j = \\sum_k w_k P_j(x_k)P_j(x_k)\$ the numerical inner-product.
+Then, a matrix element can be expressed in integrals over Legendre polynomials
+rather than Lagrange polynomials, i.e.,
+```math
+   M_{ij} = \\int^1_{-1} l_i(x)l_j(x) d x = \\sum_{mn} \\frac{w_m P_m(x_i) w_n P_n(x_j)}{\\gamma_m\\gamma_n} \\int^{1}_{-1} P_m(x)P_n(x) d x. 
+```
+Defining 
+```math
+  A_{mn} = \\int^{1}_{-1} P_m(x)P_n(x) d x, 
+```
+we can thus write
+```math
+   M_{ij} = \\sum_{mn} \\frac{w_m P_m(x_i) w_n P_n(x_j)}{\\gamma_m\\gamma_n} A_{mn}. 
+```
+We can use a quadrature which yields exact results (to machine precision)
+to evaluate \$A_{mn}\$ using fast library functions for the Legendre polynomials,
+and then carry out the sum \$\\sum_{mn}\$ to obtain exact results (to machine-precision).
+Here we use a Gauss-Legendre integration quadrature with exact results up to 
+polynomials with order \$k_{max} = 4N +1\$, with \$N=\$`ngrid` and the highest order polynomial product
+that we integrate is \$P_{N-1}(x)P_{N-1}(x)x^2\$, which has order \$k=2N < k_{max}\$.
+
 """
 function GaussLegendre_weak_product_matrix!(QQ::Array{mk_float,2},ngrid,x,wgts,option;radau=false)
     # coefficient in expansion of 
@@ -627,9 +700,10 @@ function GaussLegendre_weak_product_matrix!(QQ::Array{mk_float,2},ngrid,x,wgts,o
 end
 
 """
-assign abitrary weak inner product matrix Q on a 1D line with Jacobian = 1
-matrix Q acts on two vectors x1 and x2 such that the quadratic form 
-y = x1 * Q * x2 is also a vector
+Assign abitrary weak (nonlinear) inner product matrix `Q` on a 1D line with Jacobian equal to 1.
+matrix `Q` acts on two vectors `x1` and `x2` such that the quadratic form 
+`y = x1 * Q * x2` is also a vector. See documentation of corresponding function
+for linear inner product matrices.
 """
 function GaussLegendre_weak_product_matrix!(QQ::Array{mk_float,3},ngrid,x,wgts,option;radau=false)
     # coefficient in expansion of 
@@ -756,10 +830,18 @@ function GaussLegendre_weak_product_matrix!(QQ::Array{mk_float,3},ngrid,x,wgts,o
     return nothing
 end
 
+"""
+Function for computing the scale factor on a grid with uniformed spaced element boundaries.
+Unused.
+"""
 function scale_factor_func(L,nelement_global)
     return 0.5*L/float(nelement_global)
 end
 
+"""
+Function for computing the shift factor on a grid with uniformed spaced element boundaries.
+Unused.
+"""
 function shift_factor_func(L,nelement_global,nelement_local,irank,ielement_local)
     #ielement_global = ielement_local # for testing + irank*nelement_local
     ielement_global = ielement_local + irank*nelement_local # proper line for future distributed memory MPI use
@@ -767,13 +849,17 @@ function shift_factor_func(L,nelement_global,nelement_local,irank,ielement_local
     return shift
 end
 
+"""
+Function for finding the elemental index in the global distributed-memory grid.
+Distributed-memory for global finite-element operators is not yet supported.
+"""
 function ielement_global_func(nelement_local,irank,ielement_local)
     return ielement_global = ielement_local + irank*nelement_local
 end
 
 """
-function for setting up the full Gauss-Legendre-Lobatto
-grid and collocation point weights
+Function for setting up the full Gauss-Legendre-Lobatto
+grid and collocation point weights.
 """
 function scaled_gauss_legendre_lobatto_grid(ngrid, nelement_local, n_local, element_scale, element_shift, imin, imax)
     # get Gauss-Legendre-Lobatto points and weights on [-1,1]
@@ -802,9 +888,8 @@ function scaled_gauss_legendre_lobatto_grid(ngrid, nelement_local, n_local, elem
 end
 
 """
-function for setting up the full Gauss-Legendre-Radau
-grid and collocation point weights
-see comments of Gauss-Legendre-Lobatto routine above
+Function for setting up the full Gauss-Legendre-Radau
+grid and collocation point weights.
 """
 function scaled_gauss_legendre_radau_grid(ngrid, nelement_local, n_local, element_scale, element_shift, imin, imax, irank)
     # get Gauss-Legendre-Lobatto points and weights on [-1,1]
@@ -885,7 +970,7 @@ or
 `dirichlet_bc = true`, `b[1] = f[1]` (except for cylindrical coordinates), `b[end] = f[end]`
 
 in the function call, and create new matrices for this purpose
-in the gausslegendre_info struct. Currently the Laplacian matrix
+in the `gausslegendre_info` struct. Currently the Laplacian matrix
 is supported with boundary conditions.
 """
 function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2},
@@ -964,7 +1049,7 @@ the operators constructed from this function can only be used
 for differentiation, and not solving 1D ODEs.
 The shared points in the element assembly are
 averaged (instead of simply added) to be consistent with the
-derivative_elements_to_full_grid!() function in calculus.jl.
+`derivative_elements_to_full_grid!()` function in `calculus.jl`.
 """
 function setup_global_strong_form_matrix!(QQ_global::Array{mk_float,2},
                                           lobatto::gausslegendre_base_info,
@@ -1024,6 +1109,10 @@ function setup_global_strong_form_matrix!(QQ_global::Array{mk_float,2},
     return nothing
 end
 
+"""
+Construction function to provide the appropriate elemental 
+matrix `Q` to the global matrix assembly functions.
+"""
 function get_QQ_local!(QQ::Array{mk_float,2},ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1057,6 +1146,17 @@ function get_QQ_local!(QQ::Array{mk_float,2},ielement,
         return nothing
 end
 
+"""
+If called for `coord.name = vperp` elemental matrix `MM` on the \$i^{th}\$ element is
+```math
+ M_{jk} = \\int^{v_\\perp^U}_{v_\\perp^L}  \\varphi_j(v_\\perp)\\varphi_k(v_\\perp) v_\\perp d v_\\perp = \\int^1_{-1} (c_i + x s_i)l_j(x)l_k(x) s_i d x 
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `MM` is 
+```math
+ M_{jk} = \\int^{v_\\|^U}_{v_\\|^L}  \\varphi_j(v_\\|)\\varphi_k(v_\\|) d v_\\| = \\int^1_{-1} l_j(x)l_k(x) s_i d x.
+```
+"""
 function get_MM_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1159,8 +1259,8 @@ function get_KJ_local!(QQ,ielement,
         radau::gausslegendre_base_info, 
         coord)
         
-        scale_factor = scale_factor_func(coord.L,coord.nelement_global)
-        shift_factor = shift_factor_func(coord.L,coord.nelement_global,coord.nelement_local,coord.irank,ielement) + 0.5*coord.L
+        scale_factor = coord.element_scale[ielement]
+        shift_factor = coord.element_shift[ielement]
         if coord.name == "vperp" # assume integrals of form int^infty_0 (.) vperp d vperp
             # extra scale and shift factors required because of vperp^2 in integral
             if ielement > 1 || coord.irank > 0 # lobatto points
@@ -1331,10 +1431,9 @@ function get_PU_local!(QQ,ielement,
 end
 
 """
-construction function for nonlinear diffusion matrices, only
+Construction function for nonlinear diffusion matrices, only
 used in the assembly of the collision operator
 """
-
 function get_QQ_local!(QQ::AbstractArray{mk_float,3},
         ielement,lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 

From 884b1c7eadbd56a8e2aa5b31df52dddd875449bc Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Fri, 25 Oct 2024 15:48:51 +0100
Subject: [PATCH 26/41] Complete autodocs for important functions in
 gauss_legendre.jl

---
 moment_kinetics/src/gauss_legendre.jl | 134 +++++++++++++++++++++++---
 1 file changed, 123 insertions(+), 11 deletions(-)

diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl
index 34548845a..c5e5ab677 100644
--- a/moment_kinetics/src/gauss_legendre.jl
+++ b/moment_kinetics/src/gauss_legendre.jl
@@ -1208,6 +1208,17 @@ function get_SS_local!(QQ,ielement,
         return nothing
 end
 
+"""
+If called for `coord.name = vperp` elemental matrix `KK` on the \$i^{th}\$ element is
+```math
+ K_{jk} = -\\int^{v_\\perp^U}_{v_\\perp^L} \\left(v_\\perp \\frac{\\partial\\varphi_j(v_\\perp)}{\\partial v_\\perp} + \\varphi_j(v_\\perp) \\right)
+ \\frac{\\partial\\varphi_k(v_\\perp)}{\\partial v_\\perp} d v_\\perp
+ = -\\int^1_{-1} ((c_i + x s_i)l_j^\\prime(x) + l_j(x))l_k^\\prime(x) d x /s_i
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `KK` is the same as `LL` (see `get_LL_local!()).
+If `explicit_BC_terms = true`, boundary terms arising from integration by parts are included at the extreme boundary points.
+"""
 function get_KK_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1252,8 +1263,15 @@ function get_KK_local!(QQ,ielement,
         return nothing
 end
 
-# second derivative matrix with vperp^2 Jacobian factor if 
-# coord is vperp. Not useful for the vpa coordinate
+"""
+If called for `coord.name = vperp` elemental matrix `KJ` on the \$i^{th}\$ element is
+```math
+ (KJ)_{jk} = -\\int^{v_\\perp^U}_{v_\\perp^L} \\frac{\\partial\\varphi_j(v_\\perp)}{\\partial v_\\perp}\\frac{\\partial\\varphi_k(v_\\perp)}{\\partial v_\\perp} v_\\perp^2 d v_\\perp
+ = -\\int^1_{-1} (c_i + x s_i)^2l_j^\\prime(x)l_k^\\prime(x) d x /s_i
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `KJ` is the same as `LL` (see `get_LL_local()!`).
+"""
 function get_KJ_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1278,6 +1296,20 @@ function get_KJ_local!(QQ,ielement,
         return nothing
 end
 
+"""
+If called for `coord.name = vperp` elemental matrix `LL` on the \$i^{th}\$ element is
+```math
+ L_{jk} = -\\int^{v_\\perp^U}_{v_\\perp^L} \\frac{\\partial\\varphi_j(v_\\perp)}{\\partial v_\\perp}\\frac{\\partial\\varphi_k(v_\\perp)}{\\partial v_\\perp} v_\\perp d v_\\perp
+ = -\\int^1_{-1} (c_i + x s_i)l_j^\\prime(x)l_k^\\prime(x) d x /s_i
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `LL` is 
+```math
+ L_{jk} = -\\int^{v_\\|^U}_{v_\\|^L}  \\frac{\\partial\\varphi_j(v_\\|)}{\\partial v_\\|}\\frac{\\partial\\varphi_k(v_\\|)}{\\partial v_\\|} d v_\\| =
+ -\\int^1_{-1} l_j^\\prime(x)l_k^\\prime(x) d x /s_i.
+```
+If `explicit_BC_terms = true`, boundary terms arising from integration by parts are included at the extreme boundary points.
+"""
 function get_LL_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1334,8 +1366,14 @@ function get_DD_local!(QQ, ielement, lobatto::gausslegendre_base_info,
     return nothing
 end
 
-# mass matrix without vperp factor (matrix N)
-# only useful for the vperp coordinate
+"""
+If called for `coord.name = vperp` elemental matrix `MN` on the \$i^{th}\$ element is
+```math
+ (MN)_{jk} = \\int^{v_\\perp^U}_{v_\\perp^L}  \\varphi_j(v_\\perp)\\varphi_k(v_\\perp) d v_\\perp = \\int^1_{-1} l_j(x)l_k(x) s_i d x 
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `MN` is the same as `MM` (see `get_MM_local!()`).
+"""
 function get_MN_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1355,8 +1393,14 @@ function get_MN_local!(QQ,ielement,
         return nothing
 end
 
-# mass matrix with vperp^2 factor (matrix R)
-# only useful for the vperp coordinate
+"""
+If called for `coord.name = vperp` elemental matrix `MR` on the \$i^{th}\$ element is
+```math
+ (MR)_{jk} = \\int^{v_\\perp^U}_{v_\\perp^L}  \\varphi_j(v_\\perp)\\varphi_k(v_\\perp) v_\\perp^2 d v_\\perp = \\int^1_{-1} (c_i + s_i x)^2 l_j(x)l_k(x) s_i d x 
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `MR` is the same as `MM` (see `get_MM_local!()`).
+"""
 function get_MR_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1381,8 +1425,18 @@ function get_MR_local!(QQ,ielement,
         return nothing
 end
 
-# derivative matrix (matrix P, no integration by parts)
-# with vperp Jacobian factor if coord is vperp (matrix P)
+"""
+If called for `coord.name = vperp` elemental matrix `PP` on the \$i^{th}\$ element is
+```math
+ P_{jk} = \\int^{v_\\perp^U}_{v_\\perp^L}  \\varphi_j(v_\\perp)\\frac{\\partial\\varphi_k(v_\\perp)}{\\partial v_\\perp} v_\\perp d v_\\perp
+ = \\int^1_{-1} (c_i + x s_i)l_j(x)l_k^\\prime(x) d x 
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `PP` is 
+```math
+ P_{jk} = \\int^{v_\\|^U}_{v_\\|^L}  \\varphi_j(v_\\|)\\frac{\\partial\\varphi_k(v_\\|)}{\\partial v_\\|} d v_\\| = \\int^1_{-1} l_j(x)l_k^\\prime(x) d x.
+```
+"""
 function get_PP_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1403,9 +1457,15 @@ function get_PP_local!(QQ,ielement,
         return nothing
 end
 
-# derivative matrix (matrix P, no integration by parts)
-# with vperp^2 Jacobian factor if coord is vperp (matrix U)
-# not useful for vpa coordinate
+"""
+If called for `coord.name = vperp` elemental matrix `PP` on the \$i^{th}\$ element is
+```math
+ (PU)_{jk} = \\int^{v_\\perp^U}_{v_\\perp^L}  \\varphi_j(v_\\perp)\\frac{\\partial\\varphi_k(v_\\perp)}{\\partial v_\\perp} v_\\perp^2 d v_\\perp
+ = \\int^1_{-1} (c_i + x s_i)^2l_j(x)l_k^\\prime(x) d x 
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `PU` is the same as `PP` see `get_PP_local!()`.
+"""
 function get_PU_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1451,6 +1511,19 @@ function get_QQ_local!(QQ::AbstractArray{mk_float,3},
         return nothing
 end
 
+"""
+If called for `coord.name = vperp` elemental matrix `YY0` on the \$i^{th}\$ element is
+```math
+ (YY0)_{jkm} = \\int^{v_\\perp^U}_{v_\\perp^L}  \\varphi_j(v_\\perp)\\varphi_k(v_\\perp)\\varphi_m(v_\\perp) v_\\perp d v_\\perp
+ = \\int^1_{-1} (c_i + x s_i)l_j(x)l_k(x)l_m(x) s_i d x 
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `YY0` is 
+```math
+ (YY0)_{jkm} = \\int^{v_\\|^U}_{v_\\|^L}  \\varphi_j(v_\\|)\\varphi_k(v_\\|)\\varphi_m(v_\\|) d v_\\|
+ = \\int^1_{-1} l_j(x)l_k(x)l_m(x) s_i d x.
+```
+"""
 function get_YY0_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1471,6 +1544,19 @@ function get_YY0_local!(QQ,ielement,
         return nothing
 end
 
+"""
+If called for `coord.name = vperp` elemental matrix `YY1` on the \$i^{th}\$ element is
+```math
+ (YY1)_{jkm} = \\int^{v_\\perp^U}_{v_\\perp^L}  \\varphi_j(v_\\perp)\\varphi_k(v_\\perp)\\frac{\\partial\\varphi_m(v_\\perp)}{\\partial v_\\perp} v_\\perp d v_\\perp
+ = \\int^1_{-1} (c_i + x s_i)l_j(x)l_k(x)l_m^\\prime(x) d x 
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `YY1` is 
+```math
+ (YY1)_{jkm} = \\int^{v_\\|^U}_{v_\\|^L}  \\varphi_j(v_\\|)\\varphi_k(v_\\|)\\frac{\\partial\\varphi_m(v_\\|)}{\\partial v_\\|} d v_\\|
+ = \\int^1_{-1} l_j(x)l_k(x)l_m^\\prime(x) d x.
+```
+"""
 function get_YY1_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1491,6 +1577,19 @@ function get_YY1_local!(QQ,ielement,
         return nothing
 end
 
+"""
+If called for `coord.name = vperp` elemental matrix `YY2` on the \$i^{th}\$ element is
+```math
+ (YY2)_{jkm} = \\int^{v_\\perp^U}_{v_\\perp^L}  \\varphi_j(v_\\perp)\\frac{\\partial\\varphi_k(v_\\|)}{\\partial v_\\|}\\frac{\\partial\\varphi_m(v_\\perp)}{\\partial v_\\perp} v_\\perp d v_\\perp
+ = \\int^1_{-1} (c_i + x s_i)l_j(x)l_k^\\prime(x)l_m^\\prime(x) d x/s_i 
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `YY2` is 
+```math
+ (YY2)_{jkm} = \\int^{v_\\|^U}_{v_\\|^L}  \\varphi_j(v_\\|)\\frac{\\partial\\varphi_k(v_\\|)}{\\partial v_\\|}\\frac{\\partial\\varphi_m(v_\\|)}{\\partial v_\\|} d v_\\|
+ = \\int^1_{-1} l_j(x)l_k^\\prime(x)l_m^\\prime(x) d x /s_i.
+```
+"""
 function get_YY2_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 
@@ -1511,6 +1610,19 @@ function get_YY2_local!(QQ,ielement,
         return nothing
 end
 
+"""
+If called for `coord.name = vperp` elemental matrix `YY3` on the \$i^{th}\$ element is
+```math
+ (YY3)_{jkm} = \\int^{v_\\perp^U}_{v_\\perp^L}  \\varphi_j(v_\\perp)\\frac{\\partial\\varphi_k(v_\\|)}{\\partial v_\\|}\\varphi_m(v_\\perp) v_\\perp d v_\\perp
+ = \\int^1_{-1} (c_i + x s_i)l_j(x)l_k^\\prime(x)l_m(x) d x 
+```
+with \$c_i\$ and \$s_i\$ the appropriate shift and scale factors, respectively. 
+Otherwise, if called for any other coordinate elemental matrix `YY3` is 
+```math
+ (YY3)_{jkm} = \\int^{v_\\|^U}_{v_\\|^L}  \\varphi_j(v_\\|)\\frac{\\partial\\varphi_k(v_\\|)}{\\partial v_\\|}\\varphi_m(v_\\|) d v_\\|
+ = \\int^1_{-1} l_j(x)l_k^\\prime(x)l_m(x) d x.
+```
+"""
 function get_YY3_local!(QQ,ielement,
         lobatto::gausslegendre_base_info,
         radau::gausslegendre_base_info, 

From 8164762a690ffc804450271a19d1983ecc223c6e Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Sat, 26 Oct 2024 10:59:14 +0100
Subject: [PATCH 27/41] Improve autodocs for fokker_planck_test.jl.

---
 moment_kinetics/src/fokker_planck_test.jl | 158 ++++++++++++++++++++--
 1 file changed, 145 insertions(+), 13 deletions(-)

diff --git a/moment_kinetics/src/fokker_planck_test.jl b/moment_kinetics/src/fokker_planck_test.jl
index 103acdc86..9b122a868 100644
--- a/moment_kinetics/src/fokker_planck_test.jl
+++ b/moment_kinetics/src/fokker_planck_test.jl
@@ -1,7 +1,7 @@
 """
-module for including functions used 
+Module for including functions used 
 in testing the implementation of the 
-the Full-F Fokker-Planck Collision Operator
+the full-F Fokker-Planck collision operator.
 """
 module fokker_planck_test
 
@@ -27,9 +27,18 @@ using ..velocity_moments: get_density
 # of the Rosenbluth potentials for a shifted Maxwellian
 # or provide an estimate for collisional coefficients 
 
-# G (defined by Del^4 G = -(8/sqrt(pi))*F 
-# with F = cref^3 pi^(3/2) F_Maxwellian / nref 
-# the normalised Maxwellian
+"""
+Function computing G, defined by 
+```math 
+\\nabla^4 G = -\\frac{8}{\\sqrt{\\pi}} F 
+```
+with 
+```math
+F = c_{\\rm ref}^3 \\pi^{3/2} F_{\\rm Maxwellian} / n_{\\rm ref} 
+```
+the normalised Maxwellian. 
+See Plasma Confinement, R. D. Hazeltine & J. D. Meiss, 2003, Dover Publications, pg 184, Chpt 5.2, Eqn (5.49).
+"""
 function G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
     # speed variable
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -43,9 +52,18 @@ function G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
     return G*dens*vth
 end
 
-# H (defined by Del^2 H = -(4/sqrt(pi))*F 
-# with F = cref^3 pi^(3/2) F_Maxwellian / nref 
-# the normalised Maxwellian
+"""
+Function computing H, defined by 
+```math 
+\\nabla^2 H = -\\frac{4}{\\sqrt{\\pi}} F 
+```
+with 
+```math
+F = c_{\\rm ref}^3 \\pi^{3/2} F_{\\rm Maxwellian} / n_{\\rm ref} 
+```
+the normalised Maxwellian. 
+See Plasma Confinement, R. D. Hazeltine & J. D. Meiss, 2003, Dover Publications, pg 184, Chpt 5.2, Eqn (5.49).
+"""
 function H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
     # speed variable
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -85,13 +103,27 @@ function dHdeta(eta::mk_float)
     return dHdeta_fac
 end
 
-# functions of vpa & vperp 
+"""
+Function computing the normalised speed variable 
+```math
+\\eta = \\frac{\\sqrt{(v_\\| - u_\\|)^2 + v_\\perp^2}}{v_{\\rm th}}
+```
+with \$v_{\\rm th} = \\sqrt{2 p / n m}\$ the thermal speed, and \$p\$ the pressure,
+ \$n\$ the density and \$m\$ the mass.
+"""
 function eta_func(upar::mk_float,vth::mk_float,
              vpa,vperp,ivpa,ivperp)
     speed = sqrt( (vpa.grid[ivpa] - upar)^2 + vperp.grid[ivperp]^2)/vth
     return speed
 end
 
+"""
+Function computing 
+```math 
+\\frac{\\partial^2 G }{ \\partial v_\\|^2}
+```
+ for Maxwellian input. See `G_Maxwellian()`.
+"""
 function d2Gdvpa2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                             vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -100,6 +132,13 @@ function d2Gdvpa2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return d2Gdvpa2_fac
 end
 
+"""
+Function computing
+```math
+\\frac{\\partial^2 G}{\\partial v_\\perp \\partial v_\\|}
+```
+for Maxwellian input. See `G_Maxwellian()`.
+"""
 function d2Gdvperpdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                             vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -108,6 +147,13 @@ function d2Gdvperpdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return d2Gdvperpdvpa_fac
 end
 
+"""
+Function computing
+```math
+\\frac{\\partial^2 G}{\\partial v_\\perp^2}
+```
+for Maxwellian input. See `G_Maxwellian()`.
+"""
 function d2Gdvperp2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                             vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -116,6 +162,13 @@ function d2Gdvperp2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return d2Gdvperp2_fac
 end
 
+"""
+Function computing
+```math
+\\frac{\\partial G}{\\partial v_\\perp}
+```
+for Maxwellian input. See `G_Maxwellian()`.
+"""
 function dGdvperp_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                             vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -123,6 +176,13 @@ function dGdvperp_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return fac 
 end
 
+"""
+Function computing
+```math
+\\frac{\\partial H}{\\partial v_\\perp}
+```
+for Maxwellian input. See `H_Maxwellian()`.
+"""
 function dHdvperp_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                             vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -130,6 +190,13 @@ function dHdvperp_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return fac 
 end
 
+"""
+Function computing
+```math
+\\frac{\\partial H}{\\partial v_\\|}
+```
+for Maxwellian input. See `H_Maxwellian()`.
+"""
 function dHdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                             vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -137,6 +204,9 @@ function dHdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return fac 
 end
 
+"""
+Function computing \$ F_{\\rm Maxwellian} \$.
+"""
 function F_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                         vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -144,6 +214,13 @@ function F_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return fac
 end
 
+"""
+Function computing 
+```math
+\\frac{\\partial F}{\\partial v_\\|}
+```
+for \$ F = F_{\\rm Maxwellian}\$.
+"""
 function dFdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                         vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -151,6 +228,13 @@ function dFdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return fac
 end
 
+"""
+Function computing
+```math
+\\frac{\\partial F}{\\partial v_\\perp}
+```
+for \$ F = F_{\\rm Maxwellian}\$.
+"""
 function dFdvperp_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                         vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -158,6 +242,13 @@ function dFdvperp_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return fac
 end
 
+"""
+Function computing
+```math
+\\frac{\\partial^2 F}{\\partial v_\\perp \\partial v_\\|}
+```
+for \$ F = F_{\\rm Maxwellian}\$.
+"""
 function d2Fdvperpdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                         vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -165,6 +256,13 @@ function d2Fdvperpdvpa_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return fac
 end
 
+"""
+Function computing
+```math
+\\frac{\\partial^2 F}{\\partial v_\\|^2}
+```
+for \$ F = F_{\\rm Maxwellian}\$.
+"""
 function d2Fdvpa2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                         vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -172,6 +270,13 @@ function d2Fdvpa2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return fac
 end
 
+"""
+Function computing
+```math
+\\frac{\\partial^2 F}{\\partial v_\\perp^2}.
+```
+for \$ F = F_{\\rm Maxwellian}\$.
+"""
 function d2Fdvperp2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
                         vpa,vperp,ivpa,ivperp)
     eta = eta_func(upar,vth,vpa,vperp,ivpa,ivperp)
@@ -179,6 +284,10 @@ function d2Fdvperp2_Maxwellian(dens::mk_float,upar::mk_float,vth::mk_float,
     return fac
 end
 
+"""
+Calculates the fully expanded form of the collision operator \$C_{s s^\\prime}[F_s,F_{s^\\prime}]\$ given Maxwellian input \$F_s\$ and \$F_{s^\\prime}\$.
+The input Maxwellians are specified through their moments.
+"""
 function Cssp_Maxwellian_inputs(denss::mk_float,upars::mk_float,vths::mk_float,ms::mk_float,
                                 denssp::mk_float,uparsp::mk_float,vthsp::mk_float,msp::mk_float,
                                 nussp::mk_float,vpa,vperp,ivpa,ivperp)
@@ -210,6 +319,10 @@ function Cssp_Maxwellian_inputs(denss::mk_float,upars::mk_float,vths::mk_float,m
     return Cssp_Maxwellian
 end
 
+"""
+Calculates the collisional flux \$\\Gamma_\\|\$ given Maxwellian input \$F_s\$ and \$F_{s^\\prime}\$.
+The input Maxwellians are specified through their moments.
+"""
 function Cflux_vpa_Maxwellian_inputs(ms::mk_float,denss::mk_float,upars::mk_float,vths::mk_float,
                                      msp::mk_float,denssp::mk_float,uparsp::mk_float,vthsp::mk_float,
                                      vpa,vperp,ivpa,ivperp)
@@ -224,6 +337,10 @@ function Cflux_vpa_Maxwellian_inputs(ms::mk_float,denss::mk_float,upars::mk_floa
     return Cflux
 end
 
+"""
+Calculates the collisional flux \$\\Gamma_\\perp\$ given Maxwellian input \$F_s\$ and \$F_{s^\\prime}\$.
+The input Maxwellians are specified through their moments.
+"""
 function Cflux_vperp_Maxwellian_inputs(ms::mk_float,denss::mk_float,upars::mk_float,vths::mk_float,
                                      msp::mk_float,denssp::mk_float,uparsp::mk_float,vthsp::mk_float,
                                      vpa,vperp,ivpa,ivperp)
@@ -240,7 +357,8 @@ end
 
 """
 Function calculating the fully expanded form of the collision operator
-taking floats as arguments. This function is designed to be used at the 
+taking as arguments the derivatives of \$F_s\$, \$G_{s^\\prime}\$ and \$H_{s^\\prime}\$.
+This function is designed to be used at the 
 lowest level of a coordinate loop, with derivatives and integrals
 all previously calculated.
 """
@@ -259,7 +377,7 @@ end
 
 
 """
-calculates the collisional fluxes given input F_s and G_sp, H_sp
+Calculates the collisional fluxes given input \$F_s\$ and \$G_{s^\\prime}\$, \$H_{s^\\prime}\$.
 """
 function calculate_collisional_fluxes(F,dFdvpa,dFdvperp,
                             d2Gdvpa2,d2Gdvperpdvpa,d2Gdvperp2,dHdvpa,dHdvperp,
@@ -273,10 +391,11 @@ function calculate_collisional_fluxes(F,dFdvpa,dFdvperp,
 end
 
 
+# Below are functions which are used for storing and printing data from the tests 
+
 """
-Below are functions which are used for storing and printing data from the tests 
+Function to print the maximum error \${\\rm MAX}(|f_{\\rm numerical}-f_{\\rm exact}|)\$.
 """
-
 function print_test_data(func_exact,func_num,func_err,func_name)
     @. func_err = abs(func_num - func_exact)
     max_err = maximum(func_err)
@@ -284,6 +403,13 @@ function print_test_data(func_exact,func_num,func_err,func_name)
     return max_err
 end
 
+"""
+Function to print the maximum error \${\\rm MAX}(|f_{\\rm numerical}-f_{\\rm exact}|)\$ and the
+\$L_2\$ norm of the error 
+```math
+\\sqrt{\\int (f - f_{\\rm exact})^2 v_\\perp d v_\\perp d v_\\|/\\int v_\\perp d v_\\perp d v_\\|}.
+```
+"""
 function print_test_data(func_exact,func_num,func_err,func_name,vpa,vperp,dummy;print_to_screen=true)
     @. func_err = abs(func_num - func_exact)
     max_err = maximum(func_err)
@@ -340,6 +466,9 @@ function allocate_error_data()
         moments)
 end
 
+"""
+Utility function that saves error data to a HDF5 file for later use.
+"""
 function save_fkpl_error_data(outdir,ncore,ngrid,nelement_list,
     max_C_err, max_H_err, max_G_err, max_dHdvpa_err, max_dHdvperp_err,
     max_d2Gdvperp2_err, max_d2Gdvpa2_err, max_d2Gdvperpdvpa_err, max_dGdvperp_err, 
@@ -384,6 +513,9 @@ function save_fkpl_error_data(outdir,ncore,ngrid,nelement_list,
     return nothing
 end
 
+"""
+Utility function that saves error data to a HDF5 file for later use.
+"""
 function save_fkpl_integration_error_data(outdir,ncore,ngrid,nelement_list,
     max_dfsdvpa_err, max_dfsdvperp_err, max_d2fsdvperpdvpa_err,
     max_H_err, max_G_err, max_dHdvpa_err, max_dHdvperp_err,

From aa365d55f3155b5e21c358b7cf26f2ed26f4c913 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Sat, 26 Oct 2024 20:12:35 +0100
Subject: [PATCH 28/41] Correct formatting of chebyshev.md so that
 Documenter.jl can correctly render the document.

---
 docs/src/chebyshev.md | 285 +++++++++++++++++++++++++++---------------
 1 file changed, 186 insertions(+), 99 deletions(-)

diff --git a/docs/src/chebyshev.md b/docs/src/chebyshev.md
index 908daaf09..c4d38f8ae 100644
--- a/docs/src/chebyshev.md
+++ b/docs/src/chebyshev.md
@@ -1,22 +1,24 @@
 Chebyshev tranform via Fourier transform
 ===============================================
 
-```math
-\begin{equation}
-\end{equation}
-```
 We express a function $f$ as a sum of Chebyshev polynomials
 ```math 
-\begin{equation} f(x) = \sum^N_{n=0} a_{n}T_n(x)\label{eq:cheb-expansion} \tag{1}\end{equation}
+\begin{equation}
+f(x) = \sum^N_{n=0} a_{n}T_n(x) \tag{1}
+\end{equation}
 ```
 The Chebyshev polynomials are defined by 
 ```math
-\begin{equation} T_n(\cos \theta) = \cos n \theta, {\rm~with~}x = \cos \theta. \end{equation}
+\begin{equation}
+T_n(\cos \theta) = \cos n \theta, {\rm~with~}x = \cos \theta.
+\end{equation}
 ```
 We can see how to find $\{a_{n}\}$ given $\{f(x_j)\}$ via Fourier transform. 
 The Fourier series representation of $f$ on a uniform grid indexed by $j$ is defined by 
 ```math
-\begin{equation} f_j = \sum_{k=0}^{M-1} b_{k}\exp\left[i \frac{2\pi k j}{M}\right].\label{eq:fourier-series}\tag{2}\end{equation}
+\begin{equation}
+f_j = \sum_{k=0}^{M-1} b_{k}\exp\left[i \frac{2\pi k j}{M}\right].\tag{2}
+\end{equation}
 ```
 
 Gauss-Chebyshev-Lobotto points
@@ -24,26 +26,36 @@ Gauss-Chebyshev-Lobotto points
 
 We pick points 
 ```math
-\begin{equation} x_j = \cos \theta_j, \quad \theta_j = \frac{j \pi}{N} \quad 0 \leq j \leq N.\end{equation}
+\begin{equation}
+x_j = \cos \theta_j, \quad \theta_j = \frac{j \pi}{N} \quad 0 \leq j \leq N.
+\end{equation}
 ```
 Then 
 ```math
-\begin{equation} T_n(x_j) = \cos \frac{n j \pi}{N}.\end{equation}
+\begin{equation}
+T_n(x_j) = \cos \frac{n j \pi}{N}.
+\end{equation}
 ```
 Assuming that $M = 2N$, with $N$ an integer, and $b_{k} = b_{M-k}$ for $k>0$, we have that 
 ```math
-\begin{equation} f_j = b_{0} + b_{N}(-1)^j + \sum_{n=1}^{N-1}
-b_{n}\left(\exp\left[i \frac{\pi n j}{N}\right]+\exp\left[-i \frac{\pi n j}{N}\right]\right).\end{equation}
+\begin{equation}
+f_j = b_{0} + b_{N}(-1)^j + \sum_{n=1}^{N-1}
+b_{n}\left(\exp\left[i \frac{\pi n j}{N}\right]+\exp\left[-i \frac{\pi n j}{N}\right]\right).
+\end{equation}
 ```
 Comparing this to the expression for $f(x_j)$ in the Chebyshev representation,
 ```math
-\begin{equation} f_j = a_{0} + a_{N}(-1)^j + \frac{1}{2}\sum_{n=1}^{N-1}
-a_{n}\left(\exp\left[i \frac{\pi n j}{N}\right]+\exp\left[-i \frac{\pi n j}{N}\right]\right),\end{equation}
+\begin{equation}
+f_j = a_{0} + a_{N}(-1)^j + \frac{1}{2}\sum_{n=1}^{N-1}
+a_{n}\left(\exp\left[i \frac{\pi n j}{N}\right]+\exp\left[-i \frac{\pi n j}{N}\right]\right),
+\end{equation}
 ```
 we find that the Chebyshev representation on the Chebyshev points is equivalent 
 to the Fourier representation on the uniform grid points, if we identify
 ```math
-\begin{equation} b_{0} = a_{0}, \quad  b_{N} = a_{N}, \quad b_{j} = \frac{a_{j}}{2} {\rm~for~} 1 \leq j \leq N-1. \end{equation}
+\begin{equation}
+b_{0} = a_{0}, \quad  b_{N} = a_{N}, \quad b_{j} = \frac{a_{j}}{2} {\rm~for~} 1 \leq j \leq N-1.
+\end{equation}
 ```
 This fact allows us to carry out the Chebyshev tranform by Fourier transforming the $\{f_j\}$ data
 and carrying out the correct normalisation of the resulting coefficients. 
@@ -55,74 +67,103 @@ The last subsection dealt with grids which contain both endpoints on the $[-1,1]
 Certain problems require domains which contain a single endpoint, i.e., $x \in (-1,1]$. For 
 these cases we choose the points 
 ```math
-\begin{equation} x_j = \cos \theta_j, \quad \theta_j = \frac{2 j \pi}{2 N + 1} \quad 0 \leq j \leq N.\end{equation}
+\begin{equation}
+x_j = \cos \theta_j, \quad \theta_j = \frac{2 j \pi}{2 N + 1} \quad 0 \leq j \leq N.
+\end{equation}
 ```
-Writing out the Chebyshev series (1), <!-- \eq{eq:cheb-expansion}, -->
+Writing out the Chebyshev series (1),
 we have that 
 ```math
-\begin{equation} \begin{split} f(x_j) = & \sum^N_{n=0} a_{n} \cos \frac{2 n j \pi}{2 N + 1} \\ & = a_{0} + \sum^N_{n=1} \frac{a_{n}}{2}\left(\exp\left[i \frac{2\pi n j}{2N +1}\right] + \exp\left[-i \frac{2\pi n j}{2N +1}\right]\right).\end{split} \label{eq:cheb-expansion-radau-points}\tag{3}\end{equation}
+\begin{equation}
+\begin{split}f(x_j) = & \sum^N_{n=0} a_{n} \cos \frac{2 n j \pi}{2 N + 1} \\ &
+= a_{0} + \sum^N_{n=1} \frac{a_{n}}{2}\left(\exp\left[i \frac{2\pi n j}{2N +1}\right] + \exp\left[-i \frac{2\pi n j}{2N +1}\right]\right).\end{split}
+\tag{3}
+\end{equation}
 ```
-The form of the series (3) <!--\eq{eq:cheb-expansion-radau-points}--> is identical to the form of 
-a Fourier series on an odd number of points, i.e., taking $M = 2 N + 1$ in equation (2) <--\eq{eq:fourier-series}-->,
+The form of the series (3) is identical to the form of 
+a Fourier series on an odd number of points, i.e., taking $M = 2 N + 1$ in equation (2),
 and assuming $b_{k} = b_{M -k}$ for $k>1$,
 we have that 
 ```math
-\begin{equation} f_j = b_{0} + \sum_{k=1}^{N} b_{k}\left(\exp\left[i \frac{2\pi k j}{2N+1}\right] + \exp\left[-i \frac{2\pi k j}{2N+1} \right]\right). \end{equation}
+\begin{equation}
+f_j = b_{0} + \sum_{k=1}^{N} b_{k}\left(\exp\left[i \frac{2\pi k j}{2N+1}\right] + \exp\left[-i \frac{2\pi k j}{2N+1} \right]\right).
+\end{equation}
 ```
 We can thus take a Chebyshev transform using a Fourier transform on Gauss-Chebyshev-Radau points if we identify 
 ```math
-\begin{equation} b_{0} = a_{0}, \quad b_{j} = \frac{a_{j}}{2} {\rm~for~} 1 \leq j \leq N. \end{equation}
+\begin{equation}
+b_{0} = a_{0}, \quad b_{j} = \frac{a_{j}}{2} {\rm~for~} 1 \leq j \leq N.
+\end{equation}
 ```
 
 Chebyshev coefficients of derivatives of a function
 ===============================================
 
-Starting from the expression of $f$ as a sum Chebyshev polynomials, equation (1) <!--\eq{eq:cheb-expansion}-->,
+Starting from the expression of $f$ as a sum Chebyshev polynomials, equation (1),
 we can obtain an expression for the derivative
 ```math
-\begin{equation} \frac{d f}{d x} = \sum^N_{n=0} a_{n}\frac{d T_{n}}{d x}. \label{eq:derivative-def}\tag{4}\end{equation}
+\begin{equation}
+\frac{d f}{d x} = \sum^N_{n=0} a_{n}\frac{d T_{n}}{d x}.\tag{4}
+\end{equation}
 ```
 We note that we must be able to express ${d f}/{d x}$ as a sum 
 of Chebyshev polynomials of up to order $N-1$, i.e.,
 ```math
-\begin{equation} \frac{d f}{d x} = \sum^{N-1}_{n=0} d_{n}T_{n}. \end{equation}
+\begin{equation}
+\frac{d f}{d x} = \sum^{N-1}_{n=0} d_{n}T_{n}.
+\end{equation}
 ```
 We must determine the set $\{d_{n}\}$ in terms of the set $\{a_{n}\}$.
 First, we equate the two expressions to find that 
 ```math
-\begin{equation} \sum^N_{k=0} a_{k}\frac{d T_{k}}{d x} = \sum^{N-1}_{n=0} d_{n}T_{n}. \label{eq:dn-def}\tag{5}\end{equation}
+\begin{equation}
+\sum^N_{k=0} a_{k}\frac{d T_{k}}{d x} = \sum^{N-1}_{n=0} d_{n}T_{n}.\tag{5}
+\end{equation}
 ```
 We use the Chebyshev polynomials of the second kind $U_n{x}$ to aid us in the calculation of the set $\{d_{n}\}$. 
 These polynomials are defined by 
 ```math
-\begin{equation} U_{0}(x) = 1, \quad U_{1}(x) = 2x, \quad U_{n+1} = 2 x U_{n}(x) - U_{n-1}(x).\end{equation}
+\begin{equation}
+U_{0}(x) = 1, \quad U_{1}(x) = 2x, \quad U_{n+1} = 2 x U_{n}(x) - U_{n-1}(x).
+\end{equation}
 ```
 Note the useful relations 
 ```math
-\begin{equation} \frac{d T_{n}}{d x} = n U_{n-1}, {\rm~for~}n\geq 1, \quad \frac{d T_{0}}{d x} = 0,\end{equation}
+\begin{equation}
+\frac{d T_{n}}{d x} = n U_{n-1}, {\rm~for~}n\geq 1, \quad \frac{d T_{0}}{d x} = 0,
+\end{equation}
 ```
 ```math
-\begin{equation} T_{n} = \frac{1}{2}\left(U_{n} - U_{n -2}\right), T_{0} = U_{0}\quad, {\rm ~and~} \quad 2 T_{1} = U_{1}. \end{equation}
+\begin{equation}
+T_{n} = \frac{1}{2}\left(U_{n} - U_{n -2}\right), T_{0} = U_{0}\quad, {\rm ~and~} \quad 2 T_{1} = U_{1}.
+\end{equation}
 ```
 Using these identities, which may be obtained from the trigonometric definition of $U_{n}(\cos \theta)$
 ```math
-\begin{equation}  U_{n}(\cos \theta) \sin \theta = \sin \left((n+1)\theta\right),\end{equation}
+\begin{equation}
+U_{n}(\cos \theta) \sin \theta = \sin \left((n+1)\theta\right),
+\end{equation}
 ```
-we find that equation (5) <!--\eq{eq:dn-def}--> becomes 
+we find that equation (5) becomes 
 ```math
-\begin{equation} \begin{split}\sum^N_{n=1} a_{n} n U_{n-1}(x) =& \frac{d_{N-1}}{2}U_{N-1}+\frac{d_{N-2}}{2}U_{N-2} 
-\\ & + \sum^{N-3}_{k=1} \frac{d_{k}-d_{k+2}}{2}U_{k} + \left(d_{0} - \frac{d_{2}}{2}\right)U_{0}. \end{split}
-\label{eq:dn-def-U}\tag{6}\end{equation}
+\begin{equation}
+\begin{split}\sum^N_{n=1} a_{n} n U_{n-1}(x) =& \frac{d_{N-1}}{2}U_{N-1}+\frac{d_{N-2}}{2}U_{N-2} 
+\\ & + \sum^{N-3}_{k=1} \frac{d_{k}-d_{k+2}}{2}U_{k} + \left(d_{0} - \frac{d_{2}}{2}\right)U_{0}. \end{split}\tag{6}
+\end{equation}
 ```
 Using the orthogonality relation 
 ```math
-\begin{equation} \int^1_{-1} U_{m}(x)U_{n}(x)\sqrt{1-x^2} \; d x = 
-\left\{\begin{array}{l} 0 {\rm ~if~} n\neq m  \\ \pi/2 {\rm ~if~} n=m \\ \end{array} \right.\end{equation}
+\begin{equation}
+\int^1_{-1} U_{m}(x)U_{n}(x)\sqrt{1-x^2} \; d x = 
+\left\{\begin{array}{l} 0 {\rm ~if~} n\neq m  \\ \pi/2 {\rm ~if~} n=m \\ \end{array} \right.
+\end{equation}
 ```
 we obtain the (unqiuely-determined) relations 
 ```math
-\begin{equation} \begin{split} &d_{N-1} = 2Na_{N},\quad d_{N-2} = 2(N-1)a_{N-1}, \\ 
-& d_{k} = 2(k+1) a_{k+1} + d_{k+2}, \quad d_{0} = \frac{d_{2}}{2} + a_{1}.\end{split} \label{eq:dn-result-U}\tag{7}\end{equation}
+\begin{equation}
+\begin{split} &d_{N-1} = 2Na_{N},\quad d_{N-2} = 2(N-1)a_{N-1}, \\ 
+& d_{k} = 2(k+1) a_{k+1} + d_{k+2}, \quad d_{0} = \frac{d_{2}}{2} + a_{1}.\end{split}\tag{7}
+\end{equation}
 ```       
 
 Clenshaw-Curtis integration weights
@@ -131,119 +172,165 @@ Clenshaw-Curtis integration weights
 We require the integration weights for the set of points $\{x_j\}$ chosen 
 in our numerical scheme. The weights $w_{j}$ are defined implicitly by 
 ```math
-\begin{equation} \int^{1}_{-1} f(x) \; d x = \sum_{j=0}^N f(x_j) w_{j}. \label{eq:w-sum}\tag{8}\end{equation}
+\begin{equation}
+\int^{1}_{-1} f(x) \; d x = \sum_{j=0}^N f(x_j) w_{j}.\tag{8}
+\end{equation}
 ```
 In the Chebyshev scheme we use the change of variables $x = \cos \theta$
 to write 
 ```math
-\begin{equation} \int^{1}_{-1} f(x) \; d x = \int^\pi_0 f(\cos\theta) \sin \theta \; d \theta . \label{eq:change-of-variables-integral} \tag{9}\end{equation}
+\begin{equation}
+\int^{1}_{-1} f(x) \; d x = \int^\pi_0 f(\cos\theta) \sin \theta \; d \theta .\tag{9}
+\end{equation}
 ```
- Using the series expansion (1) <!--\eq{eq:cheb-expansion}--> in equation (9) <!--\eq{eq:change-of-variables-integral}-->
+ Using the series expansion (1) in equation (9)
  we find that 
- ```math
- \begin{equation} \int^{1}_{-1} f(x) \; d x = \sum^N_{n=0} a_{n}\int^\pi_0 \cos (n \theta) \sin \theta \; d \theta
- . \label{eq:series-integral}\tag{10} \end{equation}
- ```
- Note the integral identity
- ```math
-\begin{equation} \int^\pi_0 \cos(n \theta) \sin \theta \; d \theta = \frac{\cos(n \pi) +1}{1 - n^2} {\rm~for~} n \geq 0.\end{equation}
-```
- Also note that 
-```math
-\begin{equation} \frac{\cos(n \pi) +1}{1 - n^2} = \left\{\begin{array}{l} 0 {\rm ~if~} n = 2 r + 1, ~r \in \mathbb{Z}  \\ 2/(1 - n^2) {\rm ~if~} n=2r,~r. \in \mathbb{Z}  \end{array}\right. \end{equation}
- ```
- We define 
- ```math
- \begin{equation} J_{n} = \frac{\cos(n \pi) +1}{1 - n^2}. \end{equation}
- ```
- Using this definition, we can write the integral of $f(x)$ can be written 
- in terms of a sum over of the Chebyshev coefficients:
- ```math
- \begin{equation} \int^{1}_{-1} f(x) \; d x = \sum_{n=0}^N J_{n} a_{n}. \label{eq:Cheb-sum}\tag{11}\end{equation}
- ```
- 
- To avoid computing the set of coefficients $\{a_{n}\}$ every time we wish to integrate $f(x_j)$,
- we use the inverse transforms. This transform allows us to rewrite equation (11) <!--\eq{eq:Cheb-sum}--> in the form (8) <!--\eq{eq:w-sum}-->.
- Since the inverse transform differs between the Gauss-Chebyshev-Lobotto and Gauss-Chebyshev-Radau cases, we treat each 
- case separately below. 
- 
+```math
+\begin{equation}
+\int^{1}_{-1} f(x) \; d x = \sum^N_{n=0} a_{n}\int^\pi_0 \cos (n \theta) \sin \theta \; d \theta
+.\tag{10}
+\end{equation}
+```
+Note the integral identity
+```math
+\begin{equation}
+\int^\pi_0 \cos(n \theta) \sin \theta \; d \theta = \frac{\cos(n \pi) +1}{1 - n^2} {\rm~for~} n \geq 0.
+\end{equation}
+```
+Also note that 
+```math
+\begin{equation}
+\frac{\cos(n \pi) +1}{1 - n^2} = \left\{\begin{array}{l} 0 {\rm ~if~} n = 2 r + 1, ~r \in \mathbb{Z}  \\ 2/(1 - n^2) {\rm ~if~} n=2r,~r. \in \mathbb{Z}  \end{array}\right.
+\end{equation}
+```
+We define 
+```math
+\begin{equation}
+J_{n} = \frac{\cos(n \pi) +1}{1 - n^2}.
+\end{equation}
+```
+Using this definition, we can write the integral of $f(x)$ can be written 
+in terms of a sum over of the Chebyshev coefficients:
+```math
+\begin{equation}
+\int^{1}_{-1} f(x) \; d x = \sum_{n=0}^N J_{n} a_{n}.\tag{11}
+\end{equation}
+```
+To avoid computing the set of coefficients $\{a_{n}\}$ every time we wish to integrate $f(x_j)$,
+we use the inverse transforms. This transform allows us to rewrite equation (11) in the form (8).
+Since the inverse transform differs between the Gauss-Chebyshev-Lobotto and Gauss-Chebyshev-Radau cases, we treat each 
+case separately below. 
+
 Weights on Gauss-Chebyshev-Lobotto points
 ===============================================
-  We use the inverse transformation 
- ```math
- \begin{equation} a_{n} = \frac{q_{n}}{2N}\sum^{2N-1}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N}\right], \label{eq:inverse-transform-GCL}\tag{12}\end{equation}
- ```
- where 
- ```math
- \begin{equation} q_{n} = \left\{\begin{array}{l} 2 {\rm ~if~} n\neq0,N  \\ 1 {\rm ~if~} n=0,N  \end{array}\right.\end{equation}
- ```
+We use the inverse transformation 
+```math
+\begin{equation}
+a_{n} = \frac{q_{n}}{2N}\sum^{2N-1}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N}\right], \tag{12}
+\end{equation}
+```
+where 
+```math
+\begin{equation}
+q_{n} = \left\{\begin{array}{l} 2 {\rm ~if~} n\neq0,N  \\ 1 {\rm ~if~} n=0,N  \end{array}\right.
+\end{equation}
+```
 and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 ```math
-\begin{equation} \hat{f}_j = f(x_{j}) {\rm~for~} 0 \leq j \leq N ,\quad \hat{f}_j = f(x_{2N-j}){\rm~for~} N+1 \leq j \leq 2N-1. \end{equation}
+\begin{equation}
+\hat{f}_j = f(x_{j}) {\rm~for~} 0 \leq j \leq N ,\quad \hat{f}_j = f(x_{2N-j}){\rm~for~} N+1 \leq j \leq 2N-1.
+\end{equation}
 ```
- With this inverse tranformation, we can write 
+With this inverse tranformation, we can write 
 ```math
-\begin{equation} \begin{split}\sum_{n=0}^N J_{n} a_{n} & =  \sum^{2N-1}_{n=0} \frac{a_{n}J_{n}}{q_{n}} \\
- & = \sum^{2N-1}_{j=0}\sum^{2N-1}_{n=0} \frac{\hat{f}_j J_{n}}{2N} \exp\left[-i \frac{2\pi n j}{2N}\right] \\ 
- & = \sum^{2N-1}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\label{eq:weights-working}\tag{13}\end{equation}
+\begin{equation}
+\begin{split}\sum_{n=0}^N J_{n} a_{n} & =  \sum^{2N-1}_{n=0} \frac{a_{n}J_{n}}{q_{n}} \\
+& = \sum^{2N-1}_{j=0}\sum^{2N-1}_{n=0} \frac{\hat{f}_j J_{n}}{2N} \exp\left[-i \frac{2\pi n j}{2N}\right] \\ 
+& = \sum^{2N-1}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\tag{13}
+\end{equation}
 ```
- where in the first step we have extended the sum from $N$ to $2N-1$ and used FFT-order definitions of $J_{n}$ and $a_{n}$
+where in the first step we have extended the sum from $N$ to $2N-1$ and used FFT-order definitions of $J_{n}$ and $a_{n}$
 ```math
-\begin{equation} J_{j} = J_{2N-j}, {\rm~for~} N+1 \leq j \leq 2N-1,\end{equation}
+\begin{equation}
+J_{j} = J_{2N-j}, {\rm~for~} N+1 \leq j \leq 2N-1,
+\end{equation}
 ```
 ```math
-\begin{equation} a_{j} = a_{2N-j}, {\rm~for~} N+1 \leq j \leq 2N-1.\end{equation}
+\begin{equation}
+a_{j} = a_{2N-j}, {\rm~for~} N+1 \leq j \leq 2N-1.
+\end{equation}
 ```
-In the second step we use the definition of the inverse transform (14) <!--\eq{eq:inverse-transform-GCR}-->, and 
+In the second step we use the definition of the inverse transform (14) , and 
 in the third step we define 
 ```math
-\begin{equation} v_{j} = \sum_{n=0}^{2N-1}\frac{J_{n}}{2N}\exp\left[-i \frac{2\pi n j}{2N}\right].\end{equation}
+\begin{equation}
+v_{j} = \sum_{n=0}^{2N-1}\frac{J_{n}}{2N}\exp\left[-i \frac{2\pi n j}{2N}\right].
+\end{equation}
 ```
-Finally, we can compare equations (8) <!--\eq{eq:w-sum}--> and (13) <!--\eq{eq:weights-working}--> and deduce that 
+Finally, we can compare equations (8) and (13) and deduce that 
 ```math
-\begin{equation} w_{j} = q_{j}v_{j} {\rm~for~} 0 \leq j \leq N.  \end{equation}
+\begin{equation}
+w_{j} = q_{j}v_{j} {\rm~for~} 0 \leq j \leq N.
+\end{equation}
 ```
 We can write $v_{j}$ in terms of a discrete cosine transform, i.e.,
 ```math
-\begin{equation} v_{j} = \frac{1}{2N}\left(J_{0} + (-1)^jJ_{N} + 2\sum_{n=1}^{N-1}J_{n}\cos\left(\frac{\pi n j}{N}\right)\right).\end{equation}
+\begin{equation}
+v_{j} = \frac{1}{2N}\left(J_{0} + (-1)^jJ_{N} + 2\sum_{n=1}^{N-1}J_{n}\cos\left(\frac{\pi n j}{N}\right)\right).
+\end{equation}
 ```
  
 Weights on Gauss-Chebyshev-Radau points
 ===============================================
 We use the inverse transformation 
 ```math
-\begin{equation} a_{n} = \frac{q_{n}}{2N+1}\sum^{2N}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N+1}\right], \label{eq:inverse-transform-GCR}\tag{14}\end{equation}
+\begin{equation}
+a_{n} = \frac{q_{n}}{2N+1}\sum^{2N}_{j=0} \hat{f}_j \exp\left[- i \frac{2\pi n j}{2N+1}\right],\tag{14}
+\end{equation}
 ```
 where 
 ```math
-\begin{equation} q_{n} = \left\{\begin{array}{l} 2 {\rm ~if~} n > 0  \\ 1 {\rm ~if~} n=0  \end{array}\right.\end{equation}
+\begin{equation}
+q_{n} = \left\{\begin{array}{l} 2 {\rm ~if~} n > 0  \\ 1 {\rm ~if~} n=0  \end{array}\right.
+\end{equation}
 ```
 and $\hat{f}_j$ is $f(x_j)$ on the extended domain in FFT order, i.e.,
 ```math
-\begin{equation} \hat{f}_j = f(x_{j}) {\rm~for~} 0 \leq j \leq N ,\quad \hat{f}_j = f(x_{2N-j+1}){\rm~for~} N+1 \leq j \leq 2N. \end{equation}
+\begin{equation}
+\hat{f}_j = f(x_{j}) {\rm~for~} 0 \leq j \leq N ,\quad \hat{f}_j = f(x_{2N-j+1}){\rm~for~} N+1 \leq j \leq 2N.
+\end{equation}
 ```
 Note that the details of what is the appropriate FFT order depends on the order in which the points $x_j$ are stored.
 The key detail in the Chebyshev-Radau scheme is that (in the notation above)
 $x_0 = 1$ is not a repeated point, and must occupy $\hat{f}_0$. 
 With this inverse tranformation, we can write 
 ```math
-\begin{equation} \begin{split}\sum_{n=0}^N J_{n} a_{n} & =  \sum^{2N}_{n=0} \frac{a_{n}J_{n}}{q_{n}} \\
+\begin{equation}
+\begin{split}\sum_{n=0}^N J_{n} a_{n} & =  \sum^{2N}_{n=0} \frac{a_{n}J_{n}}{q_{n}} \\
 & = \sum^{2N}_{j=0}\sum^{2N}_{n=0} \frac{\hat{f}_j J_{n}}{2N+1} \exp\left[-i \frac{2\pi n j}{2N+1}\right] \\ 
-& = \sum^{2N}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\label{eq:weights-working-radau}\tag{15}\end{equation}
+& = \sum^{2N}_{j=0} \hat{f}_j v_{j} = \sum^{N}_{j=0} \hat{f}_j q_{j}v_{j},\end{split}\tag{15}\end{equation}
 ```
 where in the first step we have extended the sum from $N$ to $2N$ and used FFT-order definitions of $J_{n}$ and $a_{n}$
 ```math
-\begin{equation} J_{j} = J_{2N+1-j}, {\rm~for~} N+1 \leq j \leq 2N,\end{equation}
+\begin{equation}
+J_{j} = J_{2N+1-j}, {\rm~for~} N+1 \leq j \leq 2N,
+\end{equation}
 ```
 ```math
-\begin{equation} a_{j} = a_{2N+1-j}, {\rm~for~} N+1 \leq j \leq 2N.\end{equation}
+\begin{equation}
+a_{j} = a_{2N+1-j}, {\rm~for~} N+1 \leq j \leq 2N.
+\end{equation}
 ```
-In the second step we use the definition of the inverse transform (14) <!--\eq{eq:inverse-transform-GCR}-->, and 
+In the second step we use the definition of the inverse transform (14), and 
 in the third step we define 
 ```math
-\begin{equation} v_{j} = \sum_{n=0}^{2N}\frac{J_{n}}{2N+1}\exp\left[-i \frac{2\pi n j}{2N+1}\right].\end{equation}
+\begin{equation}
+v_{j} = \sum_{n=0}^{2N}\frac{J_{n}}{2N+1}\exp\left[-i \frac{2\pi n j}{2N+1}\right].
+\end{equation}
 ```
-Finally, we can compare equations (8) <!--\eq{eq:w-sum}--> and (15) <!--\eq{eq:weights-working-radau}--> and deduce that 
+Finally, we can compare equations (8) and (15) and deduce that 
 ```math
-\begin{equation} w_{j} = q_{j}v_{j} {\rm~for~} 0 \leq j \leq N.  \end{equation}
+\begin{equation}
+w_{j} = q_{j}v_{j} {\rm~for~} 0 \leq j \leq N.
+\end{equation}
 ```

From 0538cf08332fde236f9c273a249ac9e4d1e30bc5 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Sun, 27 Oct 2024 09:04:04 +0000
Subject: [PATCH 29/41] Improve autodocs for fokker_planck_calculus, remove
 export statement for unused function.

---
 moment_kinetics/src/fokker_planck.jl          |   2 +-
 moment_kinetics/src/fokker_planck_calculus.jl | 372 ++++++++++++++----
 test_scripts/2D_FEM_assembly_test.jl          |   2 +-
 3 files changed, 291 insertions(+), 85 deletions(-)

diff --git a/moment_kinetics/src/fokker_planck.jl b/moment_kinetics/src/fokker_planck.jl
index 521dca83d..9a19affb9 100644
--- a/moment_kinetics/src/fokker_planck.jl
+++ b/moment_kinetics/src/fokker_planck.jl
@@ -66,7 +66,7 @@ using ..fokker_planck_calculus: assemble_explicit_collision_operator_rhs_paralle
 using ..fokker_planck_calculus: assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!
 using ..fokker_planck_calculus: calculate_YY_arrays, enforce_vpavperp_BCs!
 using ..fokker_planck_calculus: calculate_rosenbluth_potential_boundary_data!
-using ..fokker_planck_calculus: enforce_zero_bc!, elliptic_solve!, algebraic_solve!
+using ..fokker_planck_calculus: elliptic_solve!, algebraic_solve!
 using ..fokker_planck_calculus: calculate_rosenbluth_potentials_via_elliptic_solve!
 using ..fokker_planck_test: Cssp_fully_expanded_form, calculate_collisional_fluxes, H_Maxwellian, dGdvperp_Maxwellian
 using ..fokker_planck_test: d2Gdvpa2_Maxwellian, d2Gdvperpdvpa_Maxwellian, d2Gdvperp2_Maxwellian, dHdvpa_Maxwellian, dHdvperp_Maxwellian
diff --git a/moment_kinetics/src/fokker_planck_calculus.jl b/moment_kinetics/src/fokker_planck_calculus.jl
index f11dcf43c..85939a5fa 100644
--- a/moment_kinetics/src/fokker_planck_calculus.jl
+++ b/moment_kinetics/src/fokker_planck_calculus.jl
@@ -1,9 +1,9 @@
 """
-module for functions used 
+Module for functions used 
 in calculating the integrals and doing 
 the numerical differentiation for 
 the implementation of the 
-the Full-F Fokker-Planck Collision Operator [`moment_kinetics.fokker_planck`](@ref).
+the full-F Fokker-Planck collision operator [`moment_kinetics.fokker_planck`](@ref).
 
 Parallelisation of the collision operator uses a special 'anyv' region type, see
 [Collision operator and `anyv` region](@ref).
@@ -25,7 +25,6 @@ export calculate_rosenbluth_potentials_via_elliptic_solve!
 
 # testing
 export calculate_rosenbluth_potential_boundary_data_exact!
-export enforce_zero_bc!
 export allocate_rosenbluth_potential_boundary_data
 export calculate_rosenbluth_potential_boundary_data_exact!
 export test_rosenbluth_potential_boundary_data
@@ -71,10 +70,11 @@ function print_vector(vector,name::String,m::mk_int)
 end
 
 """
-a struct of dummy arrays and precalculated coefficients
-for the strong-form Fokker-Planck collision operator 
+Struct of dummy arrays and precalculated coefficients
+for the Fokker-Planck collision operator when the
+Rosenbluth potentials are computed everywhere in `(vpa,vperp)`
+by direct integration. Used for testing.
 """
-
 struct fokkerplanck_arrays_direct_integration_struct
     G0_weights::MPISharedArray{mk_float,4}
     G1_weights::MPISharedArray{mk_float,4}
@@ -103,8 +103,8 @@ struct fokkerplanck_arrays_direct_integration_struct
 end
 
 """
-a struct to contain the integration weights for the boundary points
-in the (vpa,vperp) domain
+Struct to contain the integration weights for the boundary points
+in the `(vpa,vperp)` domain.
 """
 struct boundary_integration_weights_struct
     lower_vpa_boundary::MPISharedArray{mk_float,3}
@@ -113,8 +113,8 @@ struct boundary_integration_weights_struct
 end
 
 """
-a struct used for calculating the integration weights for the 
-boundary of the velocity space domain in (vpa,vperp) coordinates
+Struct used for storing the integration weights for the 
+boundary of the velocity space domain in `(vpa,vperp)` coordinates.
 """
 struct fokkerplanck_boundary_data_arrays_struct
     G0_weights::boundary_integration_weights_struct
@@ -128,12 +128,20 @@ struct fokkerplanck_boundary_data_arrays_struct
     dfdvperp::MPISharedArray{mk_float,2}    
 end
 
+"""
+Struct to store the `(vpa,vperp)` boundary data for an
+individual Rosenbluth potential.
+"""
 struct vpa_vperp_boundary_data
     lower_boundary_vpa::MPISharedArray{mk_float,1}
     upper_boundary_vpa::MPISharedArray{mk_float,1}
     upper_boundary_vperp::MPISharedArray{mk_float,1}
 end
 
+"""
+Struct to store the boundary data for all of the
+Rosenbluth potentials required for the calculation.
+"""
 struct rosenbluth_potential_boundary_data
     H_data::vpa_vperp_boundary_data
     dHdvpa_data::vpa_vperp_boundary_data
@@ -145,6 +153,15 @@ struct rosenbluth_potential_boundary_data
     d2Gdvpa2_data::vpa_vperp_boundary_data
 end
 
+"""
+Struct to store the elemental nonlinear stiffness matrices used
+to express the finite-element weak form of the collision
+operator. The arrays are indexed so that the contraction
+in the assembly step is carried out over the fastest
+accessed indices, i.e., for `YY0perp[i,j,k,iel]`, we contract
+over `i` and `j` to give data for the field position index `k`,
+all for the 1D element indexed by `iel`.
+"""
 struct YY_collision_operator_arrays
     # let phi_j(vperp) be the jth Lagrange basis function, 
     # and phi'_j(vperp) the first derivative of the Lagrange basis function
@@ -168,8 +185,8 @@ struct YY_collision_operator_arrays
 end
 
 """
-a struct of dummy arrays and precalculated coefficients
-for the weak-form Fokker-Planck collision operator 
+Struct of dummy arrays and precalculated coefficients
+for the finite-element weak-form Fokker-Planck collision operator.
 """
 struct fokkerplanck_weakform_arrays_struct{M <: AbstractSparseArray{mk_float,mk_int,N} where N}
     # boundary weights (Green's function) data
@@ -220,6 +237,9 @@ struct fokkerplanck_weakform_arrays_struct{M <: AbstractSparseArray{mk_float,mk_
     dFdvperp::MPISharedArray{mk_float,2}
 end
 
+"""
+Function to allocate a `boundary_integration_weights_struct`.
+"""
 function allocate_boundary_integration_weight(vpa,vperp)
     nvpa = vpa.n
     nvperp = vperp.n
@@ -230,6 +250,9 @@ function allocate_boundary_integration_weight(vpa,vperp)
             upper_vpa_boundary, upper_vperp_boundary)
 end
 
+"""
+Function to allocate at `fokkerplanck_boundary_data_arrays_struct`.
+"""
 function allocate_boundary_integration_weights(vpa,vperp)
     G0_weights = allocate_boundary_integration_weight(vpa,vperp)
     G1_weights = allocate_boundary_integration_weight(vpa,vperp)
@@ -260,7 +283,8 @@ end
 
 
 """
-function that precomputes the required integration weights
+Function that precomputes the required integration weights in the whole of
+`(vpa,vperp)` for the direct integration method of computing the Rosenbluth potentials.
 """
 function init_Rosenbluth_potential_integration_weights!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,vperp,vpa;print_to_screen=true)
     
@@ -314,9 +338,9 @@ function init_Rosenbluth_potential_integration_weights!(G0_weights,G1_weights,H0
 end
 
 """
-function for getting the basic quadratures used for the 
+Function for getting the basic quadratures used for the 
 numerical integration of the Lagrange polynomials and the 
-Green's function.
+integration kernals.
 """
 function setup_basic_quadratures(vpa,vperp;print_to_screen=true)
     @serial_region begin
@@ -341,8 +365,7 @@ end
 
 
 """
-function for getting the indices used to choose the integration
-quadrature 
+Function for getting the indices used to choose the integration quadrature.
 """
 function get_element_limit_indices(ivpa,ivperp,vpa,vperp)
     nelement_vpa, ngrid_vpa = vpa.nelement_local, vpa.ngrid
@@ -359,9 +382,11 @@ function get_element_limit_indices(ivpa,ivperp,vpa,vperp)
     return igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, 
             igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi
 end
+
 """
-function that precomputes the required integration weights
-only along the velocity space boundaries
+Function that precomputes the required integration weights only along the velocity space boundaries.
+Used as the default option as part of the strategy to compute the Rosenbluth potentials
+at the boundaries with direct integration and in the rest of `(vpa,vperp)` by solving elliptic PDEs.
 """
 function init_Rosenbluth_potential_boundary_integration_weights!(G0_weights,
       G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,vpa,vperp;print_to_screen=true)
@@ -508,12 +533,14 @@ function get_nodes(coord,iel)
     return nodes
 end
 
-# Function to get the local integration grid and quadrature weights
-# to integrate a 1D element in the 2D representation of the 
-# velocity space distribution functions. This function assumes that
-# there is a divergence at the point coord_val, and splits the grid 
-# and integration weights appropriately, using Gauss-Laguerre points
-# near the divergence and Gauss-Legendre points away from the divergence. 
+"""
+Function to get the local integration grid and quadrature weights
+to integrate a 1D element in the 2D representation of the 
+velocity space distribution functions. This function assumes that
+there is a divergence at the point `coord_val`, and splits the grid 
+and integration weights appropriately, using Gauss-Laguerre points
+near the divergence and Gauss-Legendre points away from the divergence. 
+"""
 function get_scaled_x_w_with_divergences!(x_scaled, w_scaled, x_legendre, w_legendre, x_laguerre, w_laguerre, node_min, node_max, nodes, igrid_coord, coord_val)
     #println("nodes ",nodes)
     zero = 1.0e-10 
@@ -604,9 +631,12 @@ function get_scaled_x_w_with_divergences!(x_scaled, w_scaled, x_legendre, w_lege
     #println("w_scaled",w_scaled)
     return nquad_coord
 end
-# Function to get the local grid and integration weights assuming 
-# no divergences of the function on the 1D element. Gauss-Legendre
-# quadrature is used for the entire element.
+
+"""
+Function to get the local grid and integration weights assuming 
+no divergences of the function on the 1D element. Gauss-Legendre
+quadrature is used for the entire element.
+"""
 function get_scaled_x_w_no_divergences!(x_scaled, w_scaled, x_legendre, w_legendre, node_min, node_max)
     @. x_scaled = 0.0
     @. w_scaled = 0.0
@@ -621,30 +651,46 @@ function get_scaled_x_w_no_divergences!(x_scaled, w_scaled, x_legendre, w_legend
     return nquad
 end
 
-# function returns 1 if igrid = 1 or 0 if 1 < igrid <= ngrid
+"""
+Function returns `1` if `igrid = 1` or `0` if `1 < igrid <= ngrid`.
+"""
 function ng_low(igrid,ngrid)
     return floor(mk_int, (ngrid - igrid)/(ngrid - 1))
 end
-# function returns 1 if igrid = ngrid or 0 if 1 =< igrid < ngrid
+
+"""
+Function returns `1` if `igrid = ngrid` or `0` if `1 =< igrid < ngrid`.
+"""
 function ng_hi(igrid,ngrid)
     return floor(mk_int, igrid/ngrid)
 end
-# function returns 1 for nelement >= ielement > 1, 0 for ielement =1 
+
+"""
+Function returns `1` for `nelement >= ielement > 1`, `0` for `ielement = 1`.
+"""
 function nel_low(ielement,nelement)
     return floor(mk_int, (ielement - 2 + nelement)/nelement)
 end
-# function returns 1 for nelement > ielement >= 1, 0 for ielement =nelement 
+
+"""
+Function returns `1` for `nelement > ielement >= 1`, `0` for `ielement = nelement`.
+"""
 function nel_hi(ielement,nelement)
     return 1- floor(mk_int, ielement/nelement)
 end
 
-# base level function for computing the Green's function weights
-# note the definitions of ellipe & ellipk
-# `https://specialfunctions.juliamath.org/stable/functions_list/#SpecialFunctions.ellipe`
-# `https://specialfunctions.juliamath.org/stable/functions_list/#SpecialFunctions.ellipk`
-# `ellipe(m) = \int^{\pi/2}\_0 \sqrt{ 1 - m \sin^2(\theta)} d \theta`
-# `ellipe(k) = \int^{\pi/2}\_0 \frac{1}{\sqrt{ 1 - m \sin^2(\theta)}} d \theta`
-
+"""
+Base level function for computing the integration kernals for the Rosenbluth potential integration.
+Note the definitions of `ellipe(m)` (\$E(m)\$) and `ellipk(m)` (\$K(m)\$).
+`https://specialfunctions.juliamath.org/stable/functions_list/#SpecialFunctions.ellipe`
+`https://specialfunctions.juliamath.org/stable/functions_list/#SpecialFunctions.ellipk`
+```math
+E(m) = \\int^{\\pi/2}_0 \\sqrt{ 1 - m \\sin^2(\\theta)} d \\theta
+```
+```math
+K(m) = \\int^{\\pi/2}_0 \\frac{1}{\\sqrt{ 1 - m \\sin^2(\\theta)}} d \\theta
+```
+"""
 function local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
                             nquad_vpa,ielement_vpa,vpa, # info about primed vpa grids
                             nquad_vperp,ielement_vperp,vperp, # info about primed vperp grids
@@ -733,6 +779,13 @@ function local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,
     return nothing
 end
 
+"""
+Function for computing the quadratures and carrying out the loop over the 
+primed `vpa` coordinate in doing the numerical integration. Splits the integrand
+into three pieces -- two which use Gauss-Legendre quadrature assuming no divergences
+in the integrand, and one which assumes a logarithmic divergence and uses a
+Gauss-Laguerre quadrature with an (exponential) change of variables to mitigate this divergence.
+"""
 function loop_over_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
                             vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vperp grids
                             vperp,ielement_vperpp, # info about primed vperp grids
@@ -784,6 +837,11 @@ function loop_over_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_
     return nothing
 end
 
+"""
+Function for computing the quadratures and carrying out the loop over the 
+primed `vpa` coordinate in doing the numerical integration. 
+Uses a Gauss-Legendre quadrature assuming no divergences in the integrand.
+"""
 function loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
                             vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vperp grids
                             nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
@@ -805,6 +863,15 @@ function loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights
     return nothing
 end
 
+"""
+Function for computing the quadratures and carrying out the loop over the 
+primed `vperp` coordinate in doing the numerical integration. Splits the integrand
+into three pieces -- two which use Gauss-Legendre quadrature assuming no divergences
+in the integrand, and one which assumes a logarithmic divergence and uses a
+Gauss-Laguerre quadrature with an (exponential) change of variables to mitigate this divergence.
+This function calls `loop_over_vpa_elements_no_divergences!()` and `loop_over_vpa_elements!()`
+to carry out the primed `vpa` loop within the primed `vperp` loop.
+"""
 function loop_over_vperp_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
                 vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
                 vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
@@ -854,12 +921,14 @@ function loop_over_vperp_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weigh
     return nothing
 end
 
-# The function loop_over_vperp_vpa_elements_no_divergences!() was for debugging.
-# By changing the source where loop_over_vperp_vpa_elements!() is called to
-# instead call this function we can verify that the Gauss-Legendre quadrature
-# is adequate for integrating a divergence-free integrand. This function should be 
-# kept until the problems with the pure integration method of computing the
-# Rosenbluth potentials are understood.
+"""
+The function `loop_over_vperp_vpa_elements_no_divergences!()` was used for debugging.
+By changing the source where `loop_over_vperp_vpa_elements!()` is called to
+instead call this function we can verify that the Gauss-Legendre quadrature
+is adequate for integrating a divergence-free integrand. This function should be 
+kept until we understand the problems preventing machine-precision accurary in the pure integration method of computing the
+Rosenbluth potentials.
+"""
 function loop_over_vperp_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
                 vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
                 vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
@@ -926,9 +995,11 @@ function ivpa_func(ic::mk_int,nvpa::mk_int)
     return ivpa
 end
 
-# function that returns the sparse matrix index
-# used to directly construct the nonzero entries
-# of a 2D assembled sparse matrix
+"""
+Function that returns the sparse matrix index
+used to directly construct the nonzero entries
+of a 2D assembled sparse matrix.
+"""
 function icsc_func(ivpa_local::mk_int,ivpap_local::mk_int,
                    ielement_vpa::mk_int,
                    ngrid_vpa::mk_int,nelement_vpa::mk_int,
@@ -946,6 +1017,9 @@ function icsc_func(ivpa_local::mk_int,ivpap_local::mk_int,
     return icsc
 end
 
+"""
+Struct to contain data needed to create a sparse matrix.
+"""
 struct sparse_matrix_constructor
     # the Ith row
     II::Array{mk_float,1}
@@ -955,6 +1029,9 @@ struct sparse_matrix_constructor
     SS::Array{mk_float,1}
 end
 
+"""
+Function to allocate an instance of `sparse_matrix_constructor`.
+"""
 function allocate_sparse_matrix_constructor(nsparse::mk_int)
     II = Array{mk_int,1}(undef,nsparse)
     @. II = 0
@@ -965,12 +1042,20 @@ function allocate_sparse_matrix_constructor(nsparse::mk_int)
     return sparse_matrix_constructor(II,JJ,SS)
 end
 
+"""
+Function to assign data to an instance of `sparse_matrix_constructor`.
+"""
 function assign_constructor_data!(data::sparse_matrix_constructor,icsc::mk_int,ii::mk_int,jj::mk_int,ss::mk_float)
     data.II[icsc] = ii
     data.JJ[icsc] = jj
     data.SS[icsc] = ss
     return nothing
 end
+
+"""
+Function to assemble data in an instance of `sparse_matrix_constructor`. Instead of
+writing `data.SS[icsc] = ss`, as in `assign_constructor_data!()` we write `data.SS[icsc] += ss`.
+"""
 function assemble_constructor_data!(data::sparse_matrix_constructor,icsc::mk_int,ii::mk_int,jj::mk_int,ss::mk_float)
     data.II[icsc] = ii
     data.JJ[icsc] = jj
@@ -978,10 +1063,17 @@ function assemble_constructor_data!(data::sparse_matrix_constructor,icsc::mk_int
     return nothing
 end
 
+"""
+Wrapper function to create a sparse matrix with an instance of `sparse_matrix_constructor`
+and `sparse()`.
+"""
 function create_sparse_matrix(data::sparse_matrix_constructor)
     return sparse(data.II,data.JJ,data.SS)
 end
 
+"""
+Function to allocate an instance of `vpa_vperp_boundary_data`.
+"""
 function allocate_boundary_data(vpa,vperp)
     # The following velocity-space-sized buffer arrays are used to evaluate the
     # collision operator for a single species at a single spatial point. They are
@@ -997,7 +1089,10 @@ function allocate_boundary_data(vpa,vperp)
             upper_boundary_vpa,upper_boundary_vperp)
 end
 
-
+"""
+Function to assign precomputed (exact) data to an instance
+of `vpa_vperp_boundary_data`. Used in testing.
+"""
 function assign_exact_boundary_data!(func_data::vpa_vperp_boundary_data,
                                         func_exact,vpa,vperp)
     begin_anyv_region()
@@ -1014,7 +1109,10 @@ function assign_exact_boundary_data!(func_data::vpa_vperp_boundary_data,
     end
     return nothing
 end
-    
+
+"""
+Function to allocate an instance of `rosenbluth_potential_boundary_data`.
+"""    
 function allocate_rosenbluth_potential_boundary_data(vpa,vperp)
     H_data = allocate_boundary_data(vpa,vperp)
     dHdvpa_data = allocate_boundary_data(vpa,vperp)
@@ -1029,6 +1127,10 @@ function allocate_rosenbluth_potential_boundary_data(vpa,vperp)
         d2Gdvperpdvpa_data,d2Gdvpa2_data)
 end
 
+"""
+Function to assign data to an instance of `rosenbluth_potential_boundary_data`, in place,
+without allocation. Used in testing.
+"""
 function calculate_rosenbluth_potential_boundary_data_exact!(rpbd::rosenbluth_potential_boundary_data,
   H_exact,dHdvpa_exact,dHdvperp_exact,G_exact,dGdvperp_exact,
   d2Gdvperp2_exact,d2Gdvperpdvpa_exact,d2Gdvpa2_exact,
@@ -1044,7 +1146,13 @@ function calculate_rosenbluth_potential_boundary_data_exact!(rpbd::rosenbluth_po
     return nothing
 end
 
-
+"""
+Function to carry out the direct integration of a formal definition of one
+of the Rosenbluth potentials, on the boundaries of the `(vpa,vperp)` domain, 
+using the precomputed integration weights with dimension 4.
+The result is stored in an instance of `vpa_vperp_boundary_data`.
+Used in testing.
+"""
 function calculate_boundary_data!(func_data::vpa_vperp_boundary_data,
                                   weight::MPISharedArray{mk_float,4},func_input,vpa,vperp)
     nvpa = vpa.n
@@ -1073,6 +1181,12 @@ function calculate_boundary_data!(func_data::vpa_vperp_boundary_data,
     return nothing
 end
 
+"""
+Function to carry out the direct integration of a formal definition of one
+of the Rosenbluth potentials, on the boundaries of the `(vpa,vperp)` domain, 
+using the precomputed integration weights with dimension 3.
+The result is stored in an instance of `vpa_vperp_boundary_data`.
+"""
 function calculate_boundary_data!(func_data::vpa_vperp_boundary_data,
                                   weight::boundary_integration_weights_struct,
                                   func_input,vpa,vperp)
@@ -1103,6 +1217,11 @@ function calculate_boundary_data!(func_data::vpa_vperp_boundary_data,
     return nothing
 end
 
+"""
+Function to call direct integration function `calculate_boundary_data!()` and 
+assign data to an instance of `rosenbluth_potential_boundary_data`, in place,
+without allocation.
+"""
 function calculate_rosenbluth_potential_boundary_data!(rpbd::rosenbluth_potential_boundary_data,
     fkpl::Union{fokkerplanck_arrays_direct_integration_struct,fokkerplanck_boundary_data_arrays_struct},pdf,vpa,vperp,vpa_spectral,vperp_spectral;
     calculate_GG=false,calculate_dGdvperp=false)
@@ -1140,6 +1259,11 @@ function calculate_rosenbluth_potential_boundary_data!(rpbd::rosenbluth_potentia
     return nothing
 end
 
+"""
+Function to compare two instances of `rosenbluth_potential_boundary_data` --
+one assumed to contain exact results, and the other numerically computed results -- and compute
+the maximum value of the error. Calls `test_boundary_data()`.
+"""
 function test_rosenbluth_potential_boundary_data(rpbd::rosenbluth_potential_boundary_data,
     rpbd_exact::rosenbluth_potential_boundary_data,vpa,vperp;print_to_screen=true)
     
@@ -1158,6 +1282,10 @@ function test_rosenbluth_potential_boundary_data(rpbd::rosenbluth_potential_boun
     return max_H_err, max_dHdvpa_err, max_dHdvperp_err, max_G_err, max_dGdvperp_err, max_d2Gdvperp2_err, max_d2Gdvperpdvpa_err, max_d2Gdvpa2_err
 end
 
+"""
+Function to compute the maximum error \${\\rm MAX}|f_{\\rm numerical}-f_{\\rm exact}|\$ for
+instances of `vpa_vperp_boundary_data`.
+"""
 function test_boundary_data(func,func_exact,func_name,vpa,vperp,buffer_vpa,buffer_vperp_1,buffer_vperp_2,print_to_screen)
     nvpa = vpa.n
     nvperp = vperp.n
@@ -1197,6 +1325,10 @@ function get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_lo
     return ic_global
 end
 
+"""
+Unused function. Sets `f(vpa,vperp)` to zero at the boundaries
+in `(vpa,vperp)`.
+"""
 function enforce_zero_bc!(fvpavperp,vpa,vperp;impose_BC_at_zero_vperp=false)
     # lower vpa boundary
     @loop_vperp ivperp begin
@@ -1221,6 +1353,11 @@ function enforce_zero_bc!(fvpavperp,vpa,vperp;impose_BC_at_zero_vperp=false)
     end
 end
 
+"""
+Sets `f(vpa,vperp)` to a specied value `f_bc` at the boundaries
+in `(vpa,vperp)`. `f_bc` is a 2D array of `(vpa,vperp)` where
+only boundary data is used. Used for testing.
+"""
 function enforce_dirichlet_bc!(fvpavperp,vpa,vperp,f_bc;dirichlet_vperp_lower_boundary=false)
     # lower vpa boundary
     for ivperp ∈ 1:vperp.n
@@ -1245,6 +1382,10 @@ function enforce_dirichlet_bc!(fvpavperp,vpa,vperp,f_bc;dirichlet_vperp_lower_bo
     end
 end
 
+"""
+Sets `f(vpa,vperp)` to a specied value `f_bc` at the boundaries
+in `(vpa,vperp)`. `f_bc` is an instance of `vpa_vperp_boundary_data`.
+"""
 function enforce_dirichlet_bc!(fvpavperp,vpa,vperp,f_bc::vpa_vperp_boundary_data)
     # lower vpa boundary
     for ivperp ∈ 1:vperp.n
@@ -1263,6 +1404,16 @@ function enforce_dirichlet_bc!(fvpavperp,vpa,vperp,f_bc::vpa_vperp_boundary_data
     return nothing
 end
 
+"""
+Function to contruct the global sparse matrices used to solve
+the elliptic PDEs for the Rosenbluth potentials. Uses a dense matrix
+construction method. The matrices are 2D in the compound index `ic` 
+which indexes the velocity space labelled by `ivpa,ivperp`.
+Dirichlet boundary conditions are imposed in the appropriate stiffness
+matrices by setting the boundary row to be the Kronecker delta 
+(0 except where `ivpa = ivpap` and `ivperp = ivperpp`). 
+Used for testing.
+"""
 function assemble_matrix_operators_dirichlet_bc(vpa,vperp,vpa_spectral,vperp_spectral;print_to_screen=true)
     nc_global = vpa.n*vperp.n
     # Assemble a 2D mass matrix in the global compound coordinate
@@ -1486,6 +1637,16 @@ function assemble_matrix_operators_dirichlet_bc(vpa,vperp,vpa_spectral,vperp_spe
            PPpar2D_sparse, MMparMNperp2D_sparse
 end
 
+"""
+Function to contruct the global sparse matrices used to solve
+the elliptic PDEs for the Rosenbluth potentials. Uses a sparse matrix
+construction method. The matrices are 2D in the compound index `ic` 
+which indexes the velocity space labelled by `ivpa,ivperp`.
+Dirichlet boundary conditions are imposed in the appropriate stiffness
+matrices by setting the boundary row to be the Kronecker delta 
+(0 except where `ivpa = ivpap` and `ivperp = ivperpp`).
+See also `assemble_matrix_operators_dirichlet_bc()`.
+"""
 function assemble_matrix_operators_dirichlet_bc_sparse(vpa,vperp,vpa_spectral,vperp_spectral;print_to_screen=true)
     # Assemble a 2D mass matrix in the global compound coordinate
     nc_global = vpa.n*vperp.n
@@ -1725,6 +1886,11 @@ function assemble_matrix_operators_dirichlet_bc_sparse(vpa,vperp,vpa_spectral,vp
            PPpar2D_sparse, MMparMNperp2D_sparse
 end
 
+"""
+Function to allocated an instance of `YY_collision_operator_arrays`.
+Calls `get_QQ_local!()` from `gauss_legendre`. Definitions of these
+nonlinear stiffness matrices can be found in the docs for `get_QQ_local!()`.
+"""
 function calculate_YY_arrays(vpa,vperp,vpa_spectral,vperp_spectral)
     YY0perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
     YY1perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
@@ -1752,6 +1918,12 @@ function calculate_YY_arrays(vpa,vperp,vpa_spectral,vperp_spectral)
                                         YY0par,YY1par,YY2par,YY3par)
 end
 
+"""
+Function to assemble the RHS of the kinetic equation due to the collision operator,
+in weak form. Once the array `rhsvpavperp` contains the assembled weak-form collision operator,
+a mass matrix solve still must be carried out to find the time derivative of the distribution function
+due to collisions. This function uses a purely serial algorithm for testing purposes.
+"""
 function assemble_explicit_collision_operator_rhs_serial!(rhsvpavperp,pdfs,d2Gspdvpa2,d2Gspdvperpdvpa,
     d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
     vpa,vperp,YY_arrays::YY_collision_operator_arrays)
@@ -1812,6 +1984,14 @@ function assemble_explicit_collision_operator_rhs_serial!(rhsvpavperp,pdfs,d2Gsp
     return nothing
 end
 
+"""
+Function to assemble the RHS of the kinetic equation due to the collision operator,
+in weak form. Once the array `rhsvpavperp` contains the assembled weak-form collision operator,
+a mass matrix solve still must be carried out to find the time derivative of the distribution function
+due to collisions. This function uses a purely parallel algorithm and may be tested by comparing to 
+`assemble_explicit_collision_operator_rhs_serial!()`. The inner-most loop of the function is 
+in `assemble_explicit_collision_operator_rhs_parallel_inner_loop()`.
+"""
 function assemble_explicit_collision_operator_rhs_parallel!(rhsvpavperp,pdfs,d2Gspdvpa2,d2Gspdvperpdvpa,
     d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
     vpa,vperp,YY_arrays::YY_collision_operator_arrays)
@@ -1859,6 +2039,9 @@ function assemble_explicit_collision_operator_rhs_parallel!(rhsvpavperp,pdfs,d2G
     return nothing
 end
 
+"""
+The inner-most loop of the parallel collision operator assembly. Called in `assemble_explicit_collision_operator_rhs_parallel!()`.
+"""
 function assemble_explicit_collision_operator_rhs_parallel_inner_loop(
         nussp, ms, msp, YY0perp, YY0par, YY1perp, YY1par, YY2perp, YY2par, YY3perp,
         YY3par, pdfs, d2Gspdvpa2, d2Gspdvperpdvpa, d2Gspdvperp2, dHspdvpa, dHspdvperp,
@@ -1898,6 +2081,12 @@ function assemble_explicit_collision_operator_rhs_parallel_inner_loop(
     return result
 end
 
+"""
+Function to assemble the RHS of the kinetic equation due to the collision operator,
+in weak form, when the distribution function appearing the derivatives is known analytically.
+The inner-most loop of the function is 
+in `assemble_explicit_collision_operator_rhs_parallel_analytical_inputs_inner_loop()`.
+"""
 function assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!(rhsvpavperp,pdfs,dpdfsdvpa,dpdfsdvperp,d2Gspdvpa2,d2Gspdvperpdvpa,
     d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
     vpa,vperp,YY_arrays::YY_collision_operator_arrays)
@@ -1945,6 +2134,9 @@ function assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!(rh
     return nothing
 end
 
+"""
+The inner-most loop of `assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!()`.
+"""
 # Separate function for inner loop, possible optimization??
 function assemble_explicit_collision_operator_rhs_parallel_analytical_inputs_inner_loop(
         nussp, ms, msp, pdfs, dpdfsdvpa, dpdfsdvperp, d2Gspdvperp2,
@@ -1987,18 +2179,20 @@ function assemble_explicit_collision_operator_rhs_parallel_analytical_inputs_inn
     return result
 end
 
-
-# Elliptic solve function. 
-# field: the solution
-# source: the source function on the RHS
-# boundary data: the known values of field at infinity
-# lu_object_lhs: the object for the differential operator that defines field
-# matrix_rhs: the weak matrix acting on the source vector
-# vpa, vperp: coordinate structs
-#
-# Note: all variants of `elliptic_solve!()` run only in serial. They do not handle
-# shared-memory parallelism themselves. The calling site must ensure that
-# `elliptic_solve!()` is only called by one process in a shared-memory block.
+"""
+Elliptic solve function. 
+
+    field: the solution
+    source: the source function on the RHS
+    boundary data: the known values of field at infinity
+    lu_object_lhs: the object for the differential operator that defines field
+    matrix_rhs: the weak matrix acting on the source vector
+    vpa, vperp: coordinate structs
+
+Note: all variants of `elliptic_solve!()` run only in serial. They do not handle
+shared-memory parallelism themselves. The calling site must ensure that
+`elliptic_solve!()` is only called by one process in a shared-memory block.
+"""
 function elliptic_solve!(field,source,boundary_data::vpa_vperp_boundary_data,
             lu_object_lhs,matrix_rhs,rhsvpavperp,vpa,vperp)
     # assemble the rhs of the weak system
@@ -2042,14 +2236,16 @@ function elliptic_solve!(field,source_1,source_2,boundary_data::vpa_vperp_bounda
     return nothing
 end
 
-# Same as elliptic_solve!() above but no Dirichlet boundary conditions are imposed,
-# because the function is only used where the lu_object_lhs is derived from a mass matrix.
-# The source is made of two different terms with different weak matrices
-# because of the form of the only algebraic equation that we consider.
-#
-# Note: `algebraic_solve!()` run only in serial. They do not handle shared-memory
-# parallelism themselves. The calling site must ensure that `algebraic_solve!()` is only
-# called by one process in a shared-memory block.
+"""
+Same as `elliptic_solve!()` above but no Dirichlet boundary conditions are imposed,
+because the function is only used where the `lu_object_lhs` is derived from a mass matrix.
+The source is made of two different terms with different weak matrices
+because of the form of the only algebraic equation that we consider.
+
+Note: `algebraic_solve!()` run only in serial. They do not handle shared-memory
+parallelism themselves. The calling site must ensure that `algebraic_solve!()` is only
+called by one process in a shared-memory block.
+"""
 function algebraic_solve!(field,source_1,source_2,boundary_data::vpa_vperp_boundary_data,
             lu_object_lhs,matrix_rhs_1,matrix_rhs_2,rhs,vpa,vperp)
     
@@ -2073,6 +2269,15 @@ function algebraic_solve!(field,source_1,source_2,boundary_data::vpa_vperp_bound
     return nothing
 end
 
+"""
+Function to solve the appropriate elliptic PDEs to find the
+Rosenbluth potentials. First, we calculate the Rosenbluth potentials
+at the boundary with the direct integration method. Then, we use this
+data to solve the elliptic PDEs with the boundary data providing an
+accurate Dirichlet boundary condition on the maximum `vpa` and `vperp`
+of the domain. We use the sparse LU decomposition from the LinearAlgebra package
+to solve the PDE matrix equations.
+"""
 function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvperp,
              d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,ffsp_in,
              vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays::fokkerplanck_weakform_arrays_struct;
@@ -2199,7 +2404,8 @@ function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvpe
 end
 
 """
-function to calculate Rosenbluth potentials by direct integration
+Function to calculate Rosenbluth potentials in the entire
+domain of `(vpa,vperp)` by direct integration.
 """
 
 function calculate_rosenbluth_potentials_via_direct_integration!(GG,HH,dHdvpa,dHdvperp,
@@ -2277,8 +2483,9 @@ function calculate_rosenbluth_integrals!(GG,d2Gspdvpa2,dGspdvperp,d2Gspdvperpdvp
 end
 
 """
-function to enforce boundary conditions on the collision operator
-result to be consistent with the boundary conditions imposed on the the pdf
+Function to enforce boundary conditions on the collision operator
+result to be consistent with the boundary conditions imposed on the
+distribution function.
 """
 function enforce_vpavperp_BCs!(pdf,vpa,vperp,vpa_spectral,vperp_spectral)
     nvpa = vpa.n
@@ -2314,20 +2521,19 @@ function enforce_vpavperp_BCs!(pdf,vpa,vperp,vpa_spectral,vperp_spectral)
 end
 
 """
-function to interpolate f(vpa,vperp) from one 
+Function to interpolate `f(vpa,vperp)` from one 
 velocity grid to another, assuming that both 
-grids are represented by vpa, vperp in normalised units,
+grids are represented by `(vpa,vperp)` in normalised units,
 but have different normalisation factors 
-defining the meaning of these grids in physical units.
+defining the meaning of these grids in physical units. E.g.,
 
-E.g. vpai, vperpi = ci * vpa, ci * vperp
+     vpai, vperpi = ci * vpa, ci * vperp
      vpae, vperpe = ce * vpa, ce * vperp
      
-with ci = sqrt(Ti/mi), ce = sqrt(Te/mi)
-
-scalefac = ci / ce is the ratio of the
-two reference speeds
+with `ci = sqrt(Ti/mi)`, `ce = sqrt(Te/mi)`
 
+`scalefac = ci / ce` is the ratio of the
+two reference speeds.
 """
 function interpolate_2D_vspace!(pdf_out,pdf_in,vpa,vperp,scalefac)
     
@@ -2403,7 +2609,7 @@ end
 #end
 
 """
-function to find the element in which x sits
+Function to find the element in which x sits.
 """
 function ielement_loopup(x,coord)
     xebs = coord.element_boundaries
diff --git a/test_scripts/2D_FEM_assembly_test.jl b/test_scripts/2D_FEM_assembly_test.jl
index bdb499490..34a19c3cd 100644
--- a/test_scripts/2D_FEM_assembly_test.jl
+++ b/test_scripts/2D_FEM_assembly_test.jl
@@ -30,7 +30,7 @@ using moment_kinetics.fokker_planck_test: print_test_data, fkpl_error_data, allo
 using moment_kinetics.fokker_planck_test: save_fkpl_error_data
 
 using moment_kinetics.fokker_planck_calculus: elliptic_solve!
-using moment_kinetics.fokker_planck_calculus: enforce_zero_bc!, allocate_rosenbluth_potential_boundary_data
+using moment_kinetics.fokker_planck_calculus: allocate_rosenbluth_potential_boundary_data
 using moment_kinetics.fokker_planck_calculus: calculate_rosenbluth_potential_boundary_data!, calculate_rosenbluth_potential_boundary_data_exact!
 using moment_kinetics.fokker_planck_calculus: test_rosenbluth_potential_boundary_data, enforce_vpavperp_BCs!
 using moment_kinetics.fokker_planck_calculus: calculate_rosenbluth_potentials_via_elliptic_solve!

From 3801743d7e6883c0dd020d35605e29a55f4db378 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Sun, 27 Oct 2024 09:39:01 +0000
Subject: [PATCH 30/41] Add notes on the Fokker-Planck collision operator,
 linking to the ExCALIBUR report and giving input parameter information.
 Modify the index to feature this note and the note on magnetic geometry.

---
 docs/src/fokker_planck_notes.md | 37 +++++++++++++++++++++++++++++++++
 docs/src/index.md               |  2 ++
 2 files changed, 39 insertions(+)
 create mode 100644 docs/src/fokker_planck_notes.md

diff --git a/docs/src/fokker_planck_notes.md b/docs/src/fokker_planck_notes.md
new file mode 100644
index 000000000..2be75d955
--- /dev/null
+++ b/docs/src/fokker_planck_notes.md
@@ -0,0 +1,37 @@
+Fokker Planck collision operator
+===============================================
+
+We implement the nonlinear Fokker-Planck collision operator for self collisions 
+using the weak-form finite-element method. This is documented in the 
+[ExCALIBUR/NEPTUNE report 2070839-TN-07](https://excalibur-neptune.github.io/Documents/TN-07_AHigherOrderFiniteElementImplementationFullFlandauFokkerPlanckCollisionOperatorC.html).
+A publication based on this report is in progress. Full online documentation will follow.
+
+Input parameters
+===============================================
+
+A series of 0D2V Fokker-Planck input files can be found in
+
+    examples/fokker-planck/
+
+and examples of 1D2V pre-sheath simulations with the Fokker-Planck collision operator
+can be found in
+
+    examples/fokker-planck-1D2V/
+    
+noting that the timestepping or resolution parameters may require modification to find
+a converged simulation.
+
+The basic input namelist is structured as follows
+```
+[fokker_planck_collisions]
+use_fokker_planck = true
+# nuii sets the normalised input C[F,F] Fokker-Planck collision frequency
+# for frequency_option = "manual"
+nuii = 1.0
+frequency_option = "manual"
+```
+Set `use_fokker_planck=false` to turn off Fokker-Planck collisions 
+without commenting out the namelist.
+The default option for `frequency_option = "reference_parameters"`, where `nuii` is set
+by the reference parameter inputs. Further specialised input parameters can be
+seen in the source at  `setup_fkpl_collisions_input()` in `moment_kinetics/src/fokker_planck.jl`.
\ No newline at end of file
diff --git a/docs/src/index.md b/docs/src/index.md
index 39410aaaf..581c879cc 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -13,6 +13,8 @@ Pages = ["getting_started.md",
          "moment_constraints_notes.md",
          "boundary_conditions_notes.md",
          "external_sources_notes.md",
+         "fokker_planck_notes.md",
+         "geometry.md",
          "debugging-hints.md",
          "developing.md",
          "manual_setup.md",

From d6751ff74a6fef8d07ab82b3d2e73a8bd4c45554 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Thu, 7 Nov 2024 18:04:38 +0000
Subject: [PATCH 31/41] Addition of multipole expansion option to specify
 Rosenbluth potential boundary data.

---
 moment_kinetics/src/fokker_planck_calculus.jl | 644 +++++++++++++++++-
 test_scripts/2D_FEM_assembly_test.jl          |  10 +-
 2 files changed, 647 insertions(+), 7 deletions(-)

diff --git a/moment_kinetics/src/fokker_planck_calculus.jl b/moment_kinetics/src/fokker_planck_calculus.jl
index 85939a5fa..b76d553fa 100644
--- a/moment_kinetics/src/fokker_planck_calculus.jl
+++ b/moment_kinetics/src/fokker_planck_calculus.jl
@@ -17,6 +17,7 @@ export assemble_explicit_collision_operator_rhs_parallel!
 export assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!
 export YY_collision_operator_arrays, calculate_YY_arrays
 export calculate_rosenbluth_potential_boundary_data!
+export calculate_rosenbluth_potential_boundary_data_multipole!
 export elliptic_solve!, algebraic_solve!
 export fokkerplanck_arrays_direct_integration_struct
 export fokkerplanck_weakform_arrays_struct
@@ -40,6 +41,7 @@ using ..communication
 using ..communication: MPISharedArray, global_rank
 using ..lagrange_polynomials: lagrange_poly, lagrange_poly_optimised
 using ..looping
+using ..velocity_moments: integrate_over_vspace
 using moment_kinetics.gauss_legendre: get_QQ_local!
 using Dates
 using SpecialFunctions: ellipk, ellipe
@@ -48,6 +50,7 @@ using SuiteSparse
 using LinearAlgebra: ldiv!, mul!, LU
 using FastGaussQuadrature
 using Printf
+using MPI
 
 function print_matrix(matrix,name::String,n::mk_int,m::mk_int)
     println("\n ",name," \n")
@@ -1259,6 +1262,635 @@ function calculate_rosenbluth_potential_boundary_data!(rpbd::rosenbluth_potentia
     return nothing
 end
 
+function multipole_H(vpa::mk_float,vperp::mk_float,
+                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
+                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
+                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
+   # sum up terms in the multipole series 
+   H_series = (I80*((128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8)/(128*(vpa^2 + vperp^2)^8))
+             +I70*((vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
+             +I62*((-7*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(64*(vpa^2 + vperp^2)^8))
+             +I60*((16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6)/(16*(vpa^2 + vperp^2)^6))
+             +I52*((21*vpa*(-16*vpa^6 + 168*vpa^4*vperp^2 - 210*vpa^2*vperp^4 + 35*vperp^6))/(32*(vpa^2 + vperp^2)^7))
+             +I50*((8*vpa^5 - 40*vpa^3*vperp^2 + 15*vpa*vperp^4)/(8*(vpa^2 + vperp^2)^5))
+             +I44*((105*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+             +I42*((-15*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(32*(vpa^2 + vperp^2)^6))
+             +I40*((8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4)/(8*(vpa^2 + vperp^2)^4))
+             +I34*((105*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(128*(vpa^2 + vperp^2)^7))
+             +I32*((-5*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+             +I30*((vpa*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
+             +I26*((-35*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+             +I24*((45*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^6))
+             +I22*((-3*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+             +I20*(-1/2*(-2*vpa^2 + vperp^2)/(vpa^2 + vperp^2)^2)
+             +I16*((-35*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(256*(vpa^2 + vperp^2)^7))
+             +I14*((15*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(64*(vpa^2 + vperp^2)^5))
+             +I12*((-6*vpa^3 + 9*vpa*vperp^2)/(4*(vpa^2 + vperp^2)^3))
+             +I10*(vpa/(vpa^2 + vperp^2))
+             +I08*((35*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(16384*(vpa^2 + vperp^2)^8))
+             +I06*((-5*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(256*(vpa^2 + vperp^2)^6))
+             +I04*((3*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(64*(vpa^2 + vperp^2)^4))
+             +I02*((-2*vpa^2 + vperp^2)/(4*(vpa^2 + vperp^2)^2))
+             +I00*(1))
+   # multiply by overall prefactor
+   H_series *= ((vpa^2 + vperp^2)^(-1/2))
+   return H_series
+end
+
+function multipole_dHdvpa(vpa::mk_float,vperp::mk_float,
+                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
+                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
+                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
+   # sum up terms in the multipole series 
+   dHdvpa_series = (I80*((9*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(128*(vpa^2 + vperp^2)^8))
+                +I70*((128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8)/(16*(vpa^2 + vperp^2)^7))
+                +I62*((-63*(128*vpa^9 - 2304*vpa^7*vperp^2 + 6048*vpa^5*vperp^4 - 3360*vpa^3*vperp^6 + 315*vpa*vperp^8))/(64*(vpa^2 + vperp^2)^8))
+                +I60*((7*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^6))
+                +I52*((-21*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(32*(vpa^2 + vperp^2)^7))
+                +I50*((3*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                +I44*((945*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                +I42*((-105*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(32*(vpa^2 + vperp^2)^6))
+                +I40*((5*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                +I34*((105*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(128*(vpa^2 + vperp^2)^7))
+                +I32*((-15*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                +I30*((8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4)/(2*(vpa^2 + vperp^2)^3))
+                +I26*((-315*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                +I24*((315*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(128*(vpa^2 + vperp^2)^6))
+                +I22*((-15*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                +I20*((3*vpa*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^2))
+                +I16*((-35*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(256*(vpa^2 + vperp^2)^7))
+                +I14*((45*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(64*(vpa^2 + vperp^2)^5))
+                +I12*((-3*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(4*(vpa^2 + vperp^2)^3))
+                +I10*(-1 + (3*vpa^2)/(vpa^2 + vperp^2))
+                +I08*((315*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(16384*(vpa^2 + vperp^2)^8))
+                +I06*((-35*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(256*(vpa^2 + vperp^2)^6))
+                +I04*((15*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(64*(vpa^2 + vperp^2)^4))
+                +I02*((-6*vpa^3 + 9*vpa*vperp^2)/(4*(vpa^2 + vperp^2)^2))
+                +I00*(vpa))
+   # multiply by overall prefactor
+   dHdvpa_series *= -((vpa^2 + vperp^2)^(-3/2))   
+   return dHdvpa_series
+end
+
+function multipole_dHdvperp(vpa::mk_float,vperp::mk_float,
+                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
+                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
+                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
+   # sum up terms in the multipole series 
+   dHdvperp_series = (I80*((45*vperp*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(128*(vpa^2 + vperp^2)^8))
+                +I70*((9*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
+                +I62*((-315*(128*vpa^8*vperp - 896*vpa^6*vperp^3 + 1120*vpa^4*vperp^5 - 280*vpa^2*vperp^7 + 7*vperp^9))/(64*(vpa^2 + vperp^2)^8))
+                +I60*((7*vperp*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(16*(vpa^2 + vperp^2)^6))
+                +I52*((-189*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(32*(vpa^2 + vperp^2)^7))
+                +I50*((21*vpa*vperp*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+                +I44*((4725*vperp*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                +I42*((105*vperp*(-64*vpa^6 + 240*vpa^4*vperp^2 - 120*vpa^2*vperp^4 + 5*vperp^6))/(32*(vpa^2 + vperp^2)^6))
+                +I40*((15*vperp*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                +I34*((945*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(128*(vpa^2 + vperp^2)^7))
+                +I32*((-105*vpa*vperp*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+                +I30*((5*vpa*vperp*(4*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
+                +I26*((-1575*vperp*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                +I24*((315*vperp*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^6))
+                +I22*((-45*vperp*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                +I20*((-3*vperp*(-4*vpa^2 + vperp^2))/(2*(vpa^2 + vperp^2)^2))
+                +I16*((-315*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(256*(vpa^2 + vperp^2)^7))
+                +I14*((315*vpa*vperp*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(64*(vpa^2 + vperp^2)^5))
+                +I12*((-15*vpa*vperp*(4*vpa^2 - 3*vperp^2))/(4*(vpa^2 + vperp^2)^3))
+                +I10*((3*vpa*vperp)/(vpa^2 + vperp^2))
+                +I08*((1575*(128*vpa^8*vperp - 896*vpa^6*vperp^3 + 1120*vpa^4*vperp^5 - 280*vpa^2*vperp^7 + 7*vperp^9))/(16384*(vpa^2 + vperp^2)^8))
+                +I06*((-35*(64*vpa^6*vperp - 240*vpa^4*vperp^3 + 120*vpa^2*vperp^5 - 5*vperp^7))/(256*(vpa^2 + vperp^2)^6))
+                +I04*((45*(8*vpa^4*vperp - 12*vpa^2*vperp^3 + vperp^5))/(64*(vpa^2 + vperp^2)^4))
+                +I02*((3*vperp*(-4*vpa^2 + vperp^2))/(4*(vpa^2 + vperp^2)^2))
+                +I00*(vperp))
+   # multiply by overall prefactor
+   dHdvperp_series *= -((vpa^2 + vperp^2)^(-3/2))
+   return dHdvperp_series
+end
+
+function multipole_G(vpa::mk_float,vperp::mk_float,
+                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
+                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
+                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
+   # sum up terms in the multipole series 
+   G_series = (I80*((64*vpa^6*vperp^2 - 240*vpa^4*vperp^4 + 120*vpa^2*vperp^6 - 5*vperp^8)/(128*(vpa^2 + vperp^2)^8))
+             +I70*((vpa*vperp^2*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(16*(vpa^2 + vperp^2)^7))
+             +I62*((32*vpa^8 - 656*vpa^6*vperp^2 + 1620*vpa^4*vperp^4 - 670*vpa^2*vperp^6 + 25*vperp^8)/(64*(vpa^2 + vperp^2)^8))
+             +I60*((vperp^2*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(16*(vpa^2 + vperp^2)^6))
+             +I52*((vpa*(16*vpa^6 - 232*vpa^4*vperp^2 + 370*vpa^2*vperp^4 - 75*vperp^6))/(32*(vpa^2 + vperp^2)^7))
+             +I50*((vpa*vperp^2*(4*vpa^2 - 3*vperp^2))/(8*(vpa^2 + vperp^2)^5))
+             +I44*((-15*(64*vpa^8 - 864*vpa^6*vperp^2 + 1560*vpa^4*vperp^4 - 500*vpa^2*vperp^6 + 15*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+             +I42*((16*vpa^6 - 152*vpa^4*vperp^2 + 138*vpa^2*vperp^4 - 9*vperp^6)/(32*(vpa^2 + vperp^2)^6))
+             +I40*(-1/8*(vperp^2*(-4*vpa^2 + vperp^2))/(vpa^2 + vperp^2)^4)
+             +I34*((5*vpa*(-32*vpa^6 + 296*vpa^4*vperp^2 - 320*vpa^2*vperp^4 + 45*vperp^6))/(128*(vpa^2 + vperp^2)^7))
+             +I32*((vpa*(4*vpa^4 - 22*vpa^2*vperp^2 + 9*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+             +I30*((vpa*vperp^2)/(2*(vpa^2 + vperp^2)^3))
+             +I26*((5*(96*vpa^8 - 1072*vpa^6*vperp^2 + 1500*vpa^4*vperp^4 - 330*vpa^2*vperp^6 + 5*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+             +I24*((3*(-32*vpa^6 + 184*vpa^4*vperp^2 - 96*vpa^2*vperp^4 + 3*vperp^6))/(128*(vpa^2 + vperp^2)^6))
+             +I22*((4*vpa^4 - 10*vpa^2*vperp^2 + vperp^4)/(8*(vpa^2 + vperp^2)^4))
+             +I20*(vperp^2/(2*(vpa^2 + vperp^2)^2))
+             +I16*((5*vpa*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(256*(vpa^2 + vperp^2)^7))
+             +I14*((-3*vpa*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(64*(vpa^2 + vperp^2)^5))
+             +I12*((vpa*(2*vpa^2 - vperp^2))/(4*(vpa^2 + vperp^2)^3))
+             +I10*(-(vpa/(vpa^2 + vperp^2)))
+             +I08*((5*(-128*vpa^8 + 1280*vpa^6*vperp^2 - 1440*vpa^4*vperp^4 + 160*vpa^2*vperp^6 + 5*vperp^8))/(16384*(vpa^2 + vperp^2)^8))
+             +I06*((16*vpa^6 - 72*vpa^4*vperp^2 + 18*vpa^2*vperp^4 + vperp^6)/(256*(vpa^2 + vperp^2)^6))
+             +I04*((-8*vpa^4 + 8*vpa^2*vperp^2 + vperp^4)/(64*(vpa^2 + vperp^2)^4))
+             +I02*((2*vpa^2 + vperp^2)/(4*(vpa^2 + vperp^2)^2))
+             +I00*(1))
+   # multiply by overall prefactor
+   G_series *= ((vpa^2 + vperp^2)^(1/2))   
+   return G_series
+end
+
+function multipole_dGdvperp(vpa::mk_float,vperp::mk_float,
+                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
+                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
+                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
+   # sum up terms in the multipole series 
+   dGdvperp_series = (I80*((vperp*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(128*(vpa^2 + vperp^2)^8))
+                   +I70*((vpa*vperp*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
+                   +I62*((-7*(256*vpa^8*vperp - 2144*vpa^6*vperp^3 + 3120*vpa^4*vperp^5 - 890*vpa^2*vperp^7 + 25*vperp^9))/(64*(vpa^2 + vperp^2)^8))
+                   +I60*((vperp*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(16*(vpa^2 + vperp^2)^6))
+                   +I52*((21*vpa*vperp*(-32*vpa^6 + 192*vpa^4*vperp^2 - 180*vpa^2*vperp^4 + 25*vperp^6))/(32*(vpa^2 + vperp^2)^7))
+                   +I50*((8*vpa^5*vperp - 40*vpa^3*vperp^3 + 15*vpa*vperp^5)/(8*(vpa^2 + vperp^2)^5))
+                   +I44*((315*vperp*(128*vpa^8 - 832*vpa^6*vperp^2 + 960*vpa^4*vperp^4 - 220*vpa^2*vperp^6 + 5*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                   +I42*((15*vperp*(-32*vpa^6 + 128*vpa^4*vperp^2 - 68*vpa^2*vperp^4 + 3*vperp^6))/(32*(vpa^2 + vperp^2)^6))
+                   +I40*((vperp*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                   +I34*((315*vpa*vperp*(16*vpa^6 - 72*vpa^4*vperp^2 + 50*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^7))
+                   +I32*((-5*vpa*vperp*(16*vpa^4 - 38*vpa^2*vperp^2 + 9*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+                   +I30*((vpa*vperp*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
+                   +I26*((-35*vperp*(512*vpa^8 - 2848*vpa^6*vperp^2 + 2640*vpa^4*vperp^4 - 430*vpa^2*vperp^6 + 5*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                   +I24*((-45*vperp*(-48*vpa^6 + 136*vpa^4*vperp^2 - 46*vpa^2*vperp^4 + vperp^6))/(128*(vpa^2 + vperp^2)^6))
+                   +I22*((-3*vperp*(16*vpa^4 - 18*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                   +I20*(-1/2*(vperp*(-2*vpa^2 + vperp^2))/(vpa^2 + vperp^2)^2)
+                   +I16*((-35*vpa*vperp*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(256*(vpa^2 + vperp^2)^7))
+                   +I14*((45*vpa*vperp*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(64*(vpa^2 + vperp^2)^5))
+                   +I12*((3*vpa*vperp*(-4*vpa^2 + vperp^2))/(4*(vpa^2 + vperp^2)^3))
+                   +I10*((vpa*vperp)/(vpa^2 + vperp^2))
+                   +I08*((175*(128*vpa^8*vperp - 640*vpa^6*vperp^3 + 480*vpa^4*vperp^5 - 40*vpa^2*vperp^7 - vperp^9))/(16384*(vpa^2 + vperp^2)^8))
+                   +I06*((-5*(64*vpa^6*vperp - 144*vpa^4*vperp^3 + 24*vpa^2*vperp^5 + vperp^7))/(256*(vpa^2 + vperp^2)^6))
+                   +I04*((3*(24*vpa^4*vperp - 12*vpa^2*vperp^3 - vperp^5))/(64*(vpa^2 + vperp^2)^4))
+                   +I02*(-1/4*(vperp*(4*vpa^2 + vperp^2))/(vpa^2 + vperp^2)^2)
+                   +I00*(vperp))
+   # multiply by overall prefactor
+   dGdvperp_series *= ((vpa^2 + vperp^2)^(-1/2))
+   return dGdvperp_series
+end
+
+function multipole_d2Gdvperp2(vpa::mk_float,vperp::mk_float,
+                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
+                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
+                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
+   # sum up terms in the multipole series 
+   d2Gdvperp2_series = (I80*((128*vpa^10 - 7424*vpa^8*vperp^2 + 41888*vpa^6*vperp^4 - 48160*vpa^4*vperp^6 + 11515*vpa^2*vperp^8 - 280*vperp^10)/(128*(vpa^2 + vperp^2)^8))
+                   +I70*((16*vpa^9 - 728*vpa^7*vperp^2 + 3066*vpa^5*vperp^4 - 2345*vpa^3*vperp^6 + 280*vpa*vperp^8)/(16*(vpa^2 + vperp^2)^7))
+                   +I62*((-7*(256*vpa^10 - 10528*vpa^8*vperp^2 + 45616*vpa^6*vperp^4 - 43670*vpa^4*vperp^6 + 9125*vpa^2*vperp^8 - 200*vperp^10))/(64*(vpa^2 + vperp^2)^8))
+                   +I60*((16*vpa^8 - 552*vpa^6*vperp^2 + 1650*vpa^4*vperp^4 - 755*vpa^2*vperp^6 + 30*vperp^8)/(16*(vpa^2 + vperp^2)^6))
+                   +I52*((-21*(32*vpa^9 - 1024*vpa^7*vperp^2 + 3204*vpa^5*vperp^4 - 1975*vpa^3*vperp^6 + 200*vpa*vperp^8))/(32*(vpa^2 + vperp^2)^7))
+                   +I50*((8*vpa^7 - 200*vpa^5*vperp^2 + 395*vpa^3*vperp^4 - 90*vpa*vperp^6)/(8*(vpa^2 + vperp^2)^5))
+                   +I44*((315*(128*vpa^10 - 4544*vpa^8*vperp^2 + 16448*vpa^6*vperp^4 - 13060*vpa^4*vperp^6 + 2245*vpa^2*vperp^8 - 40*vperp^10))/(512*(vpa^2 + vperp^2)^8))
+                   +I42*((-15*(32*vpa^8 - 768*vpa^6*vperp^2 + 1620*vpa^4*vperp^4 - 565*vpa^2*vperp^6 + 18*vperp^8))/(32*(vpa^2 + vperp^2)^6))
+                   +I40*((8*vpa^6 - 136*vpa^4*vperp^2 + 159*vpa^2*vperp^4 - 12*vperp^6)/(8*(vpa^2 + vperp^2)^4))
+                   +I34*((315*vpa*(16*vpa^8 - 440*vpa^6*vperp^2 + 1114*vpa^4*vperp^4 - 535*vpa^2*vperp^6 + 40*vperp^8))/(128*(vpa^2 + vperp^2)^7))
+                   +I32*((5*vpa*(-16*vpa^6 + 274*vpa^4*vperp^2 - 349*vpa^2*vperp^4 + 54*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                   +I30*((vpa*(2*vpa^4 - 21*vpa^2*vperp^2 + 12*vperp^4))/(2*(vpa^2 + vperp^2)^3))
+                   +I26*((-35*(512*vpa^10 - 16736*vpa^8*vperp^2 + 53072*vpa^6*vperp^4 - 34690*vpa^4*vperp^6 + 4345*vpa^2*vperp^8 - 40*vperp^10))/(512*(vpa^2 + vperp^2)^8))
+                   +I24*((135*(16*vpa^8 - 328*vpa^6*vperp^2 + 530*vpa^4*vperp^4 - 125*vpa^2*vperp^6 + 2*vperp^8))/(128*(vpa^2 + vperp^2)^6))
+                   +I22*((-3*(16*vpa^6 - 182*vpa^4*vperp^2 + 113*vpa^2*vperp^4 - 4*vperp^6))/(8*(vpa^2 + vperp^2)^4))
+                   +I20*((2*vpa^4 - 11*vpa^2*vperp^2 + 2*vperp^4)/(2*(vpa^2 + vperp^2)^2))
+                   +I16*((-35*vpa*(64*vpa^8 - 1616*vpa^6*vperp^2 + 3480*vpa^4*vperp^4 - 1235*vpa^2*vperp^6 + 40*vperp^8))/(256*(vpa^2 + vperp^2)^7))
+                   +I14*((45*vpa*(8*vpa^6 - 116*vpa^4*vperp^2 + 101*vpa^2*vperp^4 - 6*vperp^6))/(64*(vpa^2 + vperp^2)^5))
+                   +I12*((-3*vpa*(4*vpa^4 - 27*vpa^2*vperp^2 + 4*vperp^4))/(4*(vpa^2 + vperp^2)^3))
+                   +I10*(-2*vpa + (3*vpa^3)/(vpa^2 + vperp^2))
+                   +I08*((175*(128*vpa^10 - 3968*vpa^8*vperp^2 + 11360*vpa^6*vperp^4 - 6040*vpa^4*vperp^6 + 391*vpa^2*vperp^8 + 8*vperp^10))/(16384*(vpa^2 + vperp^2)^8))
+                   +I06*((-5*(64*vpa^8 - 1200*vpa^6*vperp^2 + 1560*vpa^4*vperp^4 - 185*vpa^2*vperp^6 - 6*vperp^8))/(256*(vpa^2 + vperp^2)^6))
+                   +I04*((3*(24*vpa^6 - 228*vpa^4*vperp^2 + 67*vpa^2*vperp^4 + 4*vperp^6))/(64*(vpa^2 + vperp^2)^4))
+                   +I02*((-4*vpa^4 + 13*vpa^2*vperp^2 + 2*vperp^4)/(4*(vpa^2 + vperp^2)^2))
+                   +I00*(vpa^2))
+   # multiply by overall prefactor
+   d2Gdvperp2_series *= ((vpa^2 + vperp^2)^(-3/2))   
+   return d2Gdvperp2_series
+end
+
+function multipole_d2Gdvperpdvpa(vpa::mk_float,vperp::mk_float,
+                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
+                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
+                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
+   # sum up terms in the multipole series 
+   d2Gdvperpdvpa_series = (I80*((9*vpa*vperp*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(128*(vpa^2 + vperp^2)^8))
+                      +I70*((vperp*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(16*(vpa^2 + vperp^2)^7))
+                      +I62*((-63*(256*vpa^9*vperp - 2848*vpa^7*vperp^3 + 5936*vpa^5*vperp^5 - 2870*vpa^3*vperp^7 + 245*vpa*vperp^9))/(64*(vpa^2 + vperp^2)^8))
+                      +I60*((7*vpa*vperp*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^6))
+                      +I52*((-21*(256*vpa^8*vperp - 2144*vpa^6*vperp^3 + 3120*vpa^4*vperp^5 - 890*vpa^2*vperp^7 + 25*vperp^9))/(32*(vpa^2 + vperp^2)^7))
+                      +I50*((3*vperp*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                      +I44*((945*vpa*vperp*(384*vpa^8 - 3392*vpa^6*vperp^2 + 5824*vpa^4*vperp^4 - 2380*vpa^2*vperp^6 + 175*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                      +I42*((-105*vpa*vperp*(32*vpa^6 - 192*vpa^4*vperp^2 + 180*vpa^2*vperp^4 - 25*vperp^6))/(32*(vpa^2 + vperp^2)^6))
+                      +I40*((5*vpa*vperp*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                      +I34*((315*vperp*(128*vpa^8 - 832*vpa^6*vperp^2 + 960*vpa^4*vperp^4 - 220*vpa^2*vperp^6 + 5*vperp^8))/(128*(vpa^2 + vperp^2)^7))
+                      +I32*((15*vperp*(-32*vpa^6 + 128*vpa^4*vperp^2 - 68*vpa^2*vperp^4 + 3*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                      +I30*((vperp*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(2*(vpa^2 + vperp^2)^3))
+                      +I26*((-315*vpa*vperp*(512*vpa^8 - 3936*vpa^6*vperp^2 + 5712*vpa^4*vperp^4 - 1890*vpa^2*vperp^6 + 105*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                      +I24*((945*vpa*vperp*(16*vpa^6 - 72*vpa^4*vperp^2 + 50*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^6))
+                      +I22*((-15*vpa*vperp*(16*vpa^4 - 38*vpa^2*vperp^2 + 9*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                      +I20*((3*vpa*vperp*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^2))
+                      +I16*((-35*vperp*(512*vpa^8 - 2848*vpa^6*vperp^2 + 2640*vpa^4*vperp^4 - 430*vpa^2*vperp^6 + 5*vperp^8))/(256*(vpa^2 + vperp^2)^7))
+                      +I14*((-45*vperp*(-48*vpa^6 + 136*vpa^4*vperp^2 - 46*vpa^2*vperp^4 + vperp^6))/(64*(vpa^2 + vperp^2)^5))
+                      +I12*((-3*vperp*(16*vpa^4 - 18*vpa^2*vperp^2 + vperp^4))/(4*(vpa^2 + vperp^2)^3))
+                      +I10*(vperp*(-1 + (3*vpa^2)/(vpa^2 + vperp^2)))
+                      +I08*((1575*vpa*(128*vpa^8*vperp - 896*vpa^6*vperp^3 + 1120*vpa^4*vperp^5 - 280*vpa^2*vperp^7 + 7*vperp^9))/(16384*(vpa^2 + vperp^2)^8))
+                      +I06*((-35*vpa*(64*vpa^6*vperp - 240*vpa^4*vperp^3 + 120*vpa^2*vperp^5 - 5*vperp^7))/(256*(vpa^2 + vperp^2)^6))
+                      +I04*((45*vpa*(8*vpa^4*vperp - 12*vpa^2*vperp^3 + vperp^5))/(64*(vpa^2 + vperp^2)^4))
+                      +I02*((3*vpa*vperp*(-4*vpa^2 + vperp^2))/(4*(vpa^2 + vperp^2)^2))
+                      +I00*(vpa*vperp))
+   # multiply by overall prefactor
+   d2Gdvperpdvpa_series *= -((vpa^2 + vperp^2)^(-3/2))   
+   return d2Gdvperpdvpa_series
+end
+
+function multipole_d2Gdvpa2(vpa::mk_float,vperp::mk_float,
+                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
+                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
+                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
+   # sum up terms in the multipole series 
+   d2Gdvpa2_series = (I80*((45*vperp^2*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(128*(vpa^2 + vperp^2)^8))
+                   +I70*((9*vpa*vperp^2*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
+                   +I62*((7*(256*vpa^10 - 9088*vpa^8*vperp^2 + 43456*vpa^6*vperp^4 - 45920*vpa^4*vperp^6 + 10430*vpa^2*vperp^8 - 245*vperp^10))/(64*(vpa^2 + vperp^2)^8))
+                   +I60*((7*vperp^2*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(16*(vpa^2 + vperp^2)^6))
+                   +I52*((21*vpa*(32*vpa^8 - 880*vpa^6*vperp^2 + 3108*vpa^4*vperp^4 - 2170*vpa^2*vperp^6 + 245*vperp^8))/(32*(vpa^2 + vperp^2)^7))
+                   +I50*((21*vpa*vperp^2*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+                   +I44*((105*(-512*vpa^10 + 12416*vpa^8*vperp^2 - 46592*vpa^6*vperp^4 + 41440*vpa^4*vperp^6 - 8260*vpa^2*vperp^8 + 175*vperp^10))/(512*(vpa^2 + vperp^2)^8))
+                   +I42*((15*(32*vpa^8 - 656*vpa^6*vperp^2 + 1620*vpa^4*vperp^4 - 670*vpa^2*vperp^6 + 25*vperp^8))/(32*(vpa^2 + vperp^2)^6))
+                   +I40*((15*vperp^2*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                   +I34*((-105*vpa*(64*vpa^8 - 1184*vpa^6*vperp^2 + 3192*vpa^4*vperp^4 - 1820*vpa^2*vperp^6 + 175*vperp^8))/(128*(vpa^2 + vperp^2)^7))
+                   +I32*((5*vpa*(16*vpa^6 - 232*vpa^4*vperp^2 + 370*vpa^2*vperp^4 - 75*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                   +I30*((5*vpa*vperp^2*(4*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
+                   +I26*((105*(256*vpa^10 - 5248*vpa^8*vperp^2 + 16576*vpa^6*vperp^4 - 12320*vpa^4*vperp^6 + 2030*vpa^2*vperp^8 - 35*vperp^10))/(512*(vpa^2 + vperp^2)^8))
+                   +I24*((-45*(64*vpa^8 - 864*vpa^6*vperp^2 + 1560*vpa^4*vperp^4 - 500*vpa^2*vperp^6 + 15*vperp^8))/(128*(vpa^2 + vperp^2)^6))
+                   +I22*((3*(16*vpa^6 - 152*vpa^4*vperp^2 + 138*vpa^2*vperp^4 - 9*vperp^6))/(8*(vpa^2 + vperp^2)^4))
+                   +I20*((-3*vperp^2*(-4*vpa^2 + vperp^2))/(2*(vpa^2 + vperp^2)^2))
+                   +I16*((105*vpa*(32*vpa^8 - 496*vpa^6*vperp^2 + 1092*vpa^4*vperp^4 - 490*vpa^2*vperp^6 + 35*vperp^8))/(256*(vpa^2 + vperp^2)^7))
+                   +I14*((15*vpa*(-32*vpa^6 + 296*vpa^4*vperp^2 - 320*vpa^2*vperp^4 + 45*vperp^6))/(64*(vpa^2 + vperp^2)^5))
+                   +I12*((3*vpa*(4*vpa^4 - 22*vpa^2*vperp^2 + 9*vperp^4))/(4*(vpa^2 + vperp^2)^3))
+                   +I10*((3*vpa*vperp^2)/(vpa^2 + vperp^2))
+                   +I08*((-35*(1024*vpa^10 - 19072*vpa^8*vperp^2 + 52864*vpa^6*vperp^4 - 32480*vpa^4*vperp^6 + 3920*vpa^2*vperp^8 - 35*vperp^10))/(16384*(vpa^2 + vperp^2)^8))
+                   +I06*((5*(96*vpa^8 - 1072*vpa^6*vperp^2 + 1500*vpa^4*vperp^4 - 330*vpa^2*vperp^6 + 5*vperp^8))/(256*(vpa^2 + vperp^2)^6))
+                   +I04*((-3*(32*vpa^6 - 184*vpa^4*vperp^2 + 96*vpa^2*vperp^4 - 3*vperp^6))/(64*(vpa^2 + vperp^2)^4))
+                   +I02*((4*vpa^4 - 10*vpa^2*vperp^2 + vperp^4)/(4*(vpa^2 + vperp^2)^2))
+                   +I00*(vperp^2))
+   # multiply by overall prefactor
+   d2Gdvpa2_series *= ((vpa^2 + vperp^2)^(-3/2))
+   return d2Gdvpa2_series
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_H!(func_data::vpa_vperp_boundary_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_H(vpa.grid[1],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+                func_data.upper_boundary_vpa[ivperp] = multipole_H(vpa.grid[nvpa],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_H(vpa.grid[ivpa],vperp.grid[nvperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_dHdvpa!(func_data::vpa_vperp_boundary_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_dHdvpa(vpa.grid[1],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+                func_data.upper_boundary_vpa[ivperp] = multipole_dHdvpa(vpa.grid[nvpa],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_dHdvpa(vpa.grid[ivpa],vperp.grid[nvperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_dHdvperp!(func_data::vpa_vperp_boundary_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_dHdvperp(vpa.grid[1],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+                func_data.upper_boundary_vpa[ivperp] = multipole_dHdvperp(vpa.grid[nvpa],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_dHdvperp(vpa.grid[ivpa],vperp.grid[nvperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_G!(func_data::vpa_vperp_boundary_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_G(vpa.grid[1],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+                func_data.upper_boundary_vpa[ivperp] = multipole_G(vpa.grid[nvpa],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_G(vpa.grid[ivpa],vperp.grid[nvperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_dGdvperp!(func_data::vpa_vperp_boundary_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_dGdvperp(vpa.grid[1],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+                func_data.upper_boundary_vpa[ivperp] = multipole_dGdvperp(vpa.grid[nvpa],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_dGdvperp(vpa.grid[ivpa],vperp.grid[nvperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_d2Gdvperp2!(func_data::vpa_vperp_boundary_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_d2Gdvperp2(vpa.grid[1],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+                func_data.upper_boundary_vpa[ivperp] = multipole_d2Gdvperp2(vpa.grid[nvpa],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_d2Gdvperp2(vpa.grid[ivpa],vperp.grid[nvperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_d2Gdvperpdvpa!(func_data::vpa_vperp_boundary_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_d2Gdvperpdvpa(vpa.grid[1],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+                func_data.upper_boundary_vpa[ivperp] = multipole_d2Gdvperpdvpa(vpa.grid[nvpa],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_d2Gdvperpdvpa(vpa.grid[ivpa],vperp.grid[nvperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_d2Gdvpa2!(func_data::vpa_vperp_boundary_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_d2Gdvpa2(vpa.grid[1],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+                func_data.upper_boundary_vpa[ivperp] = multipole_d2Gdvpa2(vpa.grid[nvpa],vperp.grid[ivperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_d2Gdvpa2(vpa.grid[ivpa],vperp.grid[nvperp],
+                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                                       I02, I12, I22, I32, I42, I52, I62,
+                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+Function to use the multipole expansion of the Rosenbluth potentials to calculate and
+assign boundary data to an instance of `rosenbluth_potential_boundary_data`, in place,
+without allocation.
+"""
+function calculate_rosenbluth_potential_boundary_data_multipole!(rpbd::rosenbluth_potential_boundary_data,
+    pdf,vpa,vperp,vpa_spectral,vperp_spectral;
+    calculate_GG=false,calculate_dGdvperp=false)
+    # get required moments of pdf
+    I00, I10, I20, I30, I40, I50, I60, I70, I80 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+    I02, I12, I22, I32, I42, I52, I62 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+    I04, I14, I24, I34, I44 = 0.0, 0.0, 0.0, 0.0, 0.0
+    I06, I16, I26 = 0.0, 0.0, 0.0
+    I08 = 0.0
+    
+    begin_anyv_region()
+    @anyv_serial_region begin
+       I00 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I10 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I20 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I30 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 3, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I40 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 4, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I50 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 5, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I60 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 6, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I70 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 7, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I80 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 8, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       
+       I02 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I12 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I22 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I32 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 3, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I42 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 4, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I52 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 5, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I62 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 6, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       
+       I04 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 4, vperp.wgts)
+       I14 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 4, vperp.wgts)
+       I24 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 4, vperp.wgts)
+       I34 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 3, vpa.wgts, vperp.grid, 4, vperp.wgts)
+       I44 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 4, vpa.wgts, vperp.grid, 4, vperp.wgts)
+       
+       I06 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 6, vperp.wgts)
+       I16 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 6, vperp.wgts)
+       I26 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 6, vperp.wgts)
+       
+       I08 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 8, vperp.wgts)    
+    end
+    # Broadcast integrals to all processes in the 'anyv' subblock
+    param_vec = [I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                I02, I12, I22, I32, I42, I52, I62,
+                I04, I14, I24, I34, I44,
+                I06, I16, I26,
+                I08]
+    if comm_anyv_subblock[] != MPI.COMM_NULL
+        MPI.Bcast!(param_vec, 0, comm_anyv_subblock[])
+    end
+    (I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+     I02, I12, I22, I32, I42, I52, I62,
+     I04, I14, I24, I34, I44,
+     I06, I16, I26,
+     I08) = param_vec
+   # println(I00, " ", I10, " ", I20, " ", I30, " ", I40, " ", I50, " ", I60, " ", I70, " ", I80, " ", 
+   #        I02, " ", I12, " ", I22, " ", I32, " ", I42, " ", I52, " ", I62, " ",
+   #        I04, " ", I14, " ", I24, " ", I34, " ", I44, " ",
+   #        I06, " ", I16, " ", I26, " ",
+   #        I08)
+    # ensure data is synchronized
+    _anyv_subblock_synchronize()
+    # evaluate the multipole formulae 
+    calculate_boundary_data_multipole_H!(rpbd.H_data,vpa,vperp, 
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    calculate_boundary_data_multipole_dHdvpa!(rpbd.dHdvpa_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    calculate_boundary_data_multipole_dHdvperp!(rpbd.dHdvperp_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    if calculate_GG
+        calculate_boundary_data_multipole_G!(rpbd.G_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    if calculate_dGdvperp
+        calculate_boundary_data_multipole_dGdvperp!(rpbd.dGdvperp_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    end
+    calculate_boundary_data_multipole_d2Gdvperp2!(rpbd.d2Gdvperp2_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    calculate_boundary_data_multipole_d2Gdvperpdvpa!(rpbd.d2Gdvperpdvpa_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    calculate_boundary_data_multipole_d2Gdvpa2!(rpbd.d2Gdvpa2_data,vpa,vperp,
+                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                                     I02, I12, I22, I32, I42, I52, I62,
+                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
+    
+    return nothing
+end
+
 """
 Function to compare two instances of `rosenbluth_potential_boundary_data` --
 one assumed to contain exact results, and the other numerically computed results -- and compute
@@ -2281,7 +2913,8 @@ to solve the PDE matrix equations.
 function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvperp,
              d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,ffsp_in,
              vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays::fokkerplanck_weakform_arrays_struct;
-             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=false,calculate_dGdvperp=false)
+             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=false,calculate_dGdvperp=false,
+             multipole=false)
     
     # extract the necessary precalculated and buffer arrays from fokkerplanck_arrays
     MM2D_sparse = fkpl_arrays.MM2D_sparse
@@ -2310,8 +2943,13 @@ function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvpe
     rhsvpavperp_copy3 = fkpl_arrays.rhsvpavperp_copy3
     
     # calculate the boundary data
-    calculate_rosenbluth_potential_boundary_data!(rpbd,bwgt,ffsp_in,vpa,vperp,vpa_spectral,vperp_spectral,
-      calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
+    if multipole
+        calculate_rosenbluth_potential_boundary_data_multipole!(rpbd,ffsp_in,vpa,vperp,vpa_spectral,vperp_spectral,
+          calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
+    else # use direct integration on the boundary
+        calculate_rosenbluth_potential_boundary_data!(rpbd,bwgt,ffsp_in,vpa,vperp,vpa_spectral,vperp_spectral,
+         calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
+    end
     # carry out the elliptic solves required
     begin_anyv_vperp_vpa_region()
     @loop_vperp_vpa ivperp ivpa begin
diff --git a/test_scripts/2D_FEM_assembly_test.jl b/test_scripts/2D_FEM_assembly_test.jl
index 34a19c3cd..53c7f9f8e 100644
--- a/test_scripts/2D_FEM_assembly_test.jl
+++ b/test_scripts/2D_FEM_assembly_test.jl
@@ -78,7 +78,8 @@ end
         use_Maxwellian_Rosenbluth_coefficients=false,
         use_Maxwellian_field_particle_distribution=false,
         test_numerical_conserving_terms=false,
-        algebraic_solve_for_d2Gdvperp2=false)
+        algebraic_solve_for_d2Gdvperp2=false,
+        use_multipole=false)
         # define inputs needed for the test
         #plot_test_output = false#true
         #test_parallelism = false#true
@@ -274,7 +275,7 @@ end
         calculate_rosenbluth_potentials_via_elliptic_solve!(fkpl_arrays.GG,fkpl_arrays.HH,fkpl_arrays.dHdvpa,fkpl_arrays.dHdvperp,
              fkpl_arrays.d2Gdvpa2,fkpl_arrays.dGdvperp,fkpl_arrays.d2Gdvperpdvpa,fkpl_arrays.d2Gdvperp2,F_M,
              vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays;
-             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=true,calculate_dGdvperp=true)
+             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=true,calculate_dGdvperp=true,multipole=use_multipole)
         # extract C[Fs,Fs'] result
         # and Rosenbluth potentials for testing
         begin_s_r_z_anyv_region()
@@ -389,7 +390,8 @@ end
         test_numerical_conserving_terms=false,
         algebraic_solve_for_d2Gdvperp2=false,
         test_self_operator = true,
-        Lvpa = 12.0, Lvperp = 6.0)
+        Lvpa = 12.0, Lvperp = 6.0,
+        use_multipole = false)
         initialize_comms!()
         #ngrid = 5
         #plot_scan = true
@@ -460,7 +462,7 @@ end
             use_Maxwellian_field_particle_distribution=use_Maxwellian_field_particle_distribution,
             test_numerical_conserving_terms=test_numerical_conserving_terms,
             algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
-            standalone=false, Lvpa=Lvpa, Lvperp=Lvperp)
+            standalone=false, Lvpa=Lvpa, Lvperp=Lvperp, use_multipole=use_multipole)
             max_C_err[iscan], L2_C_err[iscan] = fkerr.C_M.max ,fkerr.C_M.L2
             max_H_err[iscan], L2_H_err[iscan] = fkerr.H_M.max ,fkerr.H_M.L2
             max_dHdvpa_err[iscan], L2_dHdvpa_err[iscan] = fkerr.dHdvpa_M.max ,fkerr.dHdvpa_M.L2

From 36a60313dc70d434fdf1212d1320637b5704b19e Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Thu, 7 Nov 2024 18:35:24 +0000
Subject: [PATCH 32/41] Propagate multipole option so it can be used in time
 evolving code.

---
 moment_kinetics/src/fokker_planck.jl          | 11 ++++++++---
 moment_kinetics/src/fokker_planck_calculus.jl |  4 ++--
 moment_kinetics/src/input_structs.jl          |  2 ++
 moment_kinetics/src/time_advance.jl           |  4 +++-
 test_scripts/2D_FEM_assembly_test.jl          |  9 +++++----
 5 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/moment_kinetics/src/fokker_planck.jl b/moment_kinetics/src/fokker_planck.jl
index 9a19affb9..a31d3f78e 100644
--- a/moment_kinetics/src/fokker_planck.jl
+++ b/moment_kinetics/src/fokker_planck.jl
@@ -94,6 +94,7 @@ function setup_fkpl_collisions_input(toml_input::Dict)
        frequency_option = "reference_parameters",
        self_collisions = true,
        use_conserving_corrections = true,
+       multipole_boundary_data = false,
        slowing_down_test = false,
        sd_density = 1.0,
        sd_temp = 0.01,
@@ -337,13 +338,15 @@ Function for advancing with the explicit, weak-form, self-collision operator.
     Zi = collisions.fkpl.Zi # generalise!
     nussp = nuref*(Zi^4) # include charge number factor for self collisions
     use_conserving_corrections = collisions.fkpl.use_conserving_corrections
+    multipole_boundary_data = collisions.fkpl.multipole_boundary_data
     # N.B. parallelisation using special 'anyv' region
     begin_s_r_z_anyv_region()
     @loop_s_r_z is ir iz begin
         # first argument is Fs, and second argument is Fs' in C[Fs,Fs'] 
         @views fokker_planck_collision_operator_weak_form!(
             pdf_in[:,:,iz,ir,is], pdf_in[:,:,iz,ir,is], ms, msp, nussp, fkpl_arrays,
-            vperp, vpa, vperp_spectral, vpa_spectral)
+            vperp, vpa, vperp_spectral, vpa_spectral, 
+            multipole_boundary_data = multipole_boundary_data)
         # enforce the boundary conditions on CC before it is used for timestepping
         enforce_vpavperp_BCs!(fkpl_arrays.CC,vpa,vperp,vpa_spectral,vperp_spectral)
         # make ad-hoc conserving corrections
@@ -398,7 +401,8 @@ with \$\\gamma_\\mathrm{ref} = 2 \\pi e^4 \\ln \\Lambda_{ii} / (4 \\pi
                          use_Maxwellian_Rosenbluth_coefficients=false,
                          use_Maxwellian_field_particle_distribution=false,
                          algebraic_solve_for_d2Gdvperp2 = false, calculate_GG=false,
-                         calculate_dGdvperp=false) = begin
+                         calculate_dGdvperp=false,
+                         multipole_boundary_data=false) = begin
     @boundscheck vpa.n == size(ffsp_in,1) || throw(BoundsError(ffsp_in))
     @boundscheck vperp.n == size(ffsp_in,2) || throw(BoundsError(ffsp_in))
     @boundscheck vpa.n == size(ffs_in,1) || throw(BoundsError(ffs_in))
@@ -448,7 +452,8 @@ with \$\\gamma_\\mathrm{ref} = 2 \\pi e^4 \\ln \\Lambda_{ii} / (4 \\pi
              d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,ffsp_in,
              vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays,
              algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
-             calculate_GG=calculate_GG,calculate_dGdvperp=calculate_dGdvperp)
+             calculate_GG=calculate_GG,calculate_dGdvperp=calculate_dGdvperp,
+             multipole_boundary_data=multipole_boundary_data)
     end
     # assemble the RHS of the collision operator matrix eq
     if use_Maxwellian_field_particle_distribution
diff --git a/moment_kinetics/src/fokker_planck_calculus.jl b/moment_kinetics/src/fokker_planck_calculus.jl
index b76d553fa..79f6e2ede 100644
--- a/moment_kinetics/src/fokker_planck_calculus.jl
+++ b/moment_kinetics/src/fokker_planck_calculus.jl
@@ -2914,7 +2914,7 @@ function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvpe
              d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,ffsp_in,
              vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays::fokkerplanck_weakform_arrays_struct;
              algebraic_solve_for_d2Gdvperp2=false,calculate_GG=false,calculate_dGdvperp=false,
-             multipole=false)
+             multipole_boundary_data=false)
     
     # extract the necessary precalculated and buffer arrays from fokkerplanck_arrays
     MM2D_sparse = fkpl_arrays.MM2D_sparse
@@ -2943,7 +2943,7 @@ function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvpe
     rhsvpavperp_copy3 = fkpl_arrays.rhsvpavperp_copy3
     
     # calculate the boundary data
-    if multipole
+    if multipole_boundary_data
         calculate_rosenbluth_potential_boundary_data_multipole!(rpbd,ffsp_in,vpa,vperp,vpa_spectral,vperp_spectral,
           calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
     else # use direct integration on the boundary
diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl
index f427c3a83..8d421fa48 100644
--- a/moment_kinetics/src/input_structs.jl
+++ b/moment_kinetics/src/input_structs.jl
@@ -489,6 +489,8 @@ Base.@kwdef struct fkpl_collisions_input
     self_collisions::Bool
     # option to determine if ad-hoc moment_kinetics-style conserving corrections are used
     use_conserving_corrections::Bool
+    # option to determine if multipole expansion is used to provide boundary data for Rosenbluth potential calculations.
+    multipole_boundary_data::Bool
     # option to determine if cross-collisions against fixed Maxwellians are used
     slowing_down_test::Bool
     # Setting to switch between different options for Fokker-Planck collision frequency input
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index b9d85420d..31a4777a4 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -734,7 +734,9 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                                                   n_neutral_species_alloc, t_params)
     # create arrays for Fokker-Planck collisions 
     if advance.explicit_weakform_fp_collisions
-        fp_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral; precompute_weights=true)
+        precompute_weights = true && !(collisions.fkpl.multipole_boundary_data)
+        fp_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral;
+                      precompute_weights=precompute_weights)
     else
         fp_arrays = nothing
     end
diff --git a/test_scripts/2D_FEM_assembly_test.jl b/test_scripts/2D_FEM_assembly_test.jl
index 53c7f9f8e..b6f8408fb 100644
--- a/test_scripts/2D_FEM_assembly_test.jl
+++ b/test_scripts/2D_FEM_assembly_test.jl
@@ -126,9 +126,9 @@ end
         nc_global = vpa.n*vperp.n
         begin_serial_region()
         start_init_time = now()
-        
+        precompute_weights = true && !(use_multipole)
         fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral; 
-                           precompute_weights=true, test_dense_matrix_construction=test_dense_construction)
+                           precompute_weights=precompute_weights, test_dense_matrix_construction=test_dense_construction)
         KKpar2D_with_BC_terms_sparse = fkpl_arrays.KKpar2D_with_BC_terms_sparse
         KKperp2D_with_BC_terms_sparse = fkpl_arrays.KKperp2D_with_BC_terms_sparse
         lu_obj_MM = fkpl_arrays.lu_obj_MM
@@ -264,7 +264,8 @@ end
                                              use_Maxwellian_Rosenbluth_coefficients=use_Maxwellian_Rosenbluth_coefficients,
                                              use_Maxwellian_field_particle_distribution=use_Maxwellian_field_particle_distribution,
                                              algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
-                                             calculate_GG = false, calculate_dGdvperp=false)
+                                             calculate_GG = false, calculate_dGdvperp=false,
+                                             multipole_boundary_data=use_multipole)
         if test_numerical_conserving_terms && test_self_operator
             # enforce the boundary conditions on CC before it is used for timestepping
             enforce_vpavperp_BCs!(fkpl_arrays.CC,vpa,vperp,vpa_spectral,vperp_spectral)
@@ -275,7 +276,7 @@ end
         calculate_rosenbluth_potentials_via_elliptic_solve!(fkpl_arrays.GG,fkpl_arrays.HH,fkpl_arrays.dHdvpa,fkpl_arrays.dHdvperp,
              fkpl_arrays.d2Gdvpa2,fkpl_arrays.dGdvperp,fkpl_arrays.d2Gdvperpdvpa,fkpl_arrays.d2Gdvperp2,F_M,
              vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays;
-             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=true,calculate_dGdvperp=true,multipole=use_multipole)
+             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=true,calculate_dGdvperp=true,multipole_boundary_data=use_multipole)
         # extract C[Fs,Fs'] result
         # and Rosenbluth potentials for testing
         begin_s_r_z_anyv_region()

From 89891ae2842b232d9efe6153683fd7fbaec79712 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Fri, 8 Nov 2024 17:15:09 +0000
Subject: [PATCH 33/41] Add multipole boundary data method to test of elliptic
 solvers.

---
 moment_kinetics/test/fokker_planck_tests.jl | 1481 ++++++++++---------
 1 file changed, 758 insertions(+), 723 deletions(-)

diff --git a/moment_kinetics/test/fokker_planck_tests.jl b/moment_kinetics/test/fokker_planck_tests.jl
index cd29e94ca..16fad7bb0 100644
--- a/moment_kinetics/test/fokker_planck_tests.jl
+++ b/moment_kinetics/test/fokker_planck_tests.jl
@@ -1,723 +1,758 @@
-module FokkerPlanckTests
-
-include("setup.jl")
-
-
-using MPI
-using LinearAlgebra: mul!, ldiv!
-using moment_kinetics.communication
-using moment_kinetics.looping
-using moment_kinetics.array_allocation: allocate_float, allocate_shared_float
-using moment_kinetics.coordinates: define_coordinate
-using moment_kinetics.type_definitions: mk_float, mk_int
-using moment_kinetics.velocity_moments: get_density, get_upar, get_ppar, get_pperp, get_pressure
-
-using moment_kinetics.fokker_planck: init_fokker_planck_collisions_weak_form, fokker_planck_collision_operator_weak_form!
-using moment_kinetics.fokker_planck: conserving_corrections!, init_fokker_planck_collisions_direct_integration
-using moment_kinetics.fokker_planck: density_conserving_correction!, fokker_planck_collision_operator_weak_form_Maxwellian_Fsp!
-using moment_kinetics.fokker_planck_test: print_test_data, fkpl_error_data, allocate_error_data #, plot_test_data
-using moment_kinetics.fokker_planck_test: F_Maxwellian, G_Maxwellian, H_Maxwellian
-using moment_kinetics.fokker_planck_test: d2Gdvpa2_Maxwellian, d2Gdvperp2_Maxwellian, d2Gdvperpdvpa_Maxwellian, dGdvperp_Maxwellian
-using moment_kinetics.fokker_planck_test: dHdvperp_Maxwellian, dHdvpa_Maxwellian, Cssp_Maxwellian_inputs
-using moment_kinetics.fokker_planck_calculus: calculate_rosenbluth_potentials_via_elliptic_solve!, calculate_rosenbluth_potential_boundary_data_exact!
-using moment_kinetics.fokker_planck_calculus: test_rosenbluth_potential_boundary_data, allocate_rosenbluth_potential_boundary_data
-using moment_kinetics.fokker_planck_calculus: enforce_vpavperp_BCs!, calculate_rosenbluth_potentials_via_direct_integration!
-using moment_kinetics.fokker_planck_calculus: interpolate_2D_vspace!
-
-function create_grids(ngrid,nelement_vpa,nelement_vperp;
-                      Lvpa=12.0,Lvperp=6.0)
-
-        nelement_local_vpa = nelement_vpa # number of elements per rank
-        nelement_global_vpa = nelement_local_vpa # total number of elements 
-        nelement_local_vperp = nelement_vperp # number of elements per rank
-        nelement_global_vperp = nelement_local_vperp # total number of elements 
-        bc = "zero" # used only in derivative! functions 
-        #discretization = "chebyshev_pseudospectral"
-        discretization = "gausslegendre_pseudospectral"
-        # create the 'input' struct containing input info needed to create a
-        # coordinate
-        element_spacing_option = "uniform"
-        coords_input = OptionsDict(
-            "vperp"=>OptionsDict("ngrid"=>ngrid, "nelement"=>nelement_global_vperp,
-                                 "nelement_local"=>nelement_local_vperp, "L"=>Lvperp,
-                                 "discretization"=>discretization, "bc"=>bc,
-                                 "element_spacing_option"=>element_spacing_option),
-            "vpa"=>OptionsDict("ngrid"=>ngrid, "nelement"=>nelement_global_vpa,
-                               "nelement_local"=>nelement_local_vpa, "L"=>Lvpa,
-                               "discretization"=>discretization, "bc"=>bc,
-                               "element_spacing_option"=>element_spacing_option),
-        )
-        
-        # Set up MPI
-        initialize_comms!()
-        setup_distributed_memory_MPI(1,1,1,1)
-        vperp, vperp_spectral = define_coordinate(coords_input, "vperp")
-        vpa, vpa_spectral = define_coordinate(coords_input, "vpa")
-        looping.setup_loop_ranges!(block_rank[], block_size[];
-                                       s=1, sn=1,
-                                       r=1, z=1, vperp=vperp.n, vpa=vpa.n,
-                                       vzeta=1, vr=1, vz=1)
-        
-        return vpa, vpa_spectral, vperp, vperp_spectral
-end
-
-function runtests()
-    print_to_screen = false
-    @testset "Fokker Planck tests" verbose=use_verbose begin
-        println("Fokker Planck tests")
-        
-        @testset "Lagrange-polynomial 2D interpolation" begin
-            println("    - test Lagrange-polynomial 2D interpolation")
-            ngrid = 9
-            nelement_vpa = 16
-            nelement_vperp = 8
-            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
-                                                                        Lvpa=8.0,Lvperp=4.0)
-            
-            # electron pdf on electron grids
-            Fe = allocate_shared_float(vpa.n,vperp.n)
-            # electron pdf on ion normalised grids
-            Fe_interp_ion_units = allocate_shared_float(vpa.n,vperp.n)
-            # exact value for comparison
-            Fe_exact_ion_units = allocate_shared_float(vpa.n,vperp.n)
-            # ion pdf on ion grids
-            Fi = allocate_shared_float(vpa.n,vperp.n)
-            # ion pdf on electron normalised grids
-            Fi_interp_electron_units = allocate_shared_float(vpa.n,vperp.n)
-            # exact value for comparison
-            Fi_exact_electron_units = allocate_shared_float(vpa.n,vperp.n)
-            # test array
-            F_err = allocate_float(vpa.n,vperp.n)
-            
-            dense = 1.0
-            upare = 0.0 # upare in electron reference units
-            vthe = 1.0 # vthe in electron reference units
-            densi = 1.0
-            upari = 0.0 # upari in ion reference units
-            vthi = 1.0 # vthi in ion reference units
-            # reference speeds for electrons and ions
-            cref_electron = 60.0 
-            cref_ion = 1.0
-            # scale factor for change of reference speed
-            scalefac = cref_ion/cref_electron
-            
-            begin_serial_region()
-            @serial_region begin
-                @loop_vperp_vpa ivperp ivpa begin
-                    Fe[ivpa,ivperp] = F_Maxwellian(dense,upare,vthe,vpa,vperp,ivpa,ivperp)
-                    Fe_exact_ion_units[ivpa,ivperp] = F_Maxwellian(dense,upare/scalefac,vthe/scalefac,vpa,vperp,ivpa,ivperp)/(scalefac^3)
-                    Fi[ivpa,ivperp] = F_Maxwellian(densi,upari,vthi,vpa,vperp,ivpa,ivperp)
-                    Fi_exact_electron_units[ivpa,ivperp] = (scalefac^3)*F_Maxwellian(densi,upari*scalefac,vthi*scalefac,vpa,vperp,ivpa,ivperp)
-                end
-            end
-            
-            begin_s_r_z_anyv_region()
-            interpolate_2D_vspace!(Fe_interp_ion_units,Fe,vpa,vperp,scalefac)
-            #println("Fe",Fe)
-            #println("Fe interp",Fe_interp_ion_units)
-            #println("Fe exact",Fe_exact_ion_units)
-            interpolate_2D_vspace!(Fi_interp_electron_units,Fi,vpa,vperp,1.0/scalefac)
-            #println("Fi",Fi)
-            #println("Fi interp", Fi_interp_electron_units)
-            #println("Fi exact",Fi_exact_electron_units)
-            
-            begin_serial_region()            
-            # check the result
-            @serial_region begin
-                # for electron data on ion grids
-                @. F_err = abs(Fe_interp_ion_units - Fe_exact_ion_units)
-                max_F_err = maximum(F_err)
-                max_F = maximum(Fe_exact_ion_units)
-                #println(max_F)
-                @test max_F_err < 3.0e-8 * max_F
-                # for ion data on electron grids
-                @. F_err = abs(Fi_interp_electron_units - Fi_exact_electron_units)
-                max_F_err = maximum(F_err)
-                max_F = maximum(Fi_exact_electron_units)
-                #println(max_F)
-                @test max_F_err < 3.0e-8 * max_F
-            end
-            
-        end
-        
-        @testset "weak-form 2D differentiation" begin
-        # tests the correct definition of mass and stiffness matrices in 2D
-            println("    - test weak-form 2D differentiation")
-
-            ngrid = 9
-            nelement_vpa = 8
-            nelement_vperp = 4
-            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
-                                                                        Lvpa=2.0,Lvperp=1.0)
-            nc_global = vpa.n*vperp.n
-            begin_serial_region()
-            fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
-                                                                  precompute_weights=false, print_to_screen=print_to_screen)
-            KKpar2D_with_BC_terms_sparse = fkpl_arrays.KKpar2D_with_BC_terms_sparse
-            KKperp2D_with_BC_terms_sparse = fkpl_arrays.KKperp2D_with_BC_terms_sparse
-            lu_obj_MM = fkpl_arrays.lu_obj_MM
-            
-            dummy_array = allocate_float(vpa.n,vperp.n)
-            fvpavperp = allocate_float(vpa.n,vperp.n)
-            fvpavperp_test = allocate_float(vpa.n,vperp.n)
-            fvpavperp_err = allocate_float(vpa.n,vperp.n)
-            d2fvpavperp_dvpa2_exact = allocate_float(vpa.n,vperp.n)
-            d2fvpavperp_dvpa2_err = allocate_float(vpa.n,vperp.n)
-            d2fvpavperp_dvpa2_num = allocate_float(vpa.n,vperp.n)
-            d2fvpavperp_dvperp2_exact = allocate_float(vpa.n,vperp.n)
-            d2fvpavperp_dvperp2_err = allocate_float(vpa.n,vperp.n)
-            d2fvpavperp_dvperp2_num = allocate_float(vpa.n,vperp.n)
-            dfc = allocate_float(nc_global)
-            dgc = allocate_float(nc_global)
-            for ivperp in 1:vperp.n
-                for ivpa in 1:vpa.n
-                    fvpavperp[ivpa,ivperp] = exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
-                    d2fvpavperp_dvpa2_exact[ivpa,ivperp] = (4.0*vpa.grid[ivpa]^2 - 2.0)*exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
-                    d2fvpavperp_dvperp2_exact[ivpa,ivperp] = (4.0*vperp.grid[ivperp]^2 - 2.0)*exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
-                end
-            end
-            
-            # Make 1d views
-            fc = vec(fvpavperp)
-            d2fc_dvpa2 = vec(d2fvpavperp_dvpa2_num)
-            d2fc_dvperp2 = vec(d2fvpavperp_dvperp2_num)
-
-            #print_vector(fc,"fc",nc_global)
-            # multiply by KKpar2D and fill dfc
-            mul!(dfc,KKpar2D_with_BC_terms_sparse,fc)
-            mul!(dgc,KKperp2D_with_BC_terms_sparse,fc)
-            # invert mass matrix
-            ldiv!(d2fc_dvpa2, lu_obj_MM, dfc)
-            ldiv!(d2fc_dvperp2, lu_obj_MM, dgc)
-            #print_vector(fc,"fc",nc_global)
-            @serial_region begin 
-                d2fvpavperp_dvpa2_max, d2fvpavperp_dvpa2_L2 = print_test_data(d2fvpavperp_dvpa2_exact,d2fvpavperp_dvpa2_num,d2fvpavperp_dvpa2_err,"d2fdvpa2",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                @test d2fvpavperp_dvpa2_max < 1.0e-7
-                @test d2fvpavperp_dvpa2_L2 < 1.0e-8
-                d2fvpavperp_dvperp2_max, d2fvpavperp_dvperp2_L2 = print_test_data(d2fvpavperp_dvperp2_exact,d2fvpavperp_dvperp2_num,d2fvpavperp_dvperp2_err,"d2fdvperp2",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                @test d2fvpavperp_dvperp2_max < 1.0e-7
-                @test d2fvpavperp_dvperp2_L2 < 1.0e-8
-                #if plot_test_output
-                #    plot_test_data(d2fvpavperp_dvpa2_exact,d2fvpavperp_dvpa2_num,d2fvpavperp_dvpa2_err,"d2fvpavperp_dvpa2",vpa,vperp)
-                #    plot_test_data(d2fvpavperp_dvperp2_exact,d2fvpavperp_dvperp2_num,d2fvpavperp_dvperp2_err,"d2fvpavperp_dvperp2",vpa,vperp)
-                #end
-            end
-            finalize_comms!()
-        end
-        
-        @testset "weak-form Rosenbluth potential calculation: elliptic solve" begin
-            println("    - test weak-form Rosenbluth potential calculation: elliptic solve")
-            ngrid = 9
-            nelement_vpa = 8
-            nelement_vperp = 4
-            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
-                                                                        Lvpa=12.0,Lvperp=6.0)
-            begin_serial_region()
-            fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
-                                                                  precompute_weights=true, print_to_screen=print_to_screen)
-            dummy_array = allocate_float(vpa.n,vperp.n)
-            F_M = allocate_float(vpa.n,vperp.n)
-            H_M_exact = allocate_float(vpa.n,vperp.n)
-            H_M_num = allocate_shared_float(vpa.n,vperp.n)
-            H_M_err = allocate_float(vpa.n,vperp.n)
-            G_M_exact = allocate_float(vpa.n,vperp.n)
-            G_M_num = allocate_shared_float(vpa.n,vperp.n)
-            G_M_err = allocate_float(vpa.n,vperp.n)
-            d2Gdvpa2_M_exact = allocate_float(vpa.n,vperp.n)
-            d2Gdvpa2_M_num = allocate_shared_float(vpa.n,vperp.n)
-            d2Gdvpa2_M_err = allocate_float(vpa.n,vperp.n)
-            d2Gdvperp2_M_exact = allocate_float(vpa.n,vperp.n)
-            d2Gdvperp2_M_num = allocate_shared_float(vpa.n,vperp.n)
-            d2Gdvperp2_M_err = allocate_float(vpa.n,vperp.n)
-            dGdvperp_M_exact = allocate_float(vpa.n,vperp.n)
-            dGdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
-            dGdvperp_M_err = allocate_float(vpa.n,vperp.n)
-            d2Gdvperpdvpa_M_exact = allocate_float(vpa.n,vperp.n)
-            d2Gdvperpdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
-            d2Gdvperpdvpa_M_err = allocate_float(vpa.n,vperp.n)
-            dHdvpa_M_exact = allocate_float(vpa.n,vperp.n)
-            dHdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
-            dHdvpa_M_err = allocate_float(vpa.n,vperp.n)
-            dHdvperp_M_exact = allocate_float(vpa.n,vperp.n)
-            dHdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
-            dHdvperp_M_err = allocate_float(vpa.n,vperp.n)
-
-            dens, upar, vth = 1.0, 1.0, 1.0
-            begin_serial_region()
-            for ivperp in 1:vperp.n
-                for ivpa in 1:vpa.n
-                    F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    H_M_exact[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    G_M_exact[ivpa,ivperp] = G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    d2Gdvpa2_M_exact[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    d2Gdvperp2_M_exact[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    dGdvperp_M_exact[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    d2Gdvperpdvpa_M_exact[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    dHdvpa_M_exact[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    dHdvperp_M_exact[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                end
-            end
-            rpbd_exact = allocate_rosenbluth_potential_boundary_data(vpa,vperp)
-            # use known test function to provide exact data
-            begin_s_r_z_anyv_region()
-            calculate_rosenbluth_potential_boundary_data_exact!(rpbd_exact,
-                  H_M_exact,dHdvpa_M_exact,dHdvperp_M_exact,G_M_exact,
-                  dGdvperp_M_exact,d2Gdvperp2_M_exact,
-                  d2Gdvperpdvpa_M_exact,d2Gdvpa2_M_exact,vpa,vperp)
-            # calculate the potentials numerically
-            calculate_rosenbluth_potentials_via_elliptic_solve!(
-                 fkpl_arrays.GG, fkpl_arrays.HH, fkpl_arrays.dHdvpa, fkpl_arrays.dHdvperp,
-                 fkpl_arrays.d2Gdvpa2, fkpl_arrays.dGdvperp, fkpl_arrays.d2Gdvperpdvpa,
-                 fkpl_arrays.d2Gdvperp2, F_M, vpa, vperp, vpa_spectral, vperp_spectral,
-                 fkpl_arrays; algebraic_solve_for_d2Gdvperp2=false,
-                 calculate_GG=true, calculate_dGdvperp=true)
-            # extract C[Fs,Fs'] result
-            # and Rosenbluth potentials for testing
-            begin_s_r_z_anyv_region()
-            begin_anyv_vperp_vpa_region()
-            @loop_vperp_vpa ivperp ivpa begin
-                G_M_num[ivpa,ivperp] = fkpl_arrays.GG[ivpa,ivperp]
-                H_M_num[ivpa,ivperp] = fkpl_arrays.HH[ivpa,ivperp]
-                dHdvpa_M_num[ivpa,ivperp] = fkpl_arrays.dHdvpa[ivpa,ivperp]
-                dHdvperp_M_num[ivpa,ivperp] = fkpl_arrays.dHdvperp[ivpa,ivperp]
-                dGdvperp_M_num[ivpa,ivperp] = fkpl_arrays.dGdvperp[ivpa,ivperp]
-                d2Gdvperp2_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvperp2[ivpa,ivperp]
-                d2Gdvpa2_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvpa2[ivpa,ivperp]
-                d2Gdvperpdvpa_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvperpdvpa[ivpa,ivperp]
-            end
-            begin_serial_region()
-            @serial_region begin
-                # test the boundary data
-                max_H_boundary_data_err, max_dHdvpa_boundary_data_err, 
-                max_dHdvperp_boundary_data_err, max_G_boundary_data_err,
-                max_dGdvperp_boundary_data_err, max_d2Gdvperp2_boundary_data_err, 
-                max_d2Gdvperpdvpa_boundary_data_err, max_d2Gdvpa2_boundary_data_err = test_rosenbluth_potential_boundary_data(fkpl_arrays.rpbd,rpbd_exact,vpa,vperp,print_to_screen=print_to_screen)
-                atol_max = 2.0e-12
-                @test max_H_boundary_data_err < atol_max
-                atol_max = 2.0e-11
-                @test max_dHdvpa_boundary_data_err < atol_max
-                atol_max = 6.0e-9
-                @test max_dHdvperp_boundary_data_err < atol_max
-                atol_max = 1.0e-11
-                @test max_G_boundary_data_err < atol_max
-                atol_max = 2.0e-7
-                @test max_dGdvperp_boundary_data_err < atol_max
-                atol_max = 5.0e-8
-                @test max_d2Gdvperp2_boundary_data_err < atol_max
-                atol_max = 2.0e-8
-                @test max_d2Gdvperpdvpa_boundary_data_err < atol_max
-                atol_max = 1.0e-11
-                @test max_d2Gdvpa2_boundary_data_err < atol_max
-                # test the elliptic solvers
-                H_M_max, H_M_L2 = print_test_data(H_M_exact,H_M_num,H_M_err,"H_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                dHdvpa_M_max, dHdvpa_M_L2 = print_test_data(dHdvpa_M_exact,dHdvpa_M_num,dHdvpa_M_err,"dHdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                dHdvperp_M_max, dHdvperp_M_L2 = print_test_data(dHdvperp_M_exact,dHdvperp_M_num,dHdvperp_M_err,"dHdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                G_M_max, G_M_L2 = print_test_data(G_M_exact,G_M_num,G_M_err,"G_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                d2Gdvpa2_M_max, d2Gdvpa2_M_L2 = print_test_data(d2Gdvpa2_M_exact,d2Gdvpa2_M_num,d2Gdvpa2_M_err,"d2Gdvpa2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                dGdvperp_M_max, dGdvperp_M_L2 = print_test_data(dGdvperp_M_exact,dGdvperp_M_num,dGdvperp_M_err,"dGdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                d2Gdvperpdvpa_M_max, d2Gdvperpdvpa_M_L2 = print_test_data(d2Gdvperpdvpa_M_exact,d2Gdvperpdvpa_M_num,d2Gdvperpdvpa_M_err,"d2Gdvperpdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                d2Gdvperp2_M_max, d2Gdvperp2_M_L2 = print_test_data(d2Gdvperp2_M_exact,d2Gdvperp2_M_num,d2Gdvperp2_M_err,"d2Gdvperp2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                atol_max = 2.0e-7
-                atol_L2 = 5.0e-9
-                @test H_M_max < atol_max
-                @test H_M_L2 < atol_L2
-                atol_max = 2.0e-6
-                atol_L2 = 5.0e-8
-                @test dHdvpa_M_max < atol_max
-                @test dHdvpa_M_L2 < atol_L2
-                atol_max = 2.0e-5
-                atol_L2 = 1.0e-7
-                @test dHdvperp_M_max < atol_max
-                @test dHdvperp_M_L2 < atol_L2
-                atol_max = 2.0e-8
-                atol_L2 = 7.0e-10
-                @test G_M_max < atol_max
-                @test G_M_L2 < atol_L2
-                atol_max = 2.0e-7
-                atol_L2 = 4.0e-9
-                @test d2Gdvpa2_M_max < atol_max
-                @test d2Gdvpa2_M_L2 < atol_L2
-                atol_max = 2.0e-6
-                atol_L2 = 2.0e-7
-                @test dGdvperp_M_max < atol_max
-                @test dGdvperp_M_L2 < atol_L2
-                atol_max = 2.0e-6
-                atol_L2 = 2.0e-8
-                @test d2Gdvperpdvpa_M_max < atol_max
-                @test d2Gdvperpdvpa_M_L2 < atol_L2
-                atol_max = 3.0e-7
-                atol_L2 = 2.0e-8
-                @test d2Gdvperp2_M_max < atol_max
-                @test d2Gdvperp2_M_L2 < atol_L2
-            end
-            finalize_comms!()                                                                  
-        end
-        
-        @testset "weak-form collision operator calculation" begin
-            println("    - test weak-form collision operator calculation")
-            ngrid = 9
-            nelement_vpa = 8
-            nelement_vperp = 4
-            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
-                                                                        Lvpa=12.0,Lvperp=6.0)
-            begin_serial_region()
-            fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
-                                                                  precompute_weights=true, print_to_screen=print_to_screen)
-            
-            @testset "test_self_operator=$test_self_operator test_numerical_conserving_terms=$test_numerical_conserving_terms test_parallelism = $test_parallelism test_dense_construction=$test_dense_construction use_Maxwellian_Rosenbluth_coefficients=$use_Maxwellian_Rosenbluth_coefficients use_Maxwellian_field_particle_distribution=$use_Maxwellian_field_particle_distribution algebraic_solve_for_d2Gdvperp2=$algebraic_solve_for_d2Gdvperp2" for
-                    (test_self_operator, test_numerical_conserving_terms, test_parallelism, test_dense_construction, 
-                     use_Maxwellian_Rosenbluth_coefficients, use_Maxwellian_field_particle_distribution,
-                     algebraic_solve_for_d2Gdvperp2) in ((true,false,false,false,false,false,false),(false,false,false,false,false,false,false),
-                                                         (true,true,false,false,false,false,false),(true,false,true,false,false,false,false),
-                                                         (true,false,false,true,false,false,false),(true,false,false,false,true,false,false),
-                                                         (true,false,false,false,false,true,false),(true,false,false,false,false,false,true))
-                        
-                dummy_array = allocate_float(vpa.n,vperp.n)
-                Fs_M = allocate_float(vpa.n,vperp.n)
-                F_M = allocate_float(vpa.n,vperp.n)
-                C_M_num = allocate_shared_float(vpa.n,vperp.n)
-                C_M_exact = allocate_float(vpa.n,vperp.n)
-                C_M_err = allocate_float(vpa.n,vperp.n)
-                if test_self_operator
-                    dens, upar, vth = 1.0, 1.0, 1.0
-                    denss, upars, vths = dens, upar, vth
-                else
-                    denss, upars, vths = 1.0, -1.0, 2.0/3.0
-                    dens, upar, vth = 1.0, 1.0, 1.0
-                end
-                ms = 1.0
-                msp = 1.0
-                nussp = 1.0
-                begin_serial_region()
-                for ivperp in 1:vperp.n
-                    for ivpa in 1:vpa.n
-                        Fs_M[ivpa,ivperp] = F_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
-                        F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                        C_M_exact[ivpa,ivperp] = Cssp_Maxwellian_inputs(denss,upars,vths,ms,
-                                                                        dens,upar,vth,msp,
-                                                                        nussp,vpa,vperp,ivpa,ivperp)
-                    end
-                end
-                begin_s_r_z_anyv_region()
-                fokker_planck_collision_operator_weak_form!(Fs_M,F_M,ms,msp,nussp,
-                                                 fkpl_arrays,
-                                                 vperp, vpa, vperp_spectral, vpa_spectral,
-                                                 test_assembly_serial=test_parallelism,
-                                                 use_Maxwellian_Rosenbluth_coefficients=use_Maxwellian_Rosenbluth_coefficients,
-                                                 use_Maxwellian_field_particle_distribution=use_Maxwellian_field_particle_distribution,
-                                                 algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
-                                                 calculate_GG = false, calculate_dGdvperp=false)
-                if test_numerical_conserving_terms && test_self_operator
-                    # enforce the boundary conditions on CC before it is used for timestepping
-                    enforce_vpavperp_BCs!(fkpl_arrays.CC,vpa,vperp,vpa_spectral,vperp_spectral)
-                    # make ad-hoc conserving corrections
-                    conserving_corrections!(fkpl_arrays.CC,Fs_M,vpa,vperp,dummy_array)
-                end
-                # extract C[Fs,Fs'] result
-                begin_s_r_z_anyv_region()
-                begin_anyv_vperp_vpa_region()
-                @loop_vperp_vpa ivperp ivpa begin
-                    C_M_num[ivpa,ivperp] = fkpl_arrays.CC[ivpa,ivperp]
-                end
-                begin_serial_region()
-                @serial_region begin
-                    C_M_max, C_M_L2 = print_test_data(C_M_exact,C_M_num,C_M_err,"C_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                    if test_self_operator && !test_numerical_conserving_terms && !use_Maxwellian_Rosenbluth_coefficients && !use_Maxwellian_field_particle_distribution
-                        atol_max = 6.0e-4
-                        atol_L2 = 7.0e-6
-                    elseif test_self_operator && test_numerical_conserving_terms && !use_Maxwellian_Rosenbluth_coefficients && !use_Maxwellian_field_particle_distribution
-                        atol_max = 7.0e-4
-                        atol_L2 = 7.0e-6
-                    elseif test_self_operator && !test_numerical_conserving_terms && use_Maxwellian_Rosenbluth_coefficients && !use_Maxwellian_field_particle_distribution
-                        atol_max = 8.0e-4
-                        atol_L2 = 8.1e-6
-                    elseif test_self_operator && !test_numerical_conserving_terms && !use_Maxwellian_Rosenbluth_coefficients && use_Maxwellian_field_particle_distribution
-                        atol_max = 1.1e-3
-                        atol_L2 = 9.0e-6
-                    else
-                        atol_max = 7.0e-2
-                        atol_L2 = 6.0e-4
-                    end
-                    @test C_M_max < atol_max
-                    @test C_M_L2 < atol_L2
-                    # calculate the entropy production
-                    lnfC = fkpl_arrays.rhsvpavperp
-                    @loop_vperp_vpa ivperp ivpa begin
-                        lnfC[ivpa,ivperp] = Fs_M[ivpa,ivperp]*C_M_num[ivpa,ivperp]
-                    end
-                    dSdt = - get_density(lnfC,vpa,vperp)
-                    if test_self_operator && !test_numerical_conserving_terms
-                        if algebraic_solve_for_d2Gdvperp2
-                            rtol, atol = 0.0, 1.0e-7
-                        else
-                            rtol, atol = 0.0, 1.0e-8
-                        end
-                        @test isapprox(dSdt, rtol ; atol=atol)
-                        delta_n = get_density(C_M_num, vpa, vperp)
-                        delta_upar = get_upar(C_M_num, vpa, vperp, dens)
-                        delta_ppar = msp*get_ppar(C_M_num, vpa, vperp, upar)
-                        delta_pperp = msp*get_pperp(C_M_num, vpa, vperp)
-                        delta_pressure = get_pressure(delta_ppar,delta_pperp)
-                        rtol, atol = 0.0, 1.0e-12
-                        @test isapprox(delta_n, rtol ; atol=atol)
-                        rtol, atol = 0.0, 1.0e-9
-                        @test isapprox(delta_upar, rtol ; atol=atol)
-                        if algebraic_solve_for_d2Gdvperp2
-                            rtol, atol = 0.0, 1.0e-7
-                        else
-                            rtol, atol = 0.0, 1.0e-8
-                        end
-                        @test isapprox(delta_pressure, rtol ; atol=atol)
-                        if print_to_screen
-                            println("dSdt: $dSdt should be >0.0")
-                            println("delta_n: ", delta_n)
-                            println("delta_upar: ", delta_upar)
-                            println("delta_pressure: ", delta_pressure)
-                        end
-                    elseif test_self_operator && test_numerical_conserving_terms
-                        rtol, atol = 0.0, 6.0e-7
-                        @test isapprox(dSdt, rtol ; atol=atol)
-                        delta_n = get_density(C_M_num, vpa, vperp)
-                        delta_upar = get_upar(C_M_num, vpa, vperp, dens)
-                        delta_ppar = msp*get_ppar(C_M_num, vpa, vperp, upar)
-                        delta_pperp = msp*get_pperp(C_M_num, vpa, vperp)
-                        delta_pressure = get_pressure(delta_ppar,delta_pperp)
-                        rtol, atol = 0.0, 1.0e-15
-                        @test isapprox(delta_n, rtol ; atol=atol)
-                        rtol, atol = 0.0, 1.0e-15
-                        @test isapprox(delta_upar, rtol ; atol=atol)
-                        rtol, atol = 0.0, 1.0e-15
-                        @test isapprox(delta_pressure, rtol ; atol=atol)
-                        if print_to_screen
-                            println("dSdt: $dSdt should be >0.0")
-                            println("delta_n: ", delta_n)
-                            println("delta_upar: ", delta_upar)
-                            println("delta_pressure: ", delta_pressure)
-                        end
-                    else
-                        atol = 1.0e-4
-                        @test isapprox(dSdt, 2.543251178128757 ; atol=atol)
-                        delta_n = get_density(C_M_num, vpa, vperp)
-                        rtol, atol = 0.0, 1.0e-12
-                        @test isapprox(delta_n, rtol ; atol=atol)
-                        if print_to_screen
-                            println("dSdt: $dSdt")
-                            println("delta_n: ", delta_n)
-                        end
-                    end
-                end
-            end
-            finalize_comms!()                                                                  
-        end
-        
-        @testset "weak-form (slowing-down) collision operator calculation" begin
-            println("    - test weak-form (slowing-down) collision operator calculation")
-            ngrid = 9
-            nelement_vpa = 16
-            nelement_vperp = 8
-            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
-                                                                        Lvpa=12.0,Lvperp=6.0)
-            begin_serial_region()
-            fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
-                                                                  precompute_weights=true, print_to_screen=print_to_screen)
-            
-            @testset "slowing_down_test=true test_numerical_conserving_terms=$test_numerical_conserving_terms" for test_numerical_conserving_terms in (true,false)
-                        
-                dummy_array = allocate_float(vpa.n,vperp.n)
-                Fs_M = allocate_float(vpa.n,vperp.n)
-                F_M = allocate_float(vpa.n,vperp.n)
-                C_M_num = allocate_shared_float(vpa.n,vperp.n)
-                C_M_exact = allocate_float(vpa.n,vperp.n)
-                C_M_err = allocate_float(vpa.n,vperp.n)
-                
-                # pick a set of parameters that represent slowing down
-                # on slow ions and faster electrons, but which are close
-                # enough to 1 for errors comparable to the self-collision operator
-                # increasing or reducing vth, mass increases the errors
-                dens, upar, vth = 1.0, 1.0, 1.0
-                mref = 1.0
-                Zref = 1.0
-                msp = [1.0,0.2]#[0.25, 0.25/1836.0]
-                Zsp = [0.5,0.5]#[0.5, 0.5]
-                denssp = [1.0,1.0]#[1.0, 1.0]
-                uparsp = [0.0,0.0]#[0.0, 0.0]
-                vthsp = [sqrt(0.5/msp[1]), sqrt(0.5/msp[2])]#[sqrt(0.01/msp[1]), sqrt(0.01/msp[2])]
-                nsprime = size(msp,1)
-                nuref = 1.0
-                
-                begin_serial_region()
-                for ivperp in 1:vperp.n
-                    for ivpa in 1:vpa.n
-                        Fs_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                        C_M_exact[ivpa,ivperp] = 0.0
-                    end
-                end
-                # sum up contributions to cross-collision operator
-                for isp in 1:nsprime
-                    zfac = (Zsp[isp]/Zref)^2
-                    nussp = nuref*zfac
-                    for ivperp in 1:vperp.n
-                        for ivpa in 1:vpa.n
-                            C_M_exact[ivpa,ivperp] += Cssp_Maxwellian_inputs(dens,upar,vth,mref,
-                                                                            denssp[isp],uparsp[isp],vthsp[isp],msp[isp],
-                                                                            nussp,vpa,vperp,ivpa,ivperp)
-                        end
-                    end
-                end
-                begin_s_r_z_anyv_region()
-                @views fokker_planck_collision_operator_weak_form_Maxwellian_Fsp!(Fs_M[:,:],
-                                     nuref,mref,Zref,msp,Zsp,denssp,uparsp,vthsp,
-                                     fkpl_arrays,vperp,vpa,vperp_spectral,vpa_spectral)
-                if test_numerical_conserving_terms
-                    # enforce the boundary conditions on CC before it is used for timestepping
-                    enforce_vpavperp_BCs!(fkpl_arrays.CC,vpa,vperp,vpa_spectral,vperp_spectral)
-                    # make ad-hoc conserving corrections
-                    density_conserving_correction!(fkpl_arrays.CC,Fs_M,vpa,vperp,dummy_array)
-                end
-                # extract C[Fs,Fs'] result
-                begin_s_r_z_anyv_region()
-                begin_anyv_vperp_vpa_region()
-                @loop_vperp_vpa ivperp ivpa begin
-                    C_M_num[ivpa,ivperp] = fkpl_arrays.CC[ivpa,ivperp]
-                end
-                begin_serial_region()
-                @serial_region begin
-                    C_M_max, C_M_L2 = print_test_data(C_M_exact,C_M_num,C_M_err,"C_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)                    
-                    atol_max = 7.0e-2
-                    atol_L2 = 6.0e-4
-                    @test C_M_max < atol_max
-                    @test C_M_L2 < atol_L2
-                    if !test_numerical_conserving_terms
-                        delta_n = get_density(C_M_num, vpa, vperp)
-                        rtol, atol = 0.0, 1.0e-12
-                        @test isapprox(delta_n, rtol ; atol=atol)
-                        if print_to_screen
-                            println("delta_n: ", delta_n)                           
-                        end
-                    elseif test_numerical_conserving_terms
-                        delta_n = get_density(C_M_num, vpa, vperp)
-                        rtol, atol = 0.0, 1.0e-15
-                        @test isapprox(delta_n, rtol ; atol=atol)
-                        if print_to_screen
-                            println("delta_n: ", delta_n)
-                        end
-                    end
-                end
-            end
-            finalize_comms!()                                                                  
-        end
-        
-        @testset "weak-form Rosenbluth potential calculation: direct integration" begin
-            println("    - test weak-form Rosenbluth potential calculation: direct integration")
-            ngrid = 5 # chosen for a quick test -- direct integration is slow!
-            nelement_vpa = 8
-            nelement_vperp = 4
-            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
-                                                                        Lvpa=12.0,Lvperp=6.0)
-            begin_serial_region()
-            fkpl_arrays = init_fokker_planck_collisions_direct_integration(vperp,vpa,precompute_weights=true,print_to_screen=print_to_screen)
-            dummy_array = allocate_float(vpa.n,vperp.n)
-            F_M = allocate_float(vpa.n,vperp.n)
-            H_M_exact = allocate_float(vpa.n,vperp.n)
-            H_M_num = allocate_shared_float(vpa.n,vperp.n)
-            H_M_err = allocate_float(vpa.n,vperp.n)
-            G_M_exact = allocate_float(vpa.n,vperp.n)
-            G_M_num = allocate_shared_float(vpa.n,vperp.n)
-            G_M_err = allocate_float(vpa.n,vperp.n)
-            d2Gdvpa2_M_exact = allocate_float(vpa.n,vperp.n)
-            d2Gdvpa2_M_num = allocate_shared_float(vpa.n,vperp.n)
-            d2Gdvpa2_M_err = allocate_float(vpa.n,vperp.n)
-            d2Gdvperp2_M_exact = allocate_float(vpa.n,vperp.n)
-            d2Gdvperp2_M_num = allocate_shared_float(vpa.n,vperp.n)
-            d2Gdvperp2_M_err = allocate_float(vpa.n,vperp.n)
-            dGdvperp_M_exact = allocate_float(vpa.n,vperp.n)
-            dGdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
-            dGdvperp_M_err = allocate_float(vpa.n,vperp.n)
-            d2Gdvperpdvpa_M_exact = allocate_float(vpa.n,vperp.n)
-            d2Gdvperpdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
-            d2Gdvperpdvpa_M_err = allocate_float(vpa.n,vperp.n)
-            dHdvpa_M_exact = allocate_float(vpa.n,vperp.n)
-            dHdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
-            dHdvpa_M_err = allocate_float(vpa.n,vperp.n)
-            dHdvperp_M_exact = allocate_float(vpa.n,vperp.n)
-            dHdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
-            dHdvperp_M_err = allocate_float(vpa.n,vperp.n)
-
-            dens, upar, vth = 1.0, 1.0, 1.0
-            begin_serial_region()
-            for ivperp in 1:vperp.n
-                for ivpa in 1:vpa.n
-                    F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    H_M_exact[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    G_M_exact[ivpa,ivperp] = G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    d2Gdvpa2_M_exact[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    d2Gdvperp2_M_exact[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    dGdvperp_M_exact[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    d2Gdvperpdvpa_M_exact[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    dHdvpa_M_exact[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                    dHdvperp_M_exact[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
-                end
-            end
-            # calculate the potentials numerically
-            begin_s_r_z_anyv_region()
-            calculate_rosenbluth_potentials_via_direct_integration!(G_M_num,H_M_num,dHdvpa_M_num,dHdvperp_M_num,
-             d2Gdvpa2_M_num,dGdvperp_M_num,d2Gdvperpdvpa_M_num,d2Gdvperp2_M_num,F_M,
-             vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays)
-            begin_serial_region()
-            @serial_region begin
-                # test the integration
-                # to recalculate absolute tolerances atol, set print_to_screen = true
-                H_M_max, H_M_L2 = print_test_data(H_M_exact,H_M_num,H_M_err,"H_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                dHdvpa_M_max, dHdvpa_M_L2 = print_test_data(dHdvpa_M_exact,dHdvpa_M_num,dHdvpa_M_err,"dHdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                dHdvperp_M_max, dHdvperp_M_L2 = print_test_data(dHdvperp_M_exact,dHdvperp_M_num,dHdvperp_M_err,"dHdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                G_M_max, G_M_L2 = print_test_data(G_M_exact,G_M_num,G_M_err,"G_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                d2Gdvpa2_M_max, d2Gdvpa2_M_L2 = print_test_data(d2Gdvpa2_M_exact,d2Gdvpa2_M_num,d2Gdvpa2_M_err,"d2Gdvpa2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                dGdvperp_M_max, dGdvperp_M_L2 = print_test_data(dGdvperp_M_exact,dGdvperp_M_num,dGdvperp_M_err,"dGdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                d2Gdvperpdvpa_M_max, d2Gdvperpdvpa_M_L2 = print_test_data(d2Gdvperpdvpa_M_exact,d2Gdvperpdvpa_M_num,d2Gdvperpdvpa_M_err,"d2Gdvperpdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                d2Gdvperp2_M_max, d2Gdvperp2_M_L2 = print_test_data(d2Gdvperp2_M_exact,d2Gdvperp2_M_num,d2Gdvperp2_M_err,"d2Gdvperp2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                atol_max = 2.1e-4
-                atol_L2 = 6.5e-6
-                @test H_M_max < atol_max
-                @test H_M_L2 < atol_L2
-                atol_max = 1.5e-3
-                atol_L2 = 6.5e-5
-                @test dHdvpa_M_max < atol_max
-                @test dHdvpa_M_L2 < atol_L2
-                atol_max = 8.0e-4
-                atol_L2 = 4.0e-5
-                @test dHdvperp_M_max < atol_max
-                @test dHdvperp_M_L2 < atol_L2
-                atol_max = 1.1e-4
-                atol_L2 = 4.0e-5
-                @test G_M_max < atol_max
-                @test G_M_L2 < atol_L2
-                atol_max = 2.5e-4
-                atol_L2 = 1.2e-5
-                @test d2Gdvpa2_M_max < atol_max
-                @test d2Gdvpa2_M_L2 < atol_L2
-                atol_max = 9.0e-5
-                atol_L2 = 6.0e-5
-                @test dGdvperp_M_max < atol_max
-                @test dGdvperp_M_L2 < atol_L2
-                atol_max = 1.1e-4
-                atol_L2 = 9.0e-6
-                @test d2Gdvperpdvpa_M_max < atol_max
-                @test d2Gdvperpdvpa_M_L2 < atol_L2
-                atol_max = 2.0e-4
-                atol_L2 = 1.1e-5
-                @test d2Gdvperp2_M_max < atol_max
-                @test d2Gdvperp2_M_L2 < atol_L2
-            end
-            finalize_comms!()                                                                  
-        end
-        
-        
-    end
-end
-
-end #FokkerPlanckTests
-
-using .FokkerPlanckTests
-
-FokkerPlanckTests.runtests()
-
+module FokkerPlanckTests
+
+include("setup.jl")
+
+
+using MPI
+using LinearAlgebra: mul!, ldiv!
+using moment_kinetics.communication
+using moment_kinetics.looping
+using moment_kinetics.array_allocation: allocate_float, allocate_shared_float
+using moment_kinetics.coordinates: define_coordinate
+using moment_kinetics.type_definitions: mk_float, mk_int
+using moment_kinetics.velocity_moments: get_density, get_upar, get_ppar, get_pperp, get_pressure
+
+using moment_kinetics.fokker_planck: init_fokker_planck_collisions_weak_form, fokker_planck_collision_operator_weak_form!
+using moment_kinetics.fokker_planck: conserving_corrections!, init_fokker_planck_collisions_direct_integration
+using moment_kinetics.fokker_planck: density_conserving_correction!, fokker_planck_collision_operator_weak_form_Maxwellian_Fsp!
+using moment_kinetics.fokker_planck_test: print_test_data, fkpl_error_data, allocate_error_data #, plot_test_data
+using moment_kinetics.fokker_planck_test: F_Maxwellian, G_Maxwellian, H_Maxwellian
+using moment_kinetics.fokker_planck_test: d2Gdvpa2_Maxwellian, d2Gdvperp2_Maxwellian, d2Gdvperpdvpa_Maxwellian, dGdvperp_Maxwellian
+using moment_kinetics.fokker_planck_test: dHdvperp_Maxwellian, dHdvpa_Maxwellian, Cssp_Maxwellian_inputs
+using moment_kinetics.fokker_planck_calculus: calculate_rosenbluth_potentials_via_elliptic_solve!, calculate_rosenbluth_potential_boundary_data_exact!
+using moment_kinetics.fokker_planck_calculus: test_rosenbluth_potential_boundary_data, allocate_rosenbluth_potential_boundary_data
+using moment_kinetics.fokker_planck_calculus: enforce_vpavperp_BCs!, calculate_rosenbluth_potentials_via_direct_integration!
+using moment_kinetics.fokker_planck_calculus: interpolate_2D_vspace!
+
+function create_grids(ngrid,nelement_vpa,nelement_vperp;
+                      Lvpa=12.0,Lvperp=6.0)
+
+        nelement_local_vpa = nelement_vpa # number of elements per rank
+        nelement_global_vpa = nelement_local_vpa # total number of elements
+        nelement_local_vperp = nelement_vperp # number of elements per rank
+        nelement_global_vperp = nelement_local_vperp # total number of elements
+        bc = "zero" # used only in derivative! functions
+        #discretization = "chebyshev_pseudospectral"
+        discretization = "gausslegendre_pseudospectral"
+        # create the 'input' struct containing input info needed to create a
+        # coordinate
+        element_spacing_option = "uniform"
+        coords_input = OptionsDict(
+            "vperp"=>OptionsDict("ngrid"=>ngrid, "nelement"=>nelement_global_vperp,
+                                 "nelement_local"=>nelement_local_vperp, "L"=>Lvperp,
+                                 "discretization"=>discretization, "bc"=>bc,
+                                 "element_spacing_option"=>element_spacing_option),
+            "vpa"=>OptionsDict("ngrid"=>ngrid, "nelement"=>nelement_global_vpa,
+                               "nelement_local"=>nelement_local_vpa, "L"=>Lvpa,
+                               "discretization"=>discretization, "bc"=>bc,
+                               "element_spacing_option"=>element_spacing_option),
+        )
+
+        # Set up MPI
+        initialize_comms!()
+        setup_distributed_memory_MPI(1,1,1,1)
+        vperp, vperp_spectral = define_coordinate(coords_input, "vperp")
+        vpa, vpa_spectral = define_coordinate(coords_input, "vpa")
+        looping.setup_loop_ranges!(block_rank[], block_size[];
+                                       s=1, sn=1,
+                                       r=1, z=1, vperp=vperp.n, vpa=vpa.n,
+                                       vzeta=1, vr=1, vz=1)
+
+        return vpa, vpa_spectral, vperp, vperp_spectral
+end
+
+function runtests()
+    print_to_screen = false
+    @testset "Fokker Planck tests" verbose=use_verbose begin
+        println("Fokker Planck tests")
+
+        @testset "Lagrange-polynomial 2D interpolation" begin
+            println("    - test Lagrange-polynomial 2D interpolation")
+            ngrid = 9
+            nelement_vpa = 16
+            nelement_vperp = 8
+            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
+                                                                        Lvpa=8.0,Lvperp=4.0)
+
+            # electron pdf on electron grids
+            Fe = allocate_shared_float(vpa.n,vperp.n)
+            # electron pdf on ion normalised grids
+            Fe_interp_ion_units = allocate_shared_float(vpa.n,vperp.n)
+            # exact value for comparison
+            Fe_exact_ion_units = allocate_shared_float(vpa.n,vperp.n)
+            # ion pdf on ion grids
+            Fi = allocate_shared_float(vpa.n,vperp.n)
+            # ion pdf on electron normalised grids
+            Fi_interp_electron_units = allocate_shared_float(vpa.n,vperp.n)
+            # exact value for comparison
+            Fi_exact_electron_units = allocate_shared_float(vpa.n,vperp.n)
+            # test array
+            F_err = allocate_float(vpa.n,vperp.n)
+
+            dense = 1.0
+            upare = 0.0 # upare in electron reference units
+            vthe = 1.0 # vthe in electron reference units
+            densi = 1.0
+            upari = 0.0 # upari in ion reference units
+            vthi = 1.0 # vthi in ion reference units
+            # reference speeds for electrons and ions
+            cref_electron = 60.0
+            cref_ion = 1.0
+            # scale factor for change of reference speed
+            scalefac = cref_ion/cref_electron
+
+            begin_serial_region()
+            @serial_region begin
+                @loop_vperp_vpa ivperp ivpa begin
+                    Fe[ivpa,ivperp] = F_Maxwellian(dense,upare,vthe,vpa,vperp,ivpa,ivperp)
+                    Fe_exact_ion_units[ivpa,ivperp] = F_Maxwellian(dense,upare/scalefac,vthe/scalefac,vpa,vperp,ivpa,ivperp)/(scalefac^3)
+                    Fi[ivpa,ivperp] = F_Maxwellian(densi,upari,vthi,vpa,vperp,ivpa,ivperp)
+                    Fi_exact_electron_units[ivpa,ivperp] = (scalefac^3)*F_Maxwellian(densi,upari*scalefac,vthi*scalefac,vpa,vperp,ivpa,ivperp)
+                end
+            end
+
+            begin_s_r_z_anyv_region()
+            interpolate_2D_vspace!(Fe_interp_ion_units,Fe,vpa,vperp,scalefac)
+            #println("Fe",Fe)
+            #println("Fe interp",Fe_interp_ion_units)
+            #println("Fe exact",Fe_exact_ion_units)
+            interpolate_2D_vspace!(Fi_interp_electron_units,Fi,vpa,vperp,1.0/scalefac)
+            #println("Fi",Fi)
+            #println("Fi interp", Fi_interp_electron_units)
+            #println("Fi exact",Fi_exact_electron_units)
+
+            begin_serial_region()
+            # check the result
+            @serial_region begin
+                # for electron data on ion grids
+                @. F_err = abs(Fe_interp_ion_units - Fe_exact_ion_units)
+                max_F_err = maximum(F_err)
+                max_F = maximum(Fe_exact_ion_units)
+                #println(max_F)
+                @test max_F_err < 3.0e-8 * max_F
+                # for ion data on electron grids
+                @. F_err = abs(Fi_interp_electron_units - Fi_exact_electron_units)
+                max_F_err = maximum(F_err)
+                max_F = maximum(Fi_exact_electron_units)
+                #println(max_F)
+                @test max_F_err < 3.0e-8 * max_F
+            end
+
+        end
+
+        @testset "weak-form 2D differentiation" begin
+        # tests the correct definition of mass and stiffness matrices in 2D
+            println("    - test weak-form 2D differentiation")
+
+            ngrid = 9
+            nelement_vpa = 8
+            nelement_vperp = 4
+            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
+                                                                        Lvpa=2.0,Lvperp=1.0)
+            nc_global = vpa.n*vperp.n
+            begin_serial_region()
+            fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
+                                                                  precompute_weights=false, print_to_screen=print_to_screen)
+            KKpar2D_with_BC_terms_sparse = fkpl_arrays.KKpar2D_with_BC_terms_sparse
+            KKperp2D_with_BC_terms_sparse = fkpl_arrays.KKperp2D_with_BC_terms_sparse
+            lu_obj_MM = fkpl_arrays.lu_obj_MM
+
+            dummy_array = allocate_float(vpa.n,vperp.n)
+            fvpavperp = allocate_float(vpa.n,vperp.n)
+            fvpavperp_test = allocate_float(vpa.n,vperp.n)
+            fvpavperp_err = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvpa2_exact = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvpa2_err = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvpa2_num = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvperp2_exact = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvperp2_err = allocate_float(vpa.n,vperp.n)
+            d2fvpavperp_dvperp2_num = allocate_float(vpa.n,vperp.n)
+            dfc = allocate_float(nc_global)
+            dgc = allocate_float(nc_global)
+            for ivperp in 1:vperp.n
+                for ivpa in 1:vpa.n
+                    fvpavperp[ivpa,ivperp] = exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
+                    d2fvpavperp_dvpa2_exact[ivpa,ivperp] = (4.0*vpa.grid[ivpa]^2 - 2.0)*exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
+                    d2fvpavperp_dvperp2_exact[ivpa,ivperp] = (4.0*vperp.grid[ivperp]^2 - 2.0)*exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)
+                end
+            end
+
+            # Make 1d views
+            fc = vec(fvpavperp)
+            d2fc_dvpa2 = vec(d2fvpavperp_dvpa2_num)
+            d2fc_dvperp2 = vec(d2fvpavperp_dvperp2_num)
+
+            #print_vector(fc,"fc",nc_global)
+            # multiply by KKpar2D and fill dfc
+            mul!(dfc,KKpar2D_with_BC_terms_sparse,fc)
+            mul!(dgc,KKperp2D_with_BC_terms_sparse,fc)
+            # invert mass matrix
+            ldiv!(d2fc_dvpa2, lu_obj_MM, dfc)
+            ldiv!(d2fc_dvperp2, lu_obj_MM, dgc)
+            #print_vector(fc,"fc",nc_global)
+            @serial_region begin
+                d2fvpavperp_dvpa2_max, d2fvpavperp_dvpa2_L2 = print_test_data(d2fvpavperp_dvpa2_exact,d2fvpavperp_dvpa2_num,d2fvpavperp_dvpa2_err,"d2fdvpa2",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                @test d2fvpavperp_dvpa2_max < 1.0e-7
+                @test d2fvpavperp_dvpa2_L2 < 1.0e-8
+                d2fvpavperp_dvperp2_max, d2fvpavperp_dvperp2_L2 = print_test_data(d2fvpavperp_dvperp2_exact,d2fvpavperp_dvperp2_num,d2fvpavperp_dvperp2_err,"d2fdvperp2",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                @test d2fvpavperp_dvperp2_max < 1.0e-7
+                @test d2fvpavperp_dvperp2_L2 < 1.0e-8
+                #if plot_test_output
+                #    plot_test_data(d2fvpavperp_dvpa2_exact,d2fvpavperp_dvpa2_num,d2fvpavperp_dvpa2_err,"d2fvpavperp_dvpa2",vpa,vperp)
+                #    plot_test_data(d2fvpavperp_dvperp2_exact,d2fvpavperp_dvperp2_num,d2fvpavperp_dvperp2_err,"d2fvpavperp_dvperp2",vpa,vperp)
+                #end
+            end
+            finalize_comms!()
+        end
+
+        @testset "weak-form Rosenbluth potential calculation: elliptic solve" begin
+            println("    - test weak-form Rosenbluth potential calculation: elliptic solve")
+            @testset "$multipole_boundary_data" for multipole_boundary_data in (true,false)
+                println("        -  multipole_boundary_data=$multipole_boundary_data")
+                ngrid = 9
+                nelement_vpa = 8
+                nelement_vperp = 4
+                vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
+                                                                            Lvpa=12.0,Lvperp=6.0)
+                begin_serial_region()
+                fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
+                                                                      precompute_weights=(true &&!(multipole_boundary_data)),
+                                                                      print_to_screen=print_to_screen)
+                dummy_array = allocate_float(vpa.n,vperp.n)
+                F_M = allocate_float(vpa.n,vperp.n)
+                H_M_exact = allocate_float(vpa.n,vperp.n)
+                H_M_num = allocate_shared_float(vpa.n,vperp.n)
+                H_M_err = allocate_float(vpa.n,vperp.n)
+                G_M_exact = allocate_float(vpa.n,vperp.n)
+                G_M_num = allocate_shared_float(vpa.n,vperp.n)
+                G_M_err = allocate_float(vpa.n,vperp.n)
+                d2Gdvpa2_M_exact = allocate_float(vpa.n,vperp.n)
+                d2Gdvpa2_M_num = allocate_shared_float(vpa.n,vperp.n)
+                d2Gdvpa2_M_err = allocate_float(vpa.n,vperp.n)
+                d2Gdvperp2_M_exact = allocate_float(vpa.n,vperp.n)
+                d2Gdvperp2_M_num = allocate_shared_float(vpa.n,vperp.n)
+                d2Gdvperp2_M_err = allocate_float(vpa.n,vperp.n)
+                dGdvperp_M_exact = allocate_float(vpa.n,vperp.n)
+                dGdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
+                dGdvperp_M_err = allocate_float(vpa.n,vperp.n)
+                d2Gdvperpdvpa_M_exact = allocate_float(vpa.n,vperp.n)
+                d2Gdvperpdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
+                d2Gdvperpdvpa_M_err = allocate_float(vpa.n,vperp.n)
+                dHdvpa_M_exact = allocate_float(vpa.n,vperp.n)
+                dHdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
+                dHdvpa_M_err = allocate_float(vpa.n,vperp.n)
+                dHdvperp_M_exact = allocate_float(vpa.n,vperp.n)
+                dHdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
+                dHdvperp_M_err = allocate_float(vpa.n,vperp.n)
+
+                dens, upar, vth = 1.0, 1.0, 1.0
+                begin_serial_region()
+                for ivperp in 1:vperp.n
+                    for ivpa in 1:vpa.n
+                        F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                        H_M_exact[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                        G_M_exact[ivpa,ivperp] = G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                        d2Gdvpa2_M_exact[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                        d2Gdvperp2_M_exact[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                        dGdvperp_M_exact[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                        d2Gdvperpdvpa_M_exact[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                        dHdvpa_M_exact[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                        dHdvperp_M_exact[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    end
+                end
+                rpbd_exact = allocate_rosenbluth_potential_boundary_data(vpa,vperp)
+                # use known test function to provide exact data
+                begin_s_r_z_anyv_region()
+                calculate_rosenbluth_potential_boundary_data_exact!(rpbd_exact,
+                      H_M_exact,dHdvpa_M_exact,dHdvperp_M_exact,G_M_exact,
+                      dGdvperp_M_exact,d2Gdvperp2_M_exact,
+                      d2Gdvperpdvpa_M_exact,d2Gdvpa2_M_exact,vpa,vperp)
+                # calculate the potentials numerically
+                calculate_rosenbluth_potentials_via_elliptic_solve!(
+                     fkpl_arrays.GG, fkpl_arrays.HH, fkpl_arrays.dHdvpa, fkpl_arrays.dHdvperp,
+                     fkpl_arrays.d2Gdvpa2, fkpl_arrays.dGdvperp, fkpl_arrays.d2Gdvperpdvpa,
+                     fkpl_arrays.d2Gdvperp2, F_M, vpa, vperp, vpa_spectral, vperp_spectral,
+                     fkpl_arrays; algebraic_solve_for_d2Gdvperp2=false,
+                     calculate_GG=true, calculate_dGdvperp=true,
+                     multipole_boundary_data=multipole_boundary_data)
+                # extract C[Fs,Fs'] result
+                # and Rosenbluth potentials for testing
+                begin_s_r_z_anyv_region()
+                begin_anyv_vperp_vpa_region()
+                @loop_vperp_vpa ivperp ivpa begin
+                    G_M_num[ivpa,ivperp] = fkpl_arrays.GG[ivpa,ivperp]
+                    H_M_num[ivpa,ivperp] = fkpl_arrays.HH[ivpa,ivperp]
+                    dHdvpa_M_num[ivpa,ivperp] = fkpl_arrays.dHdvpa[ivpa,ivperp]
+                    dHdvperp_M_num[ivpa,ivperp] = fkpl_arrays.dHdvperp[ivpa,ivperp]
+                    dGdvperp_M_num[ivpa,ivperp] = fkpl_arrays.dGdvperp[ivpa,ivperp]
+                    d2Gdvperp2_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvperp2[ivpa,ivperp]
+                    d2Gdvpa2_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvpa2[ivpa,ivperp]
+                    d2Gdvperpdvpa_M_num[ivpa,ivperp] = fkpl_arrays.d2Gdvperpdvpa[ivpa,ivperp]
+                end
+                begin_serial_region()
+                @serial_region begin
+                    # test the boundary data
+                    max_H_boundary_data_err, max_dHdvpa_boundary_data_err,
+                    max_dHdvperp_boundary_data_err, max_G_boundary_data_err,
+                    max_dGdvperp_boundary_data_err, max_d2Gdvperp2_boundary_data_err,
+                    max_d2Gdvperpdvpa_boundary_data_err, max_d2Gdvpa2_boundary_data_err = test_rosenbluth_potential_boundary_data(fkpl_arrays.rpbd,rpbd_exact,vpa,vperp,print_to_screen=print_to_screen)
+                    if multipole_boundary_data
+                        atol_max_H = 5.0e-8
+                        atol_max_dHdvpa = 5.0e-8
+                        atol_max_dHdvperp = 5.0e-8
+                        atol_max_G = 5.0e-7
+                        atol_max_dGdvperp = 5.0e-7
+                        atol_max_d2Gdvperp2 = 5.0e-8
+                        atol_max_d2Gdvperpdvpa = 5.0e-7
+                        atol_max_d2Gdvpap2 = 1.0e-6
+                    else
+                        atol_max_H = 2.0e-12
+                        atol_max_dHdvpa = 2.0e-11
+                        atol_max_dHdvperp = 6.0e-9
+                        atol_max_G = 1.0e-11
+                        atol_max_dGdvperp = 2.0e-7
+                        atol_max_d2Gdvperp2 = 5.0e-8
+                        atol_max_d2Gdvperpdvpa = 2.0e-8
+                        atol_max_d2Gdvpap2 = 1.0e-11
+                    end
+                    @test max_H_boundary_data_err < atol_max_H
+                    @test max_dHdvpa_boundary_data_err < atol_max_dHdvpa
+                    @test max_dHdvperp_boundary_data_err < atol_max_dHdvperp
+                    @test max_G_boundary_data_err < atol_max_G
+                    @test max_dGdvperp_boundary_data_err < atol_max_dGdvperp
+                    @test max_d2Gdvperpdvpa_boundary_data_err < atol_max_d2Gdvperpdvpa
+                    @test max_d2Gdvperpdvpa_boundary_data_err < atol_max_d2Gdvperpdvpa
+                    @test max_d2Gdvpa2_boundary_data_err < atol_max_d2Gdvpap2
+                    # test the elliptic solvers
+                    H_M_max, H_M_L2 = print_test_data(H_M_exact,H_M_num,H_M_err,"H_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                    dHdvpa_M_max, dHdvpa_M_L2 = print_test_data(dHdvpa_M_exact,dHdvpa_M_num,dHdvpa_M_err,"dHdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                    dHdvperp_M_max, dHdvperp_M_L2 = print_test_data(dHdvperp_M_exact,dHdvperp_M_num,dHdvperp_M_err,"dHdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                    G_M_max, G_M_L2 = print_test_data(G_M_exact,G_M_num,G_M_err,"G_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                    d2Gdvpa2_M_max, d2Gdvpa2_M_L2 = print_test_data(d2Gdvpa2_M_exact,d2Gdvpa2_M_num,d2Gdvpa2_M_err,"d2Gdvpa2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                    dGdvperp_M_max, dGdvperp_M_L2 = print_test_data(dGdvperp_M_exact,dGdvperp_M_num,dGdvperp_M_err,"dGdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                    d2Gdvperpdvpa_M_max, d2Gdvperpdvpa_M_L2 = print_test_data(d2Gdvperpdvpa_M_exact,d2Gdvperpdvpa_M_num,d2Gdvperpdvpa_M_err,"d2Gdvperpdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                    d2Gdvperp2_M_max, d2Gdvperp2_M_L2 = print_test_data(d2Gdvperp2_M_exact,d2Gdvperp2_M_num,d2Gdvperp2_M_err,"d2Gdvperp2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                    if multipole_boundary_data
+                        atol_max_H = 2.0e-7
+                        atol_L2_H = 5.0e-9
+                        atol_max_dHdvpa = 2.0e-6
+                        atol_L2_dHdvpa = 5.0e-8
+                        atol_max_dHdvperp = 2.0e-5
+                        atol_L2_dHdvperp = 1.0e-7
+                        atol_max_G = 5.0e-7
+                        atol_L2_G = 5.0e-8
+                        atol_max_d2Gdvpap2 = 1.0e-6
+                        atol_L2_d2Gdvpa2 = 5.0e-8
+                        atol_max_dGdvperp = 2.0e-6
+                        atol_L2_dGdvperp = 2.0e-7
+                        atol_max_d2Gdvperpdvpa = 2.0e-6
+                        atol_L2_d2Gdvperpdvpa = 5.0e-8
+                        atol_max_d2Gdvperp2 = 5.0e-7
+                        atol_L2_d2Gdvperp2 = 5.0e-8
+                    else
+                        atol_max_H = 2.0e-7
+                        atol_L2_H = 5.0e-9
+                        atol_max_dHdvpa = 2.0e-6
+                        atol_L2_dHdvpa = 5.0e-8
+                        atol_max_dHdvperp = 2.0e-5
+                        atol_L2_dHdvperp = 1.0e-7
+                        atol_max_G = 2.0e-8
+                        atol_L2_G = 7.0e-10
+                        atol_max_d2Gdvpap2 = 2.0e-7
+                        atol_L2_d2Gdvpa2 = 4.0e-9
+                        atol_max_dGdvperp = 2.0e-6
+                        atol_L2_dGdvperp = 2.0e-7
+                        atol_max_d2Gdvperpdvpa = 2.0e-6
+                        atol_L2_d2Gdvperpdvpa = 2.0e-8
+                        atol_max_d2Gdvperp2 = 3.0e-7
+                        atol_L2_d2Gdvperp2 = 2.0e-8
+                    end
+                    @test H_M_max < atol_max_H
+                    @test H_M_L2 < atol_L2_H
+                    @test dHdvpa_M_max < atol_max_dHdvpa
+                    @test dHdvpa_M_L2 < atol_L2_dHdvpa
+                    @test dHdvperp_M_max < atol_max_dHdvperp
+                    @test dHdvperp_M_L2 < atol_L2_dHdvperp
+                    @test G_M_max < atol_max_G
+                    @test G_M_L2 < atol_L2_G
+                    @test d2Gdvpa2_M_max < atol_max_d2Gdvpap2
+                    @test d2Gdvpa2_M_L2 < atol_L2_d2Gdvpa2
+                    @test dGdvperp_M_max < atol_max_dGdvperp
+                    @test dGdvperp_M_L2 < atol_L2_dGdvperp
+                    @test d2Gdvperpdvpa_M_max < atol_max_d2Gdvperpdvpa
+                    @test d2Gdvperpdvpa_M_L2 < atol_L2_d2Gdvperpdvpa
+                    @test d2Gdvperp2_M_max < atol_max_d2Gdvperp2
+                    @test d2Gdvperp2_M_L2 < atol_L2_d2Gdvperp2
+                end
+                finalize_comms!()
+            end
+        end
+
+        @testset "weak-form collision operator calculation" begin
+            println("    - test weak-form collision operator calculation")
+            ngrid = 9
+            nelement_vpa = 8
+            nelement_vperp = 4
+            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
+                                                                        Lvpa=12.0,Lvperp=6.0)
+            begin_serial_region()
+            fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
+                                                                  precompute_weights=true, print_to_screen=print_to_screen)
+
+            @testset "test_self_operator=$test_self_operator test_numerical_conserving_terms=$test_numerical_conserving_terms test_parallelism = $test_parallelism test_dense_construction=$test_dense_construction use_Maxwellian_Rosenbluth_coefficients=$use_Maxwellian_Rosenbluth_coefficients use_Maxwellian_field_particle_distribution=$use_Maxwellian_field_particle_distribution algebraic_solve_for_d2Gdvperp2=$algebraic_solve_for_d2Gdvperp2" for
+                    (test_self_operator, test_numerical_conserving_terms, test_parallelism, test_dense_construction,
+                     use_Maxwellian_Rosenbluth_coefficients, use_Maxwellian_field_particle_distribution,
+                     algebraic_solve_for_d2Gdvperp2) in ((true,false,false,false,false,false,false),(false,false,false,false,false,false,false),
+                                                         (true,true,false,false,false,false,false),(true,false,true,false,false,false,false),
+                                                         (true,false,false,true,false,false,false),(true,false,false,false,true,false,false),
+                                                         (true,false,false,false,false,true,false),(true,false,false,false,false,false,true))
+
+                dummy_array = allocate_float(vpa.n,vperp.n)
+                Fs_M = allocate_float(vpa.n,vperp.n)
+                F_M = allocate_float(vpa.n,vperp.n)
+                C_M_num = allocate_shared_float(vpa.n,vperp.n)
+                C_M_exact = allocate_float(vpa.n,vperp.n)
+                C_M_err = allocate_float(vpa.n,vperp.n)
+                if test_self_operator
+                    dens, upar, vth = 1.0, 1.0, 1.0
+                    denss, upars, vths = dens, upar, vth
+                else
+                    denss, upars, vths = 1.0, -1.0, 2.0/3.0
+                    dens, upar, vth = 1.0, 1.0, 1.0
+                end
+                ms = 1.0
+                msp = 1.0
+                nussp = 1.0
+                begin_serial_region()
+                for ivperp in 1:vperp.n
+                    for ivpa in 1:vpa.n
+                        Fs_M[ivpa,ivperp] = F_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp)
+                        F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                        C_M_exact[ivpa,ivperp] = Cssp_Maxwellian_inputs(denss,upars,vths,ms,
+                                                                        dens,upar,vth,msp,
+                                                                        nussp,vpa,vperp,ivpa,ivperp)
+                    end
+                end
+                begin_s_r_z_anyv_region()
+                fokker_planck_collision_operator_weak_form!(Fs_M,F_M,ms,msp,nussp,
+                                                 fkpl_arrays,
+                                                 vperp, vpa, vperp_spectral, vpa_spectral,
+                                                 test_assembly_serial=test_parallelism,
+                                                 use_Maxwellian_Rosenbluth_coefficients=use_Maxwellian_Rosenbluth_coefficients,
+                                                 use_Maxwellian_field_particle_distribution=use_Maxwellian_field_particle_distribution,
+                                                 algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
+                                                 calculate_GG = false, calculate_dGdvperp=false)
+                if test_numerical_conserving_terms && test_self_operator
+                    # enforce the boundary conditions on CC before it is used for timestepping
+                    enforce_vpavperp_BCs!(fkpl_arrays.CC,vpa,vperp,vpa_spectral,vperp_spectral)
+                    # make ad-hoc conserving corrections
+                    conserving_corrections!(fkpl_arrays.CC,Fs_M,vpa,vperp,dummy_array)
+                end
+                # extract C[Fs,Fs'] result
+                begin_s_r_z_anyv_region()
+                begin_anyv_vperp_vpa_region()
+                @loop_vperp_vpa ivperp ivpa begin
+                    C_M_num[ivpa,ivperp] = fkpl_arrays.CC[ivpa,ivperp]
+                end
+                begin_serial_region()
+                @serial_region begin
+                    C_M_max, C_M_L2 = print_test_data(C_M_exact,C_M_num,C_M_err,"C_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                    if test_self_operator && !test_numerical_conserving_terms && !use_Maxwellian_Rosenbluth_coefficients && !use_Maxwellian_field_particle_distribution
+                        atol_max = 6.0e-4
+                        atol_L2 = 7.0e-6
+                    elseif test_self_operator && test_numerical_conserving_terms && !use_Maxwellian_Rosenbluth_coefficients && !use_Maxwellian_field_particle_distribution
+                        atol_max = 7.0e-4
+                        atol_L2 = 7.0e-6
+                    elseif test_self_operator && !test_numerical_conserving_terms && use_Maxwellian_Rosenbluth_coefficients && !use_Maxwellian_field_particle_distribution
+                        atol_max = 8.0e-4
+                        atol_L2 = 8.1e-6
+                    elseif test_self_operator && !test_numerical_conserving_terms && !use_Maxwellian_Rosenbluth_coefficients && use_Maxwellian_field_particle_distribution
+                        atol_max = 1.1e-3
+                        atol_L2 = 9.0e-6
+                    else
+                        atol_max = 7.0e-2
+                        atol_L2 = 6.0e-4
+                    end
+                    @test C_M_max < atol_max
+                    @test C_M_L2 < atol_L2
+                    # calculate the entropy production
+                    lnfC = fkpl_arrays.rhsvpavperp
+                    @loop_vperp_vpa ivperp ivpa begin
+                        lnfC[ivpa,ivperp] = Fs_M[ivpa,ivperp]*C_M_num[ivpa,ivperp]
+                    end
+                    dSdt = - get_density(lnfC,vpa,vperp)
+                    if test_self_operator && !test_numerical_conserving_terms
+                        if algebraic_solve_for_d2Gdvperp2
+                            rtol, atol = 0.0, 1.0e-7
+                        else
+                            rtol, atol = 0.0, 1.0e-8
+                        end
+                        @test isapprox(dSdt, rtol ; atol=atol)
+                        delta_n = get_density(C_M_num, vpa, vperp)
+                        delta_upar = get_upar(C_M_num, vpa, vperp, dens)
+                        delta_ppar = msp*get_ppar(C_M_num, vpa, vperp, upar)
+                        delta_pperp = msp*get_pperp(C_M_num, vpa, vperp)
+                        delta_pressure = get_pressure(delta_ppar,delta_pperp)
+                        rtol, atol = 0.0, 1.0e-12
+                        @test isapprox(delta_n, rtol ; atol=atol)
+                        rtol, atol = 0.0, 1.0e-9
+                        @test isapprox(delta_upar, rtol ; atol=atol)
+                        if algebraic_solve_for_d2Gdvperp2
+                            rtol, atol = 0.0, 1.0e-7
+                        else
+                            rtol, atol = 0.0, 1.0e-8
+                        end
+                        @test isapprox(delta_pressure, rtol ; atol=atol)
+                        if print_to_screen
+                            println("dSdt: $dSdt should be >0.0")
+                            println("delta_n: ", delta_n)
+                            println("delta_upar: ", delta_upar)
+                            println("delta_pressure: ", delta_pressure)
+                        end
+                    elseif test_self_operator && test_numerical_conserving_terms
+                        rtol, atol = 0.0, 6.0e-7
+                        @test isapprox(dSdt, rtol ; atol=atol)
+                        delta_n = get_density(C_M_num, vpa, vperp)
+                        delta_upar = get_upar(C_M_num, vpa, vperp, dens)
+                        delta_ppar = msp*get_ppar(C_M_num, vpa, vperp, upar)
+                        delta_pperp = msp*get_pperp(C_M_num, vpa, vperp)
+                        delta_pressure = get_pressure(delta_ppar,delta_pperp)
+                        rtol, atol = 0.0, 1.0e-15
+                        @test isapprox(delta_n, rtol ; atol=atol)
+                        rtol, atol = 0.0, 1.0e-15
+                        @test isapprox(delta_upar, rtol ; atol=atol)
+                        rtol, atol = 0.0, 1.0e-15
+                        @test isapprox(delta_pressure, rtol ; atol=atol)
+                        if print_to_screen
+                            println("dSdt: $dSdt should be >0.0")
+                            println("delta_n: ", delta_n)
+                            println("delta_upar: ", delta_upar)
+                            println("delta_pressure: ", delta_pressure)
+                        end
+                    else
+                        atol = 1.0e-4
+                        @test isapprox(dSdt, 2.543251178128757 ; atol=atol)
+                        delta_n = get_density(C_M_num, vpa, vperp)
+                        rtol, atol = 0.0, 1.0e-12
+                        @test isapprox(delta_n, rtol ; atol=atol)
+                        if print_to_screen
+                            println("dSdt: $dSdt")
+                            println("delta_n: ", delta_n)
+                        end
+                    end
+                end
+            end
+            finalize_comms!()
+        end
+
+        @testset "weak-form (slowing-down) collision operator calculation" begin
+            println("    - test weak-form (slowing-down) collision operator calculation")
+            ngrid = 9
+            nelement_vpa = 16
+            nelement_vperp = 8
+            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
+                                                                        Lvpa=12.0,Lvperp=6.0)
+            begin_serial_region()
+            fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
+                                                                  precompute_weights=true, print_to_screen=print_to_screen)
+
+            @testset "slowing_down_test=true test_numerical_conserving_terms=$test_numerical_conserving_terms" for test_numerical_conserving_terms in (true,false)
+
+                dummy_array = allocate_float(vpa.n,vperp.n)
+                Fs_M = allocate_float(vpa.n,vperp.n)
+                F_M = allocate_float(vpa.n,vperp.n)
+                C_M_num = allocate_shared_float(vpa.n,vperp.n)
+                C_M_exact = allocate_float(vpa.n,vperp.n)
+                C_M_err = allocate_float(vpa.n,vperp.n)
+
+                # pick a set of parameters that represent slowing down
+                # on slow ions and faster electrons, but which are close
+                # enough to 1 for errors comparable to the self-collision operator
+                # increasing or reducing vth, mass increases the errors
+                dens, upar, vth = 1.0, 1.0, 1.0
+                mref = 1.0
+                Zref = 1.0
+                msp = [1.0,0.2]#[0.25, 0.25/1836.0]
+                Zsp = [0.5,0.5]#[0.5, 0.5]
+                denssp = [1.0,1.0]#[1.0, 1.0]
+                uparsp = [0.0,0.0]#[0.0, 0.0]
+                vthsp = [sqrt(0.5/msp[1]), sqrt(0.5/msp[2])]#[sqrt(0.01/msp[1]), sqrt(0.01/msp[2])]
+                nsprime = size(msp,1)
+                nuref = 1.0
+
+                begin_serial_region()
+                for ivperp in 1:vperp.n
+                    for ivpa in 1:vpa.n
+                        Fs_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                        C_M_exact[ivpa,ivperp] = 0.0
+                    end
+                end
+                # sum up contributions to cross-collision operator
+                for isp in 1:nsprime
+                    zfac = (Zsp[isp]/Zref)^2
+                    nussp = nuref*zfac
+                    for ivperp in 1:vperp.n
+                        for ivpa in 1:vpa.n
+                            C_M_exact[ivpa,ivperp] += Cssp_Maxwellian_inputs(dens,upar,vth,mref,
+                                                                            denssp[isp],uparsp[isp],vthsp[isp],msp[isp],
+                                                                            nussp,vpa,vperp,ivpa,ivperp)
+                        end
+                    end
+                end
+                begin_s_r_z_anyv_region()
+                @views fokker_planck_collision_operator_weak_form_Maxwellian_Fsp!(Fs_M[:,:],
+                                     nuref,mref,Zref,msp,Zsp,denssp,uparsp,vthsp,
+                                     fkpl_arrays,vperp,vpa,vperp_spectral,vpa_spectral)
+                if test_numerical_conserving_terms
+                    # enforce the boundary conditions on CC before it is used for timestepping
+                    enforce_vpavperp_BCs!(fkpl_arrays.CC,vpa,vperp,vpa_spectral,vperp_spectral)
+                    # make ad-hoc conserving corrections
+                    density_conserving_correction!(fkpl_arrays.CC,Fs_M,vpa,vperp,dummy_array)
+                end
+                # extract C[Fs,Fs'] result
+                begin_s_r_z_anyv_region()
+                begin_anyv_vperp_vpa_region()
+                @loop_vperp_vpa ivperp ivpa begin
+                    C_M_num[ivpa,ivperp] = fkpl_arrays.CC[ivpa,ivperp]
+                end
+                begin_serial_region()
+                @serial_region begin
+                    C_M_max, C_M_L2 = print_test_data(C_M_exact,C_M_num,C_M_err,"C_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                    atol_max = 7.0e-2
+                    atol_L2 = 6.0e-4
+                    @test C_M_max < atol_max
+                    @test C_M_L2 < atol_L2
+                    if !test_numerical_conserving_terms
+                        delta_n = get_density(C_M_num, vpa, vperp)
+                        rtol, atol = 0.0, 1.0e-12
+                        @test isapprox(delta_n, rtol ; atol=atol)
+                        if print_to_screen
+                            println("delta_n: ", delta_n)
+                        end
+                    elseif test_numerical_conserving_terms
+                        delta_n = get_density(C_M_num, vpa, vperp)
+                        rtol, atol = 0.0, 1.0e-15
+                        @test isapprox(delta_n, rtol ; atol=atol)
+                        if print_to_screen
+                            println("delta_n: ", delta_n)
+                        end
+                    end
+                end
+            end
+            finalize_comms!()
+        end
+
+        @testset "weak-form Rosenbluth potential calculation: direct integration" begin
+            println("    - test weak-form Rosenbluth potential calculation: direct integration")
+            ngrid = 5 # chosen for a quick test -- direct integration is slow!
+            nelement_vpa = 8
+            nelement_vperp = 4
+            vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
+                                                                        Lvpa=12.0,Lvperp=6.0)
+            begin_serial_region()
+            fkpl_arrays = init_fokker_planck_collisions_direct_integration(vperp,vpa,precompute_weights=true,print_to_screen=print_to_screen)
+            dummy_array = allocate_float(vpa.n,vperp.n)
+            F_M = allocate_float(vpa.n,vperp.n)
+            H_M_exact = allocate_float(vpa.n,vperp.n)
+            H_M_num = allocate_shared_float(vpa.n,vperp.n)
+            H_M_err = allocate_float(vpa.n,vperp.n)
+            G_M_exact = allocate_float(vpa.n,vperp.n)
+            G_M_num = allocate_shared_float(vpa.n,vperp.n)
+            G_M_err = allocate_float(vpa.n,vperp.n)
+            d2Gdvpa2_M_exact = allocate_float(vpa.n,vperp.n)
+            d2Gdvpa2_M_num = allocate_shared_float(vpa.n,vperp.n)
+            d2Gdvpa2_M_err = allocate_float(vpa.n,vperp.n)
+            d2Gdvperp2_M_exact = allocate_float(vpa.n,vperp.n)
+            d2Gdvperp2_M_num = allocate_shared_float(vpa.n,vperp.n)
+            d2Gdvperp2_M_err = allocate_float(vpa.n,vperp.n)
+            dGdvperp_M_exact = allocate_float(vpa.n,vperp.n)
+            dGdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
+            dGdvperp_M_err = allocate_float(vpa.n,vperp.n)
+            d2Gdvperpdvpa_M_exact = allocate_float(vpa.n,vperp.n)
+            d2Gdvperpdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
+            d2Gdvperpdvpa_M_err = allocate_float(vpa.n,vperp.n)
+            dHdvpa_M_exact = allocate_float(vpa.n,vperp.n)
+            dHdvpa_M_num = allocate_shared_float(vpa.n,vperp.n)
+            dHdvpa_M_err = allocate_float(vpa.n,vperp.n)
+            dHdvperp_M_exact = allocate_float(vpa.n,vperp.n)
+            dHdvperp_M_num = allocate_shared_float(vpa.n,vperp.n)
+            dHdvperp_M_err = allocate_float(vpa.n,vperp.n)
+
+            dens, upar, vth = 1.0, 1.0, 1.0
+            begin_serial_region()
+            for ivperp in 1:vperp.n
+                for ivpa in 1:vpa.n
+                    F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    H_M_exact[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    G_M_exact[ivpa,ivperp] = G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    d2Gdvpa2_M_exact[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    d2Gdvperp2_M_exact[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    dGdvperp_M_exact[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    d2Gdvperpdvpa_M_exact[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    dHdvpa_M_exact[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                    dHdvperp_M_exact[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp)
+                end
+            end
+            # calculate the potentials numerically
+            begin_s_r_z_anyv_region()
+            calculate_rosenbluth_potentials_via_direct_integration!(G_M_num,H_M_num,dHdvpa_M_num,dHdvperp_M_num,
+             d2Gdvpa2_M_num,dGdvperp_M_num,d2Gdvperpdvpa_M_num,d2Gdvperp2_M_num,F_M,
+             vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays)
+            begin_serial_region()
+            @serial_region begin
+                # test the integration
+                # to recalculate absolute tolerances atol, set print_to_screen = true
+                H_M_max, H_M_L2 = print_test_data(H_M_exact,H_M_num,H_M_err,"H_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                dHdvpa_M_max, dHdvpa_M_L2 = print_test_data(dHdvpa_M_exact,dHdvpa_M_num,dHdvpa_M_err,"dHdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                dHdvperp_M_max, dHdvperp_M_L2 = print_test_data(dHdvperp_M_exact,dHdvperp_M_num,dHdvperp_M_err,"dHdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                G_M_max, G_M_L2 = print_test_data(G_M_exact,G_M_num,G_M_err,"G_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                d2Gdvpa2_M_max, d2Gdvpa2_M_L2 = print_test_data(d2Gdvpa2_M_exact,d2Gdvpa2_M_num,d2Gdvpa2_M_err,"d2Gdvpa2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                dGdvperp_M_max, dGdvperp_M_L2 = print_test_data(dGdvperp_M_exact,dGdvperp_M_num,dGdvperp_M_err,"dGdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                d2Gdvperpdvpa_M_max, d2Gdvperpdvpa_M_L2 = print_test_data(d2Gdvperpdvpa_M_exact,d2Gdvperpdvpa_M_num,d2Gdvperpdvpa_M_err,"d2Gdvperpdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                d2Gdvperp2_M_max, d2Gdvperp2_M_L2 = print_test_data(d2Gdvperp2_M_exact,d2Gdvperp2_M_num,d2Gdvperp2_M_err,"d2Gdvperp2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
+                atol_max = 2.1e-4
+                atol_L2 = 6.5e-6
+                @test H_M_max < atol_max
+                @test H_M_L2 < atol_L2
+                atol_max = 1.5e-3
+                atol_L2 = 6.5e-5
+                @test dHdvpa_M_max < atol_max
+                @test dHdvpa_M_L2 < atol_L2
+                atol_max = 8.0e-4
+                atol_L2 = 4.0e-5
+                @test dHdvperp_M_max < atol_max
+                @test dHdvperp_M_L2 < atol_L2
+                atol_max = 1.1e-4
+                atol_L2 = 4.0e-5
+                @test G_M_max < atol_max
+                @test G_M_L2 < atol_L2
+                atol_max = 2.5e-4
+                atol_L2 = 1.2e-5
+                @test d2Gdvpa2_M_max < atol_max
+                @test d2Gdvpa2_M_L2 < atol_L2
+                atol_max = 9.0e-5
+                atol_L2 = 6.0e-5
+                @test dGdvperp_M_max < atol_max
+                @test dGdvperp_M_L2 < atol_L2
+                atol_max = 1.1e-4
+                atol_L2 = 9.0e-6
+                @test d2Gdvperpdvpa_M_max < atol_max
+                @test d2Gdvperpdvpa_M_L2 < atol_L2
+                atol_max = 2.0e-4
+                atol_L2 = 1.1e-5
+                @test d2Gdvperp2_M_max < atol_max
+                @test d2Gdvperp2_M_L2 < atol_L2
+            end
+            finalize_comms!()
+        end
+
+
+    end
+end
+
+end #FokkerPlanckTests
+
+using .FokkerPlanckTests
+
+FokkerPlanckTests.runtests()
+

From 59fd6949279388e3f34f48097d00ac5eec0e1e10 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Sat, 9 Nov 2024 20:40:34 +0000
Subject: [PATCH 34/41] Add a more descriptive enum option to switch between
 boundary data options for the calculation of the Rosenbluth potentials.

---
 moment_kinetics/src/fokker_planck.jl          | 11 ++++++-----
 moment_kinetics/src/fokker_planck_calculus.jl |  5 +++--
 moment_kinetics/src/input_structs.jl          | 14 ++++++++++++--
 moment_kinetics/src/time_advance.jl           |  6 +++++-
 moment_kinetics/test/fokker_planck_tests.jl   | 18 ++++++++++++------
 test_scripts/2D_FEM_assembly_test.jl          | 17 +++++++++++------
 6 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/moment_kinetics/src/fokker_planck.jl b/moment_kinetics/src/fokker_planck.jl
index a31d3f78e..168a222b3 100644
--- a/moment_kinetics/src/fokker_planck.jl
+++ b/moment_kinetics/src/fokker_planck.jl
@@ -53,6 +53,7 @@ using ..velocity_moments: get_density, get_upar, get_ppar, get_pperp, get_qpar,
 using ..looping
 using ..timer_utils
 using ..input_structs: fkpl_collisions_input, set_defaults_and_check_section!
+using ..input_structs: multipole_expansion, direct_integration
 using ..reference_parameters: get_reference_collision_frequency_ii
 using ..fokker_planck_calculus: init_Rosenbluth_potential_integration_weights!
 using ..fokker_planck_calculus: init_Rosenbluth_potential_boundary_integration_weights!
@@ -94,7 +95,7 @@ function setup_fkpl_collisions_input(toml_input::Dict)
        frequency_option = "reference_parameters",
        self_collisions = true,
        use_conserving_corrections = true,
-       multipole_boundary_data = false,
+       boundary_data = direct_integration,
        slowing_down_test = false,
        sd_density = 1.0,
        sd_temp = 0.01,
@@ -338,7 +339,7 @@ Function for advancing with the explicit, weak-form, self-collision operator.
     Zi = collisions.fkpl.Zi # generalise!
     nussp = nuref*(Zi^4) # include charge number factor for self collisions
     use_conserving_corrections = collisions.fkpl.use_conserving_corrections
-    multipole_boundary_data = collisions.fkpl.multipole_boundary_data
+    boundary_data_option = collisions.fkpl.boundary_data
     # N.B. parallelisation using special 'anyv' region
     begin_s_r_z_anyv_region()
     @loop_s_r_z is ir iz begin
@@ -346,7 +347,7 @@ Function for advancing with the explicit, weak-form, self-collision operator.
         @views fokker_planck_collision_operator_weak_form!(
             pdf_in[:,:,iz,ir,is], pdf_in[:,:,iz,ir,is], ms, msp, nussp, fkpl_arrays,
             vperp, vpa, vperp_spectral, vpa_spectral, 
-            multipole_boundary_data = multipole_boundary_data)
+            boundary_data_option = boundary_data_option)
         # enforce the boundary conditions on CC before it is used for timestepping
         enforce_vpavperp_BCs!(fkpl_arrays.CC,vpa,vperp,vpa_spectral,vperp_spectral)
         # make ad-hoc conserving corrections
@@ -402,7 +403,7 @@ with \$\\gamma_\\mathrm{ref} = 2 \\pi e^4 \\ln \\Lambda_{ii} / (4 \\pi
                          use_Maxwellian_field_particle_distribution=false,
                          algebraic_solve_for_d2Gdvperp2 = false, calculate_GG=false,
                          calculate_dGdvperp=false,
-                         multipole_boundary_data=false) = begin
+                         boundary_data_option=direct_integration) = begin
     @boundscheck vpa.n == size(ffsp_in,1) || throw(BoundsError(ffsp_in))
     @boundscheck vperp.n == size(ffsp_in,2) || throw(BoundsError(ffsp_in))
     @boundscheck vpa.n == size(ffs_in,1) || throw(BoundsError(ffs_in))
@@ -453,7 +454,7 @@ with \$\\gamma_\\mathrm{ref} = 2 \\pi e^4 \\ln \\Lambda_{ii} / (4 \\pi
              vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays,
              algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
              calculate_GG=calculate_GG,calculate_dGdvperp=calculate_dGdvperp,
-             multipole_boundary_data=multipole_boundary_data)
+             boundary_data_option=boundary_data_option)
     end
     # assemble the RHS of the collision operator matrix eq
     if use_Maxwellian_field_particle_distribution
diff --git a/moment_kinetics/src/fokker_planck_calculus.jl b/moment_kinetics/src/fokker_planck_calculus.jl
index 79f6e2ede..9831927f4 100644
--- a/moment_kinetics/src/fokker_planck_calculus.jl
+++ b/moment_kinetics/src/fokker_planck_calculus.jl
@@ -42,6 +42,7 @@ using ..communication: MPISharedArray, global_rank
 using ..lagrange_polynomials: lagrange_poly, lagrange_poly_optimised
 using ..looping
 using ..velocity_moments: integrate_over_vspace
+using ..input_structs: direct_integration, multipole_expansion
 using moment_kinetics.gauss_legendre: get_QQ_local!
 using Dates
 using SpecialFunctions: ellipk, ellipe
@@ -2914,7 +2915,7 @@ function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvpe
              d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,ffsp_in,
              vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays::fokkerplanck_weakform_arrays_struct;
              algebraic_solve_for_d2Gdvperp2=false,calculate_GG=false,calculate_dGdvperp=false,
-             multipole_boundary_data=false)
+             boundary_data_option=direct_integration)
     
     # extract the necessary precalculated and buffer arrays from fokkerplanck_arrays
     MM2D_sparse = fkpl_arrays.MM2D_sparse
@@ -2943,7 +2944,7 @@ function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvpe
     rhsvpavperp_copy3 = fkpl_arrays.rhsvpavperp_copy3
     
     # calculate the boundary data
-    if multipole_boundary_data
+    if boundary_data_option == multipole_expansion
         calculate_rosenbluth_potential_boundary_data_multipole!(rpbd,ffsp_in,vpa,vperp,vpa_spectral,vperp_spectral,
           calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
     else # use direct integration on the boundary
diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl
index 8d421fa48..8a0fe2388 100644
--- a/moment_kinetics/src/input_structs.jl
+++ b/moment_kinetics/src/input_structs.jl
@@ -477,6 +477,16 @@ Base.@kwdef struct krook_collisions_input
     frequency_option::String # "reference_parameters" # "manual", 
 end
 
+"""
+"""
+@enum boundary_data_type begin
+    direct_integration
+    multipole_expansion
+end
+export boundary_data_type
+export direct_integration
+export multipole_expansion
+
 Base.@kwdef struct fkpl_collisions_input
     # option to check if fokker planck frequency should be > 0
     use_fokker_planck::Bool
@@ -489,8 +499,8 @@ Base.@kwdef struct fkpl_collisions_input
     self_collisions::Bool
     # option to determine if ad-hoc moment_kinetics-style conserving corrections are used
     use_conserving_corrections::Bool
-    # option to determine if multipole expansion is used to provide boundary data for Rosenbluth potential calculations.
-    multipole_boundary_data::Bool
+    # enum option to determine which method is used to provide boundary data for Rosenbluth potential calculations.
+    boundary_data::boundary_data_type
     # option to determine if cross-collisions against fixed Maxwellians are used
     slowing_down_test::Bool
     # Setting to switch between different options for Fokker-Planck collision frequency input
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index 31a4777a4..3d4ec26e8 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -734,7 +734,11 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                                                   n_neutral_species_alloc, t_params)
     # create arrays for Fokker-Planck collisions 
     if advance.explicit_weakform_fp_collisions
-        precompute_weights = true && !(collisions.fkpl.multipole_boundary_data)
+        if collisions.fkpl.boundary_data == direct_integration
+            precompute_weights = true
+        else
+            precompute_weights = false
+        end
         fp_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral;
                       precompute_weights=precompute_weights)
     else
diff --git a/moment_kinetics/test/fokker_planck_tests.jl b/moment_kinetics/test/fokker_planck_tests.jl
index 16fad7bb0..93b6d687e 100644
--- a/moment_kinetics/test/fokker_planck_tests.jl
+++ b/moment_kinetics/test/fokker_planck_tests.jl
@@ -11,6 +11,7 @@ using moment_kinetics.array_allocation: allocate_float, allocate_shared_float
 using moment_kinetics.coordinates: define_coordinate
 using moment_kinetics.type_definitions: mk_float, mk_int
 using moment_kinetics.velocity_moments: get_density, get_upar, get_ppar, get_pperp, get_pressure
+using moment_kinetics.input_structs: direct_integration, multipole_expansion
 
 using moment_kinetics.fokker_planck: init_fokker_planck_collisions_weak_form, fokker_planck_collision_operator_weak_form!
 using moment_kinetics.fokker_planck: conserving_corrections!, init_fokker_planck_collisions_direct_integration
@@ -207,16 +208,21 @@ function runtests()
 
         @testset "weak-form Rosenbluth potential calculation: elliptic solve" begin
             println("    - test weak-form Rosenbluth potential calculation: elliptic solve")
-            @testset "$multipole_boundary_data" for multipole_boundary_data in (true,false)
-                println("        -  multipole_boundary_data=$multipole_boundary_data")
+            @testset "$boundary_data_option" for boundary_data_option in (direct_integration,multipole_expansion)
+                println("        -  boundary_data_option=$boundary_data_option")
                 ngrid = 9
                 nelement_vpa = 8
                 nelement_vperp = 4
                 vpa, vpa_spectral, vperp, vperp_spectral = create_grids(ngrid,nelement_vpa,nelement_vperp,
                                                                             Lvpa=12.0,Lvperp=6.0)
                 begin_serial_region()
+                if boundary_data_option == direct_integration
+                    precompute_weights = true
+                else
+                    precompute_weights = false
+                end
                 fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral,
-                                                                      precompute_weights=(true &&!(multipole_boundary_data)),
+                                                                      precompute_weights=precompute_weights,
                                                                       print_to_screen=print_to_screen)
                 dummy_array = allocate_float(vpa.n,vperp.n)
                 F_M = allocate_float(vpa.n,vperp.n)
@@ -274,7 +280,7 @@ function runtests()
                      fkpl_arrays.d2Gdvperp2, F_M, vpa, vperp, vpa_spectral, vperp_spectral,
                      fkpl_arrays; algebraic_solve_for_d2Gdvperp2=false,
                      calculate_GG=true, calculate_dGdvperp=true,
-                     multipole_boundary_data=multipole_boundary_data)
+                     boundary_data_option=boundary_data_option)
                 # extract C[Fs,Fs'] result
                 # and Rosenbluth potentials for testing
                 begin_s_r_z_anyv_region()
@@ -296,7 +302,7 @@ function runtests()
                     max_dHdvperp_boundary_data_err, max_G_boundary_data_err,
                     max_dGdvperp_boundary_data_err, max_d2Gdvperp2_boundary_data_err,
                     max_d2Gdvperpdvpa_boundary_data_err, max_d2Gdvpa2_boundary_data_err = test_rosenbluth_potential_boundary_data(fkpl_arrays.rpbd,rpbd_exact,vpa,vperp,print_to_screen=print_to_screen)
-                    if multipole_boundary_data
+                    if boundary_data_option==multipole_expansion
                         atol_max_H = 5.0e-8
                         atol_max_dHdvpa = 5.0e-8
                         atol_max_dHdvperp = 5.0e-8
@@ -332,7 +338,7 @@ function runtests()
                     dGdvperp_M_max, dGdvperp_M_L2 = print_test_data(dGdvperp_M_exact,dGdvperp_M_num,dGdvperp_M_err,"dGdvperp_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
                     d2Gdvperpdvpa_M_max, d2Gdvperpdvpa_M_L2 = print_test_data(d2Gdvperpdvpa_M_exact,d2Gdvperpdvpa_M_num,d2Gdvperpdvpa_M_err,"d2Gdvperpdvpa_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
                     d2Gdvperp2_M_max, d2Gdvperp2_M_L2 = print_test_data(d2Gdvperp2_M_exact,d2Gdvperp2_M_num,d2Gdvperp2_M_err,"d2Gdvperp2_M",vpa,vperp,dummy_array,print_to_screen=print_to_screen)
-                    if multipole_boundary_data
+                    if boundary_data_option==multipole_expansion
                         atol_max_H = 2.0e-7
                         atol_L2_H = 5.0e-9
                         atol_max_dHdvpa = 2.0e-6
diff --git a/test_scripts/2D_FEM_assembly_test.jl b/test_scripts/2D_FEM_assembly_test.jl
index b6f8408fb..69742fb09 100644
--- a/test_scripts/2D_FEM_assembly_test.jl
+++ b/test_scripts/2D_FEM_assembly_test.jl
@@ -19,6 +19,7 @@ using moment_kinetics.velocity_moments: get_density, get_upar, get_ppar, get_ppe
 using moment_kinetics.communication
 using moment_kinetics.communication: MPISharedArray
 using moment_kinetics.looping
+using moment_kinetics.input_structs: direct_integration, multipole_expansion
 using SparseArrays: sparse
 using LinearAlgebra: mul!, lu, cholesky
 
@@ -79,7 +80,7 @@ end
         use_Maxwellian_field_particle_distribution=false,
         test_numerical_conserving_terms=false,
         algebraic_solve_for_d2Gdvperp2=false,
-        use_multipole=false)
+        boundary_data_option=direct_integration)
         # define inputs needed for the test
         #plot_test_output = false#true
         #test_parallelism = false#true
@@ -126,7 +127,11 @@ end
         nc_global = vpa.n*vperp.n
         begin_serial_region()
         start_init_time = now()
-        precompute_weights = true && !(use_multipole)
+        if boundary_data_option == direct_integration
+            precompute_weights = true
+        else
+            precompute_weights = false
+        end
         fkpl_arrays = init_fokker_planck_collisions_weak_form(vpa,vperp,vpa_spectral,vperp_spectral; 
                            precompute_weights=precompute_weights, test_dense_matrix_construction=test_dense_construction)
         KKpar2D_with_BC_terms_sparse = fkpl_arrays.KKpar2D_with_BC_terms_sparse
@@ -265,7 +270,7 @@ end
                                              use_Maxwellian_field_particle_distribution=use_Maxwellian_field_particle_distribution,
                                              algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
                                              calculate_GG = false, calculate_dGdvperp=false,
-                                             multipole_boundary_data=use_multipole)
+                                             boundary_data_option=boundary_data_option)
         if test_numerical_conserving_terms && test_self_operator
             # enforce the boundary conditions on CC before it is used for timestepping
             enforce_vpavperp_BCs!(fkpl_arrays.CC,vpa,vperp,vpa_spectral,vperp_spectral)
@@ -276,7 +281,7 @@ end
         calculate_rosenbluth_potentials_via_elliptic_solve!(fkpl_arrays.GG,fkpl_arrays.HH,fkpl_arrays.dHdvpa,fkpl_arrays.dHdvperp,
              fkpl_arrays.d2Gdvpa2,fkpl_arrays.dGdvperp,fkpl_arrays.d2Gdvperpdvpa,fkpl_arrays.d2Gdvperp2,F_M,
              vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays;
-             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=true,calculate_dGdvperp=true,multipole_boundary_data=use_multipole)
+             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=true,calculate_dGdvperp=true,boundary_data_option=boundary_data_option)
         # extract C[Fs,Fs'] result
         # and Rosenbluth potentials for testing
         begin_s_r_z_anyv_region()
@@ -392,7 +397,7 @@ end
         algebraic_solve_for_d2Gdvperp2=false,
         test_self_operator = true,
         Lvpa = 12.0, Lvperp = 6.0,
-        use_multipole = false)
+        boundary_data_option = direct_integration)
         initialize_comms!()
         #ngrid = 5
         #plot_scan = true
@@ -463,7 +468,7 @@ end
             use_Maxwellian_field_particle_distribution=use_Maxwellian_field_particle_distribution,
             test_numerical_conserving_terms=test_numerical_conserving_terms,
             algebraic_solve_for_d2Gdvperp2=algebraic_solve_for_d2Gdvperp2,
-            standalone=false, Lvpa=Lvpa, Lvperp=Lvperp, use_multipole=use_multipole)
+            standalone=false, Lvpa=Lvpa, Lvperp=Lvperp, boundary_data_option=boundary_data_option)
             max_C_err[iscan], L2_C_err[iscan] = fkerr.C_M.max ,fkerr.C_M.L2
             max_H_err[iscan], L2_H_err[iscan] = fkerr.H_M.max ,fkerr.H_M.L2
             max_dHdvpa_err[iscan], L2_dHdvpa_err[iscan] = fkerr.dHdvpa_M.max ,fkerr.dHdvpa_M.L2

From 08beebf137b983e061c5d44fc72967775ca0151e Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Mon, 11 Nov 2024 15:26:23 +0000
Subject: [PATCH 35/41] Change arguments for multipole functions to reduce
 number of variables passed.

---
 moment_kinetics/src/fokker_planck_calculus.jl | 6433 ++++++++---------
 1 file changed, 3160 insertions(+), 3273 deletions(-)

diff --git a/moment_kinetics/src/fokker_planck_calculus.jl b/moment_kinetics/src/fokker_planck_calculus.jl
index 9831927f4..ef78ef293 100644
--- a/moment_kinetics/src/fokker_planck_calculus.jl
+++ b/moment_kinetics/src/fokker_planck_calculus.jl
@@ -1,3273 +1,3160 @@
-"""
-Module for functions used 
-in calculating the integrals and doing 
-the numerical differentiation for 
-the implementation of the 
-the full-F Fokker-Planck collision operator [`moment_kinetics.fokker_planck`](@ref).
-
-Parallelisation of the collision operator uses a special 'anyv' region type, see
-[Collision operator and `anyv` region](@ref).
-"""
-module fokker_planck_calculus
-
-export assemble_matrix_operators_dirichlet_bc
-export assemble_matrix_operators_dirichlet_bc_sparse
-export assemble_explicit_collision_operator_rhs_serial!
-export assemble_explicit_collision_operator_rhs_parallel!
-export assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!
-export YY_collision_operator_arrays, calculate_YY_arrays
-export calculate_rosenbluth_potential_boundary_data!
-export calculate_rosenbluth_potential_boundary_data_multipole!
-export elliptic_solve!, algebraic_solve!
-export fokkerplanck_arrays_direct_integration_struct
-export fokkerplanck_weakform_arrays_struct
-export enforce_vpavperp_BCs!
-export calculate_rosenbluth_potentials_via_elliptic_solve!
-
-# testing
-export calculate_rosenbluth_potential_boundary_data_exact!
-export allocate_rosenbluth_potential_boundary_data
-export calculate_rosenbluth_potential_boundary_data_exact!
-export test_rosenbluth_potential_boundary_data
-export interpolate_2D_vspace!
-
-# Import moment_kinetics so that we can refer to it in docstrings
-import moment_kinetics
-
-using ..type_definitions: mk_float, mk_int
-using ..array_allocation: allocate_float, allocate_shared_float
-using ..calculus: derivative!
-using ..communication
-using ..communication: MPISharedArray, global_rank
-using ..lagrange_polynomials: lagrange_poly, lagrange_poly_optimised
-using ..looping
-using ..velocity_moments: integrate_over_vspace
-using ..input_structs: direct_integration, multipole_expansion
-using moment_kinetics.gauss_legendre: get_QQ_local!
-using Dates
-using SpecialFunctions: ellipk, ellipe
-using SparseArrays: sparse, AbstractSparseArray
-using SuiteSparse
-using LinearAlgebra: ldiv!, mul!, LU
-using FastGaussQuadrature
-using Printf
-using MPI
-
-function print_matrix(matrix,name::String,n::mk_int,m::mk_int)
-    println("\n ",name," \n")
-    for i in 1:n
-        for j in 1:m
-            @printf("%.2f ", matrix[i,j])
-        end
-        println("")
-    end
-    println("\n")
-end
-
-function print_vector(vector,name::String,m::mk_int)
-    println("\n ",name," \n")
-    for j in 1:m
-        @printf("%.3f ", vector[j])
-    end
-    println("")
-    println("\n")
-end
-
-"""
-Struct of dummy arrays and precalculated coefficients
-for the Fokker-Planck collision operator when the
-Rosenbluth potentials are computed everywhere in `(vpa,vperp)`
-by direct integration. Used for testing.
-"""
-struct fokkerplanck_arrays_direct_integration_struct
-    G0_weights::MPISharedArray{mk_float,4}
-    G1_weights::MPISharedArray{mk_float,4}
-    H0_weights::MPISharedArray{mk_float,4}
-    H1_weights::MPISharedArray{mk_float,4}
-    H2_weights::MPISharedArray{mk_float,4}
-    H3_weights::MPISharedArray{mk_float,4}
-    GG::MPISharedArray{mk_float,2}
-    d2Gdvpa2::MPISharedArray{mk_float,2}
-    d2Gdvperpdvpa::MPISharedArray{mk_float,2}
-    d2Gdvperp2::MPISharedArray{mk_float,2}
-    dGdvperp::MPISharedArray{mk_float,2}
-    HH::MPISharedArray{mk_float,2}
-    dHdvpa::MPISharedArray{mk_float,2}
-    dHdvperp::MPISharedArray{mk_float,2}
-    #Cflux_vpa::MPISharedArray{mk_float,2}
-    #Cflux_vperp::MPISharedArray{mk_float,2}
-    buffer_vpavperp_1::Array{mk_float,2}
-    buffer_vpavperp_2::Array{mk_float,2}
-    Cssp_result_vpavperp::MPISharedArray{mk_float,2}
-    dfdvpa::MPISharedArray{mk_float,2}
-    d2fdvpa2::MPISharedArray{mk_float,2}
-    d2fdvperpdvpa::MPISharedArray{mk_float,2}
-    dfdvperp::MPISharedArray{mk_float,2}
-    d2fdvperp2::MPISharedArray{mk_float,2}
-end
-
-"""
-Struct to contain the integration weights for the boundary points
-in the `(vpa,vperp)` domain.
-"""
-struct boundary_integration_weights_struct
-    lower_vpa_boundary::MPISharedArray{mk_float,3}
-    upper_vpa_boundary::MPISharedArray{mk_float,3}
-    upper_vperp_boundary::MPISharedArray{mk_float,3}
-end
-
-"""
-Struct used for storing the integration weights for the 
-boundary of the velocity space domain in `(vpa,vperp)` coordinates.
-"""
-struct fokkerplanck_boundary_data_arrays_struct
-    G0_weights::boundary_integration_weights_struct
-    G1_weights::boundary_integration_weights_struct
-    H0_weights::boundary_integration_weights_struct
-    H1_weights::boundary_integration_weights_struct
-    H2_weights::boundary_integration_weights_struct
-    H3_weights::boundary_integration_weights_struct
-    dfdvpa::MPISharedArray{mk_float,2}
-    d2fdvperpdvpa::MPISharedArray{mk_float,2}
-    dfdvperp::MPISharedArray{mk_float,2}    
-end
-
-"""
-Struct to store the `(vpa,vperp)` boundary data for an
-individual Rosenbluth potential.
-"""
-struct vpa_vperp_boundary_data
-    lower_boundary_vpa::MPISharedArray{mk_float,1}
-    upper_boundary_vpa::MPISharedArray{mk_float,1}
-    upper_boundary_vperp::MPISharedArray{mk_float,1}
-end
-
-"""
-Struct to store the boundary data for all of the
-Rosenbluth potentials required for the calculation.
-"""
-struct rosenbluth_potential_boundary_data
-    H_data::vpa_vperp_boundary_data
-    dHdvpa_data::vpa_vperp_boundary_data
-    dHdvperp_data::vpa_vperp_boundary_data
-    G_data::vpa_vperp_boundary_data
-    dGdvperp_data::vpa_vperp_boundary_data
-    d2Gdvperp2_data::vpa_vperp_boundary_data
-    d2Gdvperpdvpa_data::vpa_vperp_boundary_data
-    d2Gdvpa2_data::vpa_vperp_boundary_data
-end
-
-"""
-Struct to store the elemental nonlinear stiffness matrices used
-to express the finite-element weak form of the collision
-operator. The arrays are indexed so that the contraction
-in the assembly step is carried out over the fastest
-accessed indices, i.e., for `YY0perp[i,j,k,iel]`, we contract
-over `i` and `j` to give data for the field position index `k`,
-all for the 1D element indexed by `iel`.
-"""
-struct YY_collision_operator_arrays
-    # let phi_j(vperp) be the jth Lagrange basis function, 
-    # and phi'_j(vperp) the first derivative of the Lagrange basis function
-    # on the iel^th element. Then, the arrays are defined as follows.
-    # YY0perp[i,j,k,iel] = \int phi_i(vperp) phi_j(vperp) phi_k(vperp) vperp d vperp
-    YY0perp::Array{mk_float,4}
-    # YY1perp[i,j,k,iel] = \int phi_i(vperp) phi_j(vperp) phi'_k(vperp) vperp d vperp
-    YY1perp::Array{mk_float,4}
-    # YY2perp[i,j,k,iel] = \int phi_i(vperp) phi'_j(vperp) phi'_k(vperp) vperp d vperp
-    YY2perp::Array{mk_float,4}
-    # YY3perp[i,j,k,iel] = \int phi_i(vperp) phi'_j(vperp) phi_k(vperp) vperp d vperp
-    YY3perp::Array{mk_float,4}
-    # YY0par[i,j,k,iel] = \int phi_i(vpa) phi_j(vpa) phi_k(vpa) vpa d vpa
-    YY0par::Array{mk_float,4}
-    # YY1par[i,j,k,iel] = \int phi_i(vpa) phi_j(vpa) phi'_k(vpa) vpa d vpa
-    YY1par::Array{mk_float,4}
-    # YY2par[i,j,k,iel] = \int phi_i(vpa) phi'_j(vpa) phi'_k(vpa) vpa d vpa
-    YY2par::Array{mk_float,4}
-    # YY3par[i,j,k,iel] = \int phi_i(vpa) phi'_j(vpa) phi_k(vpa) vpa d vpa
-    YY3par::Array{mk_float,4}
-end
-
-"""
-Struct of dummy arrays and precalculated coefficients
-for the finite-element weak-form Fokker-Planck collision operator.
-"""
-struct fokkerplanck_weakform_arrays_struct{M <: AbstractSparseArray{mk_float,mk_int,N} where N}
-    # boundary weights (Green's function) data
-    bwgt::fokkerplanck_boundary_data_arrays_struct
-    # dummy arrays for boundary data calculation
-    rpbd::rosenbluth_potential_boundary_data
-    # assembled 2D weak-form matrices
-    MM2D_sparse::M
-    KKpar2D_sparse::M
-    KKperp2D_sparse::M
-    KKpar2D_with_BC_terms_sparse::M
-    KKperp2D_with_BC_terms_sparse::M
-    LP2D_sparse::M
-    LV2D_sparse::M
-    LB2D_sparse::M
-    PUperp2D_sparse::M
-    PPparPUperp2D_sparse::M
-    PPpar2D_sparse::M
-    MMparMNperp2D_sparse::M
-    KPperp2D_sparse::M
-    # lu decomposition objects
-    lu_obj_MM::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
-    lu_obj_LP::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
-    lu_obj_LV::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
-    lu_obj_LB::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
-    # elemental matrices for the assembly of C[Fs,Fsp]
-    YY_arrays::YY_collision_operator_arrays
-    # dummy arrays for elliptic solvers
-    S_dummy::MPISharedArray{mk_float,2}
-    Q_dummy::MPISharedArray{mk_float,2}
-    rhsvpavperp::MPISharedArray{mk_float,2}
-    rhsvpavperp_copy1::MPISharedArray{mk_float,2}
-    rhsvpavperp_copy2::MPISharedArray{mk_float,2}
-    rhsvpavperp_copy3::MPISharedArray{mk_float,2}
-    # dummy array for the result of the calculation
-    CC::MPISharedArray{mk_float,2}
-    # dummy arrays for storing Rosenbluth potentials
-    GG::MPISharedArray{mk_float,2}
-    HH::MPISharedArray{mk_float,2}
-    dHdvpa::MPISharedArray{mk_float,2}
-    dHdvperp::MPISharedArray{mk_float,2}
-    dGdvperp::MPISharedArray{mk_float,2}
-    d2Gdvperp2::MPISharedArray{mk_float,2}
-    d2Gdvpa2::MPISharedArray{mk_float,2}
-    d2Gdvperpdvpa::MPISharedArray{mk_float,2}
-    FF::MPISharedArray{mk_float,2}
-    dFdvpa::MPISharedArray{mk_float,2}
-    dFdvperp::MPISharedArray{mk_float,2}
-end
-
-"""
-Function to allocate a `boundary_integration_weights_struct`.
-"""
-function allocate_boundary_integration_weight(vpa,vperp)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    lower_vpa_boundary = allocate_shared_float(nvpa,nvperp,nvperp)
-    upper_vpa_boundary = allocate_shared_float(nvpa,nvperp,nvperp)
-    upper_vperp_boundary = allocate_shared_float(nvpa,nvperp,nvpa)
-    return boundary_integration_weights_struct(lower_vpa_boundary,
-            upper_vpa_boundary, upper_vperp_boundary)
-end
-
-"""
-Function to allocate at `fokkerplanck_boundary_data_arrays_struct`.
-"""
-function allocate_boundary_integration_weights(vpa,vperp)
-    G0_weights = allocate_boundary_integration_weight(vpa,vperp)
-    G1_weights = allocate_boundary_integration_weight(vpa,vperp)
-    H0_weights = allocate_boundary_integration_weight(vpa,vperp)
-    H1_weights = allocate_boundary_integration_weight(vpa,vperp)
-    H2_weights = allocate_boundary_integration_weight(vpa,vperp)
-    H3_weights = allocate_boundary_integration_weight(vpa,vperp)
-
-    # The following velocity-space-sized buffer arrays are used to evaluate the
-    # collision operator for a single species at a single spatial point. They are
-    # shared-memory arrays. The `comm` argument to `allocate_shared_float()` is used to
-    # set up the shared-memory arrays so that they are shared only by the processes on
-    # `comm_anyv_subblock[]` rather than on the full `comm_block[]`. This means that
-    # different subblocks that are calculating the collision operator at different
-    # spatial points do not interfere with each others' buffer arrays.
-    # Note that the 'weights' allocated above are read-only and therefore can be used
-    # simultaneously by different subblocks. They are shared over the full
-    # `comm_block[]` in order to save memory and setup time.
-    nvpa = vpa.n
-    nvperp = vperp.n
-    dfdvpa = allocate_shared_float(nvpa,nvperp; comm=comm_anyv_subblock[])
-    d2fdvperpdvpa = allocate_shared_float(nvpa,nvperp; comm=comm_anyv_subblock[])
-    dfdvperp = allocate_shared_float(nvpa,nvperp; comm=comm_anyv_subblock[])
-    return fokkerplanck_boundary_data_arrays_struct(G0_weights,
-            G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-            dfdvpa,d2fdvperpdvpa,dfdvperp)
-end
-
-
-"""
-Function that precomputes the required integration weights in the whole of
-`(vpa,vperp)` for the direct integration method of computing the Rosenbluth potentials.
-"""
-function init_Rosenbluth_potential_integration_weights!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,vperp,vpa;print_to_screen=true)
-    
-    x_vpa, w_vpa, x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre = setup_basic_quadratures(vpa,vperp,print_to_screen=print_to_screen)
-    
-    @serial_region begin
-        if global_rank[] == 0 && print_to_screen
-            println("beginning weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
-        end
-    end
-
-    # precalculated weights, integrating over Lagrange polynomials
-    begin_vperp_vpa_region()
-    @loop_vperp_vpa ivperp ivpa begin
-        #limits where checks required to determine which divergence-safe grid is needed
-        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
-        
-        vperp_val = vperp.grid[ivperp]
-        vpa_val = vpa.grid[ivpa]
-        for ivperpp in 1:vperp.n
-            for ivpap in 1:vpa.n
-                G0_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                G1_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                H0_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                H1_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                H2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                H3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-            end
-        end
-        # loop over elements and grid points within elements on primed coordinate
-        @views loop_over_vperp_vpa_elements!(G0_weights[:,:,ivpa,ivperp],G1_weights[:,:,ivpa,ivperp],
-                H0_weights[:,:,ivpa,ivperp],H1_weights[:,:,ivpa,ivperp],
-                H2_weights[:,:,ivpa,ivperp],H3_weights[:,:,ivpa,ivperp],
-                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
-                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
-                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                x_legendre,w_legendre,x_laguerre,w_laguerre,
-                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
-    end
-    
-    
-    @serial_region begin
-        if global_rank[] == 0 && print_to_screen
-            println("finished weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
-        end
-    end
-    return nothing
-end
-
-"""
-Function for getting the basic quadratures used for the 
-numerical integration of the Lagrange polynomials and the 
-integration kernals.
-"""
-function setup_basic_quadratures(vpa,vperp;print_to_screen=true)
-    @serial_region begin
-        if global_rank[] == 0 && print_to_screen
-            println("setting up GL quadrature   ", Dates.format(now(), dateformat"H:MM:SS"))
-        end
-    end
-    
-    # get Gauss-Legendre points and weights on (-1,1)
-    ngrid = max(vpa.ngrid,vperp.ngrid)
-    nquad = 2*ngrid
-    x_legendre, w_legendre = gausslegendre(nquad)
-    #nlaguerre = min(9,nquad) # to prevent points to close to the boundaries
-    nlaguerre = nquad
-    x_laguerre, w_laguerre = gausslaguerre(nlaguerre)
-    
-    x_vpa, w_vpa = Array{mk_float,1}(undef,4*nquad), Array{mk_float,1}(undef,4*nquad)
-    x_vperp, w_vperp = Array{mk_float,1}(undef,4*nquad), Array{mk_float,1}(undef,4*nquad)
-  
-    return x_vpa, w_vpa, x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre
-end
-
-
-"""
-Function for getting the indices used to choose the integration quadrature.
-"""
-function get_element_limit_indices(ivpa,ivperp,vpa,vperp)
-    nelement_vpa, ngrid_vpa = vpa.nelement_local, vpa.ngrid
-    nelement_vperp, ngrid_vperp = vperp.nelement_local, vperp.ngrid
-    #limits where checks required to determine which divergence-safe grid is needed
-    igrid_vpa, ielement_vpa = vpa.igrid[ivpa], vpa.ielement[ivpa]
-    ielement_vpa_low = ielement_vpa - ng_low(igrid_vpa,ngrid_vpa)*nel_low(ielement_vpa,nelement_vpa)
-    ielement_vpa_hi = ielement_vpa + ng_hi(igrid_vpa,ngrid_vpa)*nel_hi(ielement_vpa,nelement_vpa)
-    #println("igrid_vpa: ielement_vpa: ielement_vpa_low: ielement_vpa_hi:", igrid_vpa," ",ielement_vpa," ",ielement_vpa_low," ",ielement_vpa_hi)
-    igrid_vperp, ielement_vperp = vperp.igrid[ivperp], vperp.ielement[ivperp]
-    ielement_vperp_low = ielement_vperp - ng_low(igrid_vperp,ngrid_vperp)*nel_low(ielement_vperp,nelement_vperp)
-    ielement_vperp_hi = ielement_vperp + ng_hi(igrid_vperp,ngrid_vperp)*nel_hi(ielement_vperp,nelement_vperp)
-    #println("igrid_vperp: ielement_vperp: ielement_vperp_low: ielement_vperp_hi:", igrid_vperp," ",ielement_vperp," ",ielement_vperp_low," ",ielement_vperp_hi)
-    return igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, 
-            igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi
-end
-
-"""
-Function that precomputes the required integration weights only along the velocity space boundaries.
-Used as the default option as part of the strategy to compute the Rosenbluth potentials
-at the boundaries with direct integration and in the rest of `(vpa,vperp)` by solving elliptic PDEs.
-"""
-function init_Rosenbluth_potential_boundary_integration_weights!(G0_weights,
-      G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,vpa,vperp;print_to_screen=true)
-    
-    x_vpa, w_vpa, x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre = setup_basic_quadratures(vpa,vperp,print_to_screen=print_to_screen)
-    
-    @serial_region begin
-        if global_rank[] == 0 && print_to_screen
-            println("beginning (boundary) weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
-        end
-    end
-
-    # precalculate weights, integrating over Lagrange polynomials
-    # first compute weights along lower vpa boundary
-    begin_vperp_region()
-    ivpa = 1 # lower_vpa_boundary
-    @loop_vperp ivperp begin
-        #limits where checks required to determine which divergence-safe grid is needed
-        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
-        
-        vperp_val = vperp.grid[ivperp]
-        vpa_val = vpa.grid[ivpa]
-        for ivperpp in 1:vperp.n
-            for ivpap in 1:vpa.n
-                G0_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                G1_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                H0_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                H1_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                H2_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                H3_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-            end
-        end
-        # loop over elements and grid points within elements on primed coordinate
-        @views loop_over_vperp_vpa_elements!(G0_weights.lower_vpa_boundary[:,:,ivperp],
-                G1_weights.lower_vpa_boundary[:,:,ivperp],
-                H0_weights.lower_vpa_boundary[:,:,ivperp],
-                H1_weights.lower_vpa_boundary[:,:,ivperp],
-                H2_weights.lower_vpa_boundary[:,:,ivperp],
-                H3_weights.lower_vpa_boundary[:,:,ivperp],
-                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
-                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
-                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                x_legendre,w_legendre,x_laguerre,w_laguerre,
-                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
-    end
-    # second compute weights along upper vpa boundary
-    ivpa = vpa.n # upper_vpa_boundary
-    @loop_vperp ivperp begin
-        #limits where checks required to determine which divergence-safe grid is needed
-        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
-        
-        vperp_val = vperp.grid[ivperp]
-        vpa_val = vpa.grid[ivpa]
-        for ivperpp in 1:vperp.n
-            for ivpap in 1:vpa.n
-                G0_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                G1_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                H0_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                H1_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                H2_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                H3_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
-                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-            end
-        end
-        # loop over elements and grid points within elements on primed coordinate
-        @views loop_over_vperp_vpa_elements!(G0_weights.upper_vpa_boundary[:,:,ivperp],
-                G1_weights.upper_vpa_boundary[:,:,ivperp],
-                H0_weights.upper_vpa_boundary[:,:,ivperp],
-                H1_weights.upper_vpa_boundary[:,:,ivperp],
-                H2_weights.upper_vpa_boundary[:,:,ivperp],
-                H3_weights.upper_vpa_boundary[:,:,ivperp],
-                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
-                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
-                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                x_legendre,w_legendre,x_laguerre,w_laguerre,
-                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
-    end
-    # finally compute weight along upper vperp boundary
-    begin_vpa_region()
-    ivperp = vperp.n # upper_vperp_boundary
-    @loop_vpa ivpa begin
-        #limits where checks required to determine which divergence-safe grid is needed
-        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
-        
-        vperp_val = vperp.grid[ivperp]
-        vpa_val = vpa.grid[ivpa]
-        for ivperpp in 1:vperp.n
-            for ivpap in 1:vpa.n
-                G0_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
-                G1_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
-                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-                H0_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
-                H1_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
-                H2_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
-                H3_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
-                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
-            end
-        end
-        # loop over elements and grid points within elements on primed coordinate
-        @views loop_over_vperp_vpa_elements!(G0_weights.upper_vperp_boundary[:,:,ivpa],
-                G1_weights.upper_vperp_boundary[:,:,ivpa],
-                H0_weights.upper_vperp_boundary[:,:,ivpa],
-                H1_weights.upper_vperp_boundary[:,:,ivpa],
-                H2_weights.upper_vperp_boundary[:,:,ivpa],
-                H3_weights.upper_vperp_boundary[:,:,ivpa],
-                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
-                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
-                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                x_legendre,w_legendre,x_laguerre,w_laguerre,
-                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
-    end
-    # return the parallelisation status to serial
-    begin_serial_region()
-    @serial_region begin 
-        if global_rank[] == 0 && print_to_screen
-            println("finished (boundary) weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
-        end
-    end
-    return nothing
-end
-
-function get_imin_imax(coord,iel)
-    j = iel
-    if j > 1
-        k = 1
-    else
-        k = 0
-    end
-    imin = coord.imin[j] - k
-    imax = coord.imax[j]
-    return imin, imax
-end
-
-function get_nodes(coord,iel)
-    # get imin and imax of this element on full grid
-    (imin, imax) = get_imin_imax(coord,iel)
-    nodes = coord.grid[imin:imax]
-    return nodes
-end
-
-"""
-Function to get the local integration grid and quadrature weights
-to integrate a 1D element in the 2D representation of the 
-velocity space distribution functions. This function assumes that
-there is a divergence at the point `coord_val`, and splits the grid 
-and integration weights appropriately, using Gauss-Laguerre points
-near the divergence and Gauss-Legendre points away from the divergence. 
-"""
-function get_scaled_x_w_with_divergences!(x_scaled, w_scaled, x_legendre, w_legendre, x_laguerre, w_laguerre, node_min, node_max, nodes, igrid_coord, coord_val)
-    #println("nodes ",nodes)
-    zero = 1.0e-10 
-    @. x_scaled = 0.0
-    @. w_scaled = 0.0
-    nnodes = size(nodes,1)
-    nquad_legendre = size(x_legendre,1)
-    nquad_laguerre = size(x_laguerre,1)
-    # assume x_scaled, w_scaled are arrays of length 2*nquad
-    # use only nquad points for most elements, but use 2*nquad for
-    # elements with interior divergences
-    #println("coord: ",coord_val," node_max: ",node_max," node_min: ",node_min) 
-    if abs(coord_val - node_max) < zero # divergence at upper endpoint 
-        node_cut = (nodes[nnodes-1] + nodes[nnodes])/2.0
-        
-        n = nquad_laguerre + nquad_legendre
-        shift = 0.5*(node_min + node_cut)
-        scale = 0.5*(node_cut - node_min)
-        @. x_scaled[1:nquad_legendre] = scale*x_legendre + shift
-        @. w_scaled[1:nquad_legendre] = scale*w_legendre
-
-        @. x_scaled[1+nquad_legendre:n] = node_max + (node_cut - node_max)*exp(-x_laguerre)
-        @. w_scaled[1+nquad_legendre:n] = (node_max - node_cut)*w_laguerre
-        
-        nquad_coord = n
-        #println("upper divergence")
-    elseif abs(coord_val - node_min) < zero # divergence at lower endpoint
-        n = nquad_laguerre + nquad_legendre
-        nquad = size(x_laguerre,1)
-        node_cut = (nodes[1] + nodes[2])/2.0
-        for j in 1:nquad_laguerre
-            x_scaled[nquad_laguerre+1-j] = node_min + (node_cut - node_min)*exp(-x_laguerre[j])
-            w_scaled[nquad_laguerre+1-j] = (node_cut - node_min)*w_laguerre[j]
-        end
-        shift = 0.5*(node_max + node_cut)
-        scale = 0.5*(node_max - node_cut)
-        @. x_scaled[1+nquad_laguerre:n] = scale*x_legendre + shift
-        @. w_scaled[1+nquad_laguerre:n] = scale*w_legendre
-
-        nquad_coord = n
-        #println("lower divergence")
-    else #if (coord_val - node_min)*(coord_val - node_max) < - zero # interior divergence
-        #println(nodes[igrid_coord]," ", coord_val)
-        n = 2*nquad_laguerre
-        node_cut_high = (nodes[igrid_coord+1] + nodes[igrid_coord])/2.0
-        if igrid_coord == 1
-            # exception for vperp coordinate near orgin
-            k = 0
-            node_cut_low = node_min
-            nquad_coord = nquad_legendre + 2*nquad_laguerre
-        else
-            # fill in lower Gauss-Legendre points
-            node_cut_low = (nodes[igrid_coord-1]+nodes[igrid_coord])/2.0
-            shift = 0.5*(node_cut_low + node_min)
-            scale = 0.5*(node_cut_low - node_min)
-            @. x_scaled[1:nquad_legendre] = scale*x_legendre + shift
-            @. w_scaled[1:nquad_legendre] = scale*w_legendre
-            k = nquad_legendre
-            nquad_coord = 2*(nquad_laguerre + nquad_legendre)
-        end
-        # lower half of domain  
-        for j in 1:nquad_laguerre  
-            x_scaled[k+j] = coord_val + (node_cut_low - coord_val)*exp(-x_laguerre[j])
-            w_scaled[k+j] = (coord_val - node_cut_low)*w_laguerre[j]
-        end  
-        # upper half of domain
-        for j in 1:nquad_laguerre
-            x_scaled[k+n+1-j] = coord_val + (node_cut_high - coord_val)*exp(-x_laguerre[j])
-            w_scaled[k+n+1-j] = (node_cut_high - coord_val)*w_laguerre[j]
-        end
-        # fill in upper Gauss-Legendre points
-        shift = 0.5*(node_cut_high + node_max)
-        scale = 0.5*(node_max - node_cut_high)
-        @. x_scaled[k+n+1:nquad_coord] = scale*x_legendre + shift
-        @. w_scaled[k+n+1:nquad_coord] = scale*w_legendre
-        
-        #println("intermediate divergence")
-    #else # no divergences
-    #    nquad = size(x_legendre,1) 
-    #    shift = 0.5*(node_min + node_max)
-    #    scale = 0.5*(node_max - node_min)
-    #    @. x_scaled[1:nquad] = scale*x_legendre + shift
-    #    @. w_scaled[1:nquad] = scale*w_legendre
-    #    #println("no divergence")
-    #    nquad_coord = nquad
-    end
-    #println("x_scaled",x_scaled)
-    #println("w_scaled",w_scaled)
-    return nquad_coord
-end
-
-"""
-Function to get the local grid and integration weights assuming 
-no divergences of the function on the 1D element. Gauss-Legendre
-quadrature is used for the entire element.
-"""
-function get_scaled_x_w_no_divergences!(x_scaled, w_scaled, x_legendre, w_legendre, node_min, node_max)
-    @. x_scaled = 0.0
-    @. w_scaled = 0.0
-    #println("coord: ",coord_val," node_max: ",node_max," node_min: ",node_min) 
-    nquad = size(x_legendre,1) 
-    shift = 0.5*(node_min + node_max)
-    scale = 0.5*(node_max - node_min)
-    @. x_scaled[1:nquad] = scale*x_legendre + shift
-    @. w_scaled[1:nquad] = scale*w_legendre
-    #println("x_scaled",x_scaled)
-    #println("w_scaled",w_scaled)
-    return nquad
-end
-
-"""
-Function returns `1` if `igrid = 1` or `0` if `1 < igrid <= ngrid`.
-"""
-function ng_low(igrid,ngrid)
-    return floor(mk_int, (ngrid - igrid)/(ngrid - 1))
-end
-
-"""
-Function returns `1` if `igrid = ngrid` or `0` if `1 =< igrid < ngrid`.
-"""
-function ng_hi(igrid,ngrid)
-    return floor(mk_int, igrid/ngrid)
-end
-
-"""
-Function returns `1` for `nelement >= ielement > 1`, `0` for `ielement = 1`.
-"""
-function nel_low(ielement,nelement)
-    return floor(mk_int, (ielement - 2 + nelement)/nelement)
-end
-
-"""
-Function returns `1` for `nelement > ielement >= 1`, `0` for `ielement = nelement`.
-"""
-function nel_hi(ielement,nelement)
-    return 1- floor(mk_int, ielement/nelement)
-end
-
-"""
-Base level function for computing the integration kernals for the Rosenbluth potential integration.
-Note the definitions of `ellipe(m)` (\$E(m)\$) and `ellipk(m)` (\$K(m)\$).
-`https://specialfunctions.juliamath.org/stable/functions_list/#SpecialFunctions.ellipe`
-`https://specialfunctions.juliamath.org/stable/functions_list/#SpecialFunctions.ellipk`
-```math
-E(m) = \\int^{\\pi/2}_0 \\sqrt{ 1 - m \\sin^2(\\theta)} d \\theta
-```
-```math
-K(m) = \\int^{\\pi/2}_0 \\frac{1}{\\sqrt{ 1 - m \\sin^2(\\theta)}} d \\theta
-```
-"""
-function local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                            nquad_vpa,ielement_vpa,vpa, # info about primed vpa grids
-                            nquad_vperp,ielement_vperp,vperp, # info about primed vperp grids
-                            x_vpa, w_vpa, x_vperp, w_vperp, # points and weights for primed (source) grids
-                            vpa_val, vperp_val) # values and indices for unprimed (field) grids
-    for igrid_vperp in 1:vperp.ngrid
-        vperp_other_nodes = @view vperp.other_nodes[:,igrid_vperp,ielement_vperp]
-        vperp_one_over_denominator = vperp.one_over_denominator[igrid_vperp,ielement_vperp]
-        for igrid_vpa in 1:vpa.ngrid
-            vpa_other_nodes = @view vpa.other_nodes[:,igrid_vpa,ielement_vpa]
-            vpa_one_over_denominator = vpa.one_over_denominator[igrid_vpa,ielement_vpa]
-            # get grid index for point on full grid  
-            ivpap = vpa.igrid_full[igrid_vpa,ielement_vpa]   
-            ivperpp = vperp.igrid_full[igrid_vperp,ielement_vperp]   
-            # carry out integration over Lagrange polynomial at this node, on this element
-            for kvperp in 1:nquad_vperp
-                for kvpa in 1:nquad_vpa 
-                    x_kvpa = x_vpa[kvpa]
-                    x_kvperp = x_vperp[kvperp]
-                    w_kvperp = w_vperp[kvperp]
-                    w_kvpa = w_vpa[kvpa]
-                    denom = (vpa_val - x_kvpa)^2 + (vperp_val + x_kvperp)^2 
-                    mm = min(4.0*vperp_val*x_kvperp/denom,1.0 - 1.0e-15)
-                    #mm = 4.0*vperp_val*x_kvperp/denom/(1.0 + 10^-15)
-                    #mm = 4.0*vperp_val*x_kvperp/denom
-                    prefac = sqrt(denom)
-                    ellipe_mm = ellipe(mm) 
-                    ellipk_mm = ellipk(mm) 
-                    #if mm_test > 1.0
-                    #    println("mm: ",mm_test," ellipe: ",ellipe_mm," ellipk: ",ellipk_mm)
-                    #end
-                    G_elliptic_integral_factor = 2.0*ellipe_mm*prefac/pi
-                    G1_elliptic_integral_factor = -(2.0*prefac/pi)*( (2.0 - mm)*ellipe_mm - 2.0*(1.0 - mm)*ellipk_mm )/(3.0*mm)
-                    #G2_elliptic_integral_factor = (2.0*prefac/pi)*( (7.0*mm^2 + 8.0*mm - 8.0)*ellipe_mm + 4.0*(2.0 - mm)*(1.0 - mm)*ellipk_mm )/(15.0*mm^2)
-                    #G3_elliptic_integral_factor = (2.0*prefac/pi)*( 8.0*(mm^2 - mm + 1.0)*ellipe_mm - 4.0*(2.0 - mm)*(1.0 - mm)*ellipk_mm )/(15.0*mm^2)
-                    H_elliptic_integral_factor = 2.0*ellipk_mm/(pi*prefac)
-                    H1_elliptic_integral_factor = -(2.0/(pi*prefac))*( (mm-2.0)*(ellipk_mm/mm) + (2.0*ellipe_mm/mm) )
-                    H2_elliptic_integral_factor = (2.0/(pi*prefac))*( (3.0*mm^2 - 8.0*mm + 8.0)*(ellipk_mm/(3.0*mm^2)) + (4.0*mm - 8.0)*ellipe_mm/(3.0*mm^2) )
-                    lagrange_poly_vpa = lagrange_poly_optimised(vpa_other_nodes,
-                                                                vpa_one_over_denominator,
-                                                                x_kvpa)
-                    lagrange_poly_vperp = lagrange_poly_optimised(vperp_other_nodes,
-                                                                  vperp_one_over_denominator,
-                                                                  x_kvperp)
-                    
-                    (G0_weights[ivpap,ivperpp] += 
-                        lagrange_poly_vpa*lagrange_poly_vperp*
-                        G_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
-                    
-                    (G1_weights[ivpap,ivperpp] += 
-                        lagrange_poly_vpa*lagrange_poly_vperp*
-                        G1_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
-                    
-                    #(G2_weights[ivpap,ivperpp] += 
-                    #    lagrange_poly_vpa*lagrange_poly_vperp*
-                    #    G2_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
-                    
-                    #(G3_weights[ivpap,ivperpp] += 
-                    #    lagrange_poly_vpa*lagrange_poly_vperp*
-                    #    G3_elliptic_integral_factor*w_kvperp*w_kvpa*2.0/sqrt(pi))
-                    
-                    (H0_weights[ivpap,ivperpp] += 
-                        lagrange_poly_vpa*lagrange_poly_vperp*
-                        H_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
-                        
-                    (H1_weights[ivpap,ivperpp] += 
-                        lagrange_poly_vpa*lagrange_poly_vperp*
-                        H1_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
-                        
-                    (H2_weights[ivpap,ivperpp] += 
-                        lagrange_poly_vpa*lagrange_poly_vperp*
-                        (H1_elliptic_integral_factor*vperp_val - H2_elliptic_integral_factor*x_kvperp)*
-                        x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
-                    (H3_weights[ivpap,ivperpp] += 
-                        lagrange_poly_vpa*lagrange_poly_vperp*
-                        H_elliptic_integral_factor*(vpa_val - x_kvpa)*
-                        x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
-                    
-                    #(n_weights[ivpap,ivperpp] += 
-                    #    lagrange_poly_vpa*lagrange_poly_vperp*
-                    #    x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
-                end
-            end
-        end
-    end
-    return nothing
-end
-
-"""
-Function for computing the quadratures and carrying out the loop over the 
-primed `vpa` coordinate in doing the numerical integration. Splits the integrand
-into three pieces -- two which use Gauss-Legendre quadrature assuming no divergences
-in the integrand, and one which assumes a logarithmic divergence and uses a
-Gauss-Laguerre quadrature with an (exponential) change of variables to mitigate this divergence.
-"""
-function loop_over_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                            vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vperp grids
-                            vperp,ielement_vperpp, # info about primed vperp grids
-                            x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                            x_legendre,w_legendre,x_laguerre,w_laguerre,
-                            igrid_vpa, igrid_vperp, vpa_val, vperp_val)
-    vperp_nodes = get_nodes(vperp,ielement_vperpp)
-    vperp_max = vperp_nodes[end]
-    vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
-    nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
-    for ielement_vpap in 1:ielement_vpa_low-1 
-        # do integration over part of the domain with no divergences
-        vpa_nodes = get_nodes(vpa,ielement_vpap)
-        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
-        nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
-        local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                    nquad_vpa,ielement_vpap,vpa,
-                    nquad_vperp,ielement_vperpp,vperp,
-                    x_vpa, w_vpa, x_vperp, w_vperp, 
-                    vpa_val, vperp_val)
-    end
-    nquad_vperp = get_scaled_x_w_with_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre, vperp_min, vperp_max, vperp_nodes, igrid_vperp, vperp_val)
-    for ielement_vpap in ielement_vpa_low:ielement_vpa_hi
-    #for ielement_vpap in 1:vpa.nelement_local
-        # use general grid function that checks divergences
-        vpa_nodes = get_nodes(vpa,ielement_vpap)
-        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
-        #nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
-        nquad_vpa = get_scaled_x_w_with_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, x_laguerre, w_laguerre, vpa_min, vpa_max, vpa_nodes, igrid_vpa, vpa_val)
-        local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                    nquad_vpa,ielement_vpap,vpa,
-                    nquad_vperp,ielement_vperpp,vperp,
-                    x_vpa, w_vpa, x_vperp, w_vperp, 
-                    vpa_val, vperp_val)
-    end
-    nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
-    for ielement_vpap in ielement_vpa_hi+1:vpa.nelement_local
-        # do integration over part of the domain with no divergences
-        vpa_nodes = get_nodes(vpa,ielement_vpap)
-        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
-        nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
-        local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                    nquad_vpa,ielement_vpap,vpa,
-                    nquad_vperp,ielement_vperpp,vperp,
-                    x_vpa, w_vpa, x_vperp, w_vperp, 
-                    vpa_val, vperp_val)
-                    
-    end
-    return nothing
-end
-
-"""
-Function for computing the quadratures and carrying out the loop over the 
-primed `vpa` coordinate in doing the numerical integration. 
-Uses a Gauss-Legendre quadrature assuming no divergences in the integrand.
-"""
-function loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                            vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vperp grids
-                            nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
-                            x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                            x_legendre,w_legendre,
-                            vpa_val, vperp_val)
-    for ielement_vpap in 1:vpa.nelement_local
-        # do integration over part of the domain with no divergences
-        vpa_nodes = get_nodes(vpa,ielement_vpap)
-        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
-        nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
-        local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                    nquad_vpa,ielement_vpap,vpa,
-                    nquad_vperp,ielement_vperpp,vperp,
-                    x_vpa, w_vpa, x_vperp, w_vperp, 
-                    vpa_val, vperp_val)
-                    
-    end
-    return nothing
-end
-
-"""
-Function for computing the quadratures and carrying out the loop over the 
-primed `vperp` coordinate in doing the numerical integration. Splits the integrand
-into three pieces -- two which use Gauss-Legendre quadrature assuming no divergences
-in the integrand, and one which assumes a logarithmic divergence and uses a
-Gauss-Laguerre quadrature with an (exponential) change of variables to mitigate this divergence.
-This function calls `loop_over_vpa_elements_no_divergences!()` and `loop_over_vpa_elements!()`
-to carry out the primed `vpa` loop within the primed `vperp` loop.
-"""
-function loop_over_vperp_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
-                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
-                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                x_legendre,w_legendre,x_laguerre,w_laguerre,
-                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
-    for ielement_vperpp in 1:ielement_vperp_low-1
-        
-        vperp_nodes = get_nodes(vperp,ielement_vperpp)
-        vperp_max = vperp_nodes[end]
-        vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
-        nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
-        loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
-                nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
-                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                x_legendre,w_legendre,
-                vpa_val, vperp_val)
-    end
-    for ielement_vperpp in ielement_vperp_low:ielement_vperp_hi
-        
-        #vperp_nodes = get_nodes(vperp,ielement_vperpp)
-        #vperp_max = vperp_nodes[end]
-        #vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
-        #nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
-        #nquad_vperp = get_scaled_x_w_with_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre, vperp_min, vperp_max, vperp_nodes, igrid_vperp, vperp_val)
-        loop_over_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
-                vperp,ielement_vperpp, # info about primed vperp grids
-                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                x_legendre,w_legendre,x_laguerre,w_laguerre,
-                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
-    end
-    for ielement_vperpp in ielement_vperp_hi+1:vperp.nelement_local
-        
-        vperp_nodes = get_nodes(vperp,ielement_vperpp)
-        vperp_max = vperp_nodes[end]
-        vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
-        nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
-        loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
-                nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
-                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                x_legendre,w_legendre,
-                vpa_val, vperp_val)
-    end
-    return nothing
-end
-
-"""
-The function `loop_over_vperp_vpa_elements_no_divergences!()` was used for debugging.
-By changing the source where `loop_over_vperp_vpa_elements!()` is called to
-instead call this function we can verify that the Gauss-Legendre quadrature
-is adequate for integrating a divergence-free integrand. This function should be 
-kept until we understand the problems preventing machine-precision accurary in the pure integration method of computing the
-Rosenbluth potentials.
-"""
-function loop_over_vperp_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
-                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
-                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                x_legendre,w_legendre,
-                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
-    for ielement_vperpp in 1:vperp.nelement_local
-        vperp_nodes = get_nodes(vperp,ielement_vperpp)
-        vperp_max = vperp_nodes[end]
-        vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,nelement_vperp) 
-        nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
-        loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
-                nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
-                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
-                x_legendre,w_legendre,
-                vpa_val, vperp_val)
-    end
-    return nothing
-end 
-
-
-"""
-    ic_func(ivpa::mk_int,ivperp::mk_int,nvpa::mk_int)
-
-Get the 'linear index' corresponding to `ivpa` and `ivperp`. Defined so that the linear
-index corresponds to the underlying layout in memory of a 2d array indexed by
-`[ivpa,ivperp]`, i.e. for a 2d array `f2d`:
-* `size(f2d) == (vpa.n, vperp.n)`
-* For a reference to `f2d` that is reshaped to a vector (a 1d array) `f1d = vec(f2d)` than
-  for any `ivpa` and `ivperp` it is true that `f1d[ic_func(ivpa,ivperp)] ==
-  f2d[ivpa,ivperp]`.
-"""
-function ic_func(ivpa::mk_int,ivperp::mk_int,nvpa::mk_int)
-    return ivpa + nvpa*(ivperp-1)
-end
-
-"""
-    ivperp_func(ic::mk_int,nvpa::mk_int)
-
-Get the `vperp` index `ivperp` that corresponds to a 'linear index' `ic` that spans a 2d
-velocity space.
-
-Defined so that `ivperp_func(inc_func(ivpa,ivperp,nvpa), nvpa) == ivperp`.
-
-See also [`ic_func`](@ref), [`ivpa_func`](@ref).
-"""
-function ivperp_func(ic::mk_int,nvpa::mk_int)
-    return floor(Int64,(ic-1)/nvpa) + 1
-end
-
-"""
-    ivpa_func(ic::mk_int,nvpa::mk_int)
-
-Get the `vpa` index `ivpa` that corresponds to a 'linear index' `ic` that spans a 2d
-velocity space.
-
-Defined so that `ivpa_func(inc_func(ivpa,ivperp,nvpa), nvpa) == ivpa`.
-
-See also [`ic_func`](@ref), [`ivperp_func`](@ref).
-"""
-function ivpa_func(ic::mk_int,nvpa::mk_int)
-    ivpa = ic - nvpa*(ivperp_func(ic,nvpa) - 1)
-    return ivpa
-end
-
-"""
-Function that returns the sparse matrix index
-used to directly construct the nonzero entries
-of a 2D assembled sparse matrix.
-"""
-function icsc_func(ivpa_local::mk_int,ivpap_local::mk_int,
-                   ielement_vpa::mk_int,
-                   ngrid_vpa::mk_int,nelement_vpa::mk_int,
-                   ivperp_local::mk_int,ivperpp_local::mk_int,
-                   ielement_vperp::mk_int,
-                   ngrid_vperp::mk_int,nelement_vperp::mk_int)
-    ntot_vpa = (nelement_vpa - 1)*(ngrid_vpa^2 - 1) + ngrid_vpa^2
-    #ntot_vperp = (nelement_vperp - 1)*(ngrid_vperp^2 - 1) + ngrid_vperp^2
-    
-    icsc_vpa = ((ivpap_local - 1) + (ivpa_local - 1)*ngrid_vpa +
-                (ielement_vpa - 1)*(ngrid_vpa^2 - 1))
-    icsc_vperp = ((ivperpp_local - 1) + (ivperp_local - 1)*ngrid_vperp + 
-                    (ielement_vperp - 1)*(ngrid_vperp^2 - 1))
-    icsc = 1 + icsc_vpa + ntot_vpa*icsc_vperp
-    return icsc
-end
-
-"""
-Struct to contain data needed to create a sparse matrix.
-"""
-struct sparse_matrix_constructor
-    # the Ith row
-    II::Array{mk_float,1}
-    # the Jth column
-    JJ::Array{mk_float,1}
-    # the data S[I,J]
-    SS::Array{mk_float,1}
-end
-
-"""
-Function to allocate an instance of `sparse_matrix_constructor`.
-"""
-function allocate_sparse_matrix_constructor(nsparse::mk_int)
-    II = Array{mk_int,1}(undef,nsparse)
-    @. II = 0
-    JJ = Array{mk_int,1}(undef,nsparse)
-    @. JJ = 0
-    SS = Array{mk_float,1}(undef,nsparse)
-    @. SS = 0.0
-    return sparse_matrix_constructor(II,JJ,SS)
-end
-
-"""
-Function to assign data to an instance of `sparse_matrix_constructor`.
-"""
-function assign_constructor_data!(data::sparse_matrix_constructor,icsc::mk_int,ii::mk_int,jj::mk_int,ss::mk_float)
-    data.II[icsc] = ii
-    data.JJ[icsc] = jj
-    data.SS[icsc] = ss
-    return nothing
-end
-
-"""
-Function to assemble data in an instance of `sparse_matrix_constructor`. Instead of
-writing `data.SS[icsc] = ss`, as in `assign_constructor_data!()` we write `data.SS[icsc] += ss`.
-"""
-function assemble_constructor_data!(data::sparse_matrix_constructor,icsc::mk_int,ii::mk_int,jj::mk_int,ss::mk_float)
-    data.II[icsc] = ii
-    data.JJ[icsc] = jj
-    data.SS[icsc] += ss
-    return nothing
-end
-
-"""
-Wrapper function to create a sparse matrix with an instance of `sparse_matrix_constructor`
-and `sparse()`.
-"""
-function create_sparse_matrix(data::sparse_matrix_constructor)
-    return sparse(data.II,data.JJ,data.SS)
-end
-
-"""
-Function to allocate an instance of `vpa_vperp_boundary_data`.
-"""
-function allocate_boundary_data(vpa,vperp)
-    # The following velocity-space-sized buffer arrays are used to evaluate the
-    # collision operator for a single species at a single spatial point. They are
-    # shared-memory arrays. The `comm` argument to `allocate_shared_float()` is used to
-    # set up the shared-memory arrays so that they are shared only by the processes on
-    # `comm_anyv_subblock[]` rather than on the full `comm_block[]`. This means that
-    # different subblocks that are calculating the collision operator at different
-    # spatial points do not interfere with each others' buffer arrays.
-    lower_boundary_vpa = allocate_shared_float(vperp.n; comm=comm_anyv_subblock[])
-    upper_boundary_vpa = allocate_shared_float(vperp.n; comm=comm_anyv_subblock[])
-    upper_boundary_vperp = allocate_shared_float(vpa.n; comm=comm_anyv_subblock[])
-    return vpa_vperp_boundary_data(lower_boundary_vpa,
-            upper_boundary_vpa,upper_boundary_vperp)
-end
-
-"""
-Function to assign precomputed (exact) data to an instance
-of `vpa_vperp_boundary_data`. Used in testing.
-"""
-function assign_exact_boundary_data!(func_data::vpa_vperp_boundary_data,
-                                        func_exact,vpa,vperp)
-    begin_anyv_region()
-    nvpa = vpa.n
-    nvperp = vperp.n
-    @anyv_serial_region begin
-        for ivperp in 1:nvperp
-            func_data.lower_boundary_vpa[ivperp] = func_exact[1,ivperp]
-            func_data.upper_boundary_vpa[ivperp] = func_exact[nvpa,ivperp]
-        end
-        for ivpa in 1:nvpa
-            func_data.upper_boundary_vperp[ivpa] = func_exact[ivpa,nvperp]
-        end
-    end
-    return nothing
-end
-
-"""
-Function to allocate an instance of `rosenbluth_potential_boundary_data`.
-"""    
-function allocate_rosenbluth_potential_boundary_data(vpa,vperp)
-    H_data = allocate_boundary_data(vpa,vperp)
-    dHdvpa_data = allocate_boundary_data(vpa,vperp)
-    dHdvperp_data = allocate_boundary_data(vpa,vperp)
-    G_data = allocate_boundary_data(vpa,vperp)
-    dGdvperp_data = allocate_boundary_data(vpa,vperp)
-    d2Gdvperp2_data = allocate_boundary_data(vpa,vperp)
-    d2Gdvperpdvpa_data = allocate_boundary_data(vpa,vperp)
-    d2Gdvpa2_data = allocate_boundary_data(vpa,vperp)
-    return rosenbluth_potential_boundary_data(H_data,dHdvpa_data,
-        dHdvperp_data,G_data,dGdvperp_data,d2Gdvperp2_data,
-        d2Gdvperpdvpa_data,d2Gdvpa2_data)
-end
-
-"""
-Function to assign data to an instance of `rosenbluth_potential_boundary_data`, in place,
-without allocation. Used in testing.
-"""
-function calculate_rosenbluth_potential_boundary_data_exact!(rpbd::rosenbluth_potential_boundary_data,
-  H_exact,dHdvpa_exact,dHdvperp_exact,G_exact,dGdvperp_exact,
-  d2Gdvperp2_exact,d2Gdvperpdvpa_exact,d2Gdvpa2_exact,
-  vpa,vperp)
-    assign_exact_boundary_data!(rpbd.H_data,H_exact,vpa,vperp)
-    assign_exact_boundary_data!(rpbd.dHdvpa_data,dHdvpa_exact,vpa,vperp)
-    assign_exact_boundary_data!(rpbd.dHdvperp_data,dHdvperp_exact,vpa,vperp)
-    assign_exact_boundary_data!(rpbd.G_data,G_exact,vpa,vperp)
-    assign_exact_boundary_data!(rpbd.dGdvperp_data,dGdvperp_exact,vpa,vperp)
-    assign_exact_boundary_data!(rpbd.d2Gdvperp2_data,d2Gdvperp2_exact,vpa,vperp)
-    assign_exact_boundary_data!(rpbd.d2Gdvperpdvpa_data,d2Gdvperpdvpa_exact,vpa,vperp)
-    assign_exact_boundary_data!(rpbd.d2Gdvpa2_data,d2Gdvpa2_exact,vpa,vperp)
-    return nothing
-end
-
-"""
-Function to carry out the direct integration of a formal definition of one
-of the Rosenbluth potentials, on the boundaries of the `(vpa,vperp)` domain, 
-using the precomputed integration weights with dimension 4.
-The result is stored in an instance of `vpa_vperp_boundary_data`.
-Used in testing.
-"""
-function calculate_boundary_data!(func_data::vpa_vperp_boundary_data,
-                                  weight::MPISharedArray{mk_float,4},func_input,vpa,vperp)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    begin_anyv_vperp_region(no_synchronize=true)
-    @loop_vperp ivperp begin
-        func_data.lower_boundary_vpa[ivperp] = 0.0
-        func_data.upper_boundary_vpa[ivperp] = 0.0
-        for ivperpp in 1:nvperp
-            for ivpap in 1:nvpa
-                func_data.lower_boundary_vpa[ivperp] += weight[ivpap,ivperpp,1,ivperp]*func_input[ivpap,ivperpp]
-                func_data.upper_boundary_vpa[ivperp] += weight[ivpap,ivperpp,nvpa,ivperp]*func_input[ivpap,ivperpp]
-            end
-        end
-    end
-    #for ivpa in 1:nvpa
-    begin_anyv_vpa_region(no_synchronize=true)
-    @loop_vpa ivpa begin
-        func_data.upper_boundary_vperp[ivpa] = 0.0
-        for ivperpp in 1:nvperp
-            for ivpap in 1:nvpa
-                func_data.upper_boundary_vperp[ivpa] += weight[ivpap,ivperpp,ivpa,nvperp]*func_input[ivpap,ivperpp]
-            end
-        end
-    end
-    return nothing
-end
-
-"""
-Function to carry out the direct integration of a formal definition of one
-of the Rosenbluth potentials, on the boundaries of the `(vpa,vperp)` domain, 
-using the precomputed integration weights with dimension 3.
-The result is stored in an instance of `vpa_vperp_boundary_data`.
-"""
-function calculate_boundary_data!(func_data::vpa_vperp_boundary_data,
-                                  weight::boundary_integration_weights_struct,
-                                  func_input,vpa,vperp)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    begin_anyv_vperp_region(no_synchronize=true)
-    @loop_vperp ivperp begin
-        func_data.lower_boundary_vpa[ivperp] = 0.0
-        func_data.upper_boundary_vpa[ivperp] = 0.0
-        for ivperpp in 1:nvperp
-            for ivpap in 1:nvpa
-                func_data.lower_boundary_vpa[ivperp] += weight.lower_vpa_boundary[ivpap,ivperpp,ivperp]*func_input[ivpap,ivperpp]
-                func_data.upper_boundary_vpa[ivperp] += weight.upper_vpa_boundary[ivpap,ivperpp,ivperp]*func_input[ivpap,ivperpp]
-            end
-        end
-    end
-    #for ivpa in 1:nvpa
-    begin_anyv_vpa_region(no_synchronize=true)
-    @loop_vpa ivpa begin
-        func_data.upper_boundary_vperp[ivpa] = 0.0
-        for ivperpp in 1:nvperp
-            for ivpap in 1:nvpa
-                func_data.upper_boundary_vperp[ivpa] += weight.upper_vperp_boundary[ivpap,ivperpp,ivpa]*func_input[ivpap,ivperpp]
-            end
-        end
-    end
-    # return to serial parallelisation
-    return nothing
-end
-
-"""
-Function to call direct integration function `calculate_boundary_data!()` and 
-assign data to an instance of `rosenbluth_potential_boundary_data`, in place,
-without allocation.
-"""
-function calculate_rosenbluth_potential_boundary_data!(rpbd::rosenbluth_potential_boundary_data,
-    fkpl::Union{fokkerplanck_arrays_direct_integration_struct,fokkerplanck_boundary_data_arrays_struct},pdf,vpa,vperp,vpa_spectral,vperp_spectral;
-    calculate_GG=false,calculate_dGdvperp=false)
-    # get derivatives of pdf
-    dfdvperp = fkpl.dfdvperp
-    dfdvpa = fkpl.dfdvpa
-    d2fdvperpdvpa = fkpl.d2fdvperpdvpa
-    #for ivpa in 1:vpa.n
-    begin_anyv_vpa_region()
-    @loop_vpa ivpa begin
-        @views derivative!(dfdvperp[ivpa,:], pdf[ivpa,:], vperp, vperp_spectral)
-    end
-    begin_anyv_vperp_region()
-    @loop_vperp ivperp begin
-    #for ivperp in 1:vperp.n
-        @views derivative!(dfdvpa[:,ivperp], pdf[:,ivperp], vpa, vpa_spectral)
-        @views derivative!(d2fdvperpdvpa[:,ivperp], dfdvperp[:,ivperp], vpa, vpa_spectral)
-    end
-    # ensure data is synchronized
-    _anyv_subblock_synchronize()
-    # carry out the numerical integration 
-    calculate_boundary_data!(rpbd.H_data,fkpl.H0_weights,pdf,vpa,vperp)
-    calculate_boundary_data!(rpbd.dHdvpa_data,fkpl.H0_weights,dfdvpa,vpa,vperp)
-    calculate_boundary_data!(rpbd.dHdvperp_data,fkpl.H1_weights,dfdvperp,vpa,vperp)
-    if calculate_GG
-        calculate_boundary_data!(rpbd.G_data,fkpl.G0_weights,pdf,vpa,vperp)
-    end
-    if calculate_dGdvperp
-        calculate_boundary_data!(rpbd.dGdvperp_data,fkpl.G1_weights,dfdvperp,vpa,vperp)
-    end
-    calculate_boundary_data!(rpbd.d2Gdvperp2_data,fkpl.H2_weights,dfdvperp,vpa,vperp)
-    calculate_boundary_data!(rpbd.d2Gdvperpdvpa_data,fkpl.G1_weights,d2fdvperpdvpa,vpa,vperp)
-    calculate_boundary_data!(rpbd.d2Gdvpa2_data,fkpl.H3_weights,dfdvpa,vpa,vperp)
-    
-    return nothing
-end
-
-function multipole_H(vpa::mk_float,vperp::mk_float,
-                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
-                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
-                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
-   # sum up terms in the multipole series 
-   H_series = (I80*((128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8)/(128*(vpa^2 + vperp^2)^8))
-             +I70*((vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
-             +I62*((-7*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(64*(vpa^2 + vperp^2)^8))
-             +I60*((16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6)/(16*(vpa^2 + vperp^2)^6))
-             +I52*((21*vpa*(-16*vpa^6 + 168*vpa^4*vperp^2 - 210*vpa^2*vperp^4 + 35*vperp^6))/(32*(vpa^2 + vperp^2)^7))
-             +I50*((8*vpa^5 - 40*vpa^3*vperp^2 + 15*vpa*vperp^4)/(8*(vpa^2 + vperp^2)^5))
-             +I44*((105*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-             +I42*((-15*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(32*(vpa^2 + vperp^2)^6))
-             +I40*((8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4)/(8*(vpa^2 + vperp^2)^4))
-             +I34*((105*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(128*(vpa^2 + vperp^2)^7))
-             +I32*((-5*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^5))
-             +I30*((vpa*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
-             +I26*((-35*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-             +I24*((45*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^6))
-             +I22*((-3*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(8*(vpa^2 + vperp^2)^4))
-             +I20*(-1/2*(-2*vpa^2 + vperp^2)/(vpa^2 + vperp^2)^2)
-             +I16*((-35*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(256*(vpa^2 + vperp^2)^7))
-             +I14*((15*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(64*(vpa^2 + vperp^2)^5))
-             +I12*((-6*vpa^3 + 9*vpa*vperp^2)/(4*(vpa^2 + vperp^2)^3))
-             +I10*(vpa/(vpa^2 + vperp^2))
-             +I08*((35*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(16384*(vpa^2 + vperp^2)^8))
-             +I06*((-5*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(256*(vpa^2 + vperp^2)^6))
-             +I04*((3*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(64*(vpa^2 + vperp^2)^4))
-             +I02*((-2*vpa^2 + vperp^2)/(4*(vpa^2 + vperp^2)^2))
-             +I00*(1))
-   # multiply by overall prefactor
-   H_series *= ((vpa^2 + vperp^2)^(-1/2))
-   return H_series
-end
-
-function multipole_dHdvpa(vpa::mk_float,vperp::mk_float,
-                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
-                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
-                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
-   # sum up terms in the multipole series 
-   dHdvpa_series = (I80*((9*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(128*(vpa^2 + vperp^2)^8))
-                +I70*((128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8)/(16*(vpa^2 + vperp^2)^7))
-                +I62*((-63*(128*vpa^9 - 2304*vpa^7*vperp^2 + 6048*vpa^5*vperp^4 - 3360*vpa^3*vperp^6 + 315*vpa*vperp^8))/(64*(vpa^2 + vperp^2)^8))
-                +I60*((7*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^6))
-                +I52*((-21*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(32*(vpa^2 + vperp^2)^7))
-                +I50*((3*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(8*(vpa^2 + vperp^2)^5))
-                +I44*((945*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-                +I42*((-105*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(32*(vpa^2 + vperp^2)^6))
-                +I40*((5*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^4))
-                +I34*((105*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(128*(vpa^2 + vperp^2)^7))
-                +I32*((-15*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(8*(vpa^2 + vperp^2)^5))
-                +I30*((8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4)/(2*(vpa^2 + vperp^2)^3))
-                +I26*((-315*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-                +I24*((315*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(128*(vpa^2 + vperp^2)^6))
-                +I22*((-15*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^4))
-                +I20*((3*vpa*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^2))
-                +I16*((-35*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(256*(vpa^2 + vperp^2)^7))
-                +I14*((45*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(64*(vpa^2 + vperp^2)^5))
-                +I12*((-3*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(4*(vpa^2 + vperp^2)^3))
-                +I10*(-1 + (3*vpa^2)/(vpa^2 + vperp^2))
-                +I08*((315*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(16384*(vpa^2 + vperp^2)^8))
-                +I06*((-35*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(256*(vpa^2 + vperp^2)^6))
-                +I04*((15*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(64*(vpa^2 + vperp^2)^4))
-                +I02*((-6*vpa^3 + 9*vpa*vperp^2)/(4*(vpa^2 + vperp^2)^2))
-                +I00*(vpa))
-   # multiply by overall prefactor
-   dHdvpa_series *= -((vpa^2 + vperp^2)^(-3/2))   
-   return dHdvpa_series
-end
-
-function multipole_dHdvperp(vpa::mk_float,vperp::mk_float,
-                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
-                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
-                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
-   # sum up terms in the multipole series 
-   dHdvperp_series = (I80*((45*vperp*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(128*(vpa^2 + vperp^2)^8))
-                +I70*((9*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
-                +I62*((-315*(128*vpa^8*vperp - 896*vpa^6*vperp^3 + 1120*vpa^4*vperp^5 - 280*vpa^2*vperp^7 + 7*vperp^9))/(64*(vpa^2 + vperp^2)^8))
-                +I60*((7*vperp*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(16*(vpa^2 + vperp^2)^6))
-                +I52*((-189*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(32*(vpa^2 + vperp^2)^7))
-                +I50*((21*vpa*vperp*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(8*(vpa^2 + vperp^2)^5))
-                +I44*((4725*vperp*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-                +I42*((105*vperp*(-64*vpa^6 + 240*vpa^4*vperp^2 - 120*vpa^2*vperp^4 + 5*vperp^6))/(32*(vpa^2 + vperp^2)^6))
-                +I40*((15*vperp*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
-                +I34*((945*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(128*(vpa^2 + vperp^2)^7))
-                +I32*((-105*vpa*vperp*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(8*(vpa^2 + vperp^2)^5))
-                +I30*((5*vpa*vperp*(4*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
-                +I26*((-1575*vperp*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-                +I24*((315*vperp*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^6))
-                +I22*((-45*vperp*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
-                +I20*((-3*vperp*(-4*vpa^2 + vperp^2))/(2*(vpa^2 + vperp^2)^2))
-                +I16*((-315*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(256*(vpa^2 + vperp^2)^7))
-                +I14*((315*vpa*vperp*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(64*(vpa^2 + vperp^2)^5))
-                +I12*((-15*vpa*vperp*(4*vpa^2 - 3*vperp^2))/(4*(vpa^2 + vperp^2)^3))
-                +I10*((3*vpa*vperp)/(vpa^2 + vperp^2))
-                +I08*((1575*(128*vpa^8*vperp - 896*vpa^6*vperp^3 + 1120*vpa^4*vperp^5 - 280*vpa^2*vperp^7 + 7*vperp^9))/(16384*(vpa^2 + vperp^2)^8))
-                +I06*((-35*(64*vpa^6*vperp - 240*vpa^4*vperp^3 + 120*vpa^2*vperp^5 - 5*vperp^7))/(256*(vpa^2 + vperp^2)^6))
-                +I04*((45*(8*vpa^4*vperp - 12*vpa^2*vperp^3 + vperp^5))/(64*(vpa^2 + vperp^2)^4))
-                +I02*((3*vperp*(-4*vpa^2 + vperp^2))/(4*(vpa^2 + vperp^2)^2))
-                +I00*(vperp))
-   # multiply by overall prefactor
-   dHdvperp_series *= -((vpa^2 + vperp^2)^(-3/2))
-   return dHdvperp_series
-end
-
-function multipole_G(vpa::mk_float,vperp::mk_float,
-                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
-                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
-                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
-   # sum up terms in the multipole series 
-   G_series = (I80*((64*vpa^6*vperp^2 - 240*vpa^4*vperp^4 + 120*vpa^2*vperp^6 - 5*vperp^8)/(128*(vpa^2 + vperp^2)^8))
-             +I70*((vpa*vperp^2*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(16*(vpa^2 + vperp^2)^7))
-             +I62*((32*vpa^8 - 656*vpa^6*vperp^2 + 1620*vpa^4*vperp^4 - 670*vpa^2*vperp^6 + 25*vperp^8)/(64*(vpa^2 + vperp^2)^8))
-             +I60*((vperp^2*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(16*(vpa^2 + vperp^2)^6))
-             +I52*((vpa*(16*vpa^6 - 232*vpa^4*vperp^2 + 370*vpa^2*vperp^4 - 75*vperp^6))/(32*(vpa^2 + vperp^2)^7))
-             +I50*((vpa*vperp^2*(4*vpa^2 - 3*vperp^2))/(8*(vpa^2 + vperp^2)^5))
-             +I44*((-15*(64*vpa^8 - 864*vpa^6*vperp^2 + 1560*vpa^4*vperp^4 - 500*vpa^2*vperp^6 + 15*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-             +I42*((16*vpa^6 - 152*vpa^4*vperp^2 + 138*vpa^2*vperp^4 - 9*vperp^6)/(32*(vpa^2 + vperp^2)^6))
-             +I40*(-1/8*(vperp^2*(-4*vpa^2 + vperp^2))/(vpa^2 + vperp^2)^4)
-             +I34*((5*vpa*(-32*vpa^6 + 296*vpa^4*vperp^2 - 320*vpa^2*vperp^4 + 45*vperp^6))/(128*(vpa^2 + vperp^2)^7))
-             +I32*((vpa*(4*vpa^4 - 22*vpa^2*vperp^2 + 9*vperp^4))/(8*(vpa^2 + vperp^2)^5))
-             +I30*((vpa*vperp^2)/(2*(vpa^2 + vperp^2)^3))
-             +I26*((5*(96*vpa^8 - 1072*vpa^6*vperp^2 + 1500*vpa^4*vperp^4 - 330*vpa^2*vperp^6 + 5*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-             +I24*((3*(-32*vpa^6 + 184*vpa^4*vperp^2 - 96*vpa^2*vperp^4 + 3*vperp^6))/(128*(vpa^2 + vperp^2)^6))
-             +I22*((4*vpa^4 - 10*vpa^2*vperp^2 + vperp^4)/(8*(vpa^2 + vperp^2)^4))
-             +I20*(vperp^2/(2*(vpa^2 + vperp^2)^2))
-             +I16*((5*vpa*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(256*(vpa^2 + vperp^2)^7))
-             +I14*((-3*vpa*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(64*(vpa^2 + vperp^2)^5))
-             +I12*((vpa*(2*vpa^2 - vperp^2))/(4*(vpa^2 + vperp^2)^3))
-             +I10*(-(vpa/(vpa^2 + vperp^2)))
-             +I08*((5*(-128*vpa^8 + 1280*vpa^6*vperp^2 - 1440*vpa^4*vperp^4 + 160*vpa^2*vperp^6 + 5*vperp^8))/(16384*(vpa^2 + vperp^2)^8))
-             +I06*((16*vpa^6 - 72*vpa^4*vperp^2 + 18*vpa^2*vperp^4 + vperp^6)/(256*(vpa^2 + vperp^2)^6))
-             +I04*((-8*vpa^4 + 8*vpa^2*vperp^2 + vperp^4)/(64*(vpa^2 + vperp^2)^4))
-             +I02*((2*vpa^2 + vperp^2)/(4*(vpa^2 + vperp^2)^2))
-             +I00*(1))
-   # multiply by overall prefactor
-   G_series *= ((vpa^2 + vperp^2)^(1/2))   
-   return G_series
-end
-
-function multipole_dGdvperp(vpa::mk_float,vperp::mk_float,
-                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
-                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
-                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
-   # sum up terms in the multipole series 
-   dGdvperp_series = (I80*((vperp*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(128*(vpa^2 + vperp^2)^8))
-                   +I70*((vpa*vperp*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
-                   +I62*((-7*(256*vpa^8*vperp - 2144*vpa^6*vperp^3 + 3120*vpa^4*vperp^5 - 890*vpa^2*vperp^7 + 25*vperp^9))/(64*(vpa^2 + vperp^2)^8))
-                   +I60*((vperp*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(16*(vpa^2 + vperp^2)^6))
-                   +I52*((21*vpa*vperp*(-32*vpa^6 + 192*vpa^4*vperp^2 - 180*vpa^2*vperp^4 + 25*vperp^6))/(32*(vpa^2 + vperp^2)^7))
-                   +I50*((8*vpa^5*vperp - 40*vpa^3*vperp^3 + 15*vpa*vperp^5)/(8*(vpa^2 + vperp^2)^5))
-                   +I44*((315*vperp*(128*vpa^8 - 832*vpa^6*vperp^2 + 960*vpa^4*vperp^4 - 220*vpa^2*vperp^6 + 5*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-                   +I42*((15*vperp*(-32*vpa^6 + 128*vpa^4*vperp^2 - 68*vpa^2*vperp^4 + 3*vperp^6))/(32*(vpa^2 + vperp^2)^6))
-                   +I40*((vperp*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(8*(vpa^2 + vperp^2)^4))
-                   +I34*((315*vpa*vperp*(16*vpa^6 - 72*vpa^4*vperp^2 + 50*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^7))
-                   +I32*((-5*vpa*vperp*(16*vpa^4 - 38*vpa^2*vperp^2 + 9*vperp^4))/(8*(vpa^2 + vperp^2)^5))
-                   +I30*((vpa*vperp*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
-                   +I26*((-35*vperp*(512*vpa^8 - 2848*vpa^6*vperp^2 + 2640*vpa^4*vperp^4 - 430*vpa^2*vperp^6 + 5*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-                   +I24*((-45*vperp*(-48*vpa^6 + 136*vpa^4*vperp^2 - 46*vpa^2*vperp^4 + vperp^6))/(128*(vpa^2 + vperp^2)^6))
-                   +I22*((-3*vperp*(16*vpa^4 - 18*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
-                   +I20*(-1/2*(vperp*(-2*vpa^2 + vperp^2))/(vpa^2 + vperp^2)^2)
-                   +I16*((-35*vpa*vperp*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(256*(vpa^2 + vperp^2)^7))
-                   +I14*((45*vpa*vperp*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(64*(vpa^2 + vperp^2)^5))
-                   +I12*((3*vpa*vperp*(-4*vpa^2 + vperp^2))/(4*(vpa^2 + vperp^2)^3))
-                   +I10*((vpa*vperp)/(vpa^2 + vperp^2))
-                   +I08*((175*(128*vpa^8*vperp - 640*vpa^6*vperp^3 + 480*vpa^4*vperp^5 - 40*vpa^2*vperp^7 - vperp^9))/(16384*(vpa^2 + vperp^2)^8))
-                   +I06*((-5*(64*vpa^6*vperp - 144*vpa^4*vperp^3 + 24*vpa^2*vperp^5 + vperp^7))/(256*(vpa^2 + vperp^2)^6))
-                   +I04*((3*(24*vpa^4*vperp - 12*vpa^2*vperp^3 - vperp^5))/(64*(vpa^2 + vperp^2)^4))
-                   +I02*(-1/4*(vperp*(4*vpa^2 + vperp^2))/(vpa^2 + vperp^2)^2)
-                   +I00*(vperp))
-   # multiply by overall prefactor
-   dGdvperp_series *= ((vpa^2 + vperp^2)^(-1/2))
-   return dGdvperp_series
-end
-
-function multipole_d2Gdvperp2(vpa::mk_float,vperp::mk_float,
-                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
-                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
-                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
-   # sum up terms in the multipole series 
-   d2Gdvperp2_series = (I80*((128*vpa^10 - 7424*vpa^8*vperp^2 + 41888*vpa^6*vperp^4 - 48160*vpa^4*vperp^6 + 11515*vpa^2*vperp^8 - 280*vperp^10)/(128*(vpa^2 + vperp^2)^8))
-                   +I70*((16*vpa^9 - 728*vpa^7*vperp^2 + 3066*vpa^5*vperp^4 - 2345*vpa^3*vperp^6 + 280*vpa*vperp^8)/(16*(vpa^2 + vperp^2)^7))
-                   +I62*((-7*(256*vpa^10 - 10528*vpa^8*vperp^2 + 45616*vpa^6*vperp^4 - 43670*vpa^4*vperp^6 + 9125*vpa^2*vperp^8 - 200*vperp^10))/(64*(vpa^2 + vperp^2)^8))
-                   +I60*((16*vpa^8 - 552*vpa^6*vperp^2 + 1650*vpa^4*vperp^4 - 755*vpa^2*vperp^6 + 30*vperp^8)/(16*(vpa^2 + vperp^2)^6))
-                   +I52*((-21*(32*vpa^9 - 1024*vpa^7*vperp^2 + 3204*vpa^5*vperp^4 - 1975*vpa^3*vperp^6 + 200*vpa*vperp^8))/(32*(vpa^2 + vperp^2)^7))
-                   +I50*((8*vpa^7 - 200*vpa^5*vperp^2 + 395*vpa^3*vperp^4 - 90*vpa*vperp^6)/(8*(vpa^2 + vperp^2)^5))
-                   +I44*((315*(128*vpa^10 - 4544*vpa^8*vperp^2 + 16448*vpa^6*vperp^4 - 13060*vpa^4*vperp^6 + 2245*vpa^2*vperp^8 - 40*vperp^10))/(512*(vpa^2 + vperp^2)^8))
-                   +I42*((-15*(32*vpa^8 - 768*vpa^6*vperp^2 + 1620*vpa^4*vperp^4 - 565*vpa^2*vperp^6 + 18*vperp^8))/(32*(vpa^2 + vperp^2)^6))
-                   +I40*((8*vpa^6 - 136*vpa^4*vperp^2 + 159*vpa^2*vperp^4 - 12*vperp^6)/(8*(vpa^2 + vperp^2)^4))
-                   +I34*((315*vpa*(16*vpa^8 - 440*vpa^6*vperp^2 + 1114*vpa^4*vperp^4 - 535*vpa^2*vperp^6 + 40*vperp^8))/(128*(vpa^2 + vperp^2)^7))
-                   +I32*((5*vpa*(-16*vpa^6 + 274*vpa^4*vperp^2 - 349*vpa^2*vperp^4 + 54*vperp^6))/(8*(vpa^2 + vperp^2)^5))
-                   +I30*((vpa*(2*vpa^4 - 21*vpa^2*vperp^2 + 12*vperp^4))/(2*(vpa^2 + vperp^2)^3))
-                   +I26*((-35*(512*vpa^10 - 16736*vpa^8*vperp^2 + 53072*vpa^6*vperp^4 - 34690*vpa^4*vperp^6 + 4345*vpa^2*vperp^8 - 40*vperp^10))/(512*(vpa^2 + vperp^2)^8))
-                   +I24*((135*(16*vpa^8 - 328*vpa^6*vperp^2 + 530*vpa^4*vperp^4 - 125*vpa^2*vperp^6 + 2*vperp^8))/(128*(vpa^2 + vperp^2)^6))
-                   +I22*((-3*(16*vpa^6 - 182*vpa^4*vperp^2 + 113*vpa^2*vperp^4 - 4*vperp^6))/(8*(vpa^2 + vperp^2)^4))
-                   +I20*((2*vpa^4 - 11*vpa^2*vperp^2 + 2*vperp^4)/(2*(vpa^2 + vperp^2)^2))
-                   +I16*((-35*vpa*(64*vpa^8 - 1616*vpa^6*vperp^2 + 3480*vpa^4*vperp^4 - 1235*vpa^2*vperp^6 + 40*vperp^8))/(256*(vpa^2 + vperp^2)^7))
-                   +I14*((45*vpa*(8*vpa^6 - 116*vpa^4*vperp^2 + 101*vpa^2*vperp^4 - 6*vperp^6))/(64*(vpa^2 + vperp^2)^5))
-                   +I12*((-3*vpa*(4*vpa^4 - 27*vpa^2*vperp^2 + 4*vperp^4))/(4*(vpa^2 + vperp^2)^3))
-                   +I10*(-2*vpa + (3*vpa^3)/(vpa^2 + vperp^2))
-                   +I08*((175*(128*vpa^10 - 3968*vpa^8*vperp^2 + 11360*vpa^6*vperp^4 - 6040*vpa^4*vperp^6 + 391*vpa^2*vperp^8 + 8*vperp^10))/(16384*(vpa^2 + vperp^2)^8))
-                   +I06*((-5*(64*vpa^8 - 1200*vpa^6*vperp^2 + 1560*vpa^4*vperp^4 - 185*vpa^2*vperp^6 - 6*vperp^8))/(256*(vpa^2 + vperp^2)^6))
-                   +I04*((3*(24*vpa^6 - 228*vpa^4*vperp^2 + 67*vpa^2*vperp^4 + 4*vperp^6))/(64*(vpa^2 + vperp^2)^4))
-                   +I02*((-4*vpa^4 + 13*vpa^2*vperp^2 + 2*vperp^4)/(4*(vpa^2 + vperp^2)^2))
-                   +I00*(vpa^2))
-   # multiply by overall prefactor
-   d2Gdvperp2_series *= ((vpa^2 + vperp^2)^(-3/2))   
-   return d2Gdvperp2_series
-end
-
-function multipole_d2Gdvperpdvpa(vpa::mk_float,vperp::mk_float,
-                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
-                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
-                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
-   # sum up terms in the multipole series 
-   d2Gdvperpdvpa_series = (I80*((9*vpa*vperp*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(128*(vpa^2 + vperp^2)^8))
-                      +I70*((vperp*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(16*(vpa^2 + vperp^2)^7))
-                      +I62*((-63*(256*vpa^9*vperp - 2848*vpa^7*vperp^3 + 5936*vpa^5*vperp^5 - 2870*vpa^3*vperp^7 + 245*vpa*vperp^9))/(64*(vpa^2 + vperp^2)^8))
-                      +I60*((7*vpa*vperp*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^6))
-                      +I52*((-21*(256*vpa^8*vperp - 2144*vpa^6*vperp^3 + 3120*vpa^4*vperp^5 - 890*vpa^2*vperp^7 + 25*vperp^9))/(32*(vpa^2 + vperp^2)^7))
-                      +I50*((3*vperp*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(8*(vpa^2 + vperp^2)^5))
-                      +I44*((945*vpa*vperp*(384*vpa^8 - 3392*vpa^6*vperp^2 + 5824*vpa^4*vperp^4 - 2380*vpa^2*vperp^6 + 175*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-                      +I42*((-105*vpa*vperp*(32*vpa^6 - 192*vpa^4*vperp^2 + 180*vpa^2*vperp^4 - 25*vperp^6))/(32*(vpa^2 + vperp^2)^6))
-                      +I40*((5*vpa*vperp*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^4))
-                      +I34*((315*vperp*(128*vpa^8 - 832*vpa^6*vperp^2 + 960*vpa^4*vperp^4 - 220*vpa^2*vperp^6 + 5*vperp^8))/(128*(vpa^2 + vperp^2)^7))
-                      +I32*((15*vperp*(-32*vpa^6 + 128*vpa^4*vperp^2 - 68*vpa^2*vperp^4 + 3*vperp^6))/(8*(vpa^2 + vperp^2)^5))
-                      +I30*((vperp*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(2*(vpa^2 + vperp^2)^3))
-                      +I26*((-315*vpa*vperp*(512*vpa^8 - 3936*vpa^6*vperp^2 + 5712*vpa^4*vperp^4 - 1890*vpa^2*vperp^6 + 105*vperp^8))/(512*(vpa^2 + vperp^2)^8))
-                      +I24*((945*vpa*vperp*(16*vpa^6 - 72*vpa^4*vperp^2 + 50*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^6))
-                      +I22*((-15*vpa*vperp*(16*vpa^4 - 38*vpa^2*vperp^2 + 9*vperp^4))/(8*(vpa^2 + vperp^2)^4))
-                      +I20*((3*vpa*vperp*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^2))
-                      +I16*((-35*vperp*(512*vpa^8 - 2848*vpa^6*vperp^2 + 2640*vpa^4*vperp^4 - 430*vpa^2*vperp^6 + 5*vperp^8))/(256*(vpa^2 + vperp^2)^7))
-                      +I14*((-45*vperp*(-48*vpa^6 + 136*vpa^4*vperp^2 - 46*vpa^2*vperp^4 + vperp^6))/(64*(vpa^2 + vperp^2)^5))
-                      +I12*((-3*vperp*(16*vpa^4 - 18*vpa^2*vperp^2 + vperp^4))/(4*(vpa^2 + vperp^2)^3))
-                      +I10*(vperp*(-1 + (3*vpa^2)/(vpa^2 + vperp^2)))
-                      +I08*((1575*vpa*(128*vpa^8*vperp - 896*vpa^6*vperp^3 + 1120*vpa^4*vperp^5 - 280*vpa^2*vperp^7 + 7*vperp^9))/(16384*(vpa^2 + vperp^2)^8))
-                      +I06*((-35*vpa*(64*vpa^6*vperp - 240*vpa^4*vperp^3 + 120*vpa^2*vperp^5 - 5*vperp^7))/(256*(vpa^2 + vperp^2)^6))
-                      +I04*((45*vpa*(8*vpa^4*vperp - 12*vpa^2*vperp^3 + vperp^5))/(64*(vpa^2 + vperp^2)^4))
-                      +I02*((3*vpa*vperp*(-4*vpa^2 + vperp^2))/(4*(vpa^2 + vperp^2)^2))
-                      +I00*(vpa*vperp))
-   # multiply by overall prefactor
-   d2Gdvperpdvpa_series *= -((vpa^2 + vperp^2)^(-3/2))   
-   return d2Gdvperpdvpa_series
-end
-
-function multipole_d2Gdvpa2(vpa::mk_float,vperp::mk_float,
-                   I00::mk_float, I10::mk_float, I20::mk_float, I30::mk_float, I40::mk_float, I50::mk_float, I60::mk_float, I70::mk_float, I80::mk_float, 
-                   I02::mk_float, I12::mk_float, I22::mk_float, I32::mk_float, I42::mk_float, I52::mk_float, I62::mk_float,
-                   I04::mk_float, I14::mk_float, I24::mk_float, I34::mk_float, I44::mk_float, I06::mk_float, I16::mk_float, I26::mk_float, I08::mk_float)
-   # sum up terms in the multipole series 
-   d2Gdvpa2_series = (I80*((45*vperp^2*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(128*(vpa^2 + vperp^2)^8))
-                   +I70*((9*vpa*vperp^2*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
-                   +I62*((7*(256*vpa^10 - 9088*vpa^8*vperp^2 + 43456*vpa^6*vperp^4 - 45920*vpa^4*vperp^6 + 10430*vpa^2*vperp^8 - 245*vperp^10))/(64*(vpa^2 + vperp^2)^8))
-                   +I60*((7*vperp^2*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(16*(vpa^2 + vperp^2)^6))
-                   +I52*((21*vpa*(32*vpa^8 - 880*vpa^6*vperp^2 + 3108*vpa^4*vperp^4 - 2170*vpa^2*vperp^6 + 245*vperp^8))/(32*(vpa^2 + vperp^2)^7))
-                   +I50*((21*vpa*vperp^2*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(8*(vpa^2 + vperp^2)^5))
-                   +I44*((105*(-512*vpa^10 + 12416*vpa^8*vperp^2 - 46592*vpa^6*vperp^4 + 41440*vpa^4*vperp^6 - 8260*vpa^2*vperp^8 + 175*vperp^10))/(512*(vpa^2 + vperp^2)^8))
-                   +I42*((15*(32*vpa^8 - 656*vpa^6*vperp^2 + 1620*vpa^4*vperp^4 - 670*vpa^2*vperp^6 + 25*vperp^8))/(32*(vpa^2 + vperp^2)^6))
-                   +I40*((15*vperp^2*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
-                   +I34*((-105*vpa*(64*vpa^8 - 1184*vpa^6*vperp^2 + 3192*vpa^4*vperp^4 - 1820*vpa^2*vperp^6 + 175*vperp^8))/(128*(vpa^2 + vperp^2)^7))
-                   +I32*((5*vpa*(16*vpa^6 - 232*vpa^4*vperp^2 + 370*vpa^2*vperp^4 - 75*vperp^6))/(8*(vpa^2 + vperp^2)^5))
-                   +I30*((5*vpa*vperp^2*(4*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
-                   +I26*((105*(256*vpa^10 - 5248*vpa^8*vperp^2 + 16576*vpa^6*vperp^4 - 12320*vpa^4*vperp^6 + 2030*vpa^2*vperp^8 - 35*vperp^10))/(512*(vpa^2 + vperp^2)^8))
-                   +I24*((-45*(64*vpa^8 - 864*vpa^6*vperp^2 + 1560*vpa^4*vperp^4 - 500*vpa^2*vperp^6 + 15*vperp^8))/(128*(vpa^2 + vperp^2)^6))
-                   +I22*((3*(16*vpa^6 - 152*vpa^4*vperp^2 + 138*vpa^2*vperp^4 - 9*vperp^6))/(8*(vpa^2 + vperp^2)^4))
-                   +I20*((-3*vperp^2*(-4*vpa^2 + vperp^2))/(2*(vpa^2 + vperp^2)^2))
-                   +I16*((105*vpa*(32*vpa^8 - 496*vpa^6*vperp^2 + 1092*vpa^4*vperp^4 - 490*vpa^2*vperp^6 + 35*vperp^8))/(256*(vpa^2 + vperp^2)^7))
-                   +I14*((15*vpa*(-32*vpa^6 + 296*vpa^4*vperp^2 - 320*vpa^2*vperp^4 + 45*vperp^6))/(64*(vpa^2 + vperp^2)^5))
-                   +I12*((3*vpa*(4*vpa^4 - 22*vpa^2*vperp^2 + 9*vperp^4))/(4*(vpa^2 + vperp^2)^3))
-                   +I10*((3*vpa*vperp^2)/(vpa^2 + vperp^2))
-                   +I08*((-35*(1024*vpa^10 - 19072*vpa^8*vperp^2 + 52864*vpa^6*vperp^4 - 32480*vpa^4*vperp^6 + 3920*vpa^2*vperp^8 - 35*vperp^10))/(16384*(vpa^2 + vperp^2)^8))
-                   +I06*((5*(96*vpa^8 - 1072*vpa^6*vperp^2 + 1500*vpa^4*vperp^4 - 330*vpa^2*vperp^6 + 5*vperp^8))/(256*(vpa^2 + vperp^2)^6))
-                   +I04*((-3*(32*vpa^6 - 184*vpa^4*vperp^2 + 96*vpa^2*vperp^4 - 3*vperp^6))/(64*(vpa^2 + vperp^2)^4))
-                   +I02*((4*vpa^4 - 10*vpa^2*vperp^2 + vperp^4)/(4*(vpa^2 + vperp^2)^2))
-                   +I00*(vperp^2))
-   # multiply by overall prefactor
-   d2Gdvpa2_series *= ((vpa^2 + vperp^2)^(-3/2))
-   return d2Gdvpa2_series
-end
-
-"""
-"""
-function calculate_boundary_data_multipole_H!(func_data::vpa_vperp_boundary_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    begin_anyv_vperp_region(no_synchronize=true)
-    @loop_vperp ivperp begin
-                func_data.lower_boundary_vpa[ivperp] = multipole_H(vpa.grid[1],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-                func_data.upper_boundary_vpa[ivperp] = multipole_H(vpa.grid[nvpa],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    begin_anyv_vpa_region(no_synchronize=true)
-    @loop_vpa ivpa begin
-                func_data.upper_boundary_vperp[ivpa] = multipole_H(vpa.grid[ivpa],vperp.grid[nvperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    # return to serial parallelisation
-    return nothing
-end
-
-"""
-"""
-function calculate_boundary_data_multipole_dHdvpa!(func_data::vpa_vperp_boundary_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    begin_anyv_vperp_region(no_synchronize=true)
-    @loop_vperp ivperp begin
-                func_data.lower_boundary_vpa[ivperp] = multipole_dHdvpa(vpa.grid[1],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-                func_data.upper_boundary_vpa[ivperp] = multipole_dHdvpa(vpa.grid[nvpa],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    begin_anyv_vpa_region(no_synchronize=true)
-    @loop_vpa ivpa begin
-                func_data.upper_boundary_vperp[ivpa] = multipole_dHdvpa(vpa.grid[ivpa],vperp.grid[nvperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    # return to serial parallelisation
-    return nothing
-end
-
-"""
-"""
-function calculate_boundary_data_multipole_dHdvperp!(func_data::vpa_vperp_boundary_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    begin_anyv_vperp_region(no_synchronize=true)
-    @loop_vperp ivperp begin
-                func_data.lower_boundary_vpa[ivperp] = multipole_dHdvperp(vpa.grid[1],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-                func_data.upper_boundary_vpa[ivperp] = multipole_dHdvperp(vpa.grid[nvpa],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    begin_anyv_vpa_region(no_synchronize=true)
-    @loop_vpa ivpa begin
-                func_data.upper_boundary_vperp[ivpa] = multipole_dHdvperp(vpa.grid[ivpa],vperp.grid[nvperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    # return to serial parallelisation
-    return nothing
-end
-
-"""
-"""
-function calculate_boundary_data_multipole_G!(func_data::vpa_vperp_boundary_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    begin_anyv_vperp_region(no_synchronize=true)
-    @loop_vperp ivperp begin
-                func_data.lower_boundary_vpa[ivperp] = multipole_G(vpa.grid[1],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-                func_data.upper_boundary_vpa[ivperp] = multipole_G(vpa.grid[nvpa],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    begin_anyv_vpa_region(no_synchronize=true)
-    @loop_vpa ivpa begin
-                func_data.upper_boundary_vperp[ivpa] = multipole_G(vpa.grid[ivpa],vperp.grid[nvperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    # return to serial parallelisation
-    return nothing
-end
-
-"""
-"""
-function calculate_boundary_data_multipole_dGdvperp!(func_data::vpa_vperp_boundary_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    begin_anyv_vperp_region(no_synchronize=true)
-    @loop_vperp ivperp begin
-                func_data.lower_boundary_vpa[ivperp] = multipole_dGdvperp(vpa.grid[1],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-                func_data.upper_boundary_vpa[ivperp] = multipole_dGdvperp(vpa.grid[nvpa],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    begin_anyv_vpa_region(no_synchronize=true)
-    @loop_vpa ivpa begin
-                func_data.upper_boundary_vperp[ivpa] = multipole_dGdvperp(vpa.grid[ivpa],vperp.grid[nvperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    # return to serial parallelisation
-    return nothing
-end
-
-"""
-"""
-function calculate_boundary_data_multipole_d2Gdvperp2!(func_data::vpa_vperp_boundary_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    begin_anyv_vperp_region(no_synchronize=true)
-    @loop_vperp ivperp begin
-                func_data.lower_boundary_vpa[ivperp] = multipole_d2Gdvperp2(vpa.grid[1],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-                func_data.upper_boundary_vpa[ivperp] = multipole_d2Gdvperp2(vpa.grid[nvpa],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    begin_anyv_vpa_region(no_synchronize=true)
-    @loop_vpa ivpa begin
-                func_data.upper_boundary_vperp[ivpa] = multipole_d2Gdvperp2(vpa.grid[ivpa],vperp.grid[nvperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    # return to serial parallelisation
-    return nothing
-end
-
-"""
-"""
-function calculate_boundary_data_multipole_d2Gdvperpdvpa!(func_data::vpa_vperp_boundary_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    begin_anyv_vperp_region(no_synchronize=true)
-    @loop_vperp ivperp begin
-                func_data.lower_boundary_vpa[ivperp] = multipole_d2Gdvperpdvpa(vpa.grid[1],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-                func_data.upper_boundary_vpa[ivperp] = multipole_d2Gdvperpdvpa(vpa.grid[nvpa],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    begin_anyv_vpa_region(no_synchronize=true)
-    @loop_vpa ivpa begin
-                func_data.upper_boundary_vperp[ivpa] = multipole_d2Gdvperpdvpa(vpa.grid[ivpa],vperp.grid[nvperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    # return to serial parallelisation
-    return nothing
-end
-
-"""
-"""
-function calculate_boundary_data_multipole_d2Gdvpa2!(func_data::vpa_vperp_boundary_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    begin_anyv_vperp_region(no_synchronize=true)
-    @loop_vperp ivperp begin
-                func_data.lower_boundary_vpa[ivperp] = multipole_d2Gdvpa2(vpa.grid[1],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-                func_data.upper_boundary_vpa[ivperp] = multipole_d2Gdvpa2(vpa.grid[nvpa],vperp.grid[ivperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    begin_anyv_vpa_region(no_synchronize=true)
-    @loop_vpa ivpa begin
-                func_data.upper_boundary_vperp[ivpa] = multipole_d2Gdvpa2(vpa.grid[ivpa],vperp.grid[nvperp],
-                                                       I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                                       I02, I12, I22, I32, I42, I52, I62,
-                                                       I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    # return to serial parallelisation
-    return nothing
-end
-
-"""
-Function to use the multipole expansion of the Rosenbluth potentials to calculate and
-assign boundary data to an instance of `rosenbluth_potential_boundary_data`, in place,
-without allocation.
-"""
-function calculate_rosenbluth_potential_boundary_data_multipole!(rpbd::rosenbluth_potential_boundary_data,
-    pdf,vpa,vperp,vpa_spectral,vperp_spectral;
-    calculate_GG=false,calculate_dGdvperp=false)
-    # get required moments of pdf
-    I00, I10, I20, I30, I40, I50, I60, I70, I80 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-    I02, I12, I22, I32, I42, I52, I62 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-    I04, I14, I24, I34, I44 = 0.0, 0.0, 0.0, 0.0, 0.0
-    I06, I16, I26 = 0.0, 0.0, 0.0
-    I08 = 0.0
-    
-    begin_anyv_region()
-    @anyv_serial_region begin
-       I00 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 0, vperp.wgts)
-       I10 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 0, vperp.wgts)
-       I20 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 0, vperp.wgts)
-       I30 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 3, vpa.wgts, vperp.grid, 0, vperp.wgts)
-       I40 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 4, vpa.wgts, vperp.grid, 0, vperp.wgts)
-       I50 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 5, vpa.wgts, vperp.grid, 0, vperp.wgts)
-       I60 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 6, vpa.wgts, vperp.grid, 0, vperp.wgts)
-       I70 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 7, vpa.wgts, vperp.grid, 0, vperp.wgts)
-       I80 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 8, vpa.wgts, vperp.grid, 0, vperp.wgts)
-       
-       I02 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 2, vperp.wgts)
-       I12 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 2, vperp.wgts)
-       I22 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 2, vperp.wgts)
-       I32 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 3, vpa.wgts, vperp.grid, 2, vperp.wgts)
-       I42 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 4, vpa.wgts, vperp.grid, 2, vperp.wgts)
-       I52 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 5, vpa.wgts, vperp.grid, 2, vperp.wgts)
-       I62 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 6, vpa.wgts, vperp.grid, 2, vperp.wgts)
-       
-       I04 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 4, vperp.wgts)
-       I14 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 4, vperp.wgts)
-       I24 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 4, vperp.wgts)
-       I34 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 3, vpa.wgts, vperp.grid, 4, vperp.wgts)
-       I44 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 4, vpa.wgts, vperp.grid, 4, vperp.wgts)
-       
-       I06 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 6, vperp.wgts)
-       I16 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 6, vperp.wgts)
-       I26 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 6, vperp.wgts)
-       
-       I08 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 8, vperp.wgts)    
-    end
-    # Broadcast integrals to all processes in the 'anyv' subblock
-    param_vec = [I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                I02, I12, I22, I32, I42, I52, I62,
-                I04, I14, I24, I34, I44,
-                I06, I16, I26,
-                I08]
-    if comm_anyv_subblock[] != MPI.COMM_NULL
-        MPI.Bcast!(param_vec, 0, comm_anyv_subblock[])
-    end
-    (I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-     I02, I12, I22, I32, I42, I52, I62,
-     I04, I14, I24, I34, I44,
-     I06, I16, I26,
-     I08) = param_vec
-   # println(I00, " ", I10, " ", I20, " ", I30, " ", I40, " ", I50, " ", I60, " ", I70, " ", I80, " ", 
-   #        I02, " ", I12, " ", I22, " ", I32, " ", I42, " ", I52, " ", I62, " ",
-   #        I04, " ", I14, " ", I24, " ", I34, " ", I44, " ",
-   #        I06, " ", I16, " ", I26, " ",
-   #        I08)
-    # ensure data is synchronized
-    _anyv_subblock_synchronize()
-    # evaluate the multipole formulae 
-    calculate_boundary_data_multipole_H!(rpbd.H_data,vpa,vperp, 
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    calculate_boundary_data_multipole_dHdvpa!(rpbd.dHdvpa_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    calculate_boundary_data_multipole_dHdvperp!(rpbd.dHdvperp_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    if calculate_GG
-        calculate_boundary_data_multipole_G!(rpbd.G_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    if calculate_dGdvperp
-        calculate_boundary_data_multipole_dGdvperp!(rpbd.dGdvperp_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    end
-    calculate_boundary_data_multipole_d2Gdvperp2!(rpbd.d2Gdvperp2_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    calculate_boundary_data_multipole_d2Gdvperpdvpa!(rpbd.d2Gdvperpdvpa_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    calculate_boundary_data_multipole_d2Gdvpa2!(rpbd.d2Gdvpa2_data,vpa,vperp,
-                                     I00, I10, I20, I30, I40, I50, I60, I70, I80, 
-                                     I02, I12, I22, I32, I42, I52, I62,
-                                     I04, I14, I24, I34, I44, I06, I16, I26, I08)
-    
-    return nothing
-end
-
-"""
-Function to compare two instances of `rosenbluth_potential_boundary_data` --
-one assumed to contain exact results, and the other numerically computed results -- and compute
-the maximum value of the error. Calls `test_boundary_data()`.
-"""
-function test_rosenbluth_potential_boundary_data(rpbd::rosenbluth_potential_boundary_data,
-    rpbd_exact::rosenbluth_potential_boundary_data,vpa,vperp;print_to_screen=true)
-    
-    error_buffer_vpa = Array{mk_float,1}(undef,vpa.n)
-    error_buffer_vperp_1 = Array{mk_float,1}(undef,vperp.n)
-    error_buffer_vperp_2 = Array{mk_float,1}(undef,vperp.n)
-    max_H_err = test_boundary_data(rpbd.H_data,rpbd_exact.H_data,"H",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
-    max_dHdvpa_err = test_boundary_data(rpbd.dHdvpa_data,rpbd_exact.dHdvpa_data,"dHdvpa",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
-    max_dHdvperp_err = test_boundary_data(rpbd.dHdvperp_data,rpbd_exact.dHdvperp_data,"dHdvperp",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
-    max_G_err = test_boundary_data(rpbd.G_data,rpbd_exact.G_data,"G",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
-    max_dGdvperp_err = test_boundary_data(rpbd.dGdvperp_data,rpbd_exact.dGdvperp_data,"dGdvperp",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
-    max_d2Gdvperp2_err = test_boundary_data(rpbd.d2Gdvperp2_data,rpbd_exact.d2Gdvperp2_data,"d2Gdvperp2",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
-    max_d2Gdvperpdvpa_err = test_boundary_data(rpbd.d2Gdvperpdvpa_data,rpbd_exact.d2Gdvperpdvpa_data,"d2Gdvperpdvpa",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
-    max_d2Gdvpa2_err = test_boundary_data(rpbd.d2Gdvpa2_data,rpbd_exact.d2Gdvpa2_data,"d2Gdvpa2",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
-
-    return max_H_err, max_dHdvpa_err, max_dHdvperp_err, max_G_err, max_dGdvperp_err, max_d2Gdvperp2_err, max_d2Gdvperpdvpa_err, max_d2Gdvpa2_err
-end
-
-"""
-Function to compute the maximum error \${\\rm MAX}|f_{\\rm numerical}-f_{\\rm exact}|\$ for
-instances of `vpa_vperp_boundary_data`.
-"""
-function test_boundary_data(func,func_exact,func_name,vpa,vperp,buffer_vpa,buffer_vperp_1,buffer_vperp_2,print_to_screen)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    for ivperp in 1:nvperp
-        buffer_vperp_1[ivperp] = abs(func.lower_boundary_vpa[ivperp] - func_exact.lower_boundary_vpa[ivperp])
-        buffer_vperp_2[ivperp] = abs(func.upper_boundary_vpa[ivperp] - func_exact.upper_boundary_vpa[ivperp])
-    end
-    for ivpa in 1:nvpa
-        buffer_vpa[ivpa] = abs(func.upper_boundary_vperp[ivpa] - func_exact.upper_boundary_vperp[ivpa])
-    end
-    max_lower_vpa_err = maximum(buffer_vperp_1)
-    max_upper_vpa_err = maximum(buffer_vperp_2)
-    max_upper_vperp_err = maximum(buffer_vpa)
-    if print_to_screen
-        println(string(func_name*" boundary data:"))
-        println("max(lower_vpa_err) = ",max_lower_vpa_err)
-        println("max(upper_vpa_err) = ",max_upper_vpa_err)
-        println("max(upper_vperp_err) = ",max_upper_vperp_err)
-    end
-    max_err = max(max_lower_vpa_err,max_upper_vpa_err,max_upper_vperp_err)
-    return max_err
-end
-
-"""
-    get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
-
-For local (within the single element specified by `ielement_vpa` and `ielement_vperp`)
-indices `ivpa_local` and `ivperp_local`, get the global index in the 'linear-indexed' 2d
-space of size `(vperp.n, vpa.n)` (as returned by [`ic_func`](@ref)).
-"""
-function get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
-    # global indices on the grids
-    ivpa_global = vpa.igrid_full[ivpa_local,ielement_vpa]
-    ivperp_global = vperp.igrid_full[ivperp_local,ielement_vperp]
-    # global compound index
-    ic_global = ic_func(ivpa_global,ivperp_global,vpa.n)
-    return ic_global
-end
-
-"""
-Unused function. Sets `f(vpa,vperp)` to zero at the boundaries
-in `(vpa,vperp)`.
-"""
-function enforce_zero_bc!(fvpavperp,vpa,vperp;impose_BC_at_zero_vperp=false)
-    # lower vpa boundary
-    @loop_vperp ivperp begin
-        fvpavperp[1,ivperp] = 0.0
-    end
-    
-    # upper vpa boundary
-    @loop_vperp ivperp begin
-        fvpavperp[end,ivperp] = 0.0
-    end
-    
-    if impose_BC_at_zero_vperp
-        # lower vperp boundary
-        @loop_vpa ivpa begin
-            fvpavperp[ivpa,1] = 0.0
-        end
-    end
-    
-    # upper vperp boundary
-    @loop_vpa ivpa begin
-        fvpavperp[ivpa,end] = 0.0
-    end
-end
-
-"""
-Sets `f(vpa,vperp)` to a specied value `f_bc` at the boundaries
-in `(vpa,vperp)`. `f_bc` is a 2D array of `(vpa,vperp)` where
-only boundary data is used. Used for testing.
-"""
-function enforce_dirichlet_bc!(fvpavperp,vpa,vperp,f_bc;dirichlet_vperp_lower_boundary=false)
-    # lower vpa boundary
-    for ivperp ∈ 1:vperp.n
-        fvpavperp[1,ivperp] = f_bc[1,ivperp]
-    end
-    
-    # upper vpa boundary
-    for ivperp ∈ 1:vperp.n
-        fvpavperp[end,ivperp] = f_bc[end,ivperp]
-    end
-    
-    if dirichlet_vperp_lower_boundary
-        # lower vperp boundary
-        for ivpa ∈ 1:vpa.n
-            fvpavperp[ivpa,1] = f_bc[ivpa,1]
-        end
-    end
-    
-    # upper vperp boundary
-    for ivpa ∈ 1:vpa.n
-        fvpavperp[ivpa,end] = f_bc[ivpa,end]
-    end
-end
-
-"""
-Sets `f(vpa,vperp)` to a specied value `f_bc` at the boundaries
-in `(vpa,vperp)`. `f_bc` is an instance of `vpa_vperp_boundary_data`.
-"""
-function enforce_dirichlet_bc!(fvpavperp,vpa,vperp,f_bc::vpa_vperp_boundary_data)
-    # lower vpa boundary
-    for ivperp ∈ 1:vperp.n
-        fvpavperp[1,ivperp] = f_bc.lower_boundary_vpa[ivperp]
-    end
-    
-    # upper vpa boundary
-    for ivperp ∈ 1:vperp.n
-        fvpavperp[end,ivperp] = f_bc.upper_boundary_vpa[ivperp]
-    end
-            
-    # upper vperp boundary
-    for ivpa ∈ 1:vpa.n
-        fvpavperp[ivpa,end] = f_bc.upper_boundary_vperp[ivpa]
-    end
-    return nothing
-end
-
-"""
-Function to contruct the global sparse matrices used to solve
-the elliptic PDEs for the Rosenbluth potentials. Uses a dense matrix
-construction method. The matrices are 2D in the compound index `ic` 
-which indexes the velocity space labelled by `ivpa,ivperp`.
-Dirichlet boundary conditions are imposed in the appropriate stiffness
-matrices by setting the boundary row to be the Kronecker delta 
-(0 except where `ivpa = ivpap` and `ivperp = ivperpp`). 
-Used for testing.
-"""
-function assemble_matrix_operators_dirichlet_bc(vpa,vperp,vpa_spectral,vperp_spectral;print_to_screen=true)
-    nc_global = vpa.n*vperp.n
-    # Assemble a 2D mass matrix in the global compound coordinate
-    nc_global = vpa.n*vperp.n
-    MM2D = Array{mk_float,2}(undef,nc_global,nc_global)
-    MM2D .= 0.0
-    KKpar2D = Array{mk_float,2}(undef,nc_global,nc_global)
-    KKpar2D .= 0.0
-    KKperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
-    KKperp2D .= 0.0
-    KPperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
-    KPperp2D .= 0.0
-    KKpar2D_with_BC_terms = Array{mk_float,2}(undef,nc_global,nc_global)
-    KKpar2D_with_BC_terms .= 0.0
-    KKperp2D_with_BC_terms = Array{mk_float,2}(undef,nc_global,nc_global)
-    KKperp2D_with_BC_terms .= 0.0
-    PUperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
-    PUperp2D .= 0.0
-    PPparPUperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
-    PPparPUperp2D .= 0.0
-    PPpar2D = Array{mk_float,2}(undef,nc_global,nc_global)
-    PPpar2D .= 0.0
-    MMparMNperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
-    MMparMNperp2D .= 0.0
-    # Laplacian matrix
-    LP2D = Array{mk_float,2}(undef,nc_global,nc_global)
-    LP2D .= 0.0
-    # Modified Laplacian matrix
-    LV2D = Array{mk_float,2}(undef,nc_global,nc_global)
-    LV2D .= 0.0
-    # Modified Laplacian matrix
-    LB2D = Array{mk_float,2}(undef,nc_global,nc_global)
-    LB2D .= 0.0
-    
-    #print_matrix(MM2D,"MM2D",nc_global,nc_global)
-    # local dummy arrays
-    MMpar = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
-    MMperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
-    MNperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
-    MRperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
-    KKpar = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
-    KKperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
-    KKpar_with_BC_terms = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
-    KKperp_with_BC_terms = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
-    KJperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
-    LLperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
-    PPperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
-    PUperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
-    PPpar = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
-        
-    impose_BC_at_zero_vperp = false
-    @serial_region begin
-        if global_rank[] == 0 && print_to_screen
-            println("begin elliptic operator assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
-        end
-    end
-    for ielement_vperp in 1:vperp.nelement_local
-        get_QQ_local!(MMperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"M")
-        get_QQ_local!(MRperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"R")
-        get_QQ_local!(MNperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"N")
-        get_QQ_local!(KKperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K")
-        get_QQ_local!(KKperp_with_BC_terms,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K_with_BC_terms")
-        get_QQ_local!(KJperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"J")
-        get_QQ_local!(LLperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"L")
-        get_QQ_local!(PPperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"P")
-        get_QQ_local!(PUperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"U")
-        #print_matrix(MMperp,"MMperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(MRperp,"MRperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(MNperp,"MNperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(KKperp,"KKperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(KJperp,"KJperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(LLperp,"LLperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(PPperp,"PPperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(PUperp,"PUperp",vperp.ngrid,vperp.ngrid)
-        
-        for ielement_vpa in 1:vpa.nelement_local
-            get_QQ_local!(MMpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"M")
-            get_QQ_local!(KKpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K")
-            get_QQ_local!(KKpar_with_BC_terms,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K_with_BC_terms")
-            get_QQ_local!(PPpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"P")
-            #print_matrix(MMpar,"MMpar",vpa.ngrid,vpa.ngrid)
-            #print_matrix(KKpar,"KKpar",vpa.ngrid,vpa.ngrid)
-            #print_matrix(PPpar,"PPpar",vpa.ngrid,vpa.ngrid)
-            
-            for ivperpp_local in 1:vperp.ngrid
-                for ivperp_local in 1:vperp.ngrid
-                    for ivpap_local in 1:vpa.ngrid
-                        for ivpa_local in 1:vpa.ngrid
-                            ic_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
-                            icp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpap_local,ivperpp_local) #get_indices(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivpap_local,ivperp_local,ivperpp_local)
-                            #println("ielement_vpa: ",ielement_vpa," ielement_vperp: ",ielement_vperp)
-                            #println("ivpa_local: ",ivpa_local," ivpap_local: ",ivpap_local)
-                            #println("ivperp_local: ",ivperp_local," ivperpp_local: ",ivperpp_local)
-                            #println("ic: ",ic_global," icp: ",icp_global)
-                            # boundary condition possibilities
-                            lower_boundary_row_vpa = (ielement_vpa == 1 && ivpa_local == 1)
-                            upper_boundary_row_vpa = (ielement_vpa == vpa.nelement_local && ivpa_local == vpa.ngrid)
-                            lower_boundary_row_vperp = (ielement_vperp == 1 && ivperp_local == 1)
-                            upper_boundary_row_vperp = (ielement_vperp == vperp.nelement_local && ivperp_local == vperp.ngrid)
-                            
-
-                            if lower_boundary_row_vpa
-                                if ivpap_local == 1 && ivperp_local == ivperpp_local
-                                    LP2D[ic_global,icp_global] = 1.0
-                                    LV2D[ic_global,icp_global] = 1.0
-                                    LB2D[ic_global,icp_global] = 1.0
-                                else 
-                                    LP2D[ic_global,icp_global] = 0.0
-                                    LV2D[ic_global,icp_global] = 0.0
-                                    LB2D[ic_global,icp_global] = 0.0
-                                end
-                            elseif upper_boundary_row_vpa
-                                if ivpap_local == vpa.ngrid && ivperp_local == ivperpp_local 
-                                    LP2D[ic_global,icp_global] = 1.0
-                                    LV2D[ic_global,icp_global] = 1.0
-                                    LB2D[ic_global,icp_global] = 1.0
-                                else 
-                                    LP2D[ic_global,icp_global] = 0.0
-                                    LV2D[ic_global,icp_global] = 0.0
-                                    LB2D[ic_global,icp_global] = 0.0
-                                end
-                            elseif lower_boundary_row_vperp && impose_BC_at_zero_vperp
-                                if ivperpp_local == 1 && ivpa_local == ivpap_local
-                                    LP2D[ic_global,icp_global] = 1.0
-                                    LV2D[ic_global,icp_global] = 1.0
-                                    LB2D[ic_global,icp_global] = 1.0
-                                else 
-                                    LP2D[ic_global,icp_global] = 0.0
-                                    LV2D[ic_global,icp_global] = 0.0
-                                    LB2D[ic_global,icp_global] = 0.0
-                                end
-                            elseif upper_boundary_row_vperp
-                                if ivperpp_local == vperp.ngrid && ivpa_local == ivpap_local
-                                    LP2D[ic_global,icp_global] = 1.0
-                                    LV2D[ic_global,icp_global] = 1.0
-                                    LB2D[ic_global,icp_global] = 1.0
-                                else 
-                                    LP2D[ic_global,icp_global] = 0.0
-                                    LV2D[ic_global,icp_global] = 0.0
-                                    LB2D[ic_global,icp_global] = 0.0
-                                end
-                            else
-                                # assign Laplacian and modified Laplacian matrix data
-                                LP2D[ic_global,icp_global] += (KKpar[ivpa_local,ivpap_local]*
-                                                                MMperp[ivperp_local,ivperpp_local] +
-                                                               MMpar[ivpa_local,ivpap_local]*
-                                                                LLperp[ivperp_local,ivperpp_local])
-                                LV2D[ic_global,icp_global] += (KKpar[ivpa_local,ivpap_local]*
-                                                                MRperp[ivperp_local,ivperpp_local] +
-                                                               MMpar[ivpa_local,ivpap_local]*
-                                                                (KJperp[ivperp_local,ivperpp_local] -
-                                                                 PPperp[ivperp_local,ivperpp_local] - 
-                                                                 MNperp[ivperp_local,ivperpp_local]))
-                                LB2D[ic_global,icp_global] += (KKpar[ivpa_local,ivpap_local]*
-                                                                MRperp[ivperp_local,ivperpp_local] +
-                                                               MMpar[ivpa_local,ivpap_local]*
-                                                                (KJperp[ivperp_local,ivperpp_local] -
-                                                                 PPperp[ivperp_local,ivperpp_local] - 
-                                                             4.0*MNperp[ivperp_local,ivperpp_local]))
-                            end
-                            # assign mass matrix data
-                            MM2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
-                                                                MMperp[ivperp_local,ivperpp_local]
-                            
-                            # assign K matrices
-                            KKpar2D[ic_global,icp_global] += KKpar[ivpa_local,ivpap_local]*
-                                                            MMperp[ivperp_local,ivperpp_local]
-                            KKperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
-                                                            KKperp[ivperp_local,ivperpp_local]
-                            KPperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
-                                                         (KJperp[ivperp_local,ivperpp_local] -
-                                                      2.0*PPperp[ivperp_local,ivperpp_local] -
-                                                      2.0*MNperp[ivperp_local,ivperpp_local])
-                            # assign K matrices with explicit boundary terms from integration by parts
-                            KKpar2D_with_BC_terms[ic_global,icp_global] += KKpar_with_BC_terms[ivpa_local,ivpap_local]*
-                                                            MMperp[ivperp_local,ivperpp_local]
-                            KKperp2D_with_BC_terms[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
-                                                            KKperp_with_BC_terms[ivperp_local,ivperpp_local]
-                            # assign PU matrix
-                            PUperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
-                                                            PUperp[ivperp_local,ivperpp_local]
-                            PPparPUperp2D[ic_global,icp_global] += PPpar[ivpa_local,ivpap_local]*
-                                                            PUperp[ivperp_local,ivperpp_local]
-                            PPpar2D[ic_global,icp_global] += PPpar[ivpa_local,ivpap_local]*
-                                                            MMperp[ivperp_local,ivperpp_local]
-                            # assign RHS mass matrix for d2Gdvperp2
-                            MMparMNperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
-                                                            MNperp[ivperp_local,ivperpp_local]
-                        end
-                    end
-                end
-            end
-        end
-    end
-    @serial_region begin
-        if global_rank[] == 0 && print_to_screen
-            println("finished elliptic operator assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
-        end
-        # convert these matrices to sparse matrices
-        if global_rank[] == 0 && print_to_screen
-            println("begin conversion to sparse matrices   ", Dates.format(now(), dateformat"H:MM:SS"))
-        end
-    end
-    MM2D_sparse = sparse(MM2D)
-    KKpar2D_sparse = sparse(KKpar2D)
-    KKperp2D_sparse = sparse(KKperp2D)
-    KKpar2D_with_BC_terms_sparse = sparse(KKpar2D_with_BC_terms)
-    KKperp2D_with_BC_terms_sparse = sparse(KKperp2D_with_BC_terms)
-    LP2D_sparse = sparse(LP2D)
-    LV2D_sparse = sparse(LV2D)
-    LB2D_sparse = sparse(LB2D)
-    KPperp2D_sparse = sparse(KPperp2D)
-    PUperp2D_sparse = sparse(PUperp2D)
-    PPparPUperp2D_sparse = sparse(PPparPUperp2D)
-    PPpar2D_sparse = sparse(PPpar2D)
-    MMparMNperp2D_sparse = sparse(MMparMNperp2D)
-    return MM2D_sparse, KKpar2D_sparse, KKperp2D_sparse, 
-           KKpar2D_with_BC_terms_sparse, KKperp2D_with_BC_terms_sparse,
-           LP2D_sparse, LV2D_sparse, LB2D_sparse, 
-           KPperp2D_sparse,PUperp2D_sparse, PPparPUperp2D_sparse,
-           PPpar2D_sparse, MMparMNperp2D_sparse
-end
-
-"""
-Function to contruct the global sparse matrices used to solve
-the elliptic PDEs for the Rosenbluth potentials. Uses a sparse matrix
-construction method. The matrices are 2D in the compound index `ic` 
-which indexes the velocity space labelled by `ivpa,ivperp`.
-Dirichlet boundary conditions are imposed in the appropriate stiffness
-matrices by setting the boundary row to be the Kronecker delta 
-(0 except where `ivpa = ivpap` and `ivperp = ivperpp`).
-See also `assemble_matrix_operators_dirichlet_bc()`.
-"""
-function assemble_matrix_operators_dirichlet_bc_sparse(vpa,vperp,vpa_spectral,vperp_spectral;print_to_screen=true)
-    # Assemble a 2D mass matrix in the global compound coordinate
-    nc_global = vpa.n*vperp.n
-    ntot_vpa = (vpa.nelement_local - 1)*(vpa.ngrid^2 - 1) + vpa.ngrid^2
-    ntot_vperp = (vperp.nelement_local - 1)*(vperp.ngrid^2 - 1) + vperp.ngrid^2
-    nsparse = ntot_vpa*ntot_vperp
-    ngrid_vpa = vpa.ngrid
-    nelement_vpa = vpa.nelement_local
-    ngrid_vperp = vperp.ngrid
-    nelement_vperp = vperp.nelement_local
-    
-    MM2D = allocate_sparse_matrix_constructor(nsparse)
-    KKpar2D = allocate_sparse_matrix_constructor(nsparse)
-    KKperp2D = allocate_sparse_matrix_constructor(nsparse)
-    KKpar2D_with_BC_terms = allocate_sparse_matrix_constructor(nsparse)
-    KKperp2D_with_BC_terms = allocate_sparse_matrix_constructor(nsparse)
-    PUperp2D = allocate_sparse_matrix_constructor(nsparse)
-    PPparPUperp2D = allocate_sparse_matrix_constructor(nsparse)
-    PPpar2D = allocate_sparse_matrix_constructor(nsparse)
-    MMparMNperp2D = allocate_sparse_matrix_constructor(nsparse)
-    KPperp2D = allocate_sparse_matrix_constructor(nsparse)
-    # Laplacian matrix
-    LP2D = allocate_sparse_matrix_constructor(nsparse)
-    # Modified Laplacian matrix (for d / d vperp potentials)
-    LV2D = allocate_sparse_matrix_constructor(nsparse)
-    # Modified Laplacian matrix (for d^2 / d vperp^2 potentials)
-    LB2D = allocate_sparse_matrix_constructor(nsparse)
-    
-    # local dummy arrays
-    MMpar = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
-    MMperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
-    MNperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
-    MRperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
-    KKpar = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
-    KKpar_with_BC_terms = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
-    KKperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
-    KKperp_with_BC_terms = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
-    KJperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
-    LLperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
-    PPperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
-    PUperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
-    PPpar = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
-        
-    impose_BC_at_zero_vperp = false
-    @serial_region begin
-        if global_rank[] == 0 && print_to_screen
-            println("begin elliptic operator assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
-        end
-    end
-    for ielement_vperp in 1:nelement_vperp
-        get_QQ_local!(MMperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"M")
-        get_QQ_local!(MRperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"R")
-        get_QQ_local!(MNperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"N")
-        get_QQ_local!(KKperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K")
-        get_QQ_local!(KKperp_with_BC_terms,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K_with_BC_terms")
-        get_QQ_local!(KJperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"J")
-        get_QQ_local!(LLperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"L")
-        get_QQ_local!(PPperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"P")
-        get_QQ_local!(PUperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"U")
-        #print_matrix(MMperp,"MMperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(MRperp,"MRperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(MNperp,"MNperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(KKperp,"KKperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(KJperp,"KJperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(LLperp,"LLperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(PPperp,"PPperp",vperp.ngrid,vperp.ngrid)
-        #print_matrix(PUperp,"PUperp",vperp.ngrid,vperp.ngrid)
-        
-        for ielement_vpa in 1:nelement_vpa
-            get_QQ_local!(MMpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"M")
-            get_QQ_local!(KKpar_with_BC_terms,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K_with_BC_terms")
-            get_QQ_local!(KKpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K")
-            get_QQ_local!(PPpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"P")
-            #print_matrix(MMpar,"MMpar",vpa.ngrid,vpa.ngrid)
-            #print_matrix(KKpar,"KKpar",vpa.ngrid,vpa.ngrid)
-            #print_matrix(PPpar,"PPpar",vpa.ngrid,vpa.ngrid)
-            
-            for ivperpp_local in 1:ngrid_vperp
-                for ivperp_local in 1:ngrid_vperp
-                    for ivpap_local in 1:ngrid_vpa
-                        for ivpa_local in 1:ngrid_vpa
-                            ic_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
-                            icp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpap_local,ivperpp_local) #get_indices(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivpap_local,ivperp_local,ivperpp_local)
-                            icsc = icsc_func(ivpa_local,ivpap_local,ielement_vpa::mk_int,
-                                           ngrid_vpa,nelement_vpa,
-                                           ivperp_local,ivperpp_local,
-                                           ielement_vperp,
-                                           ngrid_vperp,nelement_vperp)
-                            #println("ielement_vpa: ",ielement_vpa," ielement_vperp: ",ielement_vperp)
-                            #println("ivpa_local: ",ivpa_local," ivpap_local: ",ivpap_local)
-                            #println("ivperp_local: ",ivperp_local," ivperpp_local: ",ivperpp_local)
-                            #println("ic: ",ic_global," icp: ",icp_global)
-                            # boundary condition possibilities
-                            lower_boundary_row_vpa = (ielement_vpa == 1 && ivpa_local == 1)
-                            upper_boundary_row_vpa = (ielement_vpa == vpa.nelement_local && ivpa_local == vpa.ngrid)
-                            lower_boundary_row_vperp = (ielement_vperp == 1 && ivperp_local == 1)
-                            upper_boundary_row_vperp = (ielement_vperp == vperp.nelement_local && ivperp_local == vperp.ngrid)
-                            
-
-                            if lower_boundary_row_vpa
-                                if ivpap_local == 1 && ivperp_local == ivperpp_local
-                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
-                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
-                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
-                                else 
-                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
-                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
-                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
-                                end
-                            elseif upper_boundary_row_vpa
-                                if ivpap_local == vpa.ngrid && ivperp_local == ivperpp_local 
-                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
-                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
-                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
-                                else 
-                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
-                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
-                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
-                                end
-                            elseif lower_boundary_row_vperp && impose_BC_at_zero_vperp
-                                if ivperpp_local == 1 && ivpa_local == ivpap_local
-                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
-                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
-                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
-                                else 
-                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
-                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
-                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
-                                end
-                            elseif upper_boundary_row_vperp
-                                if ivperpp_local == vperp.ngrid && ivpa_local == ivpap_local
-                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
-                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
-                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
-                                else 
-                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
-                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
-                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
-                                end
-                            else
-                                # assign Laplacian matrix data
-                                assemble_constructor_data!(LP2D,icsc,ic_global,icp_global,
-                                            (KKpar[ivpa_local,ivpap_local]*
-                                             MMperp[ivperp_local,ivperpp_local] +
-                                             MMpar[ivpa_local,ivpap_local]*
-                                             LLperp[ivperp_local,ivperpp_local]))
-                                assemble_constructor_data!(LV2D,icsc,ic_global,icp_global,
-                                            (KKpar[ivpa_local,ivpap_local]*
-                                             MRperp[ivperp_local,ivperpp_local] +
-                                             MMpar[ivpa_local,ivpap_local]*
-                                            (KJperp[ivperp_local,ivperpp_local] -
-                                             PPperp[ivperp_local,ivperpp_local] - 
-                                             MNperp[ivperp_local,ivperpp_local])))
-                                assemble_constructor_data!(LB2D,icsc,ic_global,icp_global,
-                                            (KKpar[ivpa_local,ivpap_local]*
-                                             MRperp[ivperp_local,ivperpp_local] +
-                                             MMpar[ivpa_local,ivpap_local]*
-                                             (KJperp[ivperp_local,ivperpp_local] -
-                                              PPperp[ivperp_local,ivperpp_local] -
-                                          4.0*MNperp[ivperp_local,ivperpp_local])))
-                            end
-                            #assign mass matrix
-                            assemble_constructor_data!(MM2D,icsc,ic_global,icp_global,
-                                            (MMpar[ivpa_local,ivpap_local]*
-                                             MMperp[ivperp_local,ivperpp_local]))
-                                
-                            # assign K matrices (no explicit boundary terms)
-                            assemble_constructor_data!(KKpar2D,icsc,ic_global,icp_global,
-                                            (KKpar[ivpa_local,ivpap_local]*
-                                             MMperp[ivperp_local,ivperpp_local]))
-                            assemble_constructor_data!(KKperp2D,icsc,ic_global,icp_global,
-                                            (MMpar[ivpa_local,ivpap_local]*
-                                             KKperp[ivperp_local,ivperpp_local]))
-                            assemble_constructor_data!(KPperp2D,icsc,ic_global,icp_global,
-                                            (MMpar[ivpa_local,ivpap_local]*
-                                             (KJperp[ivperp_local,ivperpp_local] -
-                                              2.0*PPperp[ivperp_local,ivperpp_local] -
-                                              2.0*MNperp[ivperp_local,ivperpp_local])))
-                                             
-                            # assign K matrices (with explicit boundary terms from integration by parts)
-                            assemble_constructor_data!(KKpar2D_with_BC_terms,icsc,ic_global,icp_global,
-                                            (KKpar_with_BC_terms[ivpa_local,ivpap_local]*
-                                             MMperp[ivperp_local,ivperpp_local]))
-                            assemble_constructor_data!(KKperp2D_with_BC_terms,icsc,ic_global,icp_global,
-                                            (MMpar[ivpa_local,ivpap_local]*
-                                             KKperp_with_BC_terms[ivperp_local,ivperpp_local]))
-                            # assign PU matrix
-                            assemble_constructor_data!(PUperp2D,icsc,ic_global,icp_global,
-                                            (MMpar[ivpa_local,ivpap_local]*
-                                             PUperp[ivperp_local,ivperpp_local]))
-                            assemble_constructor_data!(PPparPUperp2D,icsc,ic_global,icp_global,
-                                            (PPpar[ivpa_local,ivpap_local]*
-                                             PUperp[ivperp_local,ivperpp_local]))
-                            assemble_constructor_data!(PPpar2D,icsc,ic_global,icp_global,
-                                            (PPpar[ivpa_local,ivpap_local]*
-                                             MMperp[ivperp_local,ivperpp_local]))
-                            # assign RHS mass matrix for d2Gdvperp2
-                            assemble_constructor_data!(MMparMNperp2D,icsc,ic_global,icp_global,
-                                            (MMpar[ivpa_local,ivpap_local]*
-                                             MNperp[ivperp_local,ivperpp_local]))
-                        end
-                    end
-                end
-            end
-        end
-    end
-    MM2D_sparse = create_sparse_matrix(MM2D)
-    KKpar2D_sparse = create_sparse_matrix(KKpar2D)
-    KKperp2D_sparse = create_sparse_matrix(KKperp2D)
-    KKpar2D_with_BC_terms_sparse = create_sparse_matrix(KKpar2D_with_BC_terms)
-    KKperp2D_with_BC_terms_sparse = create_sparse_matrix(KKperp2D_with_BC_terms)
-    LP2D_sparse = create_sparse_matrix(LP2D)
-    LV2D_sparse = create_sparse_matrix(LV2D)
-    LB2D_sparse = create_sparse_matrix(LB2D)
-    KPperp2D_sparse = create_sparse_matrix(KPperp2D)
-    PUperp2D_sparse = create_sparse_matrix(PUperp2D)
-    PPparPUperp2D_sparse = create_sparse_matrix(PPparPUperp2D)
-    PPpar2D_sparse = create_sparse_matrix(PPpar2D)
-    MMparMNperp2D_sparse = create_sparse_matrix(MMparMNperp2D)
-    @serial_region begin
-        if global_rank[] == 0 && print_to_screen
-            println("finished elliptic operator constructor assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
-        end
-        #if nc_global < 60
-        #    println("MM2D_sparse \n",MM2D_sparse)
-        #    print_matrix(Array(MM2D_sparse),"MM2D_sparse",nc_global,nc_global)
-        #    print_matrix(KKpar2D,"KKpar2D",nc_global,nc_global)
-        #    print_matrix(KKperp2D,"KKperp2D",nc_global,nc_global)
-        #    print_matrix(LP2D,"LP",nc_global,nc_global)
-        #    print_matrix(LV2D,"LV",nc_global,nc_global)
-        #end
-    end
-    return MM2D_sparse, KKpar2D_sparse, KKperp2D_sparse, 
-           KKpar2D_with_BC_terms_sparse, KKperp2D_with_BC_terms_sparse, 
-           LP2D_sparse, LV2D_sparse, LB2D_sparse, 
-           KPperp2D_sparse, PUperp2D_sparse, PPparPUperp2D_sparse,
-           PPpar2D_sparse, MMparMNperp2D_sparse
-end
-
-"""
-Function to allocated an instance of `YY_collision_operator_arrays`.
-Calls `get_QQ_local!()` from `gauss_legendre`. Definitions of these
-nonlinear stiffness matrices can be found in the docs for `get_QQ_local!()`.
-"""
-function calculate_YY_arrays(vpa,vperp,vpa_spectral,vperp_spectral)
-    YY0perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
-    YY1perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
-    YY2perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
-    YY3perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
-    YY0par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
-    YY1par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
-    YY2par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
-    YY3par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
-    
-    for ielement_vperp in 1:vperp.nelement_local
-        @views get_QQ_local!(YY0perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY0")
-        @views get_QQ_local!(YY1perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY1")
-        @views get_QQ_local!(YY2perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY2")
-        @views get_QQ_local!(YY3perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY3")
-     end
-     for ielement_vpa in 1:vpa.nelement_local
-        @views get_QQ_local!(YY0par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY0")
-        @views get_QQ_local!(YY1par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY1")
-        @views get_QQ_local!(YY2par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY2")
-        @views get_QQ_local!(YY3par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY3")
-     end
-    
-    return YY_collision_operator_arrays(YY0perp,YY1perp,YY2perp,YY3perp,
-                                        YY0par,YY1par,YY2par,YY3par)
-end
-
-"""
-Function to assemble the RHS of the kinetic equation due to the collision operator,
-in weak form. Once the array `rhsvpavperp` contains the assembled weak-form collision operator,
-a mass matrix solve still must be carried out to find the time derivative of the distribution function
-due to collisions. This function uses a purely serial algorithm for testing purposes.
-"""
-function assemble_explicit_collision_operator_rhs_serial!(rhsvpavperp,pdfs,d2Gspdvpa2,d2Gspdvperpdvpa,
-    d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
-    vpa,vperp,YY_arrays::YY_collision_operator_arrays)
-    begin_anyv_region()
-    @anyv_serial_region begin
-        # assemble RHS of collision operator
-        rhsc = vec(rhsvpavperp)
-        @. rhsc = 0.0
-        
-        # loop over elements
-        for ielement_vperp in 1:vperp.nelement_local
-            YY0perp = YY_arrays.YY0perp[:,:,:,ielement_vperp]
-            YY1perp = YY_arrays.YY1perp[:,:,:,ielement_vperp]
-            YY2perp = YY_arrays.YY2perp[:,:,:,ielement_vperp]
-            YY3perp = YY_arrays.YY3perp[:,:,:,ielement_vperp]
-            
-            for ielement_vpa in 1:vpa.nelement_local
-                YY0par = YY_arrays.YY0par[:,:,:,ielement_vpa]
-                YY1par = YY_arrays.YY1par[:,:,:,ielement_vpa]
-                YY2par = YY_arrays.YY2par[:,:,:,ielement_vpa]
-                YY3par = YY_arrays.YY3par[:,:,:,ielement_vpa]
-                
-                # loop over field positions in each element
-                for ivperp_local in 1:vperp.ngrid
-                    for ivpa_local in 1:vpa.ngrid
-                        ic_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
-                        # carry out the matrix sum on each 2D element
-                        for jvperpp_local in 1:vperp.ngrid
-                            jvperpp = vperp.igrid_full[jvperpp_local,ielement_vperp]
-                            for kvperpp_local in 1:vperp.ngrid
-                                kvperpp = vperp.igrid_full[kvperpp_local,ielement_vperp]
-                                for jvpap_local in 1:vpa.ngrid
-                                    jvpap = vpa.igrid_full[jvpap_local,ielement_vpa]
-                                    pdfjj = pdfs[jvpap,jvperpp]
-                                    for kvpap_local in 1:vpa.ngrid
-                                        kvpap = vpa.igrid_full[kvpap_local,ielement_vpa]
-                                        # first three lines represent parallel flux terms
-                                        # second three lines represent perpendicular flux terms
-                                        rhsc[ic_global] += (YY0perp[kvperpp_local,jvperpp_local,ivperp_local]*YY2par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvpa2[kvpap,kvperpp] +
-                                                            YY3perp[kvperpp_local,jvperpp_local,ivperp_local]*YY1par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperpdvpa[kvpap,kvperpp] - 
-                                                            2.0*(ms/msp)*YY0perp[kvperpp_local,jvperpp_local,ivperp_local]*YY1par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*dHspdvpa[kvpap,kvperpp] +
-                                                            # end parallel flux, start of perpendicular flux
-                                                            YY1perp[kvperpp_local,jvperpp_local,ivperp_local]*YY3par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperpdvpa[kvpap,kvperpp] + 
-                                                            YY2perp[kvperpp_local,jvperpp_local,ivperp_local]*YY0par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperp2[kvpap,kvperpp] - 
-                                                            2.0*(ms/msp)*YY1perp[kvperpp_local,jvperpp_local,ivperp_local]*YY0par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*dHspdvperp[kvpap,kvperpp])
-                                    end
-                                end
-                            end
-                        end
-                    end
-                end 
-            end
-        end
-        # correct for minus sign due to integration by parts
-        # and multiply by the normalised collision frequency
-        @. rhsc *= -nussp
-    end
-    return nothing
-end
-
-"""
-Function to assemble the RHS of the kinetic equation due to the collision operator,
-in weak form. Once the array `rhsvpavperp` contains the assembled weak-form collision operator,
-a mass matrix solve still must be carried out to find the time derivative of the distribution function
-due to collisions. This function uses a purely parallel algorithm and may be tested by comparing to 
-`assemble_explicit_collision_operator_rhs_serial!()`. The inner-most loop of the function is 
-in `assemble_explicit_collision_operator_rhs_parallel_inner_loop()`.
-"""
-function assemble_explicit_collision_operator_rhs_parallel!(rhsvpavperp,pdfs,d2Gspdvpa2,d2Gspdvperpdvpa,
-    d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
-    vpa,vperp,YY_arrays::YY_collision_operator_arrays)
-    # assemble RHS of collision operator
-    begin_anyv_vperp_vpa_region()
-    @loop_vperp_vpa ivperp ivpa begin
-        rhsvpavperp[ivpa,ivperp] = 0.0
-    end
-
-    # loop over collocation points to benefit from shared-memory parallelism
-    ngrid_vpa, ngrid_vperp = vpa.ngrid, vperp.ngrid
-    vperp_igrid_full = vperp.igrid_full
-    vpa_igrid_full = vpa.igrid_full
-    @loop_vperp_vpa ivperp_global ivpa_global begin
-        igrid_vpa, ielement_vpax, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperpx, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa_global,ivperp_global,vpa,vperp)
-        # loop over elements belonging to this collocation point
-        for ielement_vperp in ielement_vperp_low:ielement_vperp_hi
-            # correct local ivperp in the case that we on a boundary point
-            ivperp_local = igrid_vperp + (ielement_vperp - ielement_vperp_low)*(1-ngrid_vperp)
-            @views YY0perp = YY_arrays.YY0perp[:,:,ivperp_local,ielement_vperp]
-            @views YY1perp = YY_arrays.YY1perp[:,:,ivperp_local,ielement_vperp]
-            @views YY2perp = YY_arrays.YY2perp[:,:,ivperp_local,ielement_vperp]
-            @views YY3perp = YY_arrays.YY3perp[:,:,ivperp_local,ielement_vperp]
-            vperp_igrid_full_view = @view vperp_igrid_full[:,ielement_vperp]
-            
-            for ielement_vpa in ielement_vpa_low:ielement_vpa_hi
-                # correct local ivpa in the case that we on a boundary point
-                ivpa_local = igrid_vpa + (ielement_vpa - ielement_vpa_low)*(1-ngrid_vpa)
-                @views YY0par = YY_arrays.YY0par[:,:,ivpa_local,ielement_vpa]
-                @views YY1par = YY_arrays.YY1par[:,:,ivpa_local,ielement_vpa]
-                @views YY2par = YY_arrays.YY2par[:,:,ivpa_local,ielement_vpa]
-                @views YY3par = YY_arrays.YY3par[:,:,ivpa_local,ielement_vpa]
-                vpa_igrid_full_view = @view vpa_igrid_full[:,ielement_vpa]
-                
-                # carry out the matrix sum on each 2D element
-                rhsvpavperp[ivpa_global,ivperp_global] +=
-                    assemble_explicit_collision_operator_rhs_parallel_inner_loop(
-                        nussp, ms, msp, YY0perp, YY0par, YY1perp, YY1par, YY2perp, YY2par,
-                        YY3perp, YY3par, pdfs, d2Gspdvpa2, d2Gspdvperpdvpa, d2Gspdvperp2,
-                        dHspdvpa, dHspdvperp, ngrid_vperp, vperp_igrid_full_view,
-                        ngrid_vpa, vpa_igrid_full_view)
-            end
-        end
-    end
-    return nothing
-end
-
-"""
-The inner-most loop of the parallel collision operator assembly. Called in `assemble_explicit_collision_operator_rhs_parallel!()`.
-"""
-function assemble_explicit_collision_operator_rhs_parallel_inner_loop(
-        nussp, ms, msp, YY0perp, YY0par, YY1perp, YY1par, YY2perp, YY2par, YY3perp,
-        YY3par, pdfs, d2Gspdvpa2, d2Gspdvperpdvpa, d2Gspdvperp2, dHspdvpa, dHspdvperp,
-        ngrid_vperp, vperp_igrid_full_view, ngrid_vpa, vpa_igrid_full_view)
-    # carry out the matrix sum on each 2D element
-    result = 0.0
-    for jvperpp_local in 1:ngrid_vperp
-        jvperpp = vperp_igrid_full_view[jvperpp_local]
-        for kvperpp_local in 1:ngrid_vperp
-            kvperpp = vperp_igrid_full_view[kvperpp_local]
-            YY0perp_kj = YY0perp[kvperpp_local,jvperpp_local]
-            YY1perp_kj = YY1perp[kvperpp_local,jvperpp_local]
-            YY2perp_kj = YY2perp[kvperpp_local,jvperpp_local]
-            YY3perp_kj = YY3perp[kvperpp_local,jvperpp_local]
-            for jvpap_local in 1:ngrid_vpa
-                jvpap = vpa_igrid_full_view[jvpap_local]
-                pdfjj = pdfs[jvpap,jvperpp]
-                for kvpap_local in 1:ngrid_vpa
-                    kvpap = vpa_igrid_full_view[kvpap_local]
-                    YY0par_kj = YY0par[kvpap_local,jvpap_local]
-                    YY1par_kj = YY1par[kvpap_local,jvpap_local]
-                    d2Gspdvperpdvpa_kk = d2Gspdvperpdvpa[kvpap,kvperpp]
-                    # first three lines represent parallel flux terms
-                    # second three lines represent perpendicular flux terms
-                    result += -nussp*(YY0perp_kj*YY2par[kvpap_local,jvpap_local]*pdfjj*d2Gspdvpa2[kvpap,kvperpp] +
-                                        YY3perp_kj*YY1par_kj*pdfjj*d2Gspdvperpdvpa_kk -
-                                        2.0*(ms/msp)*YY0perp_kj*YY1par_kj*pdfjj*dHspdvpa[kvpap,kvperpp] +
-                                        # end parallel flux, start of perpendicular flux
-                                        YY1perp_kj*YY3par[kvpap_local,jvpap_local]*pdfjj*d2Gspdvperpdvpa_kk +
-                                        YY2perp_kj*YY0par_kj*pdfjj*d2Gspdvperp2[kvpap,kvperpp] -
-                                        2.0*(ms/msp)*YY1perp_kj*YY0par_kj*pdfjj*dHspdvperp[kvpap,kvperpp])
-                end
-            end
-        end
-    end
-
-    return result
-end
-
-"""
-Function to assemble the RHS of the kinetic equation due to the collision operator,
-in weak form, when the distribution function appearing the derivatives is known analytically.
-The inner-most loop of the function is 
-in `assemble_explicit_collision_operator_rhs_parallel_analytical_inputs_inner_loop()`.
-"""
-function assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!(rhsvpavperp,pdfs,dpdfsdvpa,dpdfsdvperp,d2Gspdvpa2,d2Gspdvperpdvpa,
-    d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
-    vpa,vperp,YY_arrays::YY_collision_operator_arrays)
-    # assemble RHS of collision operator
-    begin_anyv_vperp_vpa_region()
-    @loop_vperp_vpa ivperp ivpa begin
-        rhsvpavperp[ivpa,ivperp] = 0.0
-    end
-
-    # loop over collocation points to benefit from shared-memory parallelism
-    ngrid_vpa, ngrid_vperp = vpa.ngrid, vperp.ngrid
-    vperp_igrid_full = vperp.igrid_full
-    vpa_igrid_full = vpa.igrid_full
-    @loop_vperp_vpa ivperp_global ivpa_global begin
-        igrid_vpa, ielement_vpax, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperpx, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa_global,ivperp_global,vpa,vperp)
-        # loop over elements belonging to this collocation point
-        for ielement_vperp in ielement_vperp_low:ielement_vperp_hi
-            # correct local ivperp in the case that we on a boundary point
-            ivperp_local = igrid_vperp + (ielement_vperp - ielement_vperp_low)*(1-ngrid_vperp)
-            @views YY0perp = YY_arrays.YY0perp[:,:,ivperp_local,ielement_vperp]
-            @views YY1perp = YY_arrays.YY1perp[:,:,ivperp_local,ielement_vperp]
-            @views YY2perp = YY_arrays.YY2perp[:,:,ivperp_local,ielement_vperp]
-            @views YY3perp = YY_arrays.YY3perp[:,:,ivperp_local,ielement_vperp]
-            vperp_igrid_full_view = @view vperp_igrid_full[:,ielement_vperp]
-            
-            for ielement_vpa in ielement_vpa_low:ielement_vpa_hi
-                # correct local ivpa in the case that we on a boundary point
-                ivpa_local = igrid_vpa + (ielement_vpa - ielement_vpa_low)*(1-ngrid_vpa)
-                @views YY0par = YY_arrays.YY0par[:,:,ivpa_local,ielement_vpa]
-                @views YY1par = YY_arrays.YY1par[:,:,ivpa_local,ielement_vpa]
-                @views YY2par = YY_arrays.YY2par[:,:,ivpa_local,ielement_vpa]
-                @views YY3par = YY_arrays.YY3par[:,:,ivpa_local,ielement_vpa]
-                vpa_igrid_full_view = @view vpa_igrid_full[:,ielement_vpa]
-                
-                # carry out the matrix sum on each 2D element
-                rhsvpavperp[ivpa_global,ivperp_global] +=
-                    assemble_explicit_collision_operator_rhs_parallel_analytical_inputs_inner_loop(
-                        nussp, ms, msp, pdfs, dpdfsdvpa, dpdfsdvperp, d2Gspdvperp2,
-                        d2Gspdvpa2, d2Gspdvperpdvpa, dHspdvperp, dHspdvpa, YY0perp,
-                        YY0par, YY1perp, YY1par, ngrid_vperp, vperp_igrid_full_view,
-                        ngrid_vpa, vpa_igrid_full_view)
-            end
-        end
-    end
-    return nothing
-end
-
-"""
-The inner-most loop of `assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!()`.
-"""
-# Separate function for inner loop, possible optimization??
-function assemble_explicit_collision_operator_rhs_parallel_analytical_inputs_inner_loop(
-        nussp, ms, msp, pdfs, dpdfsdvpa, dpdfsdvperp, d2Gspdvperp2,
-        d2Gspdvpa2, d2Gspdvperpdvpa, dHspdvperp, dHspdvpa, YY0perp, YY0par, YY1perp,
-        YY1par, ngrid_vperp, vperp_igrid_full_view, ngrid_vpa, vpa_igrid_full_view)
-
-    # carry out the matrix sum on each 2D element
-    result = 0.0
-    for jvperpp_local in 1:ngrid_vperp
-        jvperpp = vperp_igrid_full_view[jvperpp_local]
-        for kvperpp_local in 1:ngrid_vperp
-            kvperpp = vperp_igrid_full_view[kvperpp_local]
-            YY0perp_kj = YY0perp[kvperpp_local,jvperpp_local]
-            YY1perp_kj = YY1perp[kvperpp_local,jvperpp_local]
-            for jvpap_local in 1:ngrid_vpa
-                jvpap = vpa_igrid_full_view[jvpap_local]
-                pdfs_jj = pdfs[jvpap,jvperpp]
-                dpdfsdvperp_jj = dpdfsdvperp[jvpap,jvperpp]
-                dpdfsdvpa_jj = dpdfsdvpa[jvpap,jvperpp]
-                for kvpap_local in 1:ngrid_vpa
-                    kvpap = vpa_igrid_full_view[kvpap_local]
-                    YY0par_kj = YY0par[kvpap_local,jvpap_local]
-                    YY1par_kj = YY1par[kvpap_local,jvpap_local]
-                    d2Gspdvperpdvpa_kk = d2Gspdvperpdvpa[kvpap,kvperpp]
-                    # first three lines represent parallel flux terms
-                    # second three lines represent perpendicular flux terms
-                    result +=
-                        -nussp*(YY0perp_kj*YY1par_kj*dpdfsdvpa_jj*d2Gspdvpa2[kvpap,kvperpp] +
-                                YY0perp_kj*YY1par_kj*dpdfsdvperp_jj*d2Gspdvperpdvpa_kk -
-                                2.0*(ms/msp)*YY0perp_kj*YY1par_kj*pdfs_jj*dHspdvpa[kvpap,kvperpp] +
-                                # end parallel flux, start of perpendicular flux
-                                YY1perp_kj*YY0par_kj*dpdfsdvpa_jj*d2Gspdvperpdvpa_kk +
-                                YY1perp_kj*YY0par_kj*dpdfsdvperp_jj*d2Gspdvperp2[kvpap,kvperpp] -
-                                2.0*(ms/msp)*YY1perp_kj*YY0par_kj*pdfs_jj*dHspdvperp[kvpap,kvperpp])
-                end
-            end
-        end
-    end
-
-    return result
-end
-
-"""
-Elliptic solve function. 
-
-    field: the solution
-    source: the source function on the RHS
-    boundary data: the known values of field at infinity
-    lu_object_lhs: the object for the differential operator that defines field
-    matrix_rhs: the weak matrix acting on the source vector
-    vpa, vperp: coordinate structs
-
-Note: all variants of `elliptic_solve!()` run only in serial. They do not handle
-shared-memory parallelism themselves. The calling site must ensure that
-`elliptic_solve!()` is only called by one process in a shared-memory block.
-"""
-function elliptic_solve!(field,source,boundary_data::vpa_vperp_boundary_data,
-            lu_object_lhs,matrix_rhs,rhsvpavperp,vpa,vperp)
-    # assemble the rhs of the weak system
-
-    # get data into the compound index format
-    sc = vec(source)
-    fc = vec(field)
-    rhsc = vec(rhsvpavperp)
-    mul!(rhsc,matrix_rhs,sc)
-    # enforce the boundary conditions
-    enforce_dirichlet_bc!(rhsvpavperp,vpa,vperp,boundary_data)
-    # solve the linear system
-    ldiv!(fc, lu_object_lhs, rhsc)
-
-    return nothing
-end
-# same as above but source is made of two different terms
-# with different weak matrices
-function elliptic_solve!(field,source_1,source_2,boundary_data::vpa_vperp_boundary_data,
-            lu_object_lhs,matrix_rhs_1,matrix_rhs_2,rhs,vpa,vperp)
-    
-    # assemble the rhs of the weak system
-
-    # get data into the compound index format
-    sc_1 = vec(source_1)
-    sc_2 = vec(source_2)
-    rhsc = vec(rhs)
-    fc = vec(field)
-
-    # Do  rhsc = matrix_rhs_1*sc_1
-    mul!(rhsc, matrix_rhs_1, sc_1)
-
-    # Do rhsc = matrix_rhs_2*sc_2 + rhsc
-    mul!(rhsc, matrix_rhs_2, sc_2, 1.0, 1.0)
-
-    # enforce the boundary conditions
-    enforce_dirichlet_bc!(rhs,vpa,vperp,boundary_data)
-    # solve the linear system
-    ldiv!(fc, lu_object_lhs, rhsc)
-
-    return nothing
-end
-
-"""
-Same as `elliptic_solve!()` above but no Dirichlet boundary conditions are imposed,
-because the function is only used where the `lu_object_lhs` is derived from a mass matrix.
-The source is made of two different terms with different weak matrices
-because of the form of the only algebraic equation that we consider.
-
-Note: `algebraic_solve!()` run only in serial. They do not handle shared-memory
-parallelism themselves. The calling site must ensure that `algebraic_solve!()` is only
-called by one process in a shared-memory block.
-"""
-function algebraic_solve!(field,source_1,source_2,boundary_data::vpa_vperp_boundary_data,
-            lu_object_lhs,matrix_rhs_1,matrix_rhs_2,rhs,vpa,vperp)
-    
-    # assemble the rhs of the weak system
-
-    # get data into the compound index format
-    sc_1 = vec(source_1)
-    sc_2 = vec(source_2)
-    rhsc = vec(rhs)
-    fc = vec(field)
-
-    # Do  rhsc = matrix_rhs_1*sc_1
-    mul!(rhsc, matrix_rhs_1, sc_1)
-
-    # Do rhsc = matrix_rhs_2*sc_2 + rhsc
-    mul!(rhsc, matrix_rhs_2, sc_2, 1.0, 1.0)
-
-    # solve the linear system
-    ldiv!(fc, lu_object_lhs, rhsc)
-
-    return nothing
-end
-
-"""
-Function to solve the appropriate elliptic PDEs to find the
-Rosenbluth potentials. First, we calculate the Rosenbluth potentials
-at the boundary with the direct integration method. Then, we use this
-data to solve the elliptic PDEs with the boundary data providing an
-accurate Dirichlet boundary condition on the maximum `vpa` and `vperp`
-of the domain. We use the sparse LU decomposition from the LinearAlgebra package
-to solve the PDE matrix equations.
-"""
-function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvperp,
-             d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,ffsp_in,
-             vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays::fokkerplanck_weakform_arrays_struct;
-             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=false,calculate_dGdvperp=false,
-             boundary_data_option=direct_integration)
-    
-    # extract the necessary precalculated and buffer arrays from fokkerplanck_arrays
-    MM2D_sparse = fkpl_arrays.MM2D_sparse
-    KKpar2D_sparse = fkpl_arrays.KKpar2D_sparse
-    KKperp2D_sparse = fkpl_arrays.KKperp2D_sparse
-    LP2D_sparse = fkpl_arrays.LP2D_sparse
-    LV2D_sparse = fkpl_arrays.LV2D_sparse
-    PUperp2D_sparse = fkpl_arrays.PUperp2D_sparse
-    PPparPUperp2D_sparse = fkpl_arrays.PPparPUperp2D_sparse
-    PPpar2D_sparse = fkpl_arrays.PPpar2D_sparse
-    MMparMNperp2D_sparse = fkpl_arrays.MMparMNperp2D_sparse
-    KPperp2D_sparse = fkpl_arrays.KPperp2D_sparse
-    lu_obj_MM = fkpl_arrays.lu_obj_MM
-    lu_obj_LP = fkpl_arrays.lu_obj_LP
-    lu_obj_LV = fkpl_arrays.lu_obj_LV
-    lu_obj_LB = fkpl_arrays.lu_obj_LB
-    
-    bwgt = fkpl_arrays.bwgt
-    rpbd = fkpl_arrays.rpbd
-    
-    S_dummy = fkpl_arrays.S_dummy
-    Q_dummy = fkpl_arrays.Q_dummy
-    rhsvpavperp = fkpl_arrays.rhsvpavperp
-    rhsvpavperp_copy1 = fkpl_arrays.rhsvpavperp_copy1
-    rhsvpavperp_copy2 = fkpl_arrays.rhsvpavperp_copy2
-    rhsvpavperp_copy3 = fkpl_arrays.rhsvpavperp_copy3
-    
-    # calculate the boundary data
-    if boundary_data_option == multipole_expansion
-        calculate_rosenbluth_potential_boundary_data_multipole!(rpbd,ffsp_in,vpa,vperp,vpa_spectral,vperp_spectral,
-          calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
-    else # use direct integration on the boundary
-        calculate_rosenbluth_potential_boundary_data!(rpbd,bwgt,ffsp_in,vpa,vperp,vpa_spectral,vperp_spectral,
-         calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
-    end
-    # carry out the elliptic solves required
-    begin_anyv_vperp_vpa_region()
-    @loop_vperp_vpa ivperp ivpa begin
-        S_dummy[ivpa,ivperp] = -(4.0/sqrt(pi))*ffsp_in[ivpa,ivperp]
-    end
-
-    # Can run the following three solves in parallel
-    # The solves run on ranks 0, 1 and 2 of the subblock respectively, but modulo the size
-    # of the subblock (to ensure that the ranks doing work are never outside the
-    # subblock, if the size of the subblock is less than 3).
-    begin_anyv_region()
-    if anyv_subblock_rank[] == 0 % anyv_subblock_size[]
-        elliptic_solve!(HH, S_dummy, rpbd.H_data, lu_obj_LP, MM2D_sparse, rhsvpavperp,
-                        vpa, vperp)
-    end
-    if anyv_subblock_rank[] == 1 % anyv_subblock_size[]
-        elliptic_solve!(dHdvpa, S_dummy, rpbd.dHdvpa_data, lu_obj_LP, PPpar2D_sparse,
-                        rhsvpavperp_copy1, vpa, vperp)
-    end
-    if anyv_subblock_rank[] == 2 % anyv_subblock_size[]
-        elliptic_solve!(dHdvperp, S_dummy, rpbd.dHdvperp_data, lu_obj_LV, PUperp2D_sparse,
-                        rhsvpavperp_copy2, vpa, vperp)
-    end
-    
-    begin_anyv_vperp_vpa_region()
-    @loop_vperp_vpa ivperp ivpa begin
-        S_dummy[ivpa,ivperp] = 2.0*HH[ivpa,ivperp]
-    end
-
-    # The following four solves can be done in parallel. Note: do the two that are always
-    # done on ranks 0 and 1 of the subblock and the first optional one that actually needs
-    # doing on rank 3 to maximise the chances that all solves get run on separate
-    # processes (i.e. they will be on separate processes as long as there are at least 2
-    # ranks in the subblock if both conditions calculate_GG and calculate_dGdvperp are
-    # false; at least 3 ranks if only one of the conditions is true; and at least 4 ranks
-    # if both conditions are true).
-    begin_anyv_region()
-    if calculate_GG
-        if anyv_subblock_rank[] == 2 % anyv_subblock_size[]
-            elliptic_solve!(GG, S_dummy, rpbd.G_data, lu_obj_LP, MM2D_sparse,
-                            rhsvpavperp_copy2, vpa, vperp)
-        end
-    end
-    if calculate_dGdvperp || algebraic_solve_for_d2Gdvperp2
-        if anyv_subblock_rank[] == (calculate_GG ? 3 : 2) % anyv_subblock_size[]
-            elliptic_solve!(dGdvperp, S_dummy, rpbd.dGdvperp_data, lu_obj_LV,
-                            PUperp2D_sparse, rhsvpavperp_copy3, vpa, vperp)
-        end
-    end
-    if anyv_subblock_rank[] == 0 % anyv_subblock_size[]
-        elliptic_solve!(d2Gdvpa2, S_dummy, rpbd.d2Gdvpa2_data, lu_obj_LP, KKpar2D_sparse,
-                        rhsvpavperp, vpa, vperp)
-    end
-    if anyv_subblock_rank[] == 1 % anyv_subblock_size[]
-        elliptic_solve!(d2Gdvperpdvpa, S_dummy, rpbd.d2Gdvperpdvpa_data, lu_obj_LV,
-                        PPparPUperp2D_sparse, rhsvpavperp_copy1, vpa, vperp)
-    end
-    
-    if algebraic_solve_for_d2Gdvperp2
-        begin_anyv_vperp_vpa_region()
-        @loop_vperp_vpa ivperp ivpa begin
-            S_dummy[ivpa,ivperp] = 2.0*HH[ivpa,ivperp] - d2Gdvpa2[ivpa,ivperp]
-            Q_dummy[ivpa,ivperp] = -dGdvperp[ivpa,ivperp]
-        end
-        begin_anyv_region()
-        @anyv_serial_region begin
-            # use the algebraic solve function to find
-            # d2Gdvperp2 = 2H - d2Gdvpa2 - (1/vperp)dGdvperp
-            # using a weak form
-            algebraic_solve!(d2Gdvperp2, S_dummy, Q_dummy, rpbd.d2Gdvperp2_data,
-                             lu_obj_MM, MM2D_sparse, MMparMNperp2D_sparse, rhsvpavperp,
-                             vpa, vperp)
-        end
-    else
-        # solve a weak-form PDE for d2Gdvperp2
-        begin_anyv_vperp_vpa_region()
-        @loop_vperp_vpa ivperp ivpa begin
-            #S_dummy[ivpa,ivperp] = 2.0*HH[ivpa,ivperp] # <- this is already the value of
-                                                        #    S_dummy calculated above
-            Q_dummy[ivpa,ivperp] = 2.0*d2Gdvpa2[ivpa,ivperp]
-        end
-        begin_anyv_region()
-        @anyv_serial_region begin
-            elliptic_solve!(d2Gdvperp2, S_dummy, Q_dummy, rpbd.d2Gdvperp2_data, lu_obj_LB,
-                            KPperp2D_sparse, MMparMNperp2D_sparse, rhsvpavperp, vpa,
-                            vperp)
-        end
-    end
-    return nothing
-end
-
-"""
-Function to calculate Rosenbluth potentials in the entire
-domain of `(vpa,vperp)` by direct integration.
-"""
-
-function calculate_rosenbluth_potentials_via_direct_integration!(GG,HH,dHdvpa,dHdvperp,
-             d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,ffsp_in,
-             vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays::fokkerplanck_arrays_direct_integration_struct)
-    dfdvpa = fkpl_arrays.dfdvpa
-    dfdvperp = fkpl_arrays.dfdvperp
-    d2fdvperpdvpa = fkpl_arrays.d2fdvperpdvpa
-    G0_weights = fkpl_arrays.G0_weights
-    G1_weights = fkpl_arrays.G1_weights
-    H0_weights = fkpl_arrays.H0_weights
-    H1_weights = fkpl_arrays.H1_weights
-    H2_weights = fkpl_arrays.H2_weights
-    H3_weights = fkpl_arrays.H3_weights
-    # first compute the derivatives of fs' (the integration weights assume d fs' dvpa and d fs' dvperp are known)
-    begin_anyv_vperp_region()
-    @loop_vperp ivperp begin
-        @views derivative!(dfdvpa[:,ivperp], ffsp_in[:,ivperp], vpa, vpa_spectral)
-    end
-    begin_anyv_vpa_region()
-    @loop_vpa ivpa begin
-        @views derivative!(dfdvperp[ivpa,:], ffsp_in[ivpa,:], vperp, vperp_spectral)
-        @views derivative!(d2fdvperpdvpa[ivpa,:], dfdvpa[ivpa,:], vperp, vperp_spectral)
-    end
-    # with the integrands calculated, compute the integrals
-    calculate_rosenbluth_integrals!(GG,d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,
-                                        d2Gdvperp2,HH,dHdvpa,dHdvperp,
-                                        ffsp_in,dfdvpa,dfdvperp,d2fdvperpdvpa,
-                                        G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                                        vpa.n,vperp.n)
-    return nothing           
-end
-
-
-"""
-Function to carry out the integration of the revelant
-distribution functions to form the required coefficients
-for the full-F operator. We assume that the weights are
-precalculated. The function takes as arguments the arrays
-of coefficients (which we fill), the required distributions,
-the precomputed weights, the indicies of the `field' velocities,
-and the sizes of the primed vpa and vperp coordinates arrays.
-"""
-function calculate_rosenbluth_integrals!(GG,d2Gspdvpa2,dGspdvperp,d2Gspdvperpdvpa,
-                                        d2Gspdvperp2,HH,dHspdvpa,dHspdvperp,
-                                        fsp,dfspdvpa,dfspdvperp,d2fspdvperpdvpa,
-                                        G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
-                                        nvpa,nvperp)
-    begin_anyv_vperp_vpa_region()
-    @loop_vperp_vpa ivperp ivpa begin
-        GG[ivpa,ivperp] = 0.0
-        d2Gspdvpa2[ivpa,ivperp] = 0.0
-        dGspdvperp[ivpa,ivperp] = 0.0
-        d2Gspdvperpdvpa[ivpa,ivperp] = 0.0
-        d2Gspdvperp2[ivpa,ivperp] = 0.0
-        HH[ivpa,ivperp] = 0.0
-        dHspdvpa[ivpa,ivperp] = 0.0
-        dHspdvperp[ivpa,ivperp] = 0.0
-        for ivperpp in 1:nvperp
-            for ivpap in 1:nvpa
-                GG[ivpa,ivperp] += G0_weights[ivpap,ivperpp,ivpa,ivperp]*fsp[ivpap,ivperpp]
-                #d2Gspdvpa2[ivpa,ivperp] += G0_weights[ivpap,ivperpp,ivpa,ivperp]*d2fspdvpa2[ivpap,ivperpp]
-                d2Gspdvpa2[ivpa,ivperp] += H3_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvpa[ivpap,ivperpp]
-                dGspdvperp[ivpa,ivperp] += G1_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
-                d2Gspdvperpdvpa[ivpa,ivperp] += G1_weights[ivpap,ivperpp,ivpa,ivperp]*d2fspdvperpdvpa[ivpap,ivperpp]
-                #d2Gspdvperp2[ivpa,ivperp] += G2_weights[ivpap,ivperpp,ivpa,ivperp]*d2fspdvperp2[ivpap,ivperpp] + G3_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
-                d2Gspdvperp2[ivpa,ivperp] += H2_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
-                HH[ivpa,ivperp] += H0_weights[ivpap,ivperpp,ivpa,ivperp]*fsp[ivpap,ivperpp]
-                dHspdvpa[ivpa,ivperp] += H0_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvpa[ivpap,ivperpp]
-                dHspdvperp[ivpa,ivperp] += H1_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
-            end
-        end
-    end
-    return nothing
-end
-
-"""
-Function to enforce boundary conditions on the collision operator
-result to be consistent with the boundary conditions imposed on the
-distribution function.
-"""
-function enforce_vpavperp_BCs!(pdf,vpa,vperp,vpa_spectral,vperp_spectral)
-    nvpa = vpa.n
-    nvperp = vperp.n
-    ngrid_vperp = vperp.ngrid
-    D0 = vperp_spectral.radau.D0
-    # vpa boundary conditions
-    # zero at infinity
-    if vpa.bc == "zero"
-        begin_anyv_vperp_region()
-        @loop_vperp ivperp begin
-            pdf[1,ivperp] = 0.0
-            pdf[nvpa,ivperp] = 0.0
-        end
-    end
-    # vperp boundary conditions
-    # zero boundary condition at infinity
-    # set regularity condition d F / d vperp = 0 at vperp = 0
-    # adjust F(vperp = 0) so that d F / d vperp = 0 at vperp = 0
-    begin_anyv_vpa_region()
-    if vperp.bc in ("zero", "zero-impose-regularity")
-        @loop_vpa ivpa begin
-            pdf[ivpa,nvperp] = 0.0
-        end
-    end
-    if vperp.bc == "zero-impose-regularity"
-        buffer = @view vperp.scratch[1:ngrid_vperp-1]
-        @loop_vpa ivpa begin
-            @views @. buffer = D0[2:ngrid_vperp] * pdf[ivpa,2:ngrid_vperp]
-            pdf[ivpa,1] = -sum(buffer)/D0[1]
-        end
-    end
-end
-
-"""
-Function to interpolate `f(vpa,vperp)` from one 
-velocity grid to another, assuming that both 
-grids are represented by `(vpa,vperp)` in normalised units,
-but have different normalisation factors 
-defining the meaning of these grids in physical units. E.g.,
-
-     vpai, vperpi = ci * vpa, ci * vperp
-     vpae, vperpe = ce * vpa, ce * vperp
-     
-with `ci = sqrt(Ti/mi)`, `ce = sqrt(Te/mi)`
-
-`scalefac = ci / ce` is the ratio of the
-two reference speeds.
-"""
-function interpolate_2D_vspace!(pdf_out,pdf_in,vpa,vperp,scalefac)
-    
-    begin_anyv_vperp_vpa_region()
-    # loop over points in the output interpolated dataset
-    @loop_vperp ivperp begin
-        vperp_val = vperp.grid[ivperp]*scalefac
-        # get element for interpolation data
-        iel_vperp = ielement_loopup(vperp_val,vperp)
-        if iel_vperp < 1 # vperp_interp outside of range of vperp.grid
-            @loop_vpa ivpa begin
-                pdf_out[ivpa,ivperp] = 0.0
-            end
-            continue
-        else
-            # get nodes for interpolation
-            ivperpmin, ivperpmax = vperp.igrid_full[1,iel_vperp], vperp.igrid_full[vperp.ngrid,iel_vperp]
-            vperp_nodes = vperp.grid[ivperpmin:ivperpmax]
-            #print("vperp: ",iel_vperp, " ", vperp_nodes," ",vperp_val)
-                   
-        end
-        @loop_vpa ivpa begin
-            vpa_val = vpa.grid[ivpa]*scalefac
-            # get element for interpolation data
-            iel_vpa = ielement_loopup(vpa_val,vpa)
-            if iel_vpa < 1 # vpa_interp outside of range of vpa.grid
-                pdf_out[ivpa,ivperp] = 0.0
-                continue
-            else
-                # get nodes for interpolation
-                ivpamin, ivpamax = vpa.igrid_full[1,iel_vpa], vpa.igrid_full[vpa.ngrid,iel_vpa]
-                vpa_nodes = vpa.grid[ivpamin:ivpamax]
-                #print("vpa: ", iel_vpa, " ", vpa_nodes," ",vpa_val)
-                   
-                # do the interpolation
-                pdf_out[ivpa,ivperp] = 0.0
-                for ivperpgrid in 1:vperp.ngrid
-                   # index for referencing pdf_in on orginal grid
-                   ivperpp = vperp.igrid_full[ivperpgrid,iel_vperp]
-                   # interpolating polynomial value at ivperpp for interpolation
-                   vperppoly = lagrange_poly(ivperpgrid,vperp_nodes,vperp_val)
-                   for ivpagrid in 1:vpa.ngrid
-                       # index for referencing pdf_in on orginal grid
-                       ivpap = vpa.igrid_full[ivpagrid,iel_vpa]
-                       # interpolating polynomial value at ivpap for interpolation
-                       vpapoly = lagrange_poly(ivpagrid,vpa_nodes,vpa_val)
-                       pdf_out[ivpa,ivperp] += vpapoly*vperppoly*pdf_in[ivpap,ivperpp]
-                   end
-                end
-            end
-        end
-    end
-    return nothing
-end
-# Alternative version that should be faster - to be tested
-#function interpolate_2D_vspace!(pdf_out, pdf_in, vpa, vpa_spectral, vperp, vperp_spectral,
-#                                scalefac, pdf_buffer)
-#    newgrid_vperp = vperp.scratch .= scalefac .* vperp.grid
-#    newgrid_vpa = vpa.scratch .= scalefac .* vpa.grid
-#
-#    begin_anyv_vpa_region()
-#    @loop_vpa ivpa begin
-#        @views interpolate_to_grid_1d!(pdf_buffer[ivpa,:], newgrid_vperp,
-#                                       pdf_in[ivpa,:], vperp, vperp_spectral)
-#    end
-#
-#    begin_anyv_vperp_region()
-#    @loop_vperp ivperp begin
-#        @views interpolate_to_grid_1d!(pdf_out[:,ivperp], newgrid_vpa,
-#                                       pdf_buffer[:,ivperp], vpa, vpa_spectral)
-
-#    end
-#end
-
-"""
-Function to find the element in which x sits.
-"""
-function ielement_loopup(x,coord)
-    xebs = coord.element_boundaries
-    nelement = coord.nelement_global
-    zero = 1.0e-14
-    ielement = -1
-    # find the element
-    for j in 1:nelement
-        # check for internal points
-        if (x - xebs[j])*(xebs[j+1] - x) > zero
-            ielement = j
-            break
-        # check for boundary points
-        elseif (abs(x-xebs[j]) < 100*zero) || (abs(x-xebs[j+1]) < 100*zero && j == nelement)
-            ielement = j
-            break
-        end
-    end
-    return ielement
-end
-
-end
+"""
+Module for functions used 
+in calculating the integrals and doing 
+the numerical differentiation for 
+the implementation of the 
+the full-F Fokker-Planck collision operator [`moment_kinetics.fokker_planck`](@ref).
+
+Parallelisation of the collision operator uses a special 'anyv' region type, see
+[Collision operator and `anyv` region](@ref).
+"""
+module fokker_planck_calculus
+
+export assemble_matrix_operators_dirichlet_bc
+export assemble_matrix_operators_dirichlet_bc_sparse
+export assemble_explicit_collision_operator_rhs_serial!
+export assemble_explicit_collision_operator_rhs_parallel!
+export assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!
+export YY_collision_operator_arrays, calculate_YY_arrays
+export calculate_rosenbluth_potential_boundary_data!
+export calculate_rosenbluth_potential_boundary_data_multipole!
+export elliptic_solve!, algebraic_solve!
+export fokkerplanck_arrays_direct_integration_struct
+export fokkerplanck_weakform_arrays_struct
+export enforce_vpavperp_BCs!
+export calculate_rosenbluth_potentials_via_elliptic_solve!
+
+# testing
+export calculate_rosenbluth_potential_boundary_data_exact!
+export allocate_rosenbluth_potential_boundary_data
+export calculate_rosenbluth_potential_boundary_data_exact!
+export test_rosenbluth_potential_boundary_data
+export interpolate_2D_vspace!
+
+# Import moment_kinetics so that we can refer to it in docstrings
+import moment_kinetics
+
+using ..type_definitions: mk_float, mk_int
+using ..array_allocation: allocate_float, allocate_shared_float
+using ..calculus: derivative!
+using ..communication
+using ..communication: MPISharedArray, global_rank
+using ..lagrange_polynomials: lagrange_poly, lagrange_poly_optimised
+using ..looping
+using ..velocity_moments: integrate_over_vspace
+using ..input_structs: direct_integration, multipole_expansion
+using moment_kinetics.gauss_legendre: get_QQ_local!
+using Dates
+using SpecialFunctions: ellipk, ellipe
+using SparseArrays: sparse, AbstractSparseArray
+using SuiteSparse
+using LinearAlgebra: ldiv!, mul!, LU
+using FastGaussQuadrature
+using Printf
+using MPI
+
+function print_matrix(matrix,name::String,n::mk_int,m::mk_int)
+    println("\n ",name," \n")
+    for i in 1:n
+        for j in 1:m
+            @printf("%.2f ", matrix[i,j])
+        end
+        println("")
+    end
+    println("\n")
+end
+
+function print_vector(vector,name::String,m::mk_int)
+    println("\n ",name," \n")
+    for j in 1:m
+        @printf("%.3f ", vector[j])
+    end
+    println("")
+    println("\n")
+end
+
+"""
+Struct of dummy arrays and precalculated coefficients
+for the Fokker-Planck collision operator when the
+Rosenbluth potentials are computed everywhere in `(vpa,vperp)`
+by direct integration. Used for testing.
+"""
+struct fokkerplanck_arrays_direct_integration_struct
+    G0_weights::MPISharedArray{mk_float,4}
+    G1_weights::MPISharedArray{mk_float,4}
+    H0_weights::MPISharedArray{mk_float,4}
+    H1_weights::MPISharedArray{mk_float,4}
+    H2_weights::MPISharedArray{mk_float,4}
+    H3_weights::MPISharedArray{mk_float,4}
+    GG::MPISharedArray{mk_float,2}
+    d2Gdvpa2::MPISharedArray{mk_float,2}
+    d2Gdvperpdvpa::MPISharedArray{mk_float,2}
+    d2Gdvperp2::MPISharedArray{mk_float,2}
+    dGdvperp::MPISharedArray{mk_float,2}
+    HH::MPISharedArray{mk_float,2}
+    dHdvpa::MPISharedArray{mk_float,2}
+    dHdvperp::MPISharedArray{mk_float,2}
+    #Cflux_vpa::MPISharedArray{mk_float,2}
+    #Cflux_vperp::MPISharedArray{mk_float,2}
+    buffer_vpavperp_1::Array{mk_float,2}
+    buffer_vpavperp_2::Array{mk_float,2}
+    Cssp_result_vpavperp::MPISharedArray{mk_float,2}
+    dfdvpa::MPISharedArray{mk_float,2}
+    d2fdvpa2::MPISharedArray{mk_float,2}
+    d2fdvperpdvpa::MPISharedArray{mk_float,2}
+    dfdvperp::MPISharedArray{mk_float,2}
+    d2fdvperp2::MPISharedArray{mk_float,2}
+end
+
+"""
+Struct to contain the integration weights for the boundary points
+in the `(vpa,vperp)` domain.
+"""
+struct boundary_integration_weights_struct
+    lower_vpa_boundary::MPISharedArray{mk_float,3}
+    upper_vpa_boundary::MPISharedArray{mk_float,3}
+    upper_vperp_boundary::MPISharedArray{mk_float,3}
+end
+
+"""
+Struct used for storing the integration weights for the 
+boundary of the velocity space domain in `(vpa,vperp)` coordinates.
+"""
+struct fokkerplanck_boundary_data_arrays_struct
+    G0_weights::boundary_integration_weights_struct
+    G1_weights::boundary_integration_weights_struct
+    H0_weights::boundary_integration_weights_struct
+    H1_weights::boundary_integration_weights_struct
+    H2_weights::boundary_integration_weights_struct
+    H3_weights::boundary_integration_weights_struct
+    dfdvpa::MPISharedArray{mk_float,2}
+    d2fdvperpdvpa::MPISharedArray{mk_float,2}
+    dfdvperp::MPISharedArray{mk_float,2}    
+end
+
+"""
+Struct to store the `(vpa,vperp)` boundary data for an
+individual Rosenbluth potential.
+"""
+struct vpa_vperp_boundary_data
+    lower_boundary_vpa::MPISharedArray{mk_float,1}
+    upper_boundary_vpa::MPISharedArray{mk_float,1}
+    upper_boundary_vperp::MPISharedArray{mk_float,1}
+end
+
+"""
+Struct to store the boundary data for all of the
+Rosenbluth potentials required for the calculation.
+"""
+struct rosenbluth_potential_boundary_data
+    H_data::vpa_vperp_boundary_data
+    dHdvpa_data::vpa_vperp_boundary_data
+    dHdvperp_data::vpa_vperp_boundary_data
+    G_data::vpa_vperp_boundary_data
+    dGdvperp_data::vpa_vperp_boundary_data
+    d2Gdvperp2_data::vpa_vperp_boundary_data
+    d2Gdvperpdvpa_data::vpa_vperp_boundary_data
+    d2Gdvpa2_data::vpa_vperp_boundary_data
+end
+
+"""
+Struct to store the elemental nonlinear stiffness matrices used
+to express the finite-element weak form of the collision
+operator. The arrays are indexed so that the contraction
+in the assembly step is carried out over the fastest
+accessed indices, i.e., for `YY0perp[i,j,k,iel]`, we contract
+over `i` and `j` to give data for the field position index `k`,
+all for the 1D element indexed by `iel`.
+"""
+struct YY_collision_operator_arrays
+    # let phi_j(vperp) be the jth Lagrange basis function, 
+    # and phi'_j(vperp) the first derivative of the Lagrange basis function
+    # on the iel^th element. Then, the arrays are defined as follows.
+    # YY0perp[i,j,k,iel] = \int phi_i(vperp) phi_j(vperp) phi_k(vperp) vperp d vperp
+    YY0perp::Array{mk_float,4}
+    # YY1perp[i,j,k,iel] = \int phi_i(vperp) phi_j(vperp) phi'_k(vperp) vperp d vperp
+    YY1perp::Array{mk_float,4}
+    # YY2perp[i,j,k,iel] = \int phi_i(vperp) phi'_j(vperp) phi'_k(vperp) vperp d vperp
+    YY2perp::Array{mk_float,4}
+    # YY3perp[i,j,k,iel] = \int phi_i(vperp) phi'_j(vperp) phi_k(vperp) vperp d vperp
+    YY3perp::Array{mk_float,4}
+    # YY0par[i,j,k,iel] = \int phi_i(vpa) phi_j(vpa) phi_k(vpa) vpa d vpa
+    YY0par::Array{mk_float,4}
+    # YY1par[i,j,k,iel] = \int phi_i(vpa) phi_j(vpa) phi'_k(vpa) vpa d vpa
+    YY1par::Array{mk_float,4}
+    # YY2par[i,j,k,iel] = \int phi_i(vpa) phi'_j(vpa) phi'_k(vpa) vpa d vpa
+    YY2par::Array{mk_float,4}
+    # YY3par[i,j,k,iel] = \int phi_i(vpa) phi'_j(vpa) phi_k(vpa) vpa d vpa
+    YY3par::Array{mk_float,4}
+end
+
+"""
+Struct of dummy arrays and precalculated coefficients
+for the finite-element weak-form Fokker-Planck collision operator.
+"""
+struct fokkerplanck_weakform_arrays_struct{M <: AbstractSparseArray{mk_float,mk_int,N} where N}
+    # boundary weights (Green's function) data
+    bwgt::fokkerplanck_boundary_data_arrays_struct
+    # dummy arrays for boundary data calculation
+    rpbd::rosenbluth_potential_boundary_data
+    # assembled 2D weak-form matrices
+    MM2D_sparse::M
+    KKpar2D_sparse::M
+    KKperp2D_sparse::M
+    KKpar2D_with_BC_terms_sparse::M
+    KKperp2D_with_BC_terms_sparse::M
+    LP2D_sparse::M
+    LV2D_sparse::M
+    LB2D_sparse::M
+    PUperp2D_sparse::M
+    PPparPUperp2D_sparse::M
+    PPpar2D_sparse::M
+    MMparMNperp2D_sparse::M
+    KPperp2D_sparse::M
+    # lu decomposition objects
+    lu_obj_MM::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
+    lu_obj_LP::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
+    lu_obj_LV::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
+    lu_obj_LB::SuiteSparse.UMFPACK.UmfpackLU{mk_float,mk_int}
+    # elemental matrices for the assembly of C[Fs,Fsp]
+    YY_arrays::YY_collision_operator_arrays
+    # dummy arrays for elliptic solvers
+    S_dummy::MPISharedArray{mk_float,2}
+    Q_dummy::MPISharedArray{mk_float,2}
+    rhsvpavperp::MPISharedArray{mk_float,2}
+    rhsvpavperp_copy1::MPISharedArray{mk_float,2}
+    rhsvpavperp_copy2::MPISharedArray{mk_float,2}
+    rhsvpavperp_copy3::MPISharedArray{mk_float,2}
+    # dummy array for the result of the calculation
+    CC::MPISharedArray{mk_float,2}
+    # dummy arrays for storing Rosenbluth potentials
+    GG::MPISharedArray{mk_float,2}
+    HH::MPISharedArray{mk_float,2}
+    dHdvpa::MPISharedArray{mk_float,2}
+    dHdvperp::MPISharedArray{mk_float,2}
+    dGdvperp::MPISharedArray{mk_float,2}
+    d2Gdvperp2::MPISharedArray{mk_float,2}
+    d2Gdvpa2::MPISharedArray{mk_float,2}
+    d2Gdvperpdvpa::MPISharedArray{mk_float,2}
+    FF::MPISharedArray{mk_float,2}
+    dFdvpa::MPISharedArray{mk_float,2}
+    dFdvperp::MPISharedArray{mk_float,2}
+end
+
+"""
+Function to allocate a `boundary_integration_weights_struct`.
+"""
+function allocate_boundary_integration_weight(vpa,vperp)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    lower_vpa_boundary = allocate_shared_float(nvpa,nvperp,nvperp)
+    upper_vpa_boundary = allocate_shared_float(nvpa,nvperp,nvperp)
+    upper_vperp_boundary = allocate_shared_float(nvpa,nvperp,nvpa)
+    return boundary_integration_weights_struct(lower_vpa_boundary,
+            upper_vpa_boundary, upper_vperp_boundary)
+end
+
+"""
+Function to allocate at `fokkerplanck_boundary_data_arrays_struct`.
+"""
+function allocate_boundary_integration_weights(vpa,vperp)
+    G0_weights = allocate_boundary_integration_weight(vpa,vperp)
+    G1_weights = allocate_boundary_integration_weight(vpa,vperp)
+    H0_weights = allocate_boundary_integration_weight(vpa,vperp)
+    H1_weights = allocate_boundary_integration_weight(vpa,vperp)
+    H2_weights = allocate_boundary_integration_weight(vpa,vperp)
+    H3_weights = allocate_boundary_integration_weight(vpa,vperp)
+
+    # The following velocity-space-sized buffer arrays are used to evaluate the
+    # collision operator for a single species at a single spatial point. They are
+    # shared-memory arrays. The `comm` argument to `allocate_shared_float()` is used to
+    # set up the shared-memory arrays so that they are shared only by the processes on
+    # `comm_anyv_subblock[]` rather than on the full `comm_block[]`. This means that
+    # different subblocks that are calculating the collision operator at different
+    # spatial points do not interfere with each others' buffer arrays.
+    # Note that the 'weights' allocated above are read-only and therefore can be used
+    # simultaneously by different subblocks. They are shared over the full
+    # `comm_block[]` in order to save memory and setup time.
+    nvpa = vpa.n
+    nvperp = vperp.n
+    dfdvpa = allocate_shared_float(nvpa,nvperp; comm=comm_anyv_subblock[])
+    d2fdvperpdvpa = allocate_shared_float(nvpa,nvperp; comm=comm_anyv_subblock[])
+    dfdvperp = allocate_shared_float(nvpa,nvperp; comm=comm_anyv_subblock[])
+    return fokkerplanck_boundary_data_arrays_struct(G0_weights,
+            G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+            dfdvpa,d2fdvperpdvpa,dfdvperp)
+end
+
+
+"""
+Function that precomputes the required integration weights in the whole of
+`(vpa,vperp)` for the direct integration method of computing the Rosenbluth potentials.
+"""
+function init_Rosenbluth_potential_integration_weights!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,vperp,vpa;print_to_screen=true)
+    
+    x_vpa, w_vpa, x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre = setup_basic_quadratures(vpa,vperp,print_to_screen=print_to_screen)
+    
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("beginning weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+
+    # precalculated weights, integrating over Lagrange polynomials
+    begin_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        #limits where checks required to determine which divergence-safe grid is needed
+        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
+        
+        vperp_val = vperp.grid[ivperp]
+        vpa_val = vpa.grid[ivpa]
+        for ivperpp in 1:vperp.n
+            for ivpap in 1:vpa.n
+                G0_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                G1_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H0_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H1_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+            end
+        end
+        # loop over elements and grid points within elements on primed coordinate
+        @views loop_over_vperp_vpa_elements!(G0_weights[:,:,ivpa,ivperp],G1_weights[:,:,ivpa,ivperp],
+                H0_weights[:,:,ivpa,ivperp],H1_weights[:,:,ivpa,ivperp],
+                H2_weights[:,:,ivpa,ivperp],H3_weights[:,:,ivpa,ivperp],
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    end
+    
+    
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("finished weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    return nothing
+end
+
+"""
+Function for getting the basic quadratures used for the 
+numerical integration of the Lagrange polynomials and the 
+integration kernals.
+"""
+function setup_basic_quadratures(vpa,vperp;print_to_screen=true)
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("setting up GL quadrature   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    
+    # get Gauss-Legendre points and weights on (-1,1)
+    ngrid = max(vpa.ngrid,vperp.ngrid)
+    nquad = 2*ngrid
+    x_legendre, w_legendre = gausslegendre(nquad)
+    #nlaguerre = min(9,nquad) # to prevent points to close to the boundaries
+    nlaguerre = nquad
+    x_laguerre, w_laguerre = gausslaguerre(nlaguerre)
+    
+    x_vpa, w_vpa = Array{mk_float,1}(undef,4*nquad), Array{mk_float,1}(undef,4*nquad)
+    x_vperp, w_vperp = Array{mk_float,1}(undef,4*nquad), Array{mk_float,1}(undef,4*nquad)
+  
+    return x_vpa, w_vpa, x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre
+end
+
+
+"""
+Function for getting the indices used to choose the integration quadrature.
+"""
+function get_element_limit_indices(ivpa,ivperp,vpa,vperp)
+    nelement_vpa, ngrid_vpa = vpa.nelement_local, vpa.ngrid
+    nelement_vperp, ngrid_vperp = vperp.nelement_local, vperp.ngrid
+    #limits where checks required to determine which divergence-safe grid is needed
+    igrid_vpa, ielement_vpa = vpa.igrid[ivpa], vpa.ielement[ivpa]
+    ielement_vpa_low = ielement_vpa - ng_low(igrid_vpa,ngrid_vpa)*nel_low(ielement_vpa,nelement_vpa)
+    ielement_vpa_hi = ielement_vpa + ng_hi(igrid_vpa,ngrid_vpa)*nel_hi(ielement_vpa,nelement_vpa)
+    #println("igrid_vpa: ielement_vpa: ielement_vpa_low: ielement_vpa_hi:", igrid_vpa," ",ielement_vpa," ",ielement_vpa_low," ",ielement_vpa_hi)
+    igrid_vperp, ielement_vperp = vperp.igrid[ivperp], vperp.ielement[ivperp]
+    ielement_vperp_low = ielement_vperp - ng_low(igrid_vperp,ngrid_vperp)*nel_low(ielement_vperp,nelement_vperp)
+    ielement_vperp_hi = ielement_vperp + ng_hi(igrid_vperp,ngrid_vperp)*nel_hi(ielement_vperp,nelement_vperp)
+    #println("igrid_vperp: ielement_vperp: ielement_vperp_low: ielement_vperp_hi:", igrid_vperp," ",ielement_vperp," ",ielement_vperp_low," ",ielement_vperp_hi)
+    return igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, 
+            igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi
+end
+
+"""
+Function that precomputes the required integration weights only along the velocity space boundaries.
+Used as the default option as part of the strategy to compute the Rosenbluth potentials
+at the boundaries with direct integration and in the rest of `(vpa,vperp)` by solving elliptic PDEs.
+"""
+function init_Rosenbluth_potential_boundary_integration_weights!(G0_weights,
+      G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,vpa,vperp;print_to_screen=true)
+    
+    x_vpa, w_vpa, x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre = setup_basic_quadratures(vpa,vperp,print_to_screen=print_to_screen)
+    
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("beginning (boundary) weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+
+    # precalculate weights, integrating over Lagrange polynomials
+    # first compute weights along lower vpa boundary
+    begin_vperp_region()
+    ivpa = 1 # lower_vpa_boundary
+    @loop_vperp ivperp begin
+        #limits where checks required to determine which divergence-safe grid is needed
+        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
+        
+        vperp_val = vperp.grid[ivperp]
+        vpa_val = vpa.grid[ivpa]
+        for ivperpp in 1:vperp.n
+            for ivpap in 1:vpa.n
+                G0_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                G1_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H0_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H1_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H2_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H3_weights.lower_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+            end
+        end
+        # loop over elements and grid points within elements on primed coordinate
+        @views loop_over_vperp_vpa_elements!(G0_weights.lower_vpa_boundary[:,:,ivperp],
+                G1_weights.lower_vpa_boundary[:,:,ivperp],
+                H0_weights.lower_vpa_boundary[:,:,ivperp],
+                H1_weights.lower_vpa_boundary[:,:,ivperp],
+                H2_weights.lower_vpa_boundary[:,:,ivperp],
+                H3_weights.lower_vpa_boundary[:,:,ivperp],
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    end
+    # second compute weights along upper vpa boundary
+    ivpa = vpa.n # upper_vpa_boundary
+    @loop_vperp ivperp begin
+        #limits where checks required to determine which divergence-safe grid is needed
+        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
+        
+        vperp_val = vperp.grid[ivperp]
+        vpa_val = vpa.grid[ivpa]
+        for ivperpp in 1:vperp.n
+            for ivpap in 1:vpa.n
+                G0_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                G1_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H0_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H1_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H2_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                H3_weights.upper_vpa_boundary[ivpap,ivperpp,ivperp] = 0.0  
+                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+            end
+        end
+        # loop over elements and grid points within elements on primed coordinate
+        @views loop_over_vperp_vpa_elements!(G0_weights.upper_vpa_boundary[:,:,ivperp],
+                G1_weights.upper_vpa_boundary[:,:,ivperp],
+                H0_weights.upper_vpa_boundary[:,:,ivperp],
+                H1_weights.upper_vpa_boundary[:,:,ivperp],
+                H2_weights.upper_vpa_boundary[:,:,ivperp],
+                H3_weights.upper_vpa_boundary[:,:,ivperp],
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    end
+    # finally compute weight along upper vperp boundary
+    begin_vpa_region()
+    ivperp = vperp.n # upper_vperp_boundary
+    @loop_vpa ivpa begin
+        #limits where checks required to determine which divergence-safe grid is needed
+        igrid_vpa, ielement_vpa, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperp, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa,ivperp,vpa,vperp)
+        
+        vperp_val = vperp.grid[ivperp]
+        vpa_val = vpa.grid[ivpa]
+        for ivperpp in 1:vperp.n
+            for ivpap in 1:vpa.n
+                G0_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                G1_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                # G2_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                # G3_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+                H0_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                H1_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                H2_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                H3_weights.upper_vperp_boundary[ivpap,ivperpp,ivpa] = 0.0  
+                #@. n_weights[ivpap,ivperpp,ivpa,ivperp] = 0.0  
+            end
+        end
+        # loop over elements and grid points within elements on primed coordinate
+        @views loop_over_vperp_vpa_elements!(G0_weights.upper_vperp_boundary[:,:,ivpa],
+                G1_weights.upper_vperp_boundary[:,:,ivpa],
+                H0_weights.upper_vperp_boundary[:,:,ivpa],
+                H1_weights.upper_vperp_boundary[:,:,ivpa],
+                H2_weights.upper_vperp_boundary[:,:,ivpa],
+                H3_weights.upper_vperp_boundary[:,:,ivpa],
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    end
+    # return the parallelisation status to serial
+    begin_serial_region()
+    @serial_region begin 
+        if global_rank[] == 0 && print_to_screen
+            println("finished (boundary) weights calculation   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    return nothing
+end
+
+function get_imin_imax(coord,iel)
+    j = iel
+    if j > 1
+        k = 1
+    else
+        k = 0
+    end
+    imin = coord.imin[j] - k
+    imax = coord.imax[j]
+    return imin, imax
+end
+
+function get_nodes(coord,iel)
+    # get imin and imax of this element on full grid
+    (imin, imax) = get_imin_imax(coord,iel)
+    nodes = coord.grid[imin:imax]
+    return nodes
+end
+
+"""
+Function to get the local integration grid and quadrature weights
+to integrate a 1D element in the 2D representation of the 
+velocity space distribution functions. This function assumes that
+there is a divergence at the point `coord_val`, and splits the grid 
+and integration weights appropriately, using Gauss-Laguerre points
+near the divergence and Gauss-Legendre points away from the divergence. 
+"""
+function get_scaled_x_w_with_divergences!(x_scaled, w_scaled, x_legendre, w_legendre, x_laguerre, w_laguerre, node_min, node_max, nodes, igrid_coord, coord_val)
+    #println("nodes ",nodes)
+    zero = 1.0e-10 
+    @. x_scaled = 0.0
+    @. w_scaled = 0.0
+    nnodes = size(nodes,1)
+    nquad_legendre = size(x_legendre,1)
+    nquad_laguerre = size(x_laguerre,1)
+    # assume x_scaled, w_scaled are arrays of length 2*nquad
+    # use only nquad points for most elements, but use 2*nquad for
+    # elements with interior divergences
+    #println("coord: ",coord_val," node_max: ",node_max," node_min: ",node_min) 
+    if abs(coord_val - node_max) < zero # divergence at upper endpoint 
+        node_cut = (nodes[nnodes-1] + nodes[nnodes])/2.0
+        
+        n = nquad_laguerre + nquad_legendre
+        shift = 0.5*(node_min + node_cut)
+        scale = 0.5*(node_cut - node_min)
+        @. x_scaled[1:nquad_legendre] = scale*x_legendre + shift
+        @. w_scaled[1:nquad_legendre] = scale*w_legendre
+
+        @. x_scaled[1+nquad_legendre:n] = node_max + (node_cut - node_max)*exp(-x_laguerre)
+        @. w_scaled[1+nquad_legendre:n] = (node_max - node_cut)*w_laguerre
+        
+        nquad_coord = n
+        #println("upper divergence")
+    elseif abs(coord_val - node_min) < zero # divergence at lower endpoint
+        n = nquad_laguerre + nquad_legendre
+        nquad = size(x_laguerre,1)
+        node_cut = (nodes[1] + nodes[2])/2.0
+        for j in 1:nquad_laguerre
+            x_scaled[nquad_laguerre+1-j] = node_min + (node_cut - node_min)*exp(-x_laguerre[j])
+            w_scaled[nquad_laguerre+1-j] = (node_cut - node_min)*w_laguerre[j]
+        end
+        shift = 0.5*(node_max + node_cut)
+        scale = 0.5*(node_max - node_cut)
+        @. x_scaled[1+nquad_laguerre:n] = scale*x_legendre + shift
+        @. w_scaled[1+nquad_laguerre:n] = scale*w_legendre
+
+        nquad_coord = n
+        #println("lower divergence")
+    else #if (coord_val - node_min)*(coord_val - node_max) < - zero # interior divergence
+        #println(nodes[igrid_coord]," ", coord_val)
+        n = 2*nquad_laguerre
+        node_cut_high = (nodes[igrid_coord+1] + nodes[igrid_coord])/2.0
+        if igrid_coord == 1
+            # exception for vperp coordinate near orgin
+            k = 0
+            node_cut_low = node_min
+            nquad_coord = nquad_legendre + 2*nquad_laguerre
+        else
+            # fill in lower Gauss-Legendre points
+            node_cut_low = (nodes[igrid_coord-1]+nodes[igrid_coord])/2.0
+            shift = 0.5*(node_cut_low + node_min)
+            scale = 0.5*(node_cut_low - node_min)
+            @. x_scaled[1:nquad_legendre] = scale*x_legendre + shift
+            @. w_scaled[1:nquad_legendre] = scale*w_legendre
+            k = nquad_legendre
+            nquad_coord = 2*(nquad_laguerre + nquad_legendre)
+        end
+        # lower half of domain  
+        for j in 1:nquad_laguerre  
+            x_scaled[k+j] = coord_val + (node_cut_low - coord_val)*exp(-x_laguerre[j])
+            w_scaled[k+j] = (coord_val - node_cut_low)*w_laguerre[j]
+        end  
+        # upper half of domain
+        for j in 1:nquad_laguerre
+            x_scaled[k+n+1-j] = coord_val + (node_cut_high - coord_val)*exp(-x_laguerre[j])
+            w_scaled[k+n+1-j] = (node_cut_high - coord_val)*w_laguerre[j]
+        end
+        # fill in upper Gauss-Legendre points
+        shift = 0.5*(node_cut_high + node_max)
+        scale = 0.5*(node_max - node_cut_high)
+        @. x_scaled[k+n+1:nquad_coord] = scale*x_legendre + shift
+        @. w_scaled[k+n+1:nquad_coord] = scale*w_legendre
+        
+        #println("intermediate divergence")
+    #else # no divergences
+    #    nquad = size(x_legendre,1) 
+    #    shift = 0.5*(node_min + node_max)
+    #    scale = 0.5*(node_max - node_min)
+    #    @. x_scaled[1:nquad] = scale*x_legendre + shift
+    #    @. w_scaled[1:nquad] = scale*w_legendre
+    #    #println("no divergence")
+    #    nquad_coord = nquad
+    end
+    #println("x_scaled",x_scaled)
+    #println("w_scaled",w_scaled)
+    return nquad_coord
+end
+
+"""
+Function to get the local grid and integration weights assuming 
+no divergences of the function on the 1D element. Gauss-Legendre
+quadrature is used for the entire element.
+"""
+function get_scaled_x_w_no_divergences!(x_scaled, w_scaled, x_legendre, w_legendre, node_min, node_max)
+    @. x_scaled = 0.0
+    @. w_scaled = 0.0
+    #println("coord: ",coord_val," node_max: ",node_max," node_min: ",node_min) 
+    nquad = size(x_legendre,1) 
+    shift = 0.5*(node_min + node_max)
+    scale = 0.5*(node_max - node_min)
+    @. x_scaled[1:nquad] = scale*x_legendre + shift
+    @. w_scaled[1:nquad] = scale*w_legendre
+    #println("x_scaled",x_scaled)
+    #println("w_scaled",w_scaled)
+    return nquad
+end
+
+"""
+Function returns `1` if `igrid = 1` or `0` if `1 < igrid <= ngrid`.
+"""
+function ng_low(igrid,ngrid)
+    return floor(mk_int, (ngrid - igrid)/(ngrid - 1))
+end
+
+"""
+Function returns `1` if `igrid = ngrid` or `0` if `1 =< igrid < ngrid`.
+"""
+function ng_hi(igrid,ngrid)
+    return floor(mk_int, igrid/ngrid)
+end
+
+"""
+Function returns `1` for `nelement >= ielement > 1`, `0` for `ielement = 1`.
+"""
+function nel_low(ielement,nelement)
+    return floor(mk_int, (ielement - 2 + nelement)/nelement)
+end
+
+"""
+Function returns `1` for `nelement > ielement >= 1`, `0` for `ielement = nelement`.
+"""
+function nel_hi(ielement,nelement)
+    return 1- floor(mk_int, ielement/nelement)
+end
+
+"""
+Base level function for computing the integration kernals for the Rosenbluth potential integration.
+Note the definitions of `ellipe(m)` (\$E(m)\$) and `ellipk(m)` (\$K(m)\$).
+`https://specialfunctions.juliamath.org/stable/functions_list/#SpecialFunctions.ellipe`
+`https://specialfunctions.juliamath.org/stable/functions_list/#SpecialFunctions.ellipk`
+```math
+E(m) = \\int^{\\pi/2}_0 \\sqrt{ 1 - m \\sin^2(\\theta)} d \\theta
+```
+```math
+K(m) = \\int^{\\pi/2}_0 \\frac{1}{\\sqrt{ 1 - m \\sin^2(\\theta)}} d \\theta
+```
+"""
+function local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                            nquad_vpa,ielement_vpa,vpa, # info about primed vpa grids
+                            nquad_vperp,ielement_vperp,vperp, # info about primed vperp grids
+                            x_vpa, w_vpa, x_vperp, w_vperp, # points and weights for primed (source) grids
+                            vpa_val, vperp_val) # values and indices for unprimed (field) grids
+    for igrid_vperp in 1:vperp.ngrid
+        vperp_other_nodes = @view vperp.other_nodes[:,igrid_vperp,ielement_vperp]
+        vperp_one_over_denominator = vperp.one_over_denominator[igrid_vperp,ielement_vperp]
+        for igrid_vpa in 1:vpa.ngrid
+            vpa_other_nodes = @view vpa.other_nodes[:,igrid_vpa,ielement_vpa]
+            vpa_one_over_denominator = vpa.one_over_denominator[igrid_vpa,ielement_vpa]
+            # get grid index for point on full grid  
+            ivpap = vpa.igrid_full[igrid_vpa,ielement_vpa]   
+            ivperpp = vperp.igrid_full[igrid_vperp,ielement_vperp]   
+            # carry out integration over Lagrange polynomial at this node, on this element
+            for kvperp in 1:nquad_vperp
+                for kvpa in 1:nquad_vpa 
+                    x_kvpa = x_vpa[kvpa]
+                    x_kvperp = x_vperp[kvperp]
+                    w_kvperp = w_vperp[kvperp]
+                    w_kvpa = w_vpa[kvpa]
+                    denom = (vpa_val - x_kvpa)^2 + (vperp_val + x_kvperp)^2 
+                    mm = min(4.0*vperp_val*x_kvperp/denom,1.0 - 1.0e-15)
+                    #mm = 4.0*vperp_val*x_kvperp/denom/(1.0 + 10^-15)
+                    #mm = 4.0*vperp_val*x_kvperp/denom
+                    prefac = sqrt(denom)
+                    ellipe_mm = ellipe(mm) 
+                    ellipk_mm = ellipk(mm) 
+                    #if mm_test > 1.0
+                    #    println("mm: ",mm_test," ellipe: ",ellipe_mm," ellipk: ",ellipk_mm)
+                    #end
+                    G_elliptic_integral_factor = 2.0*ellipe_mm*prefac/pi
+                    G1_elliptic_integral_factor = -(2.0*prefac/pi)*( (2.0 - mm)*ellipe_mm - 2.0*(1.0 - mm)*ellipk_mm )/(3.0*mm)
+                    #G2_elliptic_integral_factor = (2.0*prefac/pi)*( (7.0*mm^2 + 8.0*mm - 8.0)*ellipe_mm + 4.0*(2.0 - mm)*(1.0 - mm)*ellipk_mm )/(15.0*mm^2)
+                    #G3_elliptic_integral_factor = (2.0*prefac/pi)*( 8.0*(mm^2 - mm + 1.0)*ellipe_mm - 4.0*(2.0 - mm)*(1.0 - mm)*ellipk_mm )/(15.0*mm^2)
+                    H_elliptic_integral_factor = 2.0*ellipk_mm/(pi*prefac)
+                    H1_elliptic_integral_factor = -(2.0/(pi*prefac))*( (mm-2.0)*(ellipk_mm/mm) + (2.0*ellipe_mm/mm) )
+                    H2_elliptic_integral_factor = (2.0/(pi*prefac))*( (3.0*mm^2 - 8.0*mm + 8.0)*(ellipk_mm/(3.0*mm^2)) + (4.0*mm - 8.0)*ellipe_mm/(3.0*mm^2) )
+                    lagrange_poly_vpa = lagrange_poly_optimised(vpa_other_nodes,
+                                                                vpa_one_over_denominator,
+                                                                x_kvpa)
+                    lagrange_poly_vperp = lagrange_poly_optimised(vperp_other_nodes,
+                                                                  vperp_one_over_denominator,
+                                                                  x_kvperp)
+                    
+                    (G0_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        G_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    
+                    (G1_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        G1_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    
+                    #(G2_weights[ivpap,ivperpp] += 
+                    #    lagrange_poly_vpa*lagrange_poly_vperp*
+                    #    G2_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    
+                    #(G3_weights[ivpap,ivperpp] += 
+                    #    lagrange_poly_vpa*lagrange_poly_vperp*
+                    #    G3_elliptic_integral_factor*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    
+                    (H0_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        H_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                        
+                    (H1_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        H1_elliptic_integral_factor*x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                        
+                    (H2_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        (H1_elliptic_integral_factor*vperp_val - H2_elliptic_integral_factor*x_kvperp)*
+                        x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    (H3_weights[ivpap,ivperpp] += 
+                        lagrange_poly_vpa*lagrange_poly_vperp*
+                        H_elliptic_integral_factor*(vpa_val - x_kvpa)*
+                        x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                    
+                    #(n_weights[ivpap,ivperpp] += 
+                    #    lagrange_poly_vpa*lagrange_poly_vperp*
+                    #    x_kvperp*w_kvperp*w_kvpa*2.0/sqrt(pi))
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+"""
+Function for computing the quadratures and carrying out the loop over the 
+primed `vpa` coordinate in doing the numerical integration. Splits the integrand
+into three pieces -- two which use Gauss-Legendre quadrature assuming no divergences
+in the integrand, and one which assumes a logarithmic divergence and uses a
+Gauss-Laguerre quadrature with an (exponential) change of variables to mitigate this divergence.
+"""
+function loop_over_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                            vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vperp grids
+                            vperp,ielement_vperpp, # info about primed vperp grids
+                            x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                            x_legendre,w_legendre,x_laguerre,w_laguerre,
+                            igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    vperp_nodes = get_nodes(vperp,ielement_vperpp)
+    vperp_max = vperp_nodes[end]
+    vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
+    nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+    for ielement_vpap in 1:ielement_vpa_low-1 
+        # do integration over part of the domain with no divergences
+        vpa_nodes = get_nodes(vpa,ielement_vpap)
+        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
+        nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
+        local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                    nquad_vpa,ielement_vpap,vpa,
+                    nquad_vperp,ielement_vperpp,vperp,
+                    x_vpa, w_vpa, x_vperp, w_vperp, 
+                    vpa_val, vperp_val)
+    end
+    nquad_vperp = get_scaled_x_w_with_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre, vperp_min, vperp_max, vperp_nodes, igrid_vperp, vperp_val)
+    for ielement_vpap in ielement_vpa_low:ielement_vpa_hi
+    #for ielement_vpap in 1:vpa.nelement_local
+        # use general grid function that checks divergences
+        vpa_nodes = get_nodes(vpa,ielement_vpap)
+        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
+        #nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
+        nquad_vpa = get_scaled_x_w_with_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, x_laguerre, w_laguerre, vpa_min, vpa_max, vpa_nodes, igrid_vpa, vpa_val)
+        local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                    nquad_vpa,ielement_vpap,vpa,
+                    nquad_vperp,ielement_vperpp,vperp,
+                    x_vpa, w_vpa, x_vperp, w_vperp, 
+                    vpa_val, vperp_val)
+    end
+    nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+    for ielement_vpap in ielement_vpa_hi+1:vpa.nelement_local
+        # do integration over part of the domain with no divergences
+        vpa_nodes = get_nodes(vpa,ielement_vpap)
+        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
+        nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
+        local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                    nquad_vpa,ielement_vpap,vpa,
+                    nquad_vperp,ielement_vperpp,vperp,
+                    x_vpa, w_vpa, x_vperp, w_vperp, 
+                    vpa_val, vperp_val)
+                    
+    end
+    return nothing
+end
+
+"""
+Function for computing the quadratures and carrying out the loop over the 
+primed `vpa` coordinate in doing the numerical integration. 
+Uses a Gauss-Legendre quadrature assuming no divergences in the integrand.
+"""
+function loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                            vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vperp grids
+                            nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
+                            x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                            x_legendre,w_legendre,
+                            vpa_val, vperp_val)
+    for ielement_vpap in 1:vpa.nelement_local
+        # do integration over part of the domain with no divergences
+        vpa_nodes = get_nodes(vpa,ielement_vpap)
+        vpa_min, vpa_max = vpa_nodes[1], vpa_nodes[end]
+        nquad_vpa = get_scaled_x_w_no_divergences!(x_vpa, w_vpa, x_legendre, w_legendre, vpa_min, vpa_max)
+        local_element_integration!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                    nquad_vpa,ielement_vpap,vpa,
+                    nquad_vperp,ielement_vperpp,vperp,
+                    x_vpa, w_vpa, x_vperp, w_vperp, 
+                    vpa_val, vperp_val)
+                    
+    end
+    return nothing
+end
+
+"""
+Function for computing the quadratures and carrying out the loop over the 
+primed `vperp` coordinate in doing the numerical integration. Splits the integrand
+into three pieces -- two which use Gauss-Legendre quadrature assuming no divergences
+in the integrand, and one which assumes a logarithmic divergence and uses a
+Gauss-Laguerre quadrature with an (exponential) change of variables to mitigate this divergence.
+This function calls `loop_over_vpa_elements_no_divergences!()` and `loop_over_vpa_elements!()`
+to carry out the primed `vpa` loop within the primed `vperp` loop.
+"""
+function loop_over_vperp_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    for ielement_vperpp in 1:ielement_vperp_low-1
+        
+        vperp_nodes = get_nodes(vperp,ielement_vperpp)
+        vperp_max = vperp_nodes[end]
+        vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
+        nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+        loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,
+                vpa_val, vperp_val)
+    end
+    for ielement_vperpp in ielement_vperp_low:ielement_vperp_hi
+        
+        #vperp_nodes = get_nodes(vperp,ielement_vperpp)
+        #vperp_max = vperp_nodes[end]
+        #vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
+        #nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+        #nquad_vperp = get_scaled_x_w_with_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, x_laguerre, w_laguerre, vperp_min, vperp_max, vperp_nodes, igrid_vperp, vperp_val)
+        loop_over_vpa_elements!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperpp, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,x_laguerre,w_laguerre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    end
+    for ielement_vperpp in ielement_vperp_hi+1:vperp.nelement_local
+        
+        vperp_nodes = get_nodes(vperp,ielement_vperpp)
+        vperp_max = vperp_nodes[end]
+        vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,vperp.nelement_local) 
+        nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+        loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,
+                vpa_val, vperp_val)
+    end
+    return nothing
+end
+
+"""
+The function `loop_over_vperp_vpa_elements_no_divergences!()` was used for debugging.
+By changing the source where `loop_over_vperp_vpa_elements!()` is called to
+instead call this function we can verify that the Gauss-Legendre quadrature
+is adequate for integrating a divergence-free integrand. This function should be 
+kept until we understand the problems preventing machine-precision accurary in the pure integration method of computing the
+Rosenbluth potentials.
+"""
+function loop_over_vperp_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                vperp,ielement_vperp_low,ielement_vperp_hi, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,
+                igrid_vpa, igrid_vperp, vpa_val, vperp_val)
+    for ielement_vperpp in 1:vperp.nelement_local
+        vperp_nodes = get_nodes(vperp,ielement_vperpp)
+        vperp_max = vperp_nodes[end]
+        vperp_min = vperp_nodes[1]*nel_low(ielement_vperpp,nelement_vperp) 
+        nquad_vperp = get_scaled_x_w_no_divergences!(x_vperp, w_vperp, x_legendre, w_legendre, vperp_min, vperp_max)
+        loop_over_vpa_elements_no_divergences!(G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                vpa,ielement_vpa_low,ielement_vpa_hi, # info about primed vpa grids
+                nquad_vperp,ielement_vperpp,vperp_nodes,vperp, # info about primed vperp grids
+                x_vpa, w_vpa, x_vperp, w_vperp, # arrays to store points and weights for primed (source) grids
+                x_legendre,w_legendre,
+                vpa_val, vperp_val)
+    end
+    return nothing
+end 
+
+
+"""
+    ic_func(ivpa::mk_int,ivperp::mk_int,nvpa::mk_int)
+
+Get the 'linear index' corresponding to `ivpa` and `ivperp`. Defined so that the linear
+index corresponds to the underlying layout in memory of a 2d array indexed by
+`[ivpa,ivperp]`, i.e. for a 2d array `f2d`:
+* `size(f2d) == (vpa.n, vperp.n)`
+* For a reference to `f2d` that is reshaped to a vector (a 1d array) `f1d = vec(f2d)` than
+  for any `ivpa` and `ivperp` it is true that `f1d[ic_func(ivpa,ivperp)] ==
+  f2d[ivpa,ivperp]`.
+"""
+function ic_func(ivpa::mk_int,ivperp::mk_int,nvpa::mk_int)
+    return ivpa + nvpa*(ivperp-1)
+end
+
+"""
+    ivperp_func(ic::mk_int,nvpa::mk_int)
+
+Get the `vperp` index `ivperp` that corresponds to a 'linear index' `ic` that spans a 2d
+velocity space.
+
+Defined so that `ivperp_func(inc_func(ivpa,ivperp,nvpa), nvpa) == ivperp`.
+
+See also [`ic_func`](@ref), [`ivpa_func`](@ref).
+"""
+function ivperp_func(ic::mk_int,nvpa::mk_int)
+    return floor(Int64,(ic-1)/nvpa) + 1
+end
+
+"""
+    ivpa_func(ic::mk_int,nvpa::mk_int)
+
+Get the `vpa` index `ivpa` that corresponds to a 'linear index' `ic` that spans a 2d
+velocity space.
+
+Defined so that `ivpa_func(inc_func(ivpa,ivperp,nvpa), nvpa) == ivpa`.
+
+See also [`ic_func`](@ref), [`ivperp_func`](@ref).
+"""
+function ivpa_func(ic::mk_int,nvpa::mk_int)
+    ivpa = ic - nvpa*(ivperp_func(ic,nvpa) - 1)
+    return ivpa
+end
+
+"""
+Function that returns the sparse matrix index
+used to directly construct the nonzero entries
+of a 2D assembled sparse matrix.
+"""
+function icsc_func(ivpa_local::mk_int,ivpap_local::mk_int,
+                   ielement_vpa::mk_int,
+                   ngrid_vpa::mk_int,nelement_vpa::mk_int,
+                   ivperp_local::mk_int,ivperpp_local::mk_int,
+                   ielement_vperp::mk_int,
+                   ngrid_vperp::mk_int,nelement_vperp::mk_int)
+    ntot_vpa = (nelement_vpa - 1)*(ngrid_vpa^2 - 1) + ngrid_vpa^2
+    #ntot_vperp = (nelement_vperp - 1)*(ngrid_vperp^2 - 1) + ngrid_vperp^2
+    
+    icsc_vpa = ((ivpap_local - 1) + (ivpa_local - 1)*ngrid_vpa +
+                (ielement_vpa - 1)*(ngrid_vpa^2 - 1))
+    icsc_vperp = ((ivperpp_local - 1) + (ivperp_local - 1)*ngrid_vperp + 
+                    (ielement_vperp - 1)*(ngrid_vperp^2 - 1))
+    icsc = 1 + icsc_vpa + ntot_vpa*icsc_vperp
+    return icsc
+end
+
+"""
+Struct to contain data needed to create a sparse matrix.
+"""
+struct sparse_matrix_constructor
+    # the Ith row
+    II::Array{mk_float,1}
+    # the Jth column
+    JJ::Array{mk_float,1}
+    # the data S[I,J]
+    SS::Array{mk_float,1}
+end
+
+"""
+Function to allocate an instance of `sparse_matrix_constructor`.
+"""
+function allocate_sparse_matrix_constructor(nsparse::mk_int)
+    II = Array{mk_int,1}(undef,nsparse)
+    @. II = 0
+    JJ = Array{mk_int,1}(undef,nsparse)
+    @. JJ = 0
+    SS = Array{mk_float,1}(undef,nsparse)
+    @. SS = 0.0
+    return sparse_matrix_constructor(II,JJ,SS)
+end
+
+"""
+Function to assign data to an instance of `sparse_matrix_constructor`.
+"""
+function assign_constructor_data!(data::sparse_matrix_constructor,icsc::mk_int,ii::mk_int,jj::mk_int,ss::mk_float)
+    data.II[icsc] = ii
+    data.JJ[icsc] = jj
+    data.SS[icsc] = ss
+    return nothing
+end
+
+"""
+Function to assemble data in an instance of `sparse_matrix_constructor`. Instead of
+writing `data.SS[icsc] = ss`, as in `assign_constructor_data!()` we write `data.SS[icsc] += ss`.
+"""
+function assemble_constructor_data!(data::sparse_matrix_constructor,icsc::mk_int,ii::mk_int,jj::mk_int,ss::mk_float)
+    data.II[icsc] = ii
+    data.JJ[icsc] = jj
+    data.SS[icsc] += ss
+    return nothing
+end
+
+"""
+Wrapper function to create a sparse matrix with an instance of `sparse_matrix_constructor`
+and `sparse()`.
+"""
+function create_sparse_matrix(data::sparse_matrix_constructor)
+    return sparse(data.II,data.JJ,data.SS)
+end
+
+"""
+Function to allocate an instance of `vpa_vperp_boundary_data`.
+"""
+function allocate_boundary_data(vpa,vperp)
+    # The following velocity-space-sized buffer arrays are used to evaluate the
+    # collision operator for a single species at a single spatial point. They are
+    # shared-memory arrays. The `comm` argument to `allocate_shared_float()` is used to
+    # set up the shared-memory arrays so that they are shared only by the processes on
+    # `comm_anyv_subblock[]` rather than on the full `comm_block[]`. This means that
+    # different subblocks that are calculating the collision operator at different
+    # spatial points do not interfere with each others' buffer arrays.
+    lower_boundary_vpa = allocate_shared_float(vperp.n; comm=comm_anyv_subblock[])
+    upper_boundary_vpa = allocate_shared_float(vperp.n; comm=comm_anyv_subblock[])
+    upper_boundary_vperp = allocate_shared_float(vpa.n; comm=comm_anyv_subblock[])
+    return vpa_vperp_boundary_data(lower_boundary_vpa,
+            upper_boundary_vpa,upper_boundary_vperp)
+end
+
+"""
+Function to assign precomputed (exact) data to an instance
+of `vpa_vperp_boundary_data`. Used in testing.
+"""
+function assign_exact_boundary_data!(func_data::vpa_vperp_boundary_data,
+                                        func_exact,vpa,vperp)
+    begin_anyv_region()
+    nvpa = vpa.n
+    nvperp = vperp.n
+    @anyv_serial_region begin
+        for ivperp in 1:nvperp
+            func_data.lower_boundary_vpa[ivperp] = func_exact[1,ivperp]
+            func_data.upper_boundary_vpa[ivperp] = func_exact[nvpa,ivperp]
+        end
+        for ivpa in 1:nvpa
+            func_data.upper_boundary_vperp[ivpa] = func_exact[ivpa,nvperp]
+        end
+    end
+    return nothing
+end
+
+"""
+Function to allocate an instance of `rosenbluth_potential_boundary_data`.
+"""    
+function allocate_rosenbluth_potential_boundary_data(vpa,vperp)
+    H_data = allocate_boundary_data(vpa,vperp)
+    dHdvpa_data = allocate_boundary_data(vpa,vperp)
+    dHdvperp_data = allocate_boundary_data(vpa,vperp)
+    G_data = allocate_boundary_data(vpa,vperp)
+    dGdvperp_data = allocate_boundary_data(vpa,vperp)
+    d2Gdvperp2_data = allocate_boundary_data(vpa,vperp)
+    d2Gdvperpdvpa_data = allocate_boundary_data(vpa,vperp)
+    d2Gdvpa2_data = allocate_boundary_data(vpa,vperp)
+    return rosenbluth_potential_boundary_data(H_data,dHdvpa_data,
+        dHdvperp_data,G_data,dGdvperp_data,d2Gdvperp2_data,
+        d2Gdvperpdvpa_data,d2Gdvpa2_data)
+end
+
+"""
+Function to assign data to an instance of `rosenbluth_potential_boundary_data`, in place,
+without allocation. Used in testing.
+"""
+function calculate_rosenbluth_potential_boundary_data_exact!(rpbd::rosenbluth_potential_boundary_data,
+  H_exact,dHdvpa_exact,dHdvperp_exact,G_exact,dGdvperp_exact,
+  d2Gdvperp2_exact,d2Gdvperpdvpa_exact,d2Gdvpa2_exact,
+  vpa,vperp)
+    assign_exact_boundary_data!(rpbd.H_data,H_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.dHdvpa_data,dHdvpa_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.dHdvperp_data,dHdvperp_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.G_data,G_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.dGdvperp_data,dGdvperp_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.d2Gdvperp2_data,d2Gdvperp2_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.d2Gdvperpdvpa_data,d2Gdvperpdvpa_exact,vpa,vperp)
+    assign_exact_boundary_data!(rpbd.d2Gdvpa2_data,d2Gdvpa2_exact,vpa,vperp)
+    return nothing
+end
+
+"""
+Function to carry out the direct integration of a formal definition of one
+of the Rosenbluth potentials, on the boundaries of the `(vpa,vperp)` domain, 
+using the precomputed integration weights with dimension 4.
+The result is stored in an instance of `vpa_vperp_boundary_data`.
+Used in testing.
+"""
+function calculate_boundary_data!(func_data::vpa_vperp_boundary_data,
+                                  weight::MPISharedArray{mk_float,4},func_input,vpa,vperp)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+        func_data.lower_boundary_vpa[ivperp] = 0.0
+        func_data.upper_boundary_vpa[ivperp] = 0.0
+        for ivperpp in 1:nvperp
+            for ivpap in 1:nvpa
+                func_data.lower_boundary_vpa[ivperp] += weight[ivpap,ivperpp,1,ivperp]*func_input[ivpap,ivperpp]
+                func_data.upper_boundary_vpa[ivperp] += weight[ivpap,ivperpp,nvpa,ivperp]*func_input[ivpap,ivperpp]
+            end
+        end
+    end
+    #for ivpa in 1:nvpa
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+        func_data.upper_boundary_vperp[ivpa] = 0.0
+        for ivperpp in 1:nvperp
+            for ivpap in 1:nvpa
+                func_data.upper_boundary_vperp[ivpa] += weight[ivpap,ivperpp,ivpa,nvperp]*func_input[ivpap,ivperpp]
+            end
+        end
+    end
+    return nothing
+end
+
+"""
+Function to carry out the direct integration of a formal definition of one
+of the Rosenbluth potentials, on the boundaries of the `(vpa,vperp)` domain, 
+using the precomputed integration weights with dimension 3.
+The result is stored in an instance of `vpa_vperp_boundary_data`.
+"""
+function calculate_boundary_data!(func_data::vpa_vperp_boundary_data,
+                                  weight::boundary_integration_weights_struct,
+                                  func_input,vpa,vperp)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+        func_data.lower_boundary_vpa[ivperp] = 0.0
+        func_data.upper_boundary_vpa[ivperp] = 0.0
+        for ivperpp in 1:nvperp
+            for ivpap in 1:nvpa
+                func_data.lower_boundary_vpa[ivperp] += weight.lower_vpa_boundary[ivpap,ivperpp,ivperp]*func_input[ivpap,ivperpp]
+                func_data.upper_boundary_vpa[ivperp] += weight.upper_vpa_boundary[ivpap,ivperpp,ivperp]*func_input[ivpap,ivperpp]
+            end
+        end
+    end
+    #for ivpa in 1:nvpa
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+        func_data.upper_boundary_vperp[ivpa] = 0.0
+        for ivperpp in 1:nvperp
+            for ivpap in 1:nvpa
+                func_data.upper_boundary_vperp[ivpa] += weight.upper_vperp_boundary[ivpap,ivperpp,ivpa]*func_input[ivpap,ivperpp]
+            end
+        end
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+Function to call direct integration function `calculate_boundary_data!()` and 
+assign data to an instance of `rosenbluth_potential_boundary_data`, in place,
+without allocation.
+"""
+function calculate_rosenbluth_potential_boundary_data!(rpbd::rosenbluth_potential_boundary_data,
+    fkpl::Union{fokkerplanck_arrays_direct_integration_struct,fokkerplanck_boundary_data_arrays_struct},pdf,vpa,vperp,vpa_spectral,vperp_spectral;
+    calculate_GG=false,calculate_dGdvperp=false)
+    # get derivatives of pdf
+    dfdvperp = fkpl.dfdvperp
+    dfdvpa = fkpl.dfdvpa
+    d2fdvperpdvpa = fkpl.d2fdvperpdvpa
+    #for ivpa in 1:vpa.n
+    begin_anyv_vpa_region()
+    @loop_vpa ivpa begin
+        @views derivative!(dfdvperp[ivpa,:], pdf[ivpa,:], vperp, vperp_spectral)
+    end
+    begin_anyv_vperp_region()
+    @loop_vperp ivperp begin
+    #for ivperp in 1:vperp.n
+        @views derivative!(dfdvpa[:,ivperp], pdf[:,ivperp], vpa, vpa_spectral)
+        @views derivative!(d2fdvperpdvpa[:,ivperp], dfdvperp[:,ivperp], vpa, vpa_spectral)
+    end
+    # ensure data is synchronized
+    _anyv_subblock_synchronize()
+    # carry out the numerical integration 
+    calculate_boundary_data!(rpbd.H_data,fkpl.H0_weights,pdf,vpa,vperp)
+    calculate_boundary_data!(rpbd.dHdvpa_data,fkpl.H0_weights,dfdvpa,vpa,vperp)
+    calculate_boundary_data!(rpbd.dHdvperp_data,fkpl.H1_weights,dfdvperp,vpa,vperp)
+    if calculate_GG
+        calculate_boundary_data!(rpbd.G_data,fkpl.G0_weights,pdf,vpa,vperp)
+    end
+    if calculate_dGdvperp
+        calculate_boundary_data!(rpbd.dGdvperp_data,fkpl.G1_weights,dfdvperp,vpa,vperp)
+    end
+    calculate_boundary_data!(rpbd.d2Gdvperp2_data,fkpl.H2_weights,dfdvperp,vpa,vperp)
+    calculate_boundary_data!(rpbd.d2Gdvperpdvpa_data,fkpl.G1_weights,d2fdvperpdvpa,vpa,vperp)
+    calculate_boundary_data!(rpbd.d2Gdvpa2_data,fkpl.H3_weights,dfdvpa,vpa,vperp)
+    
+    return nothing
+end
+
+function multipole_H(vpa::mk_float,vperp::mk_float,Inn_vec::Vector{mk_float})
+   (I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+   I02, I12, I22, I32, I42, I52, I62,
+   I04, I14, I24, I34, I44,
+   I06, I16, I26,
+   I08) = Inn_vec
+   # sum up terms in the multipole series 
+   H_series = (I80*((128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8)/(128*(vpa^2 + vperp^2)^8))
+             +I70*((vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
+             +I62*((-7*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(64*(vpa^2 + vperp^2)^8))
+             +I60*((16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6)/(16*(vpa^2 + vperp^2)^6))
+             +I52*((21*vpa*(-16*vpa^6 + 168*vpa^4*vperp^2 - 210*vpa^2*vperp^4 + 35*vperp^6))/(32*(vpa^2 + vperp^2)^7))
+             +I50*((8*vpa^5 - 40*vpa^3*vperp^2 + 15*vpa*vperp^4)/(8*(vpa^2 + vperp^2)^5))
+             +I44*((105*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+             +I42*((-15*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(32*(vpa^2 + vperp^2)^6))
+             +I40*((8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4)/(8*(vpa^2 + vperp^2)^4))
+             +I34*((105*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(128*(vpa^2 + vperp^2)^7))
+             +I32*((-5*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+             +I30*((vpa*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
+             +I26*((-35*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+             +I24*((45*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^6))
+             +I22*((-3*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+             +I20*(-1/2*(-2*vpa^2 + vperp^2)/(vpa^2 + vperp^2)^2)
+             +I16*((-35*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(256*(vpa^2 + vperp^2)^7))
+             +I14*((15*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(64*(vpa^2 + vperp^2)^5))
+             +I12*((-6*vpa^3 + 9*vpa*vperp^2)/(4*(vpa^2 + vperp^2)^3))
+             +I10*(vpa/(vpa^2 + vperp^2))
+             +I08*((35*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(16384*(vpa^2 + vperp^2)^8))
+             +I06*((-5*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(256*(vpa^2 + vperp^2)^6))
+             +I04*((3*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(64*(vpa^2 + vperp^2)^4))
+             +I02*((-2*vpa^2 + vperp^2)/(4*(vpa^2 + vperp^2)^2))
+             +I00*(1))
+   # multiply by overall prefactor
+   H_series *= ((vpa^2 + vperp^2)^(-1/2))
+   return H_series
+end
+
+function multipole_dHdvpa(vpa::mk_float,vperp::mk_float,Inn_vec::Vector{mk_float})
+   (I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+   I02, I12, I22, I32, I42, I52, I62,
+   I04, I14, I24, I34, I44,
+   I06, I16, I26,
+   I08) = Inn_vec
+   # sum up terms in the multipole series 
+   dHdvpa_series = (I80*((9*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(128*(vpa^2 + vperp^2)^8))
+                +I70*((128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8)/(16*(vpa^2 + vperp^2)^7))
+                +I62*((-63*(128*vpa^9 - 2304*vpa^7*vperp^2 + 6048*vpa^5*vperp^4 - 3360*vpa^3*vperp^6 + 315*vpa*vperp^8))/(64*(vpa^2 + vperp^2)^8))
+                +I60*((7*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^6))
+                +I52*((-21*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(32*(vpa^2 + vperp^2)^7))
+                +I50*((3*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                +I44*((945*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                +I42*((-105*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(32*(vpa^2 + vperp^2)^6))
+                +I40*((5*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                +I34*((105*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(128*(vpa^2 + vperp^2)^7))
+                +I32*((-15*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                +I30*((8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4)/(2*(vpa^2 + vperp^2)^3))
+                +I26*((-315*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                +I24*((315*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(128*(vpa^2 + vperp^2)^6))
+                +I22*((-15*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                +I20*((3*vpa*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^2))
+                +I16*((-35*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(256*(vpa^2 + vperp^2)^7))
+                +I14*((45*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(64*(vpa^2 + vperp^2)^5))
+                +I12*((-3*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(4*(vpa^2 + vperp^2)^3))
+                +I10*(-1 + (3*vpa^2)/(vpa^2 + vperp^2))
+                +I08*((315*vpa*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(16384*(vpa^2 + vperp^2)^8))
+                +I06*((-35*vpa*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(256*(vpa^2 + vperp^2)^6))
+                +I04*((15*vpa*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(64*(vpa^2 + vperp^2)^4))
+                +I02*((-6*vpa^3 + 9*vpa*vperp^2)/(4*(vpa^2 + vperp^2)^2))
+                +I00*(vpa))
+   # multiply by overall prefactor
+   dHdvpa_series *= -((vpa^2 + vperp^2)^(-3/2))   
+   return dHdvpa_series
+end
+
+function multipole_dHdvperp(vpa::mk_float,vperp::mk_float,Inn_vec::Vector{mk_float})
+   (I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+   I02, I12, I22, I32, I42, I52, I62,
+   I04, I14, I24, I34, I44,
+   I06, I16, I26,
+   I08) = Inn_vec
+   # sum up terms in the multipole series 
+   dHdvperp_series = (I80*((45*vperp*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(128*(vpa^2 + vperp^2)^8))
+                +I70*((9*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
+                +I62*((-315*(128*vpa^8*vperp - 896*vpa^6*vperp^3 + 1120*vpa^4*vperp^5 - 280*vpa^2*vperp^7 + 7*vperp^9))/(64*(vpa^2 + vperp^2)^8))
+                +I60*((7*vperp*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(16*(vpa^2 + vperp^2)^6))
+                +I52*((-189*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(32*(vpa^2 + vperp^2)^7))
+                +I50*((21*vpa*vperp*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+                +I44*((4725*vperp*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                +I42*((105*vperp*(-64*vpa^6 + 240*vpa^4*vperp^2 - 120*vpa^2*vperp^4 + 5*vperp^6))/(32*(vpa^2 + vperp^2)^6))
+                +I40*((15*vperp*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                +I34*((945*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(128*(vpa^2 + vperp^2)^7))
+                +I32*((-105*vpa*vperp*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+                +I30*((5*vpa*vperp*(4*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
+                +I26*((-1575*vperp*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                +I24*((315*vperp*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^6))
+                +I22*((-45*vperp*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                +I20*((-3*vperp*(-4*vpa^2 + vperp^2))/(2*(vpa^2 + vperp^2)^2))
+                +I16*((-315*vpa*vperp*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(256*(vpa^2 + vperp^2)^7))
+                +I14*((315*vpa*vperp*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(64*(vpa^2 + vperp^2)^5))
+                +I12*((-15*vpa*vperp*(4*vpa^2 - 3*vperp^2))/(4*(vpa^2 + vperp^2)^3))
+                +I10*((3*vpa*vperp)/(vpa^2 + vperp^2))
+                +I08*((1575*(128*vpa^8*vperp - 896*vpa^6*vperp^3 + 1120*vpa^4*vperp^5 - 280*vpa^2*vperp^7 + 7*vperp^9))/(16384*(vpa^2 + vperp^2)^8))
+                +I06*((-35*(64*vpa^6*vperp - 240*vpa^4*vperp^3 + 120*vpa^2*vperp^5 - 5*vperp^7))/(256*(vpa^2 + vperp^2)^6))
+                +I04*((45*(8*vpa^4*vperp - 12*vpa^2*vperp^3 + vperp^5))/(64*(vpa^2 + vperp^2)^4))
+                +I02*((3*vperp*(-4*vpa^2 + vperp^2))/(4*(vpa^2 + vperp^2)^2))
+                +I00*(vperp))
+   # multiply by overall prefactor
+   dHdvperp_series *= -((vpa^2 + vperp^2)^(-3/2))
+   return dHdvperp_series
+end
+
+function multipole_G(vpa::mk_float,vperp::mk_float,Inn_vec::Vector{mk_float})
+   (I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+   I02, I12, I22, I32, I42, I52, I62,
+   I04, I14, I24, I34, I44,
+   I06, I16, I26,
+   I08) = Inn_vec
+   # sum up terms in the multipole series 
+   G_series = (I80*((64*vpa^6*vperp^2 - 240*vpa^4*vperp^4 + 120*vpa^2*vperp^6 - 5*vperp^8)/(128*(vpa^2 + vperp^2)^8))
+             +I70*((vpa*vperp^2*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(16*(vpa^2 + vperp^2)^7))
+             +I62*((32*vpa^8 - 656*vpa^6*vperp^2 + 1620*vpa^4*vperp^4 - 670*vpa^2*vperp^6 + 25*vperp^8)/(64*(vpa^2 + vperp^2)^8))
+             +I60*((vperp^2*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(16*(vpa^2 + vperp^2)^6))
+             +I52*((vpa*(16*vpa^6 - 232*vpa^4*vperp^2 + 370*vpa^2*vperp^4 - 75*vperp^6))/(32*(vpa^2 + vperp^2)^7))
+             +I50*((vpa*vperp^2*(4*vpa^2 - 3*vperp^2))/(8*(vpa^2 + vperp^2)^5))
+             +I44*((-15*(64*vpa^8 - 864*vpa^6*vperp^2 + 1560*vpa^4*vperp^4 - 500*vpa^2*vperp^6 + 15*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+             +I42*((16*vpa^6 - 152*vpa^4*vperp^2 + 138*vpa^2*vperp^4 - 9*vperp^6)/(32*(vpa^2 + vperp^2)^6))
+             +I40*(-1/8*(vperp^2*(-4*vpa^2 + vperp^2))/(vpa^2 + vperp^2)^4)
+             +I34*((5*vpa*(-32*vpa^6 + 296*vpa^4*vperp^2 - 320*vpa^2*vperp^4 + 45*vperp^6))/(128*(vpa^2 + vperp^2)^7))
+             +I32*((vpa*(4*vpa^4 - 22*vpa^2*vperp^2 + 9*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+             +I30*((vpa*vperp^2)/(2*(vpa^2 + vperp^2)^3))
+             +I26*((5*(96*vpa^8 - 1072*vpa^6*vperp^2 + 1500*vpa^4*vperp^4 - 330*vpa^2*vperp^6 + 5*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+             +I24*((3*(-32*vpa^6 + 184*vpa^4*vperp^2 - 96*vpa^2*vperp^4 + 3*vperp^6))/(128*(vpa^2 + vperp^2)^6))
+             +I22*((4*vpa^4 - 10*vpa^2*vperp^2 + vperp^4)/(8*(vpa^2 + vperp^2)^4))
+             +I20*(vperp^2/(2*(vpa^2 + vperp^2)^2))
+             +I16*((5*vpa*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(256*(vpa^2 + vperp^2)^7))
+             +I14*((-3*vpa*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(64*(vpa^2 + vperp^2)^5))
+             +I12*((vpa*(2*vpa^2 - vperp^2))/(4*(vpa^2 + vperp^2)^3))
+             +I10*(-(vpa/(vpa^2 + vperp^2)))
+             +I08*((5*(-128*vpa^8 + 1280*vpa^6*vperp^2 - 1440*vpa^4*vperp^4 + 160*vpa^2*vperp^6 + 5*vperp^8))/(16384*(vpa^2 + vperp^2)^8))
+             +I06*((16*vpa^6 - 72*vpa^4*vperp^2 + 18*vpa^2*vperp^4 + vperp^6)/(256*(vpa^2 + vperp^2)^6))
+             +I04*((-8*vpa^4 + 8*vpa^2*vperp^2 + vperp^4)/(64*(vpa^2 + vperp^2)^4))
+             +I02*((2*vpa^2 + vperp^2)/(4*(vpa^2 + vperp^2)^2))
+             +I00*(1))
+   # multiply by overall prefactor
+   G_series *= ((vpa^2 + vperp^2)^(1/2))   
+   return G_series
+end
+
+function multipole_dGdvperp(vpa::mk_float,vperp::mk_float,Inn_vec::Vector{mk_float})
+   (I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+   I02, I12, I22, I32, I42, I52, I62,
+   I04, I14, I24, I34, I44,
+   I06, I16, I26,
+   I08) = Inn_vec
+   # sum up terms in the multipole series 
+   dGdvperp_series = (I80*((vperp*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(128*(vpa^2 + vperp^2)^8))
+                   +I70*((vpa*vperp*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
+                   +I62*((-7*(256*vpa^8*vperp - 2144*vpa^6*vperp^3 + 3120*vpa^4*vperp^5 - 890*vpa^2*vperp^7 + 25*vperp^9))/(64*(vpa^2 + vperp^2)^8))
+                   +I60*((vperp*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(16*(vpa^2 + vperp^2)^6))
+                   +I52*((21*vpa*vperp*(-32*vpa^6 + 192*vpa^4*vperp^2 - 180*vpa^2*vperp^4 + 25*vperp^6))/(32*(vpa^2 + vperp^2)^7))
+                   +I50*((8*vpa^5*vperp - 40*vpa^3*vperp^3 + 15*vpa*vperp^5)/(8*(vpa^2 + vperp^2)^5))
+                   +I44*((315*vperp*(128*vpa^8 - 832*vpa^6*vperp^2 + 960*vpa^4*vperp^4 - 220*vpa^2*vperp^6 + 5*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                   +I42*((15*vperp*(-32*vpa^6 + 128*vpa^4*vperp^2 - 68*vpa^2*vperp^4 + 3*vperp^6))/(32*(vpa^2 + vperp^2)^6))
+                   +I40*((vperp*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                   +I34*((315*vpa*vperp*(16*vpa^6 - 72*vpa^4*vperp^2 + 50*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^7))
+                   +I32*((-5*vpa*vperp*(16*vpa^4 - 38*vpa^2*vperp^2 + 9*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+                   +I30*((vpa*vperp*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
+                   +I26*((-35*vperp*(512*vpa^8 - 2848*vpa^6*vperp^2 + 2640*vpa^4*vperp^4 - 430*vpa^2*vperp^6 + 5*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                   +I24*((-45*vperp*(-48*vpa^6 + 136*vpa^4*vperp^2 - 46*vpa^2*vperp^4 + vperp^6))/(128*(vpa^2 + vperp^2)^6))
+                   +I22*((-3*vperp*(16*vpa^4 - 18*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                   +I20*(-1/2*(vperp*(-2*vpa^2 + vperp^2))/(vpa^2 + vperp^2)^2)
+                   +I16*((-35*vpa*vperp*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(256*(vpa^2 + vperp^2)^7))
+                   +I14*((45*vpa*vperp*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(64*(vpa^2 + vperp^2)^5))
+                   +I12*((3*vpa*vperp*(-4*vpa^2 + vperp^2))/(4*(vpa^2 + vperp^2)^3))
+                   +I10*((vpa*vperp)/(vpa^2 + vperp^2))
+                   +I08*((175*(128*vpa^8*vperp - 640*vpa^6*vperp^3 + 480*vpa^4*vperp^5 - 40*vpa^2*vperp^7 - vperp^9))/(16384*(vpa^2 + vperp^2)^8))
+                   +I06*((-5*(64*vpa^6*vperp - 144*vpa^4*vperp^3 + 24*vpa^2*vperp^5 + vperp^7))/(256*(vpa^2 + vperp^2)^6))
+                   +I04*((3*(24*vpa^4*vperp - 12*vpa^2*vperp^3 - vperp^5))/(64*(vpa^2 + vperp^2)^4))
+                   +I02*(-1/4*(vperp*(4*vpa^2 + vperp^2))/(vpa^2 + vperp^2)^2)
+                   +I00*(vperp))
+   # multiply by overall prefactor
+   dGdvperp_series *= ((vpa^2 + vperp^2)^(-1/2))
+   return dGdvperp_series
+end
+
+function multipole_d2Gdvperp2(vpa::mk_float,vperp::mk_float,Inn_vec::Vector{mk_float})
+   (I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+   I02, I12, I22, I32, I42, I52, I62,
+   I04, I14, I24, I34, I44,
+   I06, I16, I26,
+   I08) = Inn_vec
+   # sum up terms in the multipole series 
+   d2Gdvperp2_series = (I80*((128*vpa^10 - 7424*vpa^8*vperp^2 + 41888*vpa^6*vperp^4 - 48160*vpa^4*vperp^6 + 11515*vpa^2*vperp^8 - 280*vperp^10)/(128*(vpa^2 + vperp^2)^8))
+                   +I70*((16*vpa^9 - 728*vpa^7*vperp^2 + 3066*vpa^5*vperp^4 - 2345*vpa^3*vperp^6 + 280*vpa*vperp^8)/(16*(vpa^2 + vperp^2)^7))
+                   +I62*((-7*(256*vpa^10 - 10528*vpa^8*vperp^2 + 45616*vpa^6*vperp^4 - 43670*vpa^4*vperp^6 + 9125*vpa^2*vperp^8 - 200*vperp^10))/(64*(vpa^2 + vperp^2)^8))
+                   +I60*((16*vpa^8 - 552*vpa^6*vperp^2 + 1650*vpa^4*vperp^4 - 755*vpa^2*vperp^6 + 30*vperp^8)/(16*(vpa^2 + vperp^2)^6))
+                   +I52*((-21*(32*vpa^9 - 1024*vpa^7*vperp^2 + 3204*vpa^5*vperp^4 - 1975*vpa^3*vperp^6 + 200*vpa*vperp^8))/(32*(vpa^2 + vperp^2)^7))
+                   +I50*((8*vpa^7 - 200*vpa^5*vperp^2 + 395*vpa^3*vperp^4 - 90*vpa*vperp^6)/(8*(vpa^2 + vperp^2)^5))
+                   +I44*((315*(128*vpa^10 - 4544*vpa^8*vperp^2 + 16448*vpa^6*vperp^4 - 13060*vpa^4*vperp^6 + 2245*vpa^2*vperp^8 - 40*vperp^10))/(512*(vpa^2 + vperp^2)^8))
+                   +I42*((-15*(32*vpa^8 - 768*vpa^6*vperp^2 + 1620*vpa^4*vperp^4 - 565*vpa^2*vperp^6 + 18*vperp^8))/(32*(vpa^2 + vperp^2)^6))
+                   +I40*((8*vpa^6 - 136*vpa^4*vperp^2 + 159*vpa^2*vperp^4 - 12*vperp^6)/(8*(vpa^2 + vperp^2)^4))
+                   +I34*((315*vpa*(16*vpa^8 - 440*vpa^6*vperp^2 + 1114*vpa^4*vperp^4 - 535*vpa^2*vperp^6 + 40*vperp^8))/(128*(vpa^2 + vperp^2)^7))
+                   +I32*((5*vpa*(-16*vpa^6 + 274*vpa^4*vperp^2 - 349*vpa^2*vperp^4 + 54*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                   +I30*((vpa*(2*vpa^4 - 21*vpa^2*vperp^2 + 12*vperp^4))/(2*(vpa^2 + vperp^2)^3))
+                   +I26*((-35*(512*vpa^10 - 16736*vpa^8*vperp^2 + 53072*vpa^6*vperp^4 - 34690*vpa^4*vperp^6 + 4345*vpa^2*vperp^8 - 40*vperp^10))/(512*(vpa^2 + vperp^2)^8))
+                   +I24*((135*(16*vpa^8 - 328*vpa^6*vperp^2 + 530*vpa^4*vperp^4 - 125*vpa^2*vperp^6 + 2*vperp^8))/(128*(vpa^2 + vperp^2)^6))
+                   +I22*((-3*(16*vpa^6 - 182*vpa^4*vperp^2 + 113*vpa^2*vperp^4 - 4*vperp^6))/(8*(vpa^2 + vperp^2)^4))
+                   +I20*((2*vpa^4 - 11*vpa^2*vperp^2 + 2*vperp^4)/(2*(vpa^2 + vperp^2)^2))
+                   +I16*((-35*vpa*(64*vpa^8 - 1616*vpa^6*vperp^2 + 3480*vpa^4*vperp^4 - 1235*vpa^2*vperp^6 + 40*vperp^8))/(256*(vpa^2 + vperp^2)^7))
+                   +I14*((45*vpa*(8*vpa^6 - 116*vpa^4*vperp^2 + 101*vpa^2*vperp^4 - 6*vperp^6))/(64*(vpa^2 + vperp^2)^5))
+                   +I12*((-3*vpa*(4*vpa^4 - 27*vpa^2*vperp^2 + 4*vperp^4))/(4*(vpa^2 + vperp^2)^3))
+                   +I10*(-2*vpa + (3*vpa^3)/(vpa^2 + vperp^2))
+                   +I08*((175*(128*vpa^10 - 3968*vpa^8*vperp^2 + 11360*vpa^6*vperp^4 - 6040*vpa^4*vperp^6 + 391*vpa^2*vperp^8 + 8*vperp^10))/(16384*(vpa^2 + vperp^2)^8))
+                   +I06*((-5*(64*vpa^8 - 1200*vpa^6*vperp^2 + 1560*vpa^4*vperp^4 - 185*vpa^2*vperp^6 - 6*vperp^8))/(256*(vpa^2 + vperp^2)^6))
+                   +I04*((3*(24*vpa^6 - 228*vpa^4*vperp^2 + 67*vpa^2*vperp^4 + 4*vperp^6))/(64*(vpa^2 + vperp^2)^4))
+                   +I02*((-4*vpa^4 + 13*vpa^2*vperp^2 + 2*vperp^4)/(4*(vpa^2 + vperp^2)^2))
+                   +I00*(vpa^2))
+   # multiply by overall prefactor
+   d2Gdvperp2_series *= ((vpa^2 + vperp^2)^(-3/2))   
+   return d2Gdvperp2_series
+end
+
+function multipole_d2Gdvperpdvpa(vpa::mk_float,vperp::mk_float,Inn_vec::Vector{mk_float})
+   (I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+   I02, I12, I22, I32, I42, I52, I62,
+   I04, I14, I24, I34, I44,
+   I06, I16, I26,
+   I08) = Inn_vec
+   # sum up terms in the multipole series 
+   d2Gdvperpdvpa_series = (I80*((9*vpa*vperp*(128*vpa^8 - 2304*vpa^6*vperp^2 + 6048*vpa^4*vperp^4 - 3360*vpa^2*vperp^6 + 315*vperp^8))/(128*(vpa^2 + vperp^2)^8))
+                      +I70*((vperp*(128*vpa^8 - 1792*vpa^6*vperp^2 + 3360*vpa^4*vperp^4 - 1120*vpa^2*vperp^6 + 35*vperp^8))/(16*(vpa^2 + vperp^2)^7))
+                      +I62*((-63*(256*vpa^9*vperp - 2848*vpa^7*vperp^3 + 5936*vpa^5*vperp^5 - 2870*vpa^3*vperp^7 + 245*vpa*vperp^9))/(64*(vpa^2 + vperp^2)^8))
+                      +I60*((7*vpa*vperp*(16*vpa^6 - 168*vpa^4*vperp^2 + 210*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^6))
+                      +I52*((-21*(256*vpa^8*vperp - 2144*vpa^6*vperp^3 + 3120*vpa^4*vperp^5 - 890*vpa^2*vperp^7 + 25*vperp^9))/(32*(vpa^2 + vperp^2)^7))
+                      +I50*((3*vperp*(16*vpa^6 - 120*vpa^4*vperp^2 + 90*vpa^2*vperp^4 - 5*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                      +I44*((945*vpa*vperp*(384*vpa^8 - 3392*vpa^6*vperp^2 + 5824*vpa^4*vperp^4 - 2380*vpa^2*vperp^6 + 175*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                      +I42*((-105*vpa*vperp*(32*vpa^6 - 192*vpa^4*vperp^2 + 180*vpa^2*vperp^4 - 25*vperp^6))/(32*(vpa^2 + vperp^2)^6))
+                      +I40*((5*vpa*vperp*(8*vpa^4 - 40*vpa^2*vperp^2 + 15*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                      +I34*((315*vperp*(128*vpa^8 - 832*vpa^6*vperp^2 + 960*vpa^4*vperp^4 - 220*vpa^2*vperp^6 + 5*vperp^8))/(128*(vpa^2 + vperp^2)^7))
+                      +I32*((15*vperp*(-32*vpa^6 + 128*vpa^4*vperp^2 - 68*vpa^2*vperp^4 + 3*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                      +I30*((vperp*(8*vpa^4 - 24*vpa^2*vperp^2 + 3*vperp^4))/(2*(vpa^2 + vperp^2)^3))
+                      +I26*((-315*vpa*vperp*(512*vpa^8 - 3936*vpa^6*vperp^2 + 5712*vpa^4*vperp^4 - 1890*vpa^2*vperp^6 + 105*vperp^8))/(512*(vpa^2 + vperp^2)^8))
+                      +I24*((945*vpa*vperp*(16*vpa^6 - 72*vpa^4*vperp^2 + 50*vpa^2*vperp^4 - 5*vperp^6))/(128*(vpa^2 + vperp^2)^6))
+                      +I22*((-15*vpa*vperp*(16*vpa^4 - 38*vpa^2*vperp^2 + 9*vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                      +I20*((3*vpa*vperp*(2*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^2))
+                      +I16*((-35*vperp*(512*vpa^8 - 2848*vpa^6*vperp^2 + 2640*vpa^4*vperp^4 - 430*vpa^2*vperp^6 + 5*vperp^8))/(256*(vpa^2 + vperp^2)^7))
+                      +I14*((-45*vperp*(-48*vpa^6 + 136*vpa^4*vperp^2 - 46*vpa^2*vperp^4 + vperp^6))/(64*(vpa^2 + vperp^2)^5))
+                      +I12*((-3*vperp*(16*vpa^4 - 18*vpa^2*vperp^2 + vperp^4))/(4*(vpa^2 + vperp^2)^3))
+                      +I10*(vperp*(-1 + (3*vpa^2)/(vpa^2 + vperp^2)))
+                      +I08*((1575*vpa*(128*vpa^8*vperp - 896*vpa^6*vperp^3 + 1120*vpa^4*vperp^5 - 280*vpa^2*vperp^7 + 7*vperp^9))/(16384*(vpa^2 + vperp^2)^8))
+                      +I06*((-35*vpa*(64*vpa^6*vperp - 240*vpa^4*vperp^3 + 120*vpa^2*vperp^5 - 5*vperp^7))/(256*(vpa^2 + vperp^2)^6))
+                      +I04*((45*vpa*(8*vpa^4*vperp - 12*vpa^2*vperp^3 + vperp^5))/(64*(vpa^2 + vperp^2)^4))
+                      +I02*((3*vpa*vperp*(-4*vpa^2 + vperp^2))/(4*(vpa^2 + vperp^2)^2))
+                      +I00*(vpa*vperp))
+   # multiply by overall prefactor
+   d2Gdvperpdvpa_series *= -((vpa^2 + vperp^2)^(-3/2))   
+   return d2Gdvperpdvpa_series
+end
+
+function multipole_d2Gdvpa2(vpa::mk_float,vperp::mk_float,Inn_vec::Vector{mk_float})
+   (I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+   I02, I12, I22, I32, I42, I52, I62,
+   I04, I14, I24, I34, I44,
+   I06, I16, I26,
+   I08) = Inn_vec 
+   # sum up terms in the multipole series 
+   d2Gdvpa2_series = (I80*((45*vperp^2*(128*vpa^8 - 896*vpa^6*vperp^2 + 1120*vpa^4*vperp^4 - 280*vpa^2*vperp^6 + 7*vperp^8))/(128*(vpa^2 + vperp^2)^8))
+                   +I70*((9*vpa*vperp^2*(64*vpa^6 - 336*vpa^4*vperp^2 + 280*vpa^2*vperp^4 - 35*vperp^6))/(16*(vpa^2 + vperp^2)^7))
+                   +I62*((7*(256*vpa^10 - 9088*vpa^8*vperp^2 + 43456*vpa^6*vperp^4 - 45920*vpa^4*vperp^6 + 10430*vpa^2*vperp^8 - 245*vperp^10))/(64*(vpa^2 + vperp^2)^8))
+                   +I60*((7*vperp^2*(64*vpa^6 - 240*vpa^4*vperp^2 + 120*vpa^2*vperp^4 - 5*vperp^6))/(16*(vpa^2 + vperp^2)^6))
+                   +I52*((21*vpa*(32*vpa^8 - 880*vpa^6*vperp^2 + 3108*vpa^4*vperp^4 - 2170*vpa^2*vperp^6 + 245*vperp^8))/(32*(vpa^2 + vperp^2)^7))
+                   +I50*((21*vpa*vperp^2*(8*vpa^4 - 20*vpa^2*vperp^2 + 5*vperp^4))/(8*(vpa^2 + vperp^2)^5))
+                   +I44*((105*(-512*vpa^10 + 12416*vpa^8*vperp^2 - 46592*vpa^6*vperp^4 + 41440*vpa^4*vperp^6 - 8260*vpa^2*vperp^8 + 175*vperp^10))/(512*(vpa^2 + vperp^2)^8))
+                   +I42*((15*(32*vpa^8 - 656*vpa^6*vperp^2 + 1620*vpa^4*vperp^4 - 670*vpa^2*vperp^6 + 25*vperp^8))/(32*(vpa^2 + vperp^2)^6))
+                   +I40*((15*vperp^2*(8*vpa^4 - 12*vpa^2*vperp^2 + vperp^4))/(8*(vpa^2 + vperp^2)^4))
+                   +I34*((-105*vpa*(64*vpa^8 - 1184*vpa^6*vperp^2 + 3192*vpa^4*vperp^4 - 1820*vpa^2*vperp^6 + 175*vperp^8))/(128*(vpa^2 + vperp^2)^7))
+                   +I32*((5*vpa*(16*vpa^6 - 232*vpa^4*vperp^2 + 370*vpa^2*vperp^4 - 75*vperp^6))/(8*(vpa^2 + vperp^2)^5))
+                   +I30*((5*vpa*vperp^2*(4*vpa^2 - 3*vperp^2))/(2*(vpa^2 + vperp^2)^3))
+                   +I26*((105*(256*vpa^10 - 5248*vpa^8*vperp^2 + 16576*vpa^6*vperp^4 - 12320*vpa^4*vperp^6 + 2030*vpa^2*vperp^8 - 35*vperp^10))/(512*(vpa^2 + vperp^2)^8))
+                   +I24*((-45*(64*vpa^8 - 864*vpa^6*vperp^2 + 1560*vpa^4*vperp^4 - 500*vpa^2*vperp^6 + 15*vperp^8))/(128*(vpa^2 + vperp^2)^6))
+                   +I22*((3*(16*vpa^6 - 152*vpa^4*vperp^2 + 138*vpa^2*vperp^4 - 9*vperp^6))/(8*(vpa^2 + vperp^2)^4))
+                   +I20*((-3*vperp^2*(-4*vpa^2 + vperp^2))/(2*(vpa^2 + vperp^2)^2))
+                   +I16*((105*vpa*(32*vpa^8 - 496*vpa^6*vperp^2 + 1092*vpa^4*vperp^4 - 490*vpa^2*vperp^6 + 35*vperp^8))/(256*(vpa^2 + vperp^2)^7))
+                   +I14*((15*vpa*(-32*vpa^6 + 296*vpa^4*vperp^2 - 320*vpa^2*vperp^4 + 45*vperp^6))/(64*(vpa^2 + vperp^2)^5))
+                   +I12*((3*vpa*(4*vpa^4 - 22*vpa^2*vperp^2 + 9*vperp^4))/(4*(vpa^2 + vperp^2)^3))
+                   +I10*((3*vpa*vperp^2)/(vpa^2 + vperp^2))
+                   +I08*((-35*(1024*vpa^10 - 19072*vpa^8*vperp^2 + 52864*vpa^6*vperp^4 - 32480*vpa^4*vperp^6 + 3920*vpa^2*vperp^8 - 35*vperp^10))/(16384*(vpa^2 + vperp^2)^8))
+                   +I06*((5*(96*vpa^8 - 1072*vpa^6*vperp^2 + 1500*vpa^4*vperp^4 - 330*vpa^2*vperp^6 + 5*vperp^8))/(256*(vpa^2 + vperp^2)^6))
+                   +I04*((-3*(32*vpa^6 - 184*vpa^4*vperp^2 + 96*vpa^2*vperp^4 - 3*vperp^6))/(64*(vpa^2 + vperp^2)^4))
+                   +I02*((4*vpa^4 - 10*vpa^2*vperp^2 + vperp^4)/(4*(vpa^2 + vperp^2)^2))
+                   +I00*(vperp^2))
+   # multiply by overall prefactor
+   d2Gdvpa2_series *= ((vpa^2 + vperp^2)^(-3/2))
+   return d2Gdvpa2_series
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_H!(func_data::vpa_vperp_boundary_data,vpa,vperp,
+                                             Inn_vec)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_H(vpa.grid[1],vperp.grid[ivperp],Inn_vec)
+                func_data.upper_boundary_vpa[ivperp] = multipole_H(vpa.grid[nvpa],vperp.grid[ivperp],Inn_vec)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_H(vpa.grid[ivpa],vperp.grid[nvperp],Inn_vec)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_dHdvpa!(func_data::vpa_vperp_boundary_data,vpa,vperp,Inn_vec)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_dHdvpa(vpa.grid[1],vperp.grid[ivperp],Inn_vec)
+                func_data.upper_boundary_vpa[ivperp] = multipole_dHdvpa(vpa.grid[nvpa],vperp.grid[ivperp],Inn_vec)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_dHdvpa(vpa.grid[ivpa],vperp.grid[nvperp],Inn_vec)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_dHdvperp!(func_data::vpa_vperp_boundary_data,vpa,vperp,Inn_vec)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_dHdvperp(vpa.grid[1],vperp.grid[ivperp],Inn_vec)
+                func_data.upper_boundary_vpa[ivperp] = multipole_dHdvperp(vpa.grid[nvpa],vperp.grid[ivperp],Inn_vec)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_dHdvperp(vpa.grid[ivpa],vperp.grid[nvperp],Inn_vec)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_G!(func_data::vpa_vperp_boundary_data,vpa,vperp,Inn_vec)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_G(vpa.grid[1],vperp.grid[ivperp],Inn_vec)
+                func_data.upper_boundary_vpa[ivperp] = multipole_G(vpa.grid[nvpa],vperp.grid[ivperp],Inn_vec)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_G(vpa.grid[ivpa],vperp.grid[nvperp],Inn_vec)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_dGdvperp!(func_data::vpa_vperp_boundary_data,vpa,vperp,Inn_vec)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_dGdvperp(vpa.grid[1],vperp.grid[ivperp],Inn_vec)
+                func_data.upper_boundary_vpa[ivperp] = multipole_dGdvperp(vpa.grid[nvpa],vperp.grid[ivperp],Inn_vec)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_dGdvperp(vpa.grid[ivpa],vperp.grid[nvperp],Inn_vec)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_d2Gdvperp2!(func_data::vpa_vperp_boundary_data,vpa,vperp,Inn_vec)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_d2Gdvperp2(vpa.grid[1],vperp.grid[ivperp],Inn_vec)
+                func_data.upper_boundary_vpa[ivperp] = multipole_d2Gdvperp2(vpa.grid[nvpa],vperp.grid[ivperp],Inn_vec)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_d2Gdvperp2(vpa.grid[ivpa],vperp.grid[nvperp],Inn_vec)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_d2Gdvperpdvpa!(func_data::vpa_vperp_boundary_data,vpa,vperp,Inn_vec)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_d2Gdvperpdvpa(vpa.grid[1],vperp.grid[ivperp],Inn_vec)
+                func_data.upper_boundary_vpa[ivperp] = multipole_d2Gdvperpdvpa(vpa.grid[nvpa],vperp.grid[ivperp],Inn_vec)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_d2Gdvperpdvpa(vpa.grid[ivpa],vperp.grid[nvperp],Inn_vec)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+"""
+function calculate_boundary_data_multipole_d2Gdvpa2!(func_data::vpa_vperp_boundary_data,vpa,vperp,Inn_vec)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region(no_synchronize=true)
+    @loop_vperp ivperp begin
+                func_data.lower_boundary_vpa[ivperp] = multipole_d2Gdvpa2(vpa.grid[1],vperp.grid[ivperp],Inn_vec)
+                func_data.upper_boundary_vpa[ivperp] = multipole_d2Gdvpa2(vpa.grid[nvpa],vperp.grid[ivperp],Inn_vec)
+    end
+    begin_anyv_vpa_region(no_synchronize=true)
+    @loop_vpa ivpa begin
+                func_data.upper_boundary_vperp[ivpa] = multipole_d2Gdvpa2(vpa.grid[ivpa],vperp.grid[nvperp],Inn_vec)
+    end
+    # return to serial parallelisation
+    return nothing
+end
+
+"""
+Function to use the multipole expansion of the Rosenbluth potentials to calculate and
+assign boundary data to an instance of `rosenbluth_potential_boundary_data`, in place,
+without allocation.
+"""
+function calculate_rosenbluth_potential_boundary_data_multipole!(rpbd::rosenbluth_potential_boundary_data,
+    pdf,vpa,vperp,vpa_spectral,vperp_spectral;
+    calculate_GG=false,calculate_dGdvperp=false)
+    # get required moments of pdf
+    I00, I10, I20, I30, I40, I50, I60, I70, I80 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+    I02, I12, I22, I32, I42, I52, I62 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+    I04, I14, I24, I34, I44 = 0.0, 0.0, 0.0, 0.0, 0.0
+    I06, I16, I26 = 0.0, 0.0, 0.0
+    I08 = 0.0
+    
+    begin_anyv_region()
+    @anyv_serial_region begin
+       I00 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I10 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I20 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I30 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 3, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I40 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 4, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I50 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 5, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I60 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 6, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I70 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 7, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       I80 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 8, vpa.wgts, vperp.grid, 0, vperp.wgts)
+       
+       I02 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I12 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I22 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I32 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 3, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I42 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 4, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I52 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 5, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       I62 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 6, vpa.wgts, vperp.grid, 2, vperp.wgts)
+       
+       I04 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 4, vperp.wgts)
+       I14 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 4, vperp.wgts)
+       I24 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 4, vperp.wgts)
+       I34 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 3, vpa.wgts, vperp.grid, 4, vperp.wgts)
+       I44 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 4, vpa.wgts, vperp.grid, 4, vperp.wgts)
+       
+       I06 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 6, vperp.wgts)
+       I16 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 1, vpa.wgts, vperp.grid, 6, vperp.wgts)
+       I26 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 2, vpa.wgts, vperp.grid, 6, vperp.wgts)
+       
+       I08 = integrate_over_vspace(@view(pdf[:,:]), vpa.grid, 0, vpa.wgts, vperp.grid, 8, vperp.wgts)    
+    end
+    # Broadcast integrals to all processes in the 'anyv' subblock
+    Inn_vec = [I00, I10, I20, I30, I40, I50, I60, I70, I80, 
+                I02, I12, I22, I32, I42, I52, I62,
+                I04, I14, I24, I34, I44,
+                I06, I16, I26,
+                I08]
+    if comm_anyv_subblock[] != MPI.COMM_NULL
+        MPI.Bcast!(Inn_vec, 0, comm_anyv_subblock[])
+    end
+    # ensure data is synchronized
+    _anyv_subblock_synchronize()
+    # evaluate the multipole formulae 
+    calculate_boundary_data_multipole_H!(rpbd.H_data,vpa,vperp,Inn_vec)
+    calculate_boundary_data_multipole_dHdvpa!(rpbd.dHdvpa_data,vpa,vperp,Inn_vec)
+    calculate_boundary_data_multipole_dHdvperp!(rpbd.dHdvperp_data,vpa,vperp,Inn_vec)
+    if calculate_GG
+        calculate_boundary_data_multipole_G!(rpbd.G_data,vpa,vperp,Inn_vec)
+    end
+    if calculate_dGdvperp
+        calculate_boundary_data_multipole_dGdvperp!(rpbd.dGdvperp_data,vpa,vperp,Inn_vec)
+    end
+    calculate_boundary_data_multipole_d2Gdvperp2!(rpbd.d2Gdvperp2_data,vpa,vperp,Inn_vec)
+    calculate_boundary_data_multipole_d2Gdvperpdvpa!(rpbd.d2Gdvperpdvpa_data,vpa,vperp,Inn_vec)
+    calculate_boundary_data_multipole_d2Gdvpa2!(rpbd.d2Gdvpa2_data,vpa,vperp,Inn_vec)
+    
+    return nothing
+end
+
+"""
+Function to compare two instances of `rosenbluth_potential_boundary_data` --
+one assumed to contain exact results, and the other numerically computed results -- and compute
+the maximum value of the error. Calls `test_boundary_data()`.
+"""
+function test_rosenbluth_potential_boundary_data(rpbd::rosenbluth_potential_boundary_data,
+    rpbd_exact::rosenbluth_potential_boundary_data,vpa,vperp;print_to_screen=true)
+    
+    error_buffer_vpa = Array{mk_float,1}(undef,vpa.n)
+    error_buffer_vperp_1 = Array{mk_float,1}(undef,vperp.n)
+    error_buffer_vperp_2 = Array{mk_float,1}(undef,vperp.n)
+    max_H_err = test_boundary_data(rpbd.H_data,rpbd_exact.H_data,"H",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_dHdvpa_err = test_boundary_data(rpbd.dHdvpa_data,rpbd_exact.dHdvpa_data,"dHdvpa",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_dHdvperp_err = test_boundary_data(rpbd.dHdvperp_data,rpbd_exact.dHdvperp_data,"dHdvperp",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_G_err = test_boundary_data(rpbd.G_data,rpbd_exact.G_data,"G",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_dGdvperp_err = test_boundary_data(rpbd.dGdvperp_data,rpbd_exact.dGdvperp_data,"dGdvperp",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_d2Gdvperp2_err = test_boundary_data(rpbd.d2Gdvperp2_data,rpbd_exact.d2Gdvperp2_data,"d2Gdvperp2",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_d2Gdvperpdvpa_err = test_boundary_data(rpbd.d2Gdvperpdvpa_data,rpbd_exact.d2Gdvperpdvpa_data,"d2Gdvperpdvpa",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+    max_d2Gdvpa2_err = test_boundary_data(rpbd.d2Gdvpa2_data,rpbd_exact.d2Gdvpa2_data,"d2Gdvpa2",vpa,vperp,error_buffer_vpa,error_buffer_vperp_1,error_buffer_vperp_2,print_to_screen)  
+
+    return max_H_err, max_dHdvpa_err, max_dHdvperp_err, max_G_err, max_dGdvperp_err, max_d2Gdvperp2_err, max_d2Gdvperpdvpa_err, max_d2Gdvpa2_err
+end
+
+"""
+Function to compute the maximum error \${\\rm MAX}|f_{\\rm numerical}-f_{\\rm exact}|\$ for
+instances of `vpa_vperp_boundary_data`.
+"""
+function test_boundary_data(func,func_exact,func_name,vpa,vperp,buffer_vpa,buffer_vperp_1,buffer_vperp_2,print_to_screen)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    for ivperp in 1:nvperp
+        buffer_vperp_1[ivperp] = abs(func.lower_boundary_vpa[ivperp] - func_exact.lower_boundary_vpa[ivperp])
+        buffer_vperp_2[ivperp] = abs(func.upper_boundary_vpa[ivperp] - func_exact.upper_boundary_vpa[ivperp])
+    end
+    for ivpa in 1:nvpa
+        buffer_vpa[ivpa] = abs(func.upper_boundary_vperp[ivpa] - func_exact.upper_boundary_vperp[ivpa])
+    end
+    max_lower_vpa_err = maximum(buffer_vperp_1)
+    max_upper_vpa_err = maximum(buffer_vperp_2)
+    max_upper_vperp_err = maximum(buffer_vpa)
+    if print_to_screen
+        println(string(func_name*" boundary data:"))
+        println("max(lower_vpa_err) = ",max_lower_vpa_err)
+        println("max(upper_vpa_err) = ",max_upper_vpa_err)
+        println("max(upper_vperp_err) = ",max_upper_vperp_err)
+    end
+    max_err = max(max_lower_vpa_err,max_upper_vpa_err,max_upper_vperp_err)
+    return max_err
+end
+
+"""
+    get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+
+For local (within the single element specified by `ielement_vpa` and `ielement_vperp`)
+indices `ivpa_local` and `ivperp_local`, get the global index in the 'linear-indexed' 2d
+space of size `(vperp.n, vpa.n)` (as returned by [`ic_func`](@ref)).
+"""
+function get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+    # global indices on the grids
+    ivpa_global = vpa.igrid_full[ivpa_local,ielement_vpa]
+    ivperp_global = vperp.igrid_full[ivperp_local,ielement_vperp]
+    # global compound index
+    ic_global = ic_func(ivpa_global,ivperp_global,vpa.n)
+    return ic_global
+end
+
+"""
+Unused function. Sets `f(vpa,vperp)` to zero at the boundaries
+in `(vpa,vperp)`.
+"""
+function enforce_zero_bc!(fvpavperp,vpa,vperp;impose_BC_at_zero_vperp=false)
+    # lower vpa boundary
+    @loop_vperp ivperp begin
+        fvpavperp[1,ivperp] = 0.0
+    end
+    
+    # upper vpa boundary
+    @loop_vperp ivperp begin
+        fvpavperp[end,ivperp] = 0.0
+    end
+    
+    if impose_BC_at_zero_vperp
+        # lower vperp boundary
+        @loop_vpa ivpa begin
+            fvpavperp[ivpa,1] = 0.0
+        end
+    end
+    
+    # upper vperp boundary
+    @loop_vpa ivpa begin
+        fvpavperp[ivpa,end] = 0.0
+    end
+end
+
+"""
+Sets `f(vpa,vperp)` to a specied value `f_bc` at the boundaries
+in `(vpa,vperp)`. `f_bc` is a 2D array of `(vpa,vperp)` where
+only boundary data is used. Used for testing.
+"""
+function enforce_dirichlet_bc!(fvpavperp,vpa,vperp,f_bc;dirichlet_vperp_lower_boundary=false)
+    # lower vpa boundary
+    for ivperp ∈ 1:vperp.n
+        fvpavperp[1,ivperp] = f_bc[1,ivperp]
+    end
+    
+    # upper vpa boundary
+    for ivperp ∈ 1:vperp.n
+        fvpavperp[end,ivperp] = f_bc[end,ivperp]
+    end
+    
+    if dirichlet_vperp_lower_boundary
+        # lower vperp boundary
+        for ivpa ∈ 1:vpa.n
+            fvpavperp[ivpa,1] = f_bc[ivpa,1]
+        end
+    end
+    
+    # upper vperp boundary
+    for ivpa ∈ 1:vpa.n
+        fvpavperp[ivpa,end] = f_bc[ivpa,end]
+    end
+end
+
+"""
+Sets `f(vpa,vperp)` to a specied value `f_bc` at the boundaries
+in `(vpa,vperp)`. `f_bc` is an instance of `vpa_vperp_boundary_data`.
+"""
+function enforce_dirichlet_bc!(fvpavperp,vpa,vperp,f_bc::vpa_vperp_boundary_data)
+    # lower vpa boundary
+    for ivperp ∈ 1:vperp.n
+        fvpavperp[1,ivperp] = f_bc.lower_boundary_vpa[ivperp]
+    end
+    
+    # upper vpa boundary
+    for ivperp ∈ 1:vperp.n
+        fvpavperp[end,ivperp] = f_bc.upper_boundary_vpa[ivperp]
+    end
+            
+    # upper vperp boundary
+    for ivpa ∈ 1:vpa.n
+        fvpavperp[ivpa,end] = f_bc.upper_boundary_vperp[ivpa]
+    end
+    return nothing
+end
+
+"""
+Function to contruct the global sparse matrices used to solve
+the elliptic PDEs for the Rosenbluth potentials. Uses a dense matrix
+construction method. The matrices are 2D in the compound index `ic` 
+which indexes the velocity space labelled by `ivpa,ivperp`.
+Dirichlet boundary conditions are imposed in the appropriate stiffness
+matrices by setting the boundary row to be the Kronecker delta 
+(0 except where `ivpa = ivpap` and `ivperp = ivperpp`). 
+Used for testing.
+"""
+function assemble_matrix_operators_dirichlet_bc(vpa,vperp,vpa_spectral,vperp_spectral;print_to_screen=true)
+    nc_global = vpa.n*vperp.n
+    # Assemble a 2D mass matrix in the global compound coordinate
+    nc_global = vpa.n*vperp.n
+    MM2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    MM2D .= 0.0
+    KKpar2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    KKpar2D .= 0.0
+    KKperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    KKperp2D .= 0.0
+    KPperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    KPperp2D .= 0.0
+    KKpar2D_with_BC_terms = Array{mk_float,2}(undef,nc_global,nc_global)
+    KKpar2D_with_BC_terms .= 0.0
+    KKperp2D_with_BC_terms = Array{mk_float,2}(undef,nc_global,nc_global)
+    KKperp2D_with_BC_terms .= 0.0
+    PUperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    PUperp2D .= 0.0
+    PPparPUperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    PPparPUperp2D .= 0.0
+    PPpar2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    PPpar2D .= 0.0
+    MMparMNperp2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    MMparMNperp2D .= 0.0
+    # Laplacian matrix
+    LP2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    LP2D .= 0.0
+    # Modified Laplacian matrix
+    LV2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    LV2D .= 0.0
+    # Modified Laplacian matrix
+    LB2D = Array{mk_float,2}(undef,nc_global,nc_global)
+    LB2D .= 0.0
+    
+    #print_matrix(MM2D,"MM2D",nc_global,nc_global)
+    # local dummy arrays
+    MMpar = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
+    MMperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    MNperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    MRperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    KKpar = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
+    KKperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    KKpar_with_BC_terms = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
+    KKperp_with_BC_terms = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    KJperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    LLperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    PPperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    PUperp = Array{mk_float,2}(undef,vperp.ngrid,vperp.ngrid)
+    PPpar = Array{mk_float,2}(undef,vpa.ngrid,vpa.ngrid)
+        
+    impose_BC_at_zero_vperp = false
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("begin elliptic operator assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    for ielement_vperp in 1:vperp.nelement_local
+        get_QQ_local!(MMperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"M")
+        get_QQ_local!(MRperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"R")
+        get_QQ_local!(MNperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"N")
+        get_QQ_local!(KKperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K")
+        get_QQ_local!(KKperp_with_BC_terms,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K_with_BC_terms")
+        get_QQ_local!(KJperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"J")
+        get_QQ_local!(LLperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"L")
+        get_QQ_local!(PPperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"P")
+        get_QQ_local!(PUperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"U")
+        #print_matrix(MMperp,"MMperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(MRperp,"MRperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(MNperp,"MNperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(KKperp,"KKperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(KJperp,"KJperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(LLperp,"LLperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(PPperp,"PPperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(PUperp,"PUperp",vperp.ngrid,vperp.ngrid)
+        
+        for ielement_vpa in 1:vpa.nelement_local
+            get_QQ_local!(MMpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"M")
+            get_QQ_local!(KKpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K")
+            get_QQ_local!(KKpar_with_BC_terms,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K_with_BC_terms")
+            get_QQ_local!(PPpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"P")
+            #print_matrix(MMpar,"MMpar",vpa.ngrid,vpa.ngrid)
+            #print_matrix(KKpar,"KKpar",vpa.ngrid,vpa.ngrid)
+            #print_matrix(PPpar,"PPpar",vpa.ngrid,vpa.ngrid)
+            
+            for ivperpp_local in 1:vperp.ngrid
+                for ivperp_local in 1:vperp.ngrid
+                    for ivpap_local in 1:vpa.ngrid
+                        for ivpa_local in 1:vpa.ngrid
+                            ic_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+                            icp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpap_local,ivperpp_local) #get_indices(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivpap_local,ivperp_local,ivperpp_local)
+                            #println("ielement_vpa: ",ielement_vpa," ielement_vperp: ",ielement_vperp)
+                            #println("ivpa_local: ",ivpa_local," ivpap_local: ",ivpap_local)
+                            #println("ivperp_local: ",ivperp_local," ivperpp_local: ",ivperpp_local)
+                            #println("ic: ",ic_global," icp: ",icp_global)
+                            # boundary condition possibilities
+                            lower_boundary_row_vpa = (ielement_vpa == 1 && ivpa_local == 1)
+                            upper_boundary_row_vpa = (ielement_vpa == vpa.nelement_local && ivpa_local == vpa.ngrid)
+                            lower_boundary_row_vperp = (ielement_vperp == 1 && ivperp_local == 1)
+                            upper_boundary_row_vperp = (ielement_vperp == vperp.nelement_local && ivperp_local == vperp.ngrid)
+                            
+
+                            if lower_boundary_row_vpa
+                                if ivpap_local == 1 && ivperp_local == ivperpp_local
+                                    LP2D[ic_global,icp_global] = 1.0
+                                    LV2D[ic_global,icp_global] = 1.0
+                                    LB2D[ic_global,icp_global] = 1.0
+                                else 
+                                    LP2D[ic_global,icp_global] = 0.0
+                                    LV2D[ic_global,icp_global] = 0.0
+                                    LB2D[ic_global,icp_global] = 0.0
+                                end
+                            elseif upper_boundary_row_vpa
+                                if ivpap_local == vpa.ngrid && ivperp_local == ivperpp_local 
+                                    LP2D[ic_global,icp_global] = 1.0
+                                    LV2D[ic_global,icp_global] = 1.0
+                                    LB2D[ic_global,icp_global] = 1.0
+                                else 
+                                    LP2D[ic_global,icp_global] = 0.0
+                                    LV2D[ic_global,icp_global] = 0.0
+                                    LB2D[ic_global,icp_global] = 0.0
+                                end
+                            elseif lower_boundary_row_vperp && impose_BC_at_zero_vperp
+                                if ivperpp_local == 1 && ivpa_local == ivpap_local
+                                    LP2D[ic_global,icp_global] = 1.0
+                                    LV2D[ic_global,icp_global] = 1.0
+                                    LB2D[ic_global,icp_global] = 1.0
+                                else 
+                                    LP2D[ic_global,icp_global] = 0.0
+                                    LV2D[ic_global,icp_global] = 0.0
+                                    LB2D[ic_global,icp_global] = 0.0
+                                end
+                            elseif upper_boundary_row_vperp
+                                if ivperpp_local == vperp.ngrid && ivpa_local == ivpap_local
+                                    LP2D[ic_global,icp_global] = 1.0
+                                    LV2D[ic_global,icp_global] = 1.0
+                                    LB2D[ic_global,icp_global] = 1.0
+                                else 
+                                    LP2D[ic_global,icp_global] = 0.0
+                                    LV2D[ic_global,icp_global] = 0.0
+                                    LB2D[ic_global,icp_global] = 0.0
+                                end
+                            else
+                                # assign Laplacian and modified Laplacian matrix data
+                                LP2D[ic_global,icp_global] += (KKpar[ivpa_local,ivpap_local]*
+                                                                MMperp[ivperp_local,ivperpp_local] +
+                                                               MMpar[ivpa_local,ivpap_local]*
+                                                                LLperp[ivperp_local,ivperpp_local])
+                                LV2D[ic_global,icp_global] += (KKpar[ivpa_local,ivpap_local]*
+                                                                MRperp[ivperp_local,ivperpp_local] +
+                                                               MMpar[ivpa_local,ivpap_local]*
+                                                                (KJperp[ivperp_local,ivperpp_local] -
+                                                                 PPperp[ivperp_local,ivperpp_local] - 
+                                                                 MNperp[ivperp_local,ivperpp_local]))
+                                LB2D[ic_global,icp_global] += (KKpar[ivpa_local,ivpap_local]*
+                                                                MRperp[ivperp_local,ivperpp_local] +
+                                                               MMpar[ivpa_local,ivpap_local]*
+                                                                (KJperp[ivperp_local,ivperpp_local] -
+                                                                 PPperp[ivperp_local,ivperpp_local] - 
+                                                             4.0*MNperp[ivperp_local,ivperpp_local]))
+                            end
+                            # assign mass matrix data
+                            MM2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                                MMperp[ivperp_local,ivperpp_local]
+                            
+                            # assign K matrices
+                            KKpar2D[ic_global,icp_global] += KKpar[ivpa_local,ivpap_local]*
+                                                            MMperp[ivperp_local,ivperpp_local]
+                            KKperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                            KKperp[ivperp_local,ivperpp_local]
+                            KPperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                         (KJperp[ivperp_local,ivperpp_local] -
+                                                      2.0*PPperp[ivperp_local,ivperpp_local] -
+                                                      2.0*MNperp[ivperp_local,ivperpp_local])
+                            # assign K matrices with explicit boundary terms from integration by parts
+                            KKpar2D_with_BC_terms[ic_global,icp_global] += KKpar_with_BC_terms[ivpa_local,ivpap_local]*
+                                                            MMperp[ivperp_local,ivperpp_local]
+                            KKperp2D_with_BC_terms[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                            KKperp_with_BC_terms[ivperp_local,ivperpp_local]
+                            # assign PU matrix
+                            PUperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                            PUperp[ivperp_local,ivperpp_local]
+                            PPparPUperp2D[ic_global,icp_global] += PPpar[ivpa_local,ivpap_local]*
+                                                            PUperp[ivperp_local,ivperpp_local]
+                            PPpar2D[ic_global,icp_global] += PPpar[ivpa_local,ivpap_local]*
+                                                            MMperp[ivperp_local,ivperpp_local]
+                            # assign RHS mass matrix for d2Gdvperp2
+                            MMparMNperp2D[ic_global,icp_global] += MMpar[ivpa_local,ivpap_local]*
+                                                            MNperp[ivperp_local,ivperpp_local]
+                        end
+                    end
+                end
+            end
+        end
+    end
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("finished elliptic operator assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+        # convert these matrices to sparse matrices
+        if global_rank[] == 0 && print_to_screen
+            println("begin conversion to sparse matrices   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    MM2D_sparse = sparse(MM2D)
+    KKpar2D_sparse = sparse(KKpar2D)
+    KKperp2D_sparse = sparse(KKperp2D)
+    KKpar2D_with_BC_terms_sparse = sparse(KKpar2D_with_BC_terms)
+    KKperp2D_with_BC_terms_sparse = sparse(KKperp2D_with_BC_terms)
+    LP2D_sparse = sparse(LP2D)
+    LV2D_sparse = sparse(LV2D)
+    LB2D_sparse = sparse(LB2D)
+    KPperp2D_sparse = sparse(KPperp2D)
+    PUperp2D_sparse = sparse(PUperp2D)
+    PPparPUperp2D_sparse = sparse(PPparPUperp2D)
+    PPpar2D_sparse = sparse(PPpar2D)
+    MMparMNperp2D_sparse = sparse(MMparMNperp2D)
+    return MM2D_sparse, KKpar2D_sparse, KKperp2D_sparse, 
+           KKpar2D_with_BC_terms_sparse, KKperp2D_with_BC_terms_sparse,
+           LP2D_sparse, LV2D_sparse, LB2D_sparse, 
+           KPperp2D_sparse,PUperp2D_sparse, PPparPUperp2D_sparse,
+           PPpar2D_sparse, MMparMNperp2D_sparse
+end
+
+"""
+Function to contruct the global sparse matrices used to solve
+the elliptic PDEs for the Rosenbluth potentials. Uses a sparse matrix
+construction method. The matrices are 2D in the compound index `ic` 
+which indexes the velocity space labelled by `ivpa,ivperp`.
+Dirichlet boundary conditions are imposed in the appropriate stiffness
+matrices by setting the boundary row to be the Kronecker delta 
+(0 except where `ivpa = ivpap` and `ivperp = ivperpp`).
+See also `assemble_matrix_operators_dirichlet_bc()`.
+"""
+function assemble_matrix_operators_dirichlet_bc_sparse(vpa,vperp,vpa_spectral,vperp_spectral;print_to_screen=true)
+    # Assemble a 2D mass matrix in the global compound coordinate
+    nc_global = vpa.n*vperp.n
+    ntot_vpa = (vpa.nelement_local - 1)*(vpa.ngrid^2 - 1) + vpa.ngrid^2
+    ntot_vperp = (vperp.nelement_local - 1)*(vperp.ngrid^2 - 1) + vperp.ngrid^2
+    nsparse = ntot_vpa*ntot_vperp
+    ngrid_vpa = vpa.ngrid
+    nelement_vpa = vpa.nelement_local
+    ngrid_vperp = vperp.ngrid
+    nelement_vperp = vperp.nelement_local
+    
+    MM2D = allocate_sparse_matrix_constructor(nsparse)
+    KKpar2D = allocate_sparse_matrix_constructor(nsparse)
+    KKperp2D = allocate_sparse_matrix_constructor(nsparse)
+    KKpar2D_with_BC_terms = allocate_sparse_matrix_constructor(nsparse)
+    KKperp2D_with_BC_terms = allocate_sparse_matrix_constructor(nsparse)
+    PUperp2D = allocate_sparse_matrix_constructor(nsparse)
+    PPparPUperp2D = allocate_sparse_matrix_constructor(nsparse)
+    PPpar2D = allocate_sparse_matrix_constructor(nsparse)
+    MMparMNperp2D = allocate_sparse_matrix_constructor(nsparse)
+    KPperp2D = allocate_sparse_matrix_constructor(nsparse)
+    # Laplacian matrix
+    LP2D = allocate_sparse_matrix_constructor(nsparse)
+    # Modified Laplacian matrix (for d / d vperp potentials)
+    LV2D = allocate_sparse_matrix_constructor(nsparse)
+    # Modified Laplacian matrix (for d^2 / d vperp^2 potentials)
+    LB2D = allocate_sparse_matrix_constructor(nsparse)
+    
+    # local dummy arrays
+    MMpar = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
+    MMperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    MNperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    MRperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    KKpar = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
+    KKpar_with_BC_terms = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
+    KKperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    KKperp_with_BC_terms = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    KJperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    LLperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    PPperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    PUperp = Array{mk_float,2}(undef,ngrid_vperp,ngrid_vperp)
+    PPpar = Array{mk_float,2}(undef,ngrid_vpa,ngrid_vpa)
+        
+    impose_BC_at_zero_vperp = false
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("begin elliptic operator assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+    end
+    for ielement_vperp in 1:nelement_vperp
+        get_QQ_local!(MMperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"M")
+        get_QQ_local!(MRperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"R")
+        get_QQ_local!(MNperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"N")
+        get_QQ_local!(KKperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K")
+        get_QQ_local!(KKperp_with_BC_terms,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"K_with_BC_terms")
+        get_QQ_local!(KJperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"J")
+        get_QQ_local!(LLperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"L")
+        get_QQ_local!(PPperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"P")
+        get_QQ_local!(PUperp,ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"U")
+        #print_matrix(MMperp,"MMperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(MRperp,"MRperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(MNperp,"MNperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(KKperp,"KKperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(KJperp,"KJperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(LLperp,"LLperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(PPperp,"PPperp",vperp.ngrid,vperp.ngrid)
+        #print_matrix(PUperp,"PUperp",vperp.ngrid,vperp.ngrid)
+        
+        for ielement_vpa in 1:nelement_vpa
+            get_QQ_local!(MMpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"M")
+            get_QQ_local!(KKpar_with_BC_terms,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K_with_BC_terms")
+            get_QQ_local!(KKpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"K")
+            get_QQ_local!(PPpar,ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"P")
+            #print_matrix(MMpar,"MMpar",vpa.ngrid,vpa.ngrid)
+            #print_matrix(KKpar,"KKpar",vpa.ngrid,vpa.ngrid)
+            #print_matrix(PPpar,"PPpar",vpa.ngrid,vpa.ngrid)
+            
+            for ivperpp_local in 1:ngrid_vperp
+                for ivperp_local in 1:ngrid_vperp
+                    for ivpap_local in 1:ngrid_vpa
+                        for ivpa_local in 1:ngrid_vpa
+                            ic_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+                            icp_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpap_local,ivperpp_local) #get_indices(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivpap_local,ivperp_local,ivperpp_local)
+                            icsc = icsc_func(ivpa_local,ivpap_local,ielement_vpa::mk_int,
+                                           ngrid_vpa,nelement_vpa,
+                                           ivperp_local,ivperpp_local,
+                                           ielement_vperp,
+                                           ngrid_vperp,nelement_vperp)
+                            #println("ielement_vpa: ",ielement_vpa," ielement_vperp: ",ielement_vperp)
+                            #println("ivpa_local: ",ivpa_local," ivpap_local: ",ivpap_local)
+                            #println("ivperp_local: ",ivperp_local," ivperpp_local: ",ivperpp_local)
+                            #println("ic: ",ic_global," icp: ",icp_global)
+                            # boundary condition possibilities
+                            lower_boundary_row_vpa = (ielement_vpa == 1 && ivpa_local == 1)
+                            upper_boundary_row_vpa = (ielement_vpa == vpa.nelement_local && ivpa_local == vpa.ngrid)
+                            lower_boundary_row_vperp = (ielement_vperp == 1 && ivperp_local == 1)
+                            upper_boundary_row_vperp = (ielement_vperp == vperp.nelement_local && ivperp_local == vperp.ngrid)
+                            
+
+                            if lower_boundary_row_vpa
+                                if ivpap_local == 1 && ivperp_local == ivperpp_local
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
+                                else 
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
+                                end
+                            elseif upper_boundary_row_vpa
+                                if ivpap_local == vpa.ngrid && ivperp_local == ivperpp_local 
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
+                                else 
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
+                                end
+                            elseif lower_boundary_row_vperp && impose_BC_at_zero_vperp
+                                if ivperpp_local == 1 && ivpa_local == ivpap_local
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
+                                else 
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
+                                end
+                            elseif upper_boundary_row_vperp
+                                if ivperpp_local == vperp.ngrid && ivpa_local == ivpap_local
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,1.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,1.0)
+                                else 
+                                    assign_constructor_data!(LP2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LV2D,icsc,ic_global,icp_global,0.0)
+                                    assign_constructor_data!(LB2D,icsc,ic_global,icp_global,0.0)
+                                end
+                            else
+                                # assign Laplacian matrix data
+                                assemble_constructor_data!(LP2D,icsc,ic_global,icp_global,
+                                            (KKpar[ivpa_local,ivpap_local]*
+                                             MMperp[ivperp_local,ivperpp_local] +
+                                             MMpar[ivpa_local,ivpap_local]*
+                                             LLperp[ivperp_local,ivperpp_local]))
+                                assemble_constructor_data!(LV2D,icsc,ic_global,icp_global,
+                                            (KKpar[ivpa_local,ivpap_local]*
+                                             MRperp[ivperp_local,ivperpp_local] +
+                                             MMpar[ivpa_local,ivpap_local]*
+                                            (KJperp[ivperp_local,ivperpp_local] -
+                                             PPperp[ivperp_local,ivperpp_local] - 
+                                             MNperp[ivperp_local,ivperpp_local])))
+                                assemble_constructor_data!(LB2D,icsc,ic_global,icp_global,
+                                            (KKpar[ivpa_local,ivpap_local]*
+                                             MRperp[ivperp_local,ivperpp_local] +
+                                             MMpar[ivpa_local,ivpap_local]*
+                                             (KJperp[ivperp_local,ivperpp_local] -
+                                              PPperp[ivperp_local,ivperpp_local] -
+                                          4.0*MNperp[ivperp_local,ivperpp_local])))
+                            end
+                            #assign mass matrix
+                            assemble_constructor_data!(MM2D,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             MMperp[ivperp_local,ivperpp_local]))
+                                
+                            # assign K matrices (no explicit boundary terms)
+                            assemble_constructor_data!(KKpar2D,icsc,ic_global,icp_global,
+                                            (KKpar[ivpa_local,ivpap_local]*
+                                             MMperp[ivperp_local,ivperpp_local]))
+                            assemble_constructor_data!(KKperp2D,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             KKperp[ivperp_local,ivperpp_local]))
+                            assemble_constructor_data!(KPperp2D,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             (KJperp[ivperp_local,ivperpp_local] -
+                                              2.0*PPperp[ivperp_local,ivperpp_local] -
+                                              2.0*MNperp[ivperp_local,ivperpp_local])))
+                                             
+                            # assign K matrices (with explicit boundary terms from integration by parts)
+                            assemble_constructor_data!(KKpar2D_with_BC_terms,icsc,ic_global,icp_global,
+                                            (KKpar_with_BC_terms[ivpa_local,ivpap_local]*
+                                             MMperp[ivperp_local,ivperpp_local]))
+                            assemble_constructor_data!(KKperp2D_with_BC_terms,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             KKperp_with_BC_terms[ivperp_local,ivperpp_local]))
+                            # assign PU matrix
+                            assemble_constructor_data!(PUperp2D,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             PUperp[ivperp_local,ivperpp_local]))
+                            assemble_constructor_data!(PPparPUperp2D,icsc,ic_global,icp_global,
+                                            (PPpar[ivpa_local,ivpap_local]*
+                                             PUperp[ivperp_local,ivperpp_local]))
+                            assemble_constructor_data!(PPpar2D,icsc,ic_global,icp_global,
+                                            (PPpar[ivpa_local,ivpap_local]*
+                                             MMperp[ivperp_local,ivperpp_local]))
+                            # assign RHS mass matrix for d2Gdvperp2
+                            assemble_constructor_data!(MMparMNperp2D,icsc,ic_global,icp_global,
+                                            (MMpar[ivpa_local,ivpap_local]*
+                                             MNperp[ivperp_local,ivperpp_local]))
+                        end
+                    end
+                end
+            end
+        end
+    end
+    MM2D_sparse = create_sparse_matrix(MM2D)
+    KKpar2D_sparse = create_sparse_matrix(KKpar2D)
+    KKperp2D_sparse = create_sparse_matrix(KKperp2D)
+    KKpar2D_with_BC_terms_sparse = create_sparse_matrix(KKpar2D_with_BC_terms)
+    KKperp2D_with_BC_terms_sparse = create_sparse_matrix(KKperp2D_with_BC_terms)
+    LP2D_sparse = create_sparse_matrix(LP2D)
+    LV2D_sparse = create_sparse_matrix(LV2D)
+    LB2D_sparse = create_sparse_matrix(LB2D)
+    KPperp2D_sparse = create_sparse_matrix(KPperp2D)
+    PUperp2D_sparse = create_sparse_matrix(PUperp2D)
+    PPparPUperp2D_sparse = create_sparse_matrix(PPparPUperp2D)
+    PPpar2D_sparse = create_sparse_matrix(PPpar2D)
+    MMparMNperp2D_sparse = create_sparse_matrix(MMparMNperp2D)
+    @serial_region begin
+        if global_rank[] == 0 && print_to_screen
+            println("finished elliptic operator constructor assignment   ", Dates.format(now(), dateformat"H:MM:SS"))
+        end
+        #if nc_global < 60
+        #    println("MM2D_sparse \n",MM2D_sparse)
+        #    print_matrix(Array(MM2D_sparse),"MM2D_sparse",nc_global,nc_global)
+        #    print_matrix(KKpar2D,"KKpar2D",nc_global,nc_global)
+        #    print_matrix(KKperp2D,"KKperp2D",nc_global,nc_global)
+        #    print_matrix(LP2D,"LP",nc_global,nc_global)
+        #    print_matrix(LV2D,"LV",nc_global,nc_global)
+        #end
+    end
+    return MM2D_sparse, KKpar2D_sparse, KKperp2D_sparse, 
+           KKpar2D_with_BC_terms_sparse, KKperp2D_with_BC_terms_sparse, 
+           LP2D_sparse, LV2D_sparse, LB2D_sparse, 
+           KPperp2D_sparse, PUperp2D_sparse, PPparPUperp2D_sparse,
+           PPpar2D_sparse, MMparMNperp2D_sparse
+end
+
+"""
+Function to allocated an instance of `YY_collision_operator_arrays`.
+Calls `get_QQ_local!()` from `gauss_legendre`. Definitions of these
+nonlinear stiffness matrices can be found in the docs for `get_QQ_local!()`.
+"""
+function calculate_YY_arrays(vpa,vperp,vpa_spectral,vperp_spectral)
+    YY0perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
+    YY1perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
+    YY2perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
+    YY3perp = Array{mk_float,4}(undef,vperp.ngrid,vperp.ngrid,vperp.ngrid,vperp.nelement_local)
+    YY0par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
+    YY1par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
+    YY2par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
+    YY3par = Array{mk_float,4}(undef,vpa.ngrid,vpa.ngrid,vpa.ngrid,vpa.nelement_local)
+    
+    for ielement_vperp in 1:vperp.nelement_local
+        @views get_QQ_local!(YY0perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY0")
+        @views get_QQ_local!(YY1perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY1")
+        @views get_QQ_local!(YY2perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY2")
+        @views get_QQ_local!(YY3perp[:,:,:,ielement_vperp],ielement_vperp,vperp_spectral.lobatto,vperp_spectral.radau,vperp,"YY3")
+     end
+     for ielement_vpa in 1:vpa.nelement_local
+        @views get_QQ_local!(YY0par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY0")
+        @views get_QQ_local!(YY1par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY1")
+        @views get_QQ_local!(YY2par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY2")
+        @views get_QQ_local!(YY3par[:,:,:,ielement_vpa],ielement_vpa,vpa_spectral.lobatto,vpa_spectral.radau,vpa,"YY3")
+     end
+    
+    return YY_collision_operator_arrays(YY0perp,YY1perp,YY2perp,YY3perp,
+                                        YY0par,YY1par,YY2par,YY3par)
+end
+
+"""
+Function to assemble the RHS of the kinetic equation due to the collision operator,
+in weak form. Once the array `rhsvpavperp` contains the assembled weak-form collision operator,
+a mass matrix solve still must be carried out to find the time derivative of the distribution function
+due to collisions. This function uses a purely serial algorithm for testing purposes.
+"""
+function assemble_explicit_collision_operator_rhs_serial!(rhsvpavperp,pdfs,d2Gspdvpa2,d2Gspdvperpdvpa,
+    d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
+    vpa,vperp,YY_arrays::YY_collision_operator_arrays)
+    begin_anyv_region()
+    @anyv_serial_region begin
+        # assemble RHS of collision operator
+        rhsc = vec(rhsvpavperp)
+        @. rhsc = 0.0
+        
+        # loop over elements
+        for ielement_vperp in 1:vperp.nelement_local
+            YY0perp = YY_arrays.YY0perp[:,:,:,ielement_vperp]
+            YY1perp = YY_arrays.YY1perp[:,:,:,ielement_vperp]
+            YY2perp = YY_arrays.YY2perp[:,:,:,ielement_vperp]
+            YY3perp = YY_arrays.YY3perp[:,:,:,ielement_vperp]
+            
+            for ielement_vpa in 1:vpa.nelement_local
+                YY0par = YY_arrays.YY0par[:,:,:,ielement_vpa]
+                YY1par = YY_arrays.YY1par[:,:,:,ielement_vpa]
+                YY2par = YY_arrays.YY2par[:,:,:,ielement_vpa]
+                YY3par = YY_arrays.YY3par[:,:,:,ielement_vpa]
+                
+                # loop over field positions in each element
+                for ivperp_local in 1:vperp.ngrid
+                    for ivpa_local in 1:vpa.ngrid
+                        ic_global = get_global_compound_index(vpa,vperp,ielement_vpa,ielement_vperp,ivpa_local,ivperp_local)
+                        # carry out the matrix sum on each 2D element
+                        for jvperpp_local in 1:vperp.ngrid
+                            jvperpp = vperp.igrid_full[jvperpp_local,ielement_vperp]
+                            for kvperpp_local in 1:vperp.ngrid
+                                kvperpp = vperp.igrid_full[kvperpp_local,ielement_vperp]
+                                for jvpap_local in 1:vpa.ngrid
+                                    jvpap = vpa.igrid_full[jvpap_local,ielement_vpa]
+                                    pdfjj = pdfs[jvpap,jvperpp]
+                                    for kvpap_local in 1:vpa.ngrid
+                                        kvpap = vpa.igrid_full[kvpap_local,ielement_vpa]
+                                        # first three lines represent parallel flux terms
+                                        # second three lines represent perpendicular flux terms
+                                        rhsc[ic_global] += (YY0perp[kvperpp_local,jvperpp_local,ivperp_local]*YY2par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvpa2[kvpap,kvperpp] +
+                                                            YY3perp[kvperpp_local,jvperpp_local,ivperp_local]*YY1par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperpdvpa[kvpap,kvperpp] - 
+                                                            2.0*(ms/msp)*YY0perp[kvperpp_local,jvperpp_local,ivperp_local]*YY1par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*dHspdvpa[kvpap,kvperpp] +
+                                                            # end parallel flux, start of perpendicular flux
+                                                            YY1perp[kvperpp_local,jvperpp_local,ivperp_local]*YY3par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperpdvpa[kvpap,kvperpp] + 
+                                                            YY2perp[kvperpp_local,jvperpp_local,ivperp_local]*YY0par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*d2Gspdvperp2[kvpap,kvperpp] - 
+                                                            2.0*(ms/msp)*YY1perp[kvperpp_local,jvperpp_local,ivperp_local]*YY0par[kvpap_local,jvpap_local,ivpa_local]*pdfjj*dHspdvperp[kvpap,kvperpp])
+                                    end
+                                end
+                            end
+                        end
+                    end
+                end 
+            end
+        end
+        # correct for minus sign due to integration by parts
+        # and multiply by the normalised collision frequency
+        @. rhsc *= -nussp
+    end
+    return nothing
+end
+
+"""
+Function to assemble the RHS of the kinetic equation due to the collision operator,
+in weak form. Once the array `rhsvpavperp` contains the assembled weak-form collision operator,
+a mass matrix solve still must be carried out to find the time derivative of the distribution function
+due to collisions. This function uses a purely parallel algorithm and may be tested by comparing to 
+`assemble_explicit_collision_operator_rhs_serial!()`. The inner-most loop of the function is 
+in `assemble_explicit_collision_operator_rhs_parallel_inner_loop()`.
+"""
+function assemble_explicit_collision_operator_rhs_parallel!(rhsvpavperp,pdfs,d2Gspdvpa2,d2Gspdvperpdvpa,
+    d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
+    vpa,vperp,YY_arrays::YY_collision_operator_arrays)
+    # assemble RHS of collision operator
+    begin_anyv_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        rhsvpavperp[ivpa,ivperp] = 0.0
+    end
+
+    # loop over collocation points to benefit from shared-memory parallelism
+    ngrid_vpa, ngrid_vperp = vpa.ngrid, vperp.ngrid
+    vperp_igrid_full = vperp.igrid_full
+    vpa_igrid_full = vpa.igrid_full
+    @loop_vperp_vpa ivperp_global ivpa_global begin
+        igrid_vpa, ielement_vpax, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperpx, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa_global,ivperp_global,vpa,vperp)
+        # loop over elements belonging to this collocation point
+        for ielement_vperp in ielement_vperp_low:ielement_vperp_hi
+            # correct local ivperp in the case that we on a boundary point
+            ivperp_local = igrid_vperp + (ielement_vperp - ielement_vperp_low)*(1-ngrid_vperp)
+            @views YY0perp = YY_arrays.YY0perp[:,:,ivperp_local,ielement_vperp]
+            @views YY1perp = YY_arrays.YY1perp[:,:,ivperp_local,ielement_vperp]
+            @views YY2perp = YY_arrays.YY2perp[:,:,ivperp_local,ielement_vperp]
+            @views YY3perp = YY_arrays.YY3perp[:,:,ivperp_local,ielement_vperp]
+            vperp_igrid_full_view = @view vperp_igrid_full[:,ielement_vperp]
+            
+            for ielement_vpa in ielement_vpa_low:ielement_vpa_hi
+                # correct local ivpa in the case that we on a boundary point
+                ivpa_local = igrid_vpa + (ielement_vpa - ielement_vpa_low)*(1-ngrid_vpa)
+                @views YY0par = YY_arrays.YY0par[:,:,ivpa_local,ielement_vpa]
+                @views YY1par = YY_arrays.YY1par[:,:,ivpa_local,ielement_vpa]
+                @views YY2par = YY_arrays.YY2par[:,:,ivpa_local,ielement_vpa]
+                @views YY3par = YY_arrays.YY3par[:,:,ivpa_local,ielement_vpa]
+                vpa_igrid_full_view = @view vpa_igrid_full[:,ielement_vpa]
+                
+                # carry out the matrix sum on each 2D element
+                rhsvpavperp[ivpa_global,ivperp_global] +=
+                    assemble_explicit_collision_operator_rhs_parallel_inner_loop(
+                        nussp, ms, msp, YY0perp, YY0par, YY1perp, YY1par, YY2perp, YY2par,
+                        YY3perp, YY3par, pdfs, d2Gspdvpa2, d2Gspdvperpdvpa, d2Gspdvperp2,
+                        dHspdvpa, dHspdvperp, ngrid_vperp, vperp_igrid_full_view,
+                        ngrid_vpa, vpa_igrid_full_view)
+            end
+        end
+    end
+    return nothing
+end
+
+"""
+The inner-most loop of the parallel collision operator assembly. Called in `assemble_explicit_collision_operator_rhs_parallel!()`.
+"""
+function assemble_explicit_collision_operator_rhs_parallel_inner_loop(
+        nussp, ms, msp, YY0perp, YY0par, YY1perp, YY1par, YY2perp, YY2par, YY3perp,
+        YY3par, pdfs, d2Gspdvpa2, d2Gspdvperpdvpa, d2Gspdvperp2, dHspdvpa, dHspdvperp,
+        ngrid_vperp, vperp_igrid_full_view, ngrid_vpa, vpa_igrid_full_view)
+    # carry out the matrix sum on each 2D element
+    result = 0.0
+    for jvperpp_local in 1:ngrid_vperp
+        jvperpp = vperp_igrid_full_view[jvperpp_local]
+        for kvperpp_local in 1:ngrid_vperp
+            kvperpp = vperp_igrid_full_view[kvperpp_local]
+            YY0perp_kj = YY0perp[kvperpp_local,jvperpp_local]
+            YY1perp_kj = YY1perp[kvperpp_local,jvperpp_local]
+            YY2perp_kj = YY2perp[kvperpp_local,jvperpp_local]
+            YY3perp_kj = YY3perp[kvperpp_local,jvperpp_local]
+            for jvpap_local in 1:ngrid_vpa
+                jvpap = vpa_igrid_full_view[jvpap_local]
+                pdfjj = pdfs[jvpap,jvperpp]
+                for kvpap_local in 1:ngrid_vpa
+                    kvpap = vpa_igrid_full_view[kvpap_local]
+                    YY0par_kj = YY0par[kvpap_local,jvpap_local]
+                    YY1par_kj = YY1par[kvpap_local,jvpap_local]
+                    d2Gspdvperpdvpa_kk = d2Gspdvperpdvpa[kvpap,kvperpp]
+                    # first three lines represent parallel flux terms
+                    # second three lines represent perpendicular flux terms
+                    result += -nussp*(YY0perp_kj*YY2par[kvpap_local,jvpap_local]*pdfjj*d2Gspdvpa2[kvpap,kvperpp] +
+                                        YY3perp_kj*YY1par_kj*pdfjj*d2Gspdvperpdvpa_kk -
+                                        2.0*(ms/msp)*YY0perp_kj*YY1par_kj*pdfjj*dHspdvpa[kvpap,kvperpp] +
+                                        # end parallel flux, start of perpendicular flux
+                                        YY1perp_kj*YY3par[kvpap_local,jvpap_local]*pdfjj*d2Gspdvperpdvpa_kk +
+                                        YY2perp_kj*YY0par_kj*pdfjj*d2Gspdvperp2[kvpap,kvperpp] -
+                                        2.0*(ms/msp)*YY1perp_kj*YY0par_kj*pdfjj*dHspdvperp[kvpap,kvperpp])
+                end
+            end
+        end
+    end
+
+    return result
+end
+
+"""
+Function to assemble the RHS of the kinetic equation due to the collision operator,
+in weak form, when the distribution function appearing the derivatives is known analytically.
+The inner-most loop of the function is 
+in `assemble_explicit_collision_operator_rhs_parallel_analytical_inputs_inner_loop()`.
+"""
+function assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!(rhsvpavperp,pdfs,dpdfsdvpa,dpdfsdvperp,d2Gspdvpa2,d2Gspdvperpdvpa,
+    d2Gspdvperp2,dHspdvpa,dHspdvperp,ms,msp,nussp,
+    vpa,vperp,YY_arrays::YY_collision_operator_arrays)
+    # assemble RHS of collision operator
+    begin_anyv_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        rhsvpavperp[ivpa,ivperp] = 0.0
+    end
+
+    # loop over collocation points to benefit from shared-memory parallelism
+    ngrid_vpa, ngrid_vperp = vpa.ngrid, vperp.ngrid
+    vperp_igrid_full = vperp.igrid_full
+    vpa_igrid_full = vpa.igrid_full
+    @loop_vperp_vpa ivperp_global ivpa_global begin
+        igrid_vpa, ielement_vpax, ielement_vpa_low, ielement_vpa_hi, igrid_vperp, ielement_vperpx, ielement_vperp_low, ielement_vperp_hi = get_element_limit_indices(ivpa_global,ivperp_global,vpa,vperp)
+        # loop over elements belonging to this collocation point
+        for ielement_vperp in ielement_vperp_low:ielement_vperp_hi
+            # correct local ivperp in the case that we on a boundary point
+            ivperp_local = igrid_vperp + (ielement_vperp - ielement_vperp_low)*(1-ngrid_vperp)
+            @views YY0perp = YY_arrays.YY0perp[:,:,ivperp_local,ielement_vperp]
+            @views YY1perp = YY_arrays.YY1perp[:,:,ivperp_local,ielement_vperp]
+            @views YY2perp = YY_arrays.YY2perp[:,:,ivperp_local,ielement_vperp]
+            @views YY3perp = YY_arrays.YY3perp[:,:,ivperp_local,ielement_vperp]
+            vperp_igrid_full_view = @view vperp_igrid_full[:,ielement_vperp]
+            
+            for ielement_vpa in ielement_vpa_low:ielement_vpa_hi
+                # correct local ivpa in the case that we on a boundary point
+                ivpa_local = igrid_vpa + (ielement_vpa - ielement_vpa_low)*(1-ngrid_vpa)
+                @views YY0par = YY_arrays.YY0par[:,:,ivpa_local,ielement_vpa]
+                @views YY1par = YY_arrays.YY1par[:,:,ivpa_local,ielement_vpa]
+                @views YY2par = YY_arrays.YY2par[:,:,ivpa_local,ielement_vpa]
+                @views YY3par = YY_arrays.YY3par[:,:,ivpa_local,ielement_vpa]
+                vpa_igrid_full_view = @view vpa_igrid_full[:,ielement_vpa]
+                
+                # carry out the matrix sum on each 2D element
+                rhsvpavperp[ivpa_global,ivperp_global] +=
+                    assemble_explicit_collision_operator_rhs_parallel_analytical_inputs_inner_loop(
+                        nussp, ms, msp, pdfs, dpdfsdvpa, dpdfsdvperp, d2Gspdvperp2,
+                        d2Gspdvpa2, d2Gspdvperpdvpa, dHspdvperp, dHspdvpa, YY0perp,
+                        YY0par, YY1perp, YY1par, ngrid_vperp, vperp_igrid_full_view,
+                        ngrid_vpa, vpa_igrid_full_view)
+            end
+        end
+    end
+    return nothing
+end
+
+"""
+The inner-most loop of `assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!()`.
+"""
+# Separate function for inner loop, possible optimization??
+function assemble_explicit_collision_operator_rhs_parallel_analytical_inputs_inner_loop(
+        nussp, ms, msp, pdfs, dpdfsdvpa, dpdfsdvperp, d2Gspdvperp2,
+        d2Gspdvpa2, d2Gspdvperpdvpa, dHspdvperp, dHspdvpa, YY0perp, YY0par, YY1perp,
+        YY1par, ngrid_vperp, vperp_igrid_full_view, ngrid_vpa, vpa_igrid_full_view)
+
+    # carry out the matrix sum on each 2D element
+    result = 0.0
+    for jvperpp_local in 1:ngrid_vperp
+        jvperpp = vperp_igrid_full_view[jvperpp_local]
+        for kvperpp_local in 1:ngrid_vperp
+            kvperpp = vperp_igrid_full_view[kvperpp_local]
+            YY0perp_kj = YY0perp[kvperpp_local,jvperpp_local]
+            YY1perp_kj = YY1perp[kvperpp_local,jvperpp_local]
+            for jvpap_local in 1:ngrid_vpa
+                jvpap = vpa_igrid_full_view[jvpap_local]
+                pdfs_jj = pdfs[jvpap,jvperpp]
+                dpdfsdvperp_jj = dpdfsdvperp[jvpap,jvperpp]
+                dpdfsdvpa_jj = dpdfsdvpa[jvpap,jvperpp]
+                for kvpap_local in 1:ngrid_vpa
+                    kvpap = vpa_igrid_full_view[kvpap_local]
+                    YY0par_kj = YY0par[kvpap_local,jvpap_local]
+                    YY1par_kj = YY1par[kvpap_local,jvpap_local]
+                    d2Gspdvperpdvpa_kk = d2Gspdvperpdvpa[kvpap,kvperpp]
+                    # first three lines represent parallel flux terms
+                    # second three lines represent perpendicular flux terms
+                    result +=
+                        -nussp*(YY0perp_kj*YY1par_kj*dpdfsdvpa_jj*d2Gspdvpa2[kvpap,kvperpp] +
+                                YY0perp_kj*YY1par_kj*dpdfsdvperp_jj*d2Gspdvperpdvpa_kk -
+                                2.0*(ms/msp)*YY0perp_kj*YY1par_kj*pdfs_jj*dHspdvpa[kvpap,kvperpp] +
+                                # end parallel flux, start of perpendicular flux
+                                YY1perp_kj*YY0par_kj*dpdfsdvpa_jj*d2Gspdvperpdvpa_kk +
+                                YY1perp_kj*YY0par_kj*dpdfsdvperp_jj*d2Gspdvperp2[kvpap,kvperpp] -
+                                2.0*(ms/msp)*YY1perp_kj*YY0par_kj*pdfs_jj*dHspdvperp[kvpap,kvperpp])
+                end
+            end
+        end
+    end
+
+    return result
+end
+
+"""
+Elliptic solve function. 
+
+    field: the solution
+    source: the source function on the RHS
+    boundary data: the known values of field at infinity
+    lu_object_lhs: the object for the differential operator that defines field
+    matrix_rhs: the weak matrix acting on the source vector
+    vpa, vperp: coordinate structs
+
+Note: all variants of `elliptic_solve!()` run only in serial. They do not handle
+shared-memory parallelism themselves. The calling site must ensure that
+`elliptic_solve!()` is only called by one process in a shared-memory block.
+"""
+function elliptic_solve!(field,source,boundary_data::vpa_vperp_boundary_data,
+            lu_object_lhs,matrix_rhs,rhsvpavperp,vpa,vperp)
+    # assemble the rhs of the weak system
+
+    # get data into the compound index format
+    sc = vec(source)
+    fc = vec(field)
+    rhsc = vec(rhsvpavperp)
+    mul!(rhsc,matrix_rhs,sc)
+    # enforce the boundary conditions
+    enforce_dirichlet_bc!(rhsvpavperp,vpa,vperp,boundary_data)
+    # solve the linear system
+    ldiv!(fc, lu_object_lhs, rhsc)
+
+    return nothing
+end
+# same as above but source is made of two different terms
+# with different weak matrices
+function elliptic_solve!(field,source_1,source_2,boundary_data::vpa_vperp_boundary_data,
+            lu_object_lhs,matrix_rhs_1,matrix_rhs_2,rhs,vpa,vperp)
+    
+    # assemble the rhs of the weak system
+
+    # get data into the compound index format
+    sc_1 = vec(source_1)
+    sc_2 = vec(source_2)
+    rhsc = vec(rhs)
+    fc = vec(field)
+
+    # Do  rhsc = matrix_rhs_1*sc_1
+    mul!(rhsc, matrix_rhs_1, sc_1)
+
+    # Do rhsc = matrix_rhs_2*sc_2 + rhsc
+    mul!(rhsc, matrix_rhs_2, sc_2, 1.0, 1.0)
+
+    # enforce the boundary conditions
+    enforce_dirichlet_bc!(rhs,vpa,vperp,boundary_data)
+    # solve the linear system
+    ldiv!(fc, lu_object_lhs, rhsc)
+
+    return nothing
+end
+
+"""
+Same as `elliptic_solve!()` above but no Dirichlet boundary conditions are imposed,
+because the function is only used where the `lu_object_lhs` is derived from a mass matrix.
+The source is made of two different terms with different weak matrices
+because of the form of the only algebraic equation that we consider.
+
+Note: `algebraic_solve!()` run only in serial. They do not handle shared-memory
+parallelism themselves. The calling site must ensure that `algebraic_solve!()` is only
+called by one process in a shared-memory block.
+"""
+function algebraic_solve!(field,source_1,source_2,boundary_data::vpa_vperp_boundary_data,
+            lu_object_lhs,matrix_rhs_1,matrix_rhs_2,rhs,vpa,vperp)
+    
+    # assemble the rhs of the weak system
+
+    # get data into the compound index format
+    sc_1 = vec(source_1)
+    sc_2 = vec(source_2)
+    rhsc = vec(rhs)
+    fc = vec(field)
+
+    # Do  rhsc = matrix_rhs_1*sc_1
+    mul!(rhsc, matrix_rhs_1, sc_1)
+
+    # Do rhsc = matrix_rhs_2*sc_2 + rhsc
+    mul!(rhsc, matrix_rhs_2, sc_2, 1.0, 1.0)
+
+    # solve the linear system
+    ldiv!(fc, lu_object_lhs, rhsc)
+
+    return nothing
+end
+
+"""
+Function to solve the appropriate elliptic PDEs to find the
+Rosenbluth potentials. First, we calculate the Rosenbluth potentials
+at the boundary with the direct integration method. Then, we use this
+data to solve the elliptic PDEs with the boundary data providing an
+accurate Dirichlet boundary condition on the maximum `vpa` and `vperp`
+of the domain. We use the sparse LU decomposition from the LinearAlgebra package
+to solve the PDE matrix equations.
+"""
+function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvperp,
+             d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,ffsp_in,
+             vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays::fokkerplanck_weakform_arrays_struct;
+             algebraic_solve_for_d2Gdvperp2=false,calculate_GG=false,calculate_dGdvperp=false,
+             boundary_data_option=direct_integration)
+    
+    # extract the necessary precalculated and buffer arrays from fokkerplanck_arrays
+    MM2D_sparse = fkpl_arrays.MM2D_sparse
+    KKpar2D_sparse = fkpl_arrays.KKpar2D_sparse
+    KKperp2D_sparse = fkpl_arrays.KKperp2D_sparse
+    LP2D_sparse = fkpl_arrays.LP2D_sparse
+    LV2D_sparse = fkpl_arrays.LV2D_sparse
+    PUperp2D_sparse = fkpl_arrays.PUperp2D_sparse
+    PPparPUperp2D_sparse = fkpl_arrays.PPparPUperp2D_sparse
+    PPpar2D_sparse = fkpl_arrays.PPpar2D_sparse
+    MMparMNperp2D_sparse = fkpl_arrays.MMparMNperp2D_sparse
+    KPperp2D_sparse = fkpl_arrays.KPperp2D_sparse
+    lu_obj_MM = fkpl_arrays.lu_obj_MM
+    lu_obj_LP = fkpl_arrays.lu_obj_LP
+    lu_obj_LV = fkpl_arrays.lu_obj_LV
+    lu_obj_LB = fkpl_arrays.lu_obj_LB
+    
+    bwgt = fkpl_arrays.bwgt
+    rpbd = fkpl_arrays.rpbd
+    
+    S_dummy = fkpl_arrays.S_dummy
+    Q_dummy = fkpl_arrays.Q_dummy
+    rhsvpavperp = fkpl_arrays.rhsvpavperp
+    rhsvpavperp_copy1 = fkpl_arrays.rhsvpavperp_copy1
+    rhsvpavperp_copy2 = fkpl_arrays.rhsvpavperp_copy2
+    rhsvpavperp_copy3 = fkpl_arrays.rhsvpavperp_copy3
+    
+    # calculate the boundary data
+    if boundary_data_option == multipole_expansion
+        calculate_rosenbluth_potential_boundary_data_multipole!(rpbd,ffsp_in,vpa,vperp,vpa_spectral,vperp_spectral,
+          calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
+    else # use direct integration on the boundary
+        calculate_rosenbluth_potential_boundary_data!(rpbd,bwgt,ffsp_in,vpa,vperp,vpa_spectral,vperp_spectral,
+         calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
+    end
+    # carry out the elliptic solves required
+    begin_anyv_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        S_dummy[ivpa,ivperp] = -(4.0/sqrt(pi))*ffsp_in[ivpa,ivperp]
+    end
+
+    # Can run the following three solves in parallel
+    # The solves run on ranks 0, 1 and 2 of the subblock respectively, but modulo the size
+    # of the subblock (to ensure that the ranks doing work are never outside the
+    # subblock, if the size of the subblock is less than 3).
+    begin_anyv_region()
+    if anyv_subblock_rank[] == 0 % anyv_subblock_size[]
+        elliptic_solve!(HH, S_dummy, rpbd.H_data, lu_obj_LP, MM2D_sparse, rhsvpavperp,
+                        vpa, vperp)
+    end
+    if anyv_subblock_rank[] == 1 % anyv_subblock_size[]
+        elliptic_solve!(dHdvpa, S_dummy, rpbd.dHdvpa_data, lu_obj_LP, PPpar2D_sparse,
+                        rhsvpavperp_copy1, vpa, vperp)
+    end
+    if anyv_subblock_rank[] == 2 % anyv_subblock_size[]
+        elliptic_solve!(dHdvperp, S_dummy, rpbd.dHdvperp_data, lu_obj_LV, PUperp2D_sparse,
+                        rhsvpavperp_copy2, vpa, vperp)
+    end
+    
+    begin_anyv_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        S_dummy[ivpa,ivperp] = 2.0*HH[ivpa,ivperp]
+    end
+
+    # The following four solves can be done in parallel. Note: do the two that are always
+    # done on ranks 0 and 1 of the subblock and the first optional one that actually needs
+    # doing on rank 3 to maximise the chances that all solves get run on separate
+    # processes (i.e. they will be on separate processes as long as there are at least 2
+    # ranks in the subblock if both conditions calculate_GG and calculate_dGdvperp are
+    # false; at least 3 ranks if only one of the conditions is true; and at least 4 ranks
+    # if both conditions are true).
+    begin_anyv_region()
+    if calculate_GG
+        if anyv_subblock_rank[] == 2 % anyv_subblock_size[]
+            elliptic_solve!(GG, S_dummy, rpbd.G_data, lu_obj_LP, MM2D_sparse,
+                            rhsvpavperp_copy2, vpa, vperp)
+        end
+    end
+    if calculate_dGdvperp || algebraic_solve_for_d2Gdvperp2
+        if anyv_subblock_rank[] == (calculate_GG ? 3 : 2) % anyv_subblock_size[]
+            elliptic_solve!(dGdvperp, S_dummy, rpbd.dGdvperp_data, lu_obj_LV,
+                            PUperp2D_sparse, rhsvpavperp_copy3, vpa, vperp)
+        end
+    end
+    if anyv_subblock_rank[] == 0 % anyv_subblock_size[]
+        elliptic_solve!(d2Gdvpa2, S_dummy, rpbd.d2Gdvpa2_data, lu_obj_LP, KKpar2D_sparse,
+                        rhsvpavperp, vpa, vperp)
+    end
+    if anyv_subblock_rank[] == 1 % anyv_subblock_size[]
+        elliptic_solve!(d2Gdvperpdvpa, S_dummy, rpbd.d2Gdvperpdvpa_data, lu_obj_LV,
+                        PPparPUperp2D_sparse, rhsvpavperp_copy1, vpa, vperp)
+    end
+    
+    if algebraic_solve_for_d2Gdvperp2
+        begin_anyv_vperp_vpa_region()
+        @loop_vperp_vpa ivperp ivpa begin
+            S_dummy[ivpa,ivperp] = 2.0*HH[ivpa,ivperp] - d2Gdvpa2[ivpa,ivperp]
+            Q_dummy[ivpa,ivperp] = -dGdvperp[ivpa,ivperp]
+        end
+        begin_anyv_region()
+        @anyv_serial_region begin
+            # use the algebraic solve function to find
+            # d2Gdvperp2 = 2H - d2Gdvpa2 - (1/vperp)dGdvperp
+            # using a weak form
+            algebraic_solve!(d2Gdvperp2, S_dummy, Q_dummy, rpbd.d2Gdvperp2_data,
+                             lu_obj_MM, MM2D_sparse, MMparMNperp2D_sparse, rhsvpavperp,
+                             vpa, vperp)
+        end
+    else
+        # solve a weak-form PDE for d2Gdvperp2
+        begin_anyv_vperp_vpa_region()
+        @loop_vperp_vpa ivperp ivpa begin
+            #S_dummy[ivpa,ivperp] = 2.0*HH[ivpa,ivperp] # <- this is already the value of
+                                                        #    S_dummy calculated above
+            Q_dummy[ivpa,ivperp] = 2.0*d2Gdvpa2[ivpa,ivperp]
+        end
+        begin_anyv_region()
+        @anyv_serial_region begin
+            elliptic_solve!(d2Gdvperp2, S_dummy, Q_dummy, rpbd.d2Gdvperp2_data, lu_obj_LB,
+                            KPperp2D_sparse, MMparMNperp2D_sparse, rhsvpavperp, vpa,
+                            vperp)
+        end
+    end
+    return nothing
+end
+
+"""
+Function to calculate Rosenbluth potentials in the entire
+domain of `(vpa,vperp)` by direct integration.
+"""
+
+function calculate_rosenbluth_potentials_via_direct_integration!(GG,HH,dHdvpa,dHdvperp,
+             d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,d2Gdvperp2,ffsp_in,
+             vpa,vperp,vpa_spectral,vperp_spectral,fkpl_arrays::fokkerplanck_arrays_direct_integration_struct)
+    dfdvpa = fkpl_arrays.dfdvpa
+    dfdvperp = fkpl_arrays.dfdvperp
+    d2fdvperpdvpa = fkpl_arrays.d2fdvperpdvpa
+    G0_weights = fkpl_arrays.G0_weights
+    G1_weights = fkpl_arrays.G1_weights
+    H0_weights = fkpl_arrays.H0_weights
+    H1_weights = fkpl_arrays.H1_weights
+    H2_weights = fkpl_arrays.H2_weights
+    H3_weights = fkpl_arrays.H3_weights
+    # first compute the derivatives of fs' (the integration weights assume d fs' dvpa and d fs' dvperp are known)
+    begin_anyv_vperp_region()
+    @loop_vperp ivperp begin
+        @views derivative!(dfdvpa[:,ivperp], ffsp_in[:,ivperp], vpa, vpa_spectral)
+    end
+    begin_anyv_vpa_region()
+    @loop_vpa ivpa begin
+        @views derivative!(dfdvperp[ivpa,:], ffsp_in[ivpa,:], vperp, vperp_spectral)
+        @views derivative!(d2fdvperpdvpa[ivpa,:], dfdvpa[ivpa,:], vperp, vperp_spectral)
+    end
+    # with the integrands calculated, compute the integrals
+    calculate_rosenbluth_integrals!(GG,d2Gdvpa2,dGdvperp,d2Gdvperpdvpa,
+                                        d2Gdvperp2,HH,dHdvpa,dHdvperp,
+                                        ffsp_in,dfdvpa,dfdvperp,d2fdvperpdvpa,
+                                        G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                                        vpa.n,vperp.n)
+    return nothing           
+end
+
+
+"""
+Function to carry out the integration of the revelant
+distribution functions to form the required coefficients
+for the full-F operator. We assume that the weights are
+precalculated. The function takes as arguments the arrays
+of coefficients (which we fill), the required distributions,
+the precomputed weights, the indicies of the `field' velocities,
+and the sizes of the primed vpa and vperp coordinates arrays.
+"""
+function calculate_rosenbluth_integrals!(GG,d2Gspdvpa2,dGspdvperp,d2Gspdvperpdvpa,
+                                        d2Gspdvperp2,HH,dHspdvpa,dHspdvperp,
+                                        fsp,dfspdvpa,dfspdvperp,d2fspdvperpdvpa,
+                                        G0_weights,G1_weights,H0_weights,H1_weights,H2_weights,H3_weights,
+                                        nvpa,nvperp)
+    begin_anyv_vperp_vpa_region()
+    @loop_vperp_vpa ivperp ivpa begin
+        GG[ivpa,ivperp] = 0.0
+        d2Gspdvpa2[ivpa,ivperp] = 0.0
+        dGspdvperp[ivpa,ivperp] = 0.0
+        d2Gspdvperpdvpa[ivpa,ivperp] = 0.0
+        d2Gspdvperp2[ivpa,ivperp] = 0.0
+        HH[ivpa,ivperp] = 0.0
+        dHspdvpa[ivpa,ivperp] = 0.0
+        dHspdvperp[ivpa,ivperp] = 0.0
+        for ivperpp in 1:nvperp
+            for ivpap in 1:nvpa
+                GG[ivpa,ivperp] += G0_weights[ivpap,ivperpp,ivpa,ivperp]*fsp[ivpap,ivperpp]
+                #d2Gspdvpa2[ivpa,ivperp] += G0_weights[ivpap,ivperpp,ivpa,ivperp]*d2fspdvpa2[ivpap,ivperpp]
+                d2Gspdvpa2[ivpa,ivperp] += H3_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvpa[ivpap,ivperpp]
+                dGspdvperp[ivpa,ivperp] += G1_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
+                d2Gspdvperpdvpa[ivpa,ivperp] += G1_weights[ivpap,ivperpp,ivpa,ivperp]*d2fspdvperpdvpa[ivpap,ivperpp]
+                #d2Gspdvperp2[ivpa,ivperp] += G2_weights[ivpap,ivperpp,ivpa,ivperp]*d2fspdvperp2[ivpap,ivperpp] + G3_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
+                d2Gspdvperp2[ivpa,ivperp] += H2_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
+                HH[ivpa,ivperp] += H0_weights[ivpap,ivperpp,ivpa,ivperp]*fsp[ivpap,ivperpp]
+                dHspdvpa[ivpa,ivperp] += H0_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvpa[ivpap,ivperpp]
+                dHspdvperp[ivpa,ivperp] += H1_weights[ivpap,ivperpp,ivpa,ivperp]*dfspdvperp[ivpap,ivperpp]
+            end
+        end
+    end
+    return nothing
+end
+
+"""
+Function to enforce boundary conditions on the collision operator
+result to be consistent with the boundary conditions imposed on the
+distribution function.
+"""
+function enforce_vpavperp_BCs!(pdf,vpa,vperp,vpa_spectral,vperp_spectral)
+    nvpa = vpa.n
+    nvperp = vperp.n
+    ngrid_vperp = vperp.ngrid
+    D0 = vperp_spectral.radau.D0
+    # vpa boundary conditions
+    # zero at infinity
+    if vpa.bc == "zero"
+        begin_anyv_vperp_region()
+        @loop_vperp ivperp begin
+            pdf[1,ivperp] = 0.0
+            pdf[nvpa,ivperp] = 0.0
+        end
+    end
+    # vperp boundary conditions
+    # zero boundary condition at infinity
+    # set regularity condition d F / d vperp = 0 at vperp = 0
+    # adjust F(vperp = 0) so that d F / d vperp = 0 at vperp = 0
+    begin_anyv_vpa_region()
+    if vperp.bc in ("zero", "zero-impose-regularity")
+        @loop_vpa ivpa begin
+            pdf[ivpa,nvperp] = 0.0
+        end
+    end
+    if vperp.bc == "zero-impose-regularity"
+        buffer = @view vperp.scratch[1:ngrid_vperp-1]
+        @loop_vpa ivpa begin
+            @views @. buffer = D0[2:ngrid_vperp] * pdf[ivpa,2:ngrid_vperp]
+            pdf[ivpa,1] = -sum(buffer)/D0[1]
+        end
+    end
+end
+
+"""
+Function to interpolate `f(vpa,vperp)` from one 
+velocity grid to another, assuming that both 
+grids are represented by `(vpa,vperp)` in normalised units,
+but have different normalisation factors 
+defining the meaning of these grids in physical units. E.g.,
+
+     vpai, vperpi = ci * vpa, ci * vperp
+     vpae, vperpe = ce * vpa, ce * vperp
+     
+with `ci = sqrt(Ti/mi)`, `ce = sqrt(Te/mi)`
+
+`scalefac = ci / ce` is the ratio of the
+two reference speeds.
+"""
+function interpolate_2D_vspace!(pdf_out,pdf_in,vpa,vperp,scalefac)
+    
+    begin_anyv_vperp_vpa_region()
+    # loop over points in the output interpolated dataset
+    @loop_vperp ivperp begin
+        vperp_val = vperp.grid[ivperp]*scalefac
+        # get element for interpolation data
+        iel_vperp = ielement_loopup(vperp_val,vperp)
+        if iel_vperp < 1 # vperp_interp outside of range of vperp.grid
+            @loop_vpa ivpa begin
+                pdf_out[ivpa,ivperp] = 0.0
+            end
+            continue
+        else
+            # get nodes for interpolation
+            ivperpmin, ivperpmax = vperp.igrid_full[1,iel_vperp], vperp.igrid_full[vperp.ngrid,iel_vperp]
+            vperp_nodes = vperp.grid[ivperpmin:ivperpmax]
+            #print("vperp: ",iel_vperp, " ", vperp_nodes," ",vperp_val)
+                   
+        end
+        @loop_vpa ivpa begin
+            vpa_val = vpa.grid[ivpa]*scalefac
+            # get element for interpolation data
+            iel_vpa = ielement_loopup(vpa_val,vpa)
+            if iel_vpa < 1 # vpa_interp outside of range of vpa.grid
+                pdf_out[ivpa,ivperp] = 0.0
+                continue
+            else
+                # get nodes for interpolation
+                ivpamin, ivpamax = vpa.igrid_full[1,iel_vpa], vpa.igrid_full[vpa.ngrid,iel_vpa]
+                vpa_nodes = vpa.grid[ivpamin:ivpamax]
+                #print("vpa: ", iel_vpa, " ", vpa_nodes," ",vpa_val)
+                   
+                # do the interpolation
+                pdf_out[ivpa,ivperp] = 0.0
+                for ivperpgrid in 1:vperp.ngrid
+                   # index for referencing pdf_in on orginal grid
+                   ivperpp = vperp.igrid_full[ivperpgrid,iel_vperp]
+                   # interpolating polynomial value at ivperpp for interpolation
+                   vperppoly = lagrange_poly(ivperpgrid,vperp_nodes,vperp_val)
+                   for ivpagrid in 1:vpa.ngrid
+                       # index for referencing pdf_in on orginal grid
+                       ivpap = vpa.igrid_full[ivpagrid,iel_vpa]
+                       # interpolating polynomial value at ivpap for interpolation
+                       vpapoly = lagrange_poly(ivpagrid,vpa_nodes,vpa_val)
+                       pdf_out[ivpa,ivperp] += vpapoly*vperppoly*pdf_in[ivpap,ivperpp]
+                   end
+                end
+            end
+        end
+    end
+    return nothing
+end
+# Alternative version that should be faster - to be tested
+#function interpolate_2D_vspace!(pdf_out, pdf_in, vpa, vpa_spectral, vperp, vperp_spectral,
+#                                scalefac, pdf_buffer)
+#    newgrid_vperp = vperp.scratch .= scalefac .* vperp.grid
+#    newgrid_vpa = vpa.scratch .= scalefac .* vpa.grid
+#
+#    begin_anyv_vpa_region()
+#    @loop_vpa ivpa begin
+#        @views interpolate_to_grid_1d!(pdf_buffer[ivpa,:], newgrid_vperp,
+#                                       pdf_in[ivpa,:], vperp, vperp_spectral)
+#    end
+#
+#    begin_anyv_vperp_region()
+#    @loop_vperp ivperp begin
+#        @views interpolate_to_grid_1d!(pdf_out[:,ivperp], newgrid_vpa,
+#                                       pdf_buffer[:,ivperp], vpa, vpa_spectral)
+
+#    end
+#end
+
+"""
+Function to find the element in which x sits.
+"""
+function ielement_loopup(x,coord)
+    xebs = coord.element_boundaries
+    nelement = coord.nelement_global
+    zero = 1.0e-14
+    ielement = -1
+    # find the element
+    for j in 1:nelement
+        # check for internal points
+        if (x - xebs[j])*(xebs[j+1] - x) > zero
+            ielement = j
+            break
+        # check for boundary points
+        elseif (abs(x-xebs[j]) < 100*zero) || (abs(x-xebs[j+1]) < 100*zero && j == nelement)
+            ielement = j
+            break
+        end
+    end
+    return ielement
+end
+
+end

From 5dd9581982c271c75c83bb2fa5e850af556cf384 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Wed, 13 Nov 2024 09:12:19 +0000
Subject: [PATCH 36/41] Add variant of multipole_expansion option
 delta_f_multipole, to use the exact results for the Rosenbluth potentials for
 the Maxwellian piece of F, and multipole for the rest. Tests extended to this
 option. Rename boundary_data -> boundary_data_option to make internal code
 easier to follow.

---
 moment_kinetics/src/fokker_planck.jl          |  4 +-
 moment_kinetics/src/fokker_planck_calculus.jl | 98 ++++++++++++++++++-
 moment_kinetics/src/input_structs.jl          |  4 +-
 moment_kinetics/src/time_advance.jl           |  2 +-
 moment_kinetics/test/fokker_planck_tests.jl   |  4 +-
 5 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/moment_kinetics/src/fokker_planck.jl b/moment_kinetics/src/fokker_planck.jl
index 168a222b3..6ac0e0543 100644
--- a/moment_kinetics/src/fokker_planck.jl
+++ b/moment_kinetics/src/fokker_planck.jl
@@ -95,7 +95,7 @@ function setup_fkpl_collisions_input(toml_input::Dict)
        frequency_option = "reference_parameters",
        self_collisions = true,
        use_conserving_corrections = true,
-       boundary_data = direct_integration,
+       boundary_data_option = direct_integration,
        slowing_down_test = false,
        sd_density = 1.0,
        sd_temp = 0.01,
@@ -339,7 +339,7 @@ Function for advancing with the explicit, weak-form, self-collision operator.
     Zi = collisions.fkpl.Zi # generalise!
     nussp = nuref*(Zi^4) # include charge number factor for self collisions
     use_conserving_corrections = collisions.fkpl.use_conserving_corrections
-    boundary_data_option = collisions.fkpl.boundary_data
+    boundary_data_option = collisions.fkpl.boundary_data_option
     # N.B. parallelisation using special 'anyv' region
     begin_s_r_z_anyv_region()
     @loop_s_r_z is ir iz begin
diff --git a/moment_kinetics/src/fokker_planck_calculus.jl b/moment_kinetics/src/fokker_planck_calculus.jl
index ef78ef293..5594c8e45 100644
--- a/moment_kinetics/src/fokker_planck_calculus.jl
+++ b/moment_kinetics/src/fokker_planck_calculus.jl
@@ -18,6 +18,7 @@ export assemble_explicit_collision_operator_rhs_parallel_analytical_inputs!
 export YY_collision_operator_arrays, calculate_YY_arrays
 export calculate_rosenbluth_potential_boundary_data!
 export calculate_rosenbluth_potential_boundary_data_multipole!
+export calculate_rosenbluth_potential_boundary_data_delta_f_multipole!
 export elliptic_solve!, algebraic_solve!
 export fokkerplanck_arrays_direct_integration_struct
 export fokkerplanck_weakform_arrays_struct
@@ -42,7 +43,10 @@ using ..communication: MPISharedArray, global_rank
 using ..lagrange_polynomials: lagrange_poly, lagrange_poly_optimised
 using ..looping
 using ..velocity_moments: integrate_over_vspace
-using ..input_structs: direct_integration, multipole_expansion
+using ..velocity_moments: get_density, get_upar, get_ppar, get_pperp, get_pressure
+using ..input_structs: direct_integration, multipole_expansion, delta_f_multipole
+using ..fokker_planck_test: F_Maxwellian, G_Maxwellian, H_Maxwellian, dHdvpa_Maxwellian, dHdvperp_Maxwellian
+using ..fokker_planck_test: d2Gdvpa2_Maxwellian, d2Gdvperp2_Maxwellian, d2Gdvperpdvpa_Maxwellian, dGdvperp_Maxwellian
 using moment_kinetics.gauss_legendre: get_QQ_local!
 using Dates
 using SpecialFunctions: ellipk, ellipe
@@ -1779,6 +1783,88 @@ function calculate_rosenbluth_potential_boundary_data_multipole!(rpbd::rosenblut
     return nothing
 end
 
+"""
+Function to use the multipole expansion of the Rosenbluth potentials to calculate and
+assign boundary data to an instance of `rosenbluth_potential_boundary_data`, in place,
+without allocation. Use the exact results for the part of F that can be described with 
+a Maxwellian, and the multipole expansion for the remainder.
+"""
+function calculate_rosenbluth_potential_boundary_data_delta_f_multipole!(rpbd::rosenbluth_potential_boundary_data,
+    pdf,dummy_vpavperp,vpa,vperp,vpa_spectral,vperp_spectral;
+    calculate_GG=false,calculate_dGdvperp=false)
+    
+    mass = 1.0
+    # first, compute the moments and delta f
+    begin_anyv_region()
+    @anyv_serial_region begin
+      dens =  get_density(pdf, vpa, vperp)
+      upar = get_upar(pdf, vpa, vperp, dens)
+      ppar = get_ppar(pdf, vpa, vperp, upar)
+      pperp = get_pperp(pdf, vpa, vperp)
+      pressure = get_pressure(ppar,pperp)
+      vth = sqrt(2.0*pressure/(dens*mass))
+      @loop_vperp_vpa ivperp ivpa begin
+          dummy_vpavperp[ivpa,ivperp] = pdf[ivpa,ivperp] - F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) 
+      end
+    end
+    # ensure data is synchronized
+    _anyv_subblock_synchronize()
+    # now pass the delta f to the multipole function
+    calculate_rosenbluth_potential_boundary_data_multipole!(rpbd,dummy_vpavperp,
+      vpa,vperp,vpa_spectral,vperp_spectral,
+      calculate_GG=calculate_GG,calculate_dGdvperp=calculate_dGdvperp)
+    # now add on the contributions from the Maxwellian
+    nvpa = vpa.n
+    nvperp = vperp.n
+    begin_anyv_vperp_region()
+    @loop_vperp ivperp begin
+                rpbd.H_data.lower_boundary_vpa[ivperp] += H_Maxwellian(dens,upar,vth,vpa,vperp,1,ivperp)
+                rpbd.H_data.upper_boundary_vpa[ivperp] += H_Maxwellian(dens,upar,vth,vpa,vperp,nvpa,ivperp)
+                rpbd.dHdvpa_data.lower_boundary_vpa[ivperp] += dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,1,ivperp)
+                rpbd.dHdvpa_data.upper_boundary_vpa[ivperp] += dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,nvpa,ivperp)
+                rpbd.dHdvperp_data.lower_boundary_vpa[ivperp] += dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,1,ivperp)
+                rpbd.dHdvperp_data.upper_boundary_vpa[ivperp] += dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,nvpa,ivperp)
+                rpbd.d2Gdvpa2_data.lower_boundary_vpa[ivperp] += d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,1,ivperp)
+                rpbd.d2Gdvpa2_data.upper_boundary_vpa[ivperp] += d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,nvpa,ivperp)                
+                rpbd.d2Gdvperpdvpa_data.lower_boundary_vpa[ivperp] += d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,1,ivperp)
+                rpbd.d2Gdvperpdvpa_data.upper_boundary_vpa[ivperp] += d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,nvpa,ivperp)                
+                rpbd.d2Gdvperp2_data.lower_boundary_vpa[ivperp] += d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,1,ivperp)
+                rpbd.d2Gdvperp2_data.upper_boundary_vpa[ivperp] += d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,nvpa,ivperp)
+    end
+    begin_anyv_vpa_region()
+    @loop_vpa ivpa begin
+                rpbd.H_data.upper_boundary_vperp[ivpa] += H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,nvperp)
+                rpbd.dHdvpa_data.upper_boundary_vperp[ivpa] += dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,nvperp)
+                rpbd.dHdvperp_data.upper_boundary_vperp[ivpa] += dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,nvperp)
+                rpbd.d2Gdvpa2_data.upper_boundary_vperp[ivpa] += d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,nvperp)
+                rpbd.d2Gdvperpdvpa_data.upper_boundary_vperp[ivpa] += d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,nvperp)
+                rpbd.d2Gdvperp2_data.upper_boundary_vperp[ivpa] += d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,nvperp)
+    end
+    if calculate_GG
+       begin_anyv_vperp_region()
+       @loop_vperp ivperp begin
+                   rpbd.G_data.lower_boundary_vpa[ivperp] += G_Maxwellian(dens,upar,vth,vpa,vperp,1,ivperp)
+                   rpbd.G_data.upper_boundary_vpa[ivperp] += G_Maxwellian(dens,upar,vth,vpa,vperp,nvpa,ivperp)
+       end
+       begin_anyv_vpa_region()
+       @loop_vpa ivpa begin
+                   rpbd.G_data.upper_boundary_vperp[ivpa] += G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,nvperp)
+       end
+    end
+    if calculate_dGdvperp
+       begin_anyv_vperp_region()
+       @loop_vperp ivperp begin
+                   rpbd.dGdvperp_data.lower_boundary_vpa[ivperp] += dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,1,ivperp)
+                   rpbd.dGdvperp_data.upper_boundary_vpa[ivperp] += dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,nvpa,ivperp)
+       end
+       begin_anyv_vpa_region()
+       @loop_vpa ivpa begin
+                   rpbd.dGdvperp_data.upper_boundary_vperp[ivpa] += dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,nvperp)
+       end
+    end
+    return nothing
+end
+
 """
 Function to compare two instances of `rosenbluth_potential_boundary_data` --
 one assumed to contain exact results, and the other numerically computed results -- and compute
@@ -2834,9 +2920,17 @@ function calculate_rosenbluth_potentials_via_elliptic_solve!(GG,HH,dHdvpa,dHdvpe
     if boundary_data_option == multipole_expansion
         calculate_rosenbluth_potential_boundary_data_multipole!(rpbd,ffsp_in,vpa,vperp,vpa_spectral,vperp_spectral,
           calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
-    else # use direct integration on the boundary
+    elseif boundary_data_option == delta_f_multipole # use a variant of the multipole method
+        calculate_rosenbluth_potential_boundary_data_delta_f_multipole!(rpbd,ffsp_in,S_dummy,vpa,vperp,vpa_spectral,vperp_spectral,
+          calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
+    elseif boundary_data_option == direct_integration  # use direct integration on the boundary
         calculate_rosenbluth_potential_boundary_data!(rpbd,bwgt,ffsp_in,vpa,vperp,vpa_spectral,vperp_spectral,
          calculate_GG=calculate_GG,calculate_dGdvperp=(calculate_dGdvperp||algebraic_solve_for_d2Gdvperp2))
+    else
+        error("No valid boundary_data_option specified. \n 
+              Pick  boundary_data_option='$multipole_expansion' \n 
+              or  boundary_data_option='$delta_f_multipole' \n 
+              or boundary_data_option='$direct_integration'")
     end
     # carry out the elliptic solves required
     begin_anyv_vperp_vpa_region()
diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl
index 8a0fe2388..84d78bd2c 100644
--- a/moment_kinetics/src/input_structs.jl
+++ b/moment_kinetics/src/input_structs.jl
@@ -482,10 +482,12 @@ end
 @enum boundary_data_type begin
     direct_integration
     multipole_expansion
+    delta_f_multipole
 end
 export boundary_data_type
 export direct_integration
 export multipole_expansion
+export delta_f_multipole
 
 Base.@kwdef struct fkpl_collisions_input
     # option to check if fokker planck frequency should be > 0
@@ -500,7 +502,7 @@ Base.@kwdef struct fkpl_collisions_input
     # option to determine if ad-hoc moment_kinetics-style conserving corrections are used
     use_conserving_corrections::Bool
     # enum option to determine which method is used to provide boundary data for Rosenbluth potential calculations.
-    boundary_data::boundary_data_type
+    boundary_data_option::boundary_data_type
     # option to determine if cross-collisions against fixed Maxwellians are used
     slowing_down_test::Bool
     # Setting to switch between different options for Fokker-Planck collision frequency input
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index 3d4ec26e8..f2df9030a 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -734,7 +734,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                                                   n_neutral_species_alloc, t_params)
     # create arrays for Fokker-Planck collisions 
     if advance.explicit_weakform_fp_collisions
-        if collisions.fkpl.boundary_data == direct_integration
+        if collisions.fkpl.boundary_data_option == direct_integration
             precompute_weights = true
         else
             precompute_weights = false
diff --git a/moment_kinetics/test/fokker_planck_tests.jl b/moment_kinetics/test/fokker_planck_tests.jl
index 93b6d687e..317798a05 100644
--- a/moment_kinetics/test/fokker_planck_tests.jl
+++ b/moment_kinetics/test/fokker_planck_tests.jl
@@ -11,7 +11,7 @@ using moment_kinetics.array_allocation: allocate_float, allocate_shared_float
 using moment_kinetics.coordinates: define_coordinate
 using moment_kinetics.type_definitions: mk_float, mk_int
 using moment_kinetics.velocity_moments: get_density, get_upar, get_ppar, get_pperp, get_pressure
-using moment_kinetics.input_structs: direct_integration, multipole_expansion
+using moment_kinetics.input_structs: direct_integration, multipole_expansion, delta_f_multipole
 
 using moment_kinetics.fokker_planck: init_fokker_planck_collisions_weak_form, fokker_planck_collision_operator_weak_form!
 using moment_kinetics.fokker_planck: conserving_corrections!, init_fokker_planck_collisions_direct_integration
@@ -208,7 +208,7 @@ function runtests()
 
         @testset "weak-form Rosenbluth potential calculation: elliptic solve" begin
             println("    - test weak-form Rosenbluth potential calculation: elliptic solve")
-            @testset "$boundary_data_option" for boundary_data_option in (direct_integration,multipole_expansion)
+            @testset "$boundary_data_option" for boundary_data_option in (direct_integration,multipole_expansion,delta_f_multipole)
                 println("        -  boundary_data_option=$boundary_data_option")
                 ngrid = 9
                 nelement_vpa = 8

From 42c78060859b9ee38a92f983b7fe507c933d14f8 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Wed, 13 Nov 2024 13:41:20 +0000
Subject: [PATCH 37/41] Broadcast Maxwellian density, parallel flow and thermal
 speed information across cores in delta_f_multipole option.

---
 moment_kinetics/src/fokker_planck_calculus.jl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/moment_kinetics/src/fokker_planck_calculus.jl b/moment_kinetics/src/fokker_planck_calculus.jl
index 5594c8e45..a6122afad 100644
--- a/moment_kinetics/src/fokker_planck_calculus.jl
+++ b/moment_kinetics/src/fokker_planck_calculus.jl
@@ -1794,6 +1794,7 @@ function calculate_rosenbluth_potential_boundary_data_delta_f_multipole!(rpbd::r
     calculate_GG=false,calculate_dGdvperp=false)
     
     mass = 1.0
+    dens, upar, vth = 0.0, 0.0, 0.0
     # first, compute the moments and delta f
     begin_anyv_region()
     @anyv_serial_region begin
@@ -1807,6 +1808,12 @@ function calculate_rosenbluth_potential_boundary_data_delta_f_multipole!(rpbd::r
           dummy_vpavperp[ivpa,ivperp] = pdf[ivpa,ivperp] - F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) 
       end
     end
+    # broadcast this information across cores
+    param_vec = [dens, upar, vth] 
+    if comm_anyv_subblock[] != MPI.COMM_NULL
+        MPI.Bcast!(param_vec, 0, comm_anyv_subblock[])
+    end
+    (dens, upar, vth) = param_vec
     # ensure data is synchronized
     _anyv_subblock_synchronize()
     # now pass the delta f to the multipole function

From 83ee05487243d6b2586c30e2dc8ca4917e7aed10 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Wed, 20 Nov 2024 15:20:10 +0000
Subject: [PATCH 38/41] Make 2D_FEM_assembly_test.jl more flexible to allow
 easy comparison of different boundary data calculations.

---
 test_scripts/2D_FEM_assembly_test.jl | 43 +++++++++++++++++-----------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/test_scripts/2D_FEM_assembly_test.jl b/test_scripts/2D_FEM_assembly_test.jl
index 69742fb09..7f8dfbbe0 100644
--- a/test_scripts/2D_FEM_assembly_test.jl
+++ b/test_scripts/2D_FEM_assembly_test.jl
@@ -397,7 +397,8 @@ end
         algebraic_solve_for_d2Gdvperp2=false,
         test_self_operator = true,
         Lvpa = 12.0, Lvperp = 6.0,
-        boundary_data_option = direct_integration)
+        boundary_data_option = direct_integration,
+        outdir = "")
         initialize_comms!()
         #ngrid = 5
         #plot_scan = true
@@ -503,7 +504,7 @@ end
              shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
               xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
               foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
-            outfile = "fkpl_C_G_H_max_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            outfile = outdir*"fkpl_C_G_H_max_test_ngrid_"*string(ngrid)*"_GLL.pdf"
             savefig(outfile)
             println(outfile)
             println([max_C_err,max_H_err,max_G_err, expected, expected_integral])
@@ -513,7 +514,7 @@ end
              shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
               xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
               foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
-            outfile = "fkpl_coeffs_max_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            outfile = outdir*"fkpl_coeffs_max_test_ngrid_"*string(ngrid)*"_GLL.pdf"
             savefig(outfile)
             println(outfile)
             println([max_dHdvpa_err, max_dHdvperp_err, max_d2Gdvperp2_err, max_d2Gdvpa2_err, max_d2Gdvperpdvpa_err, max_dGdvperp_err, expected,      expected_integral])
@@ -535,7 +536,7 @@ end
              shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
               xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
               foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
-            outfile = "fkpl_C_G_H_L2_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            outfile = outdir*"fkpl_C_G_H_L2_test_ngrid_"*string(ngrid)*"_GLL.pdf"
             savefig(outfile)
             println(outfile)
             println([L2_C_err,L2_H_err,L2_G_err, expected, expected_integral])
@@ -545,7 +546,7 @@ end
              shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
               xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
               foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
-            outfile = "fkpl_coeffs_L2_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            outfile = outdir*"fkpl_coeffs_L2_test_ngrid_"*string(ngrid)*"_GLL.pdf"
             savefig(outfile)
             println(outfile)
             println([L2_dHdvpa_err, L2_dHdvperp_err, L2_d2Gdvperp2_err, L2_d2Gdvpa2_err, L2_d2Gdvperpdvpa_err, L2_dGdvperp_err,  expected,      expected_integral])
@@ -560,7 +561,7 @@ end
                  shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
                   xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
                   foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
-                outfile = "fkpl_conservation_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+                outfile = outdir*"fkpl_conservation_test_ngrid_"*string(ngrid)*"_GLL.pdf"
                 savefig(outfile)
                 println(outfile)
                 println([max_C_err, L2_C_err, n_err, u_err, p_err, expected, expected_integral])
@@ -570,7 +571,7 @@ end
                  shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), yticks = (ytick_sequence, ytick_sequence), markersize = 5, linewidth=2, 
                   xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
                   foreground_color_legend = nothing, background_color_legend = nothing, legend=:bottomleft)
-                outfile = "fkpl_conservation_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+                outfile = outdir*"fkpl_conservation_test_ngrid_"*string(ngrid)*"_GLL.pdf"
                 savefig(outfile)
                 println(outfile)        
                 println([max_C_err, L2_C_err, n_err, expected, expected_integral])
@@ -578,20 +579,28 @@ end
             
             calculate_timeslabel = "time/step (ms)"
             init_timeslabel = "time/init (ms)"
-            ytick_sequence_timing = Array([10^2,10^3,10^4,10^5,10^6])
-            plot(nelement_list, [calculate_times, init_times, expected_t_2, expected_t_3],
-            xlabel=xlabel, label=[calculate_timeslabel init_timeslabel expected_t_2_label expected_t_3_label], ylabel="",
-             shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), markersize = 5, linewidth=2, 
-              xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
-              foreground_color_legend = nothing, background_color_legend = nothing, legend=:topleft)
-            outfile = "fkpl_timing_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            outfile = outdir*"fkpl_timing_test_ngrid_"*string(ngrid)*"_GLL.pdf"
+            if boundary_data_option == direct_integration
+                ytick_sequence_timing = Array([10^2,10^3,10^4,10^5,10^6])
+                plot(nelement_list, [calculate_times, init_times, expected_t_2, expected_t_3],
+                xlabel=xlabel, label=[calculate_timeslabel init_timeslabel expected_t_2_label expected_t_3_label], ylabel="",
+                 shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), markersize = 5, linewidth=2, 
+                  xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+                  foreground_color_legend = nothing, background_color_legend = nothing, legend=:topleft)
+                println([calculate_times, init_times, expected_t_2, expected_t_3])
+            else
+                ytick_sequence_timing = Array([10^2,10^3,10^4,10^5])
+                plot(nelement_list, [calculate_times, init_times, expected_t_2],
+                xlabel=xlabel, label=[calculate_timeslabel init_timeslabel expected_t_2_label], ylabel="",
+                 shape =:circle, xscale=:log10, yscale=:log10, xticks = (nelement_list, nelement_list), markersize = 5, linewidth=2, 
+                  xtickfontsize = fontsize, xguidefontsize = fontsize, ytickfontsize = fontsize, yguidefontsize = fontsize, legendfontsize = fontsize,
+                  foreground_color_legend = nothing, background_color_legend = nothing, legend=:topleft)
+                println([calculate_times, init_times, expected_t_2])
+            end
             savefig(outfile)
             println(outfile)
-            println([calculate_times, init_times, expected_t_2, expected_t_3])
-            
         end
         if global_rank[]==0 && save_HDF5
-            outdir = ""
             ncore = global_size[]
             save_fkpl_error_data(outdir,ncore,ngrid,nelement_list,
                 max_C_err, max_H_err, max_G_err, max_dHdvpa_err, max_dHdvperp_err,

From 753e320f285a3f7642ece4c4768fc539f9949164 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Fri, 22 Nov 2024 10:03:09 +0000
Subject: [PATCH 39/41] Add boundary_data_option values to
 util/precompile_run.jl to ensure that these options are precompiled.

---
 util/precompile_run.jl | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/util/precompile_run.jl b/util/precompile_run.jl
index 40ed0e57b..a1e3e58ad 100644
--- a/util/precompile_run.jl
+++ b/util/precompile_run.jl
@@ -82,6 +82,16 @@ collisions_input2 = recursive_merge(wall_bc_cheb_input, OptionsDict("composition
                                                                                            "bc" => "zero-impose-regularity"),
                                                                     "vpa" => OptionsDict("discretization" => "gausslegendre_pseudospectral"),
                                                                    ))
+collisions_input3 = recursive_merge(wall_bc_cheb_input, OptionsDict("composition" => OptionsDict("n_neutral_species" => 0),
+                                                                    "fokker_planck_collisions" => OptionsDict("use_fokker_planck" => true, "self_collisions" => true, "boundary_data_option" => "delta_f_multipole"),
+                                                                    "vperp" => OptionsDict("discretization" => "gausslegendre_pseudospectral"),
+                                                                    "vpa" => OptionsDict("discretization" => "gausslegendre_pseudospectral"),
+                                                                   ))
+collisions_input4 = recursive_merge(wall_bc_cheb_input, OptionsDict("composition" => OptionsDict("n_neutral_species" => 0),
+                                                                    "fokker_planck_collisions" => OptionsDict("use_fokker_planck" => true, "self_collisions" => true, "boundary_data_option" => "multipole_expansion"),
+                                                                    "vperp" => OptionsDict("discretization" => "gausslegendre_pseudospectral"),
+                                                                    "vpa" => OptionsDict("discretization" => "gausslegendre_pseudospectral"),
+                                                                   ))
 # add an additional input for every geometry option available in addition to the default
 geo_input1 = recursive_merge(wall_bc_cheb_input, OptionsDict("composition" => OptionsDict("n_neutral_species" => 0),
                                                              "geometry" => OptionsDict("option" => "1D-mirror", "DeltaB" => 0.5, "pitch" => 0.5, "rhostar" => 1.0)))
@@ -109,7 +119,7 @@ kinetic_electron_input = recursive_merge(cheb_input, OptionsDict("evolve_moments
                                                                                                         "no_restart" => true),
                                                                 ))
 
-push!(inputs_list, collisions_input1, collisions_input2, geo_input1, kinetic_electron_input)
+push!(inputs_list, collisions_input1, collisions_input2, collisions_input3, collisions_input4, geo_input1, kinetic_electron_input)
 
 for input in inputs_list
     run_moment_kinetics(input)

From 998c725c4efb57adbeae5e55e5b350401d513a99 Mon Sep 17 00:00:00 2001
From: Michael Hardman <29800382+mrhardman@users.noreply.github.com>
Date: Thu, 19 Dec 2024 14:42:23 +0000
Subject: [PATCH 40/41] Rename variables in external_sources.jl module in line
 with present state of code. Unclear how examples/fokker_planck/ tests passed
 with variables out of namespace, now corrected.

---
 moment_kinetics/src/external_sources.jl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl
index 2eeeb3ac1..914ae0b03 100644
--- a/moment_kinetics/src/external_sources.jl
+++ b/moment_kinetics/src/external_sources.jl
@@ -814,7 +814,7 @@ Add external source term to the ion kinetic equation.
         end
     elseif source_type == "alphas" || source_type == "alphas-with-losses"
         begin_s_r_z_region()
-        source_v0 = ion_source_settings.source_v0
+        source_v0 = ion_source.source_v0
         if !(source_v0 > 1.0e-8)
             error("source_v0=$source_v0 < 1.0e-8")
         end
@@ -840,8 +840,8 @@ Add external source term to the ion kinetic equation.
             end
             
             if source_type == "alphas-with-losses"
-                sink_vth = ion_source_settings.sink_vth
-                sink_strength = ion_source_settings.sink_strength
+                sink_vth = ion_source.sink_vth
+                sink_strength = ion_source.sink_strength
                 if !(sink_vth > 1.0e-8)
                    error("sink_vth=$sink_vth < 1.0e-8")
                 end
@@ -871,8 +871,8 @@ Add external source term to the ion kinetic equation.
         end
     elseif source_type == "beam" || source_type == "beam-with-losses"
         begin_s_r_z_region()
-        source_vpa0 = ion_source_settings.source_vpa0
-        source_vperp0 = ion_source_settings.source_vperp0
+        source_vpa0 = ion_source.source_vpa0
+        source_vperp0 = ion_source.source_vperp0
         if !(source_vpa0 > 1.0e-8)
             error("source_vpa0=$source_vpa0 < 1.0e-8")
         end
@@ -901,8 +901,8 @@ Add external source term to the ion kinetic equation.
             end
             
             if source_type == "beam-with-losses"
-                sink_vth = ion_source_settings.sink_vth
-                sink_strength = ion_source_settings.sink_strength
+                sink_vth = ion_source.sink_vth
+                sink_strength = ion_source.sink_strength
                 if !(sink_vth > 1.0e-8)
                    error("sink_vth=$sink_vth < 1.0e-8")
                 end

From fc6b9e2d13e854287a61ab1fa4c2cf8b50ea7473 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Fri, 10 Jan 2025 09:52:53 +0000
Subject: [PATCH 41/41] In tests, get StatsBase dependency from moment_kinetics
 package

Avoids need to install StatsBase directly in base environment.
---
 moment_kinetics/src/moment_kinetics.jl        | 1 +
 moment_kinetics/test/jacobian_matrix_tests.jl | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl
index 3309ca8ed..0a43b6277 100644
--- a/moment_kinetics/src/moment_kinetics.jl
+++ b/moment_kinetics/src/moment_kinetics.jl
@@ -5,6 +5,7 @@ module moment_kinetics
 export run_moment_kinetics
 
 using MPI
+using StatsBase
 
 # Include submodules from other source files
 # Note that order of includes matters - things used in one module must already
diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl
index e61ae7d33..7fbcb8af9 100644
--- a/moment_kinetics/test/jacobian_matrix_tests.jl
+++ b/moment_kinetics/test/jacobian_matrix_tests.jl
@@ -36,7 +36,7 @@ using moment_kinetics.moment_constraints: electron_implicit_constraint_forcing!,
 using moment_kinetics.type_definitions: mk_float
 using moment_kinetics.velocity_moments: calculate_electron_moment_derivatives_no_r!
 
-using StatsBase
+using moment_kinetics.StatsBase
 
 # Small parameter used to create perturbations to test Jacobian against
 epsilon = 1.0e-6