From cc90abbb1e12fe5918a3a56fd04cdf514f869443 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 28 Oct 2024 11:02:57 +0000 Subject: [PATCH] For kinetic electrons, use LU when no shared-mem, ADI with shared-mem When not parallelising using shared memory, there is no need to split the preconditioner and the LU preconditioner should be the most efficient. Therefore use the LU precon in serial, and use ADI only when `block_size[] > 1`. --- moment_kinetics/src/time_advance.jl | 10 +- .../test/kinetic_electron_tests.jl | 205 ++++++++++++------ 2 files changed, 147 insertions(+), 68 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 19db3dc947..219fd0ef95 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -667,6 +667,13 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop input_dict, (z=z,); default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0) + if block_size[] == 1 + # No need to parallelise, so un-split LU solver should be most efficient. + electron_preconditioner_type = Val(:electron_lu) + else + # Want to parallelise preconditioner, so use ADI method. + electron_preconditioner_type = Val(:electron_adi) + end nl_solver_electron_advance_params = setup_nonlinear_solve(t_params.implicit_electron_advance || composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation), input_dict, @@ -675,8 +682,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0, electron_ppar_pdf_solve=true, - #preconditioner_type=Val(:electron_lu)) - preconditioner_type=Val(:electron_adi)) + preconditioner_type=electron_preconditioner_type) nl_solver_ion_advance_params = setup_nonlinear_solve(t_params.implicit_ion_advance, input_dict, (s=composition.n_ion_species, r=r, z=z, vperp=vperp, diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl index ee3b6e5143..0daeff1c7f 100644 --- a/moment_kinetics/test/kinetic_electron_tests.jl +++ b/moment_kinetics/test/kinetic_electron_tests.jl @@ -6,6 +6,7 @@ module KineticElectronsTests include("setup.jl") +using moment_kinetics.communication using moment_kinetics.load_data: get_run_info_no_setup, close_run_info, postproc_load_variable using moment_kinetics.looping @@ -172,72 +173,144 @@ function run_test() # Regression test # Benchmark data generated in serial on Linux - expected_Ez = [-0.5990683230706185 -1.136483186157602; - -0.4944296396481284 -0.9873296990705788; - -0.30889032954504736 -0.6694380824928302; - -0.2064830747303776 -0.4471331690708596; - -0.21232457328748663 -0.423069171542538; - -0.18233875912042674 -0.3586467595624931; - -0.16711429522309232 -0.3018272987758344; - -0.16920776495088916 -0.27814384649305496; - -0.1629417555658927 -0.26124630661090814; - -0.16619150334079993 -0.2572789330163811; - -0.15918194883360942 -0.23720078037362732; - -0.14034706409006803 -0.20520396656341475; - -0.12602184032280567 -0.1827016549071128; - -0.10928716440800472 -0.15808919669899502; - -0.07053969674257217 -0.10137753767917096; - -0.0249577746169536 -0.0358411459260082; - -2.8327303308330514e-15 -2.0803303361189427e-5; - 0.024957774616960776 0.03584490974053962; - 0.07053969674257636 0.1013692898656727; - 0.10928716440799909 0.15807862358546687; - 0.1260218403227975 0.18263049748179466; - 0.1403470640900294 0.20516566362571026; - 0.1591819488336015 0.23711236692241613; - 0.16619150334082114 0.257126146434857; - 0.16294175556587748 0.2609881259705107; - 0.16920776495090983 0.2778978154805798; - 0.1671142952230893 0.3015349192528757; - 0.1823387591204167 0.3585291689672981; - 0.21232457328753865 0.4231179549656996; - 0.20648307473037922 0.44816400221269476; - 0.3088903295450278 0.6716787105435247; - 0.4944296396481271 0.9861165590258743; - 0.5990683230705801 1.1300034111861956] - expected_vthe = [22.64555285302391 22.485481713141688; - 23.763411647653097 23.63281883616836; - 25.26907160117684 25.181703459470448; - 26.17920352818247 26.12461016686916; - 26.514772631426933 26.476018852279974; - 26.798783188585713 26.774387562937218; - 27.202255545479264 27.203662204308202; - 27.50424749120107 27.527732850637264; - 27.630498656270504 27.6642323848215; - 27.748483758260697 27.79134809261204; - 27.933760382468346 27.990808336620802; - 28.08611508251559 28.153978618442775; - 28.14959662643782 28.221734439130564; - 28.207730844115044 28.283677711828023; - 28.28567669896009 28.36634261525836; - 28.32728392065335 28.410489883644782; - 28.331064506972027 28.41437629072209; - 28.32729968986601 28.41050992096321; - 28.285678151542136 28.366352683865195; - 28.207765527709956 28.28373408727703; - 28.149604559462947 28.221771261090687; - 28.086248527111163 28.154158507899695; - 27.933979289064936 27.991103719847732; - 27.74906125092813 27.792046191405188; - 27.631210333523736 27.66508092926101; - 27.505479130159543 27.529115937508752; - 27.20422756527604 27.20578114592589; - 26.801712351383053 26.77740066591359; - 26.517644511297203 26.478915386575462; - 26.18176436913143 26.127099000267552; - 25.26635932097994 25.178676836919877; - 23.756593489029708 23.625697695979085; - 22.64390166090378 22.48400980852866] + if global_size[] == 1 + # Serial solves use LU preconditioner + expected_Ez = [-0.5990683230706185 -1.136483186157602; + -0.4944296396481284 -0.9873296990705788; + -0.30889032954504736 -0.6694380824928302; + -0.2064830747303776 -0.4471331690708596; + -0.21232457328748663 -0.423069171542538; + -0.18233875912042674 -0.3586467595624931; + -0.16711429522309232 -0.3018272987758344; + -0.16920776495088916 -0.27814384649305496; + -0.1629417555658927 -0.26124630661090814; + -0.16619150334079993 -0.2572789330163811; + -0.15918194883360942 -0.23720078037362732; + -0.14034706409006803 -0.20520396656341475; + -0.12602184032280567 -0.1827016549071128; + -0.10928716440800472 -0.15808919669899502; + -0.07053969674257217 -0.10137753767917096; + -0.0249577746169536 -0.0358411459260082; + -2.8327303308330514e-15 -2.0803303361189427e-5; + 0.024957774616960776 0.03584490974053962; + 0.07053969674257636 0.1013692898656727; + 0.10928716440799909 0.15807862358546687; + 0.1260218403227975 0.18263049748179466; + 0.1403470640900294 0.20516566362571026; + 0.1591819488336015 0.23711236692241613; + 0.16619150334082114 0.257126146434857; + 0.16294175556587748 0.2609881259705107; + 0.16920776495090983 0.2778978154805798; + 0.1671142952230893 0.3015349192528757; + 0.1823387591204167 0.3585291689672981; + 0.21232457328753865 0.4231179549656996; + 0.20648307473037922 0.44816400221269476; + 0.3088903295450278 0.6716787105435247; + 0.4944296396481271 0.9861165590258743; + 0.5990683230705801 1.1300034111861956] + expected_vthe = [22.64555285302391 22.485481713141688; + 23.763411647653097 23.63281883616836; + 25.26907160117684 25.181703459470448; + 26.17920352818247 26.12461016686916; + 26.514772631426933 26.476018852279974; + 26.798783188585713 26.774387562937218; + 27.202255545479264 27.203662204308202; + 27.50424749120107 27.527732850637264; + 27.630498656270504 27.6642323848215; + 27.748483758260697 27.79134809261204; + 27.933760382468346 27.990808336620802; + 28.08611508251559 28.153978618442775; + 28.14959662643782 28.221734439130564; + 28.207730844115044 28.283677711828023; + 28.28567669896009 28.36634261525836; + 28.32728392065335 28.410489883644782; + 28.331064506972027 28.41437629072209; + 28.32729968986601 28.41050992096321; + 28.285678151542136 28.366352683865195; + 28.207765527709956 28.28373408727703; + 28.149604559462947 28.221771261090687; + 28.086248527111163 28.154158507899695; + 27.933979289064936 27.991103719847732; + 27.74906125092813 27.792046191405188; + 27.631210333523736 27.66508092926101; + 27.505479130159543 27.529115937508752; + 27.20422756527604 27.20578114592589; + 26.801712351383053 26.77740066591359; + 26.517644511297203 26.478915386575462; + 26.18176436913143 26.127099000267552; + 25.26635932097994 25.178676836919877; + 23.756593489029708 23.625697695979085; + 22.64390166090378 22.48400980852866] + else + # Parallel solves, which here use only shared-memory parallelism, use the ADI + # preconditioner, which should be as accurate, but may give different results + # within Newton-Krylov tolerances. + expected_Ez = [-0.5990683230706185 -1.136484793603861; + -0.4944296396481284 -0.9873300031440772; + -0.30889032954504736 -0.6694378168618197; + -0.2064830747303776 -0.447133132132065; + -0.21232457328748663 -0.42306913446372424; + -0.18233875912042674 -0.3586467771727455; + -0.16711429522309232 -0.30182728110160495; + -0.16920776495088916 -0.27814382747995164; + -0.1629417555658927 -0.2612463784138094; + -0.16619150334079993 -0.25727894258000966; + -0.15918194883360942 -0.23720078814350573; + -0.14034706409006803 -0.20520397188041256; + -0.12602184032280567 -0.18270162474892546; + -0.10928716440800472 -0.1580892035790512; + -0.07053969674257217 -0.10137753682381391; + -0.0249577746169536 -0.03584114725793184; + -2.8327303308330514e-15 -2.0802378395589373e-5; + 0.024957774616960776 0.0358449101669449; + 0.07053969674257636 0.10136928934666747; + 0.10928716440799909 0.15807862867071673; + 0.1260218403227975 0.18263047522175488; + 0.1403470640900294 0.20516566756031385; 0.1591819488336015 0.2371123741024713; + 0.16619150334082114 0.2571261543920033; + 0.16294175556587748 0.2609882062708652; + 0.16920776495090983 0.27789779494370415; + 0.1671142952230893 0.30153489797658445; + 0.1823387591204167 0.35852918516786003; + 0.21232457328753865 0.42311789840457864; + 0.20648307473037922 0.44816400062147066; + 0.3088903295450278 0.6716785459169026; + 0.4944296396481271 0.9861167610959626; + 0.5990683230705801 1.1300045383907789] + expected_vthe = [22.64555338227396 22.48548119549829; + 23.76341164436594 23.632819782771243; + 25.26907163394297 25.18170391887767; + 26.179203467285365 26.12461016927763; + 26.514772629327332 26.47601877788725; + 26.79878318858447 26.774387534342114; + 27.20225551034186 27.20366217166485; + 27.504247525601926 27.527732760234755; + 27.630498605068166 27.66423228184859; + 27.748483763235846 27.791348082529804; + 27.933760371994826 27.990808308571204; + 28.08611509938479 28.153978648601132; + 28.149596610550738 28.221734405417436; + 28.207730848524463 28.28367771694209; + 28.28567670146647 28.366342613061416; + 28.32728392764203 28.410489892675102; + 28.331064498175866 28.414376282256146; + 28.327299695349158 28.41050992979778; + 28.285678155424083 28.366352683054103; + 28.207765532359442 28.28373409338897; + 28.149604554344048 28.22177123547944; + 28.086248537316628 28.154158532699547; + 27.933979285563435 27.991103698041254; + 27.749061255285646 27.79204618050744; + 27.63121031067771 27.665080846653012; + 27.505479148983177 27.529115838548574; + 27.204227550854288 27.205781129997607; + 26.801712356957204 26.777400644678224; + 26.517644516966772 26.478915353716097; + 26.181764354679014 26.12709901369174; + 25.266359355820907 25.178677080491074; + 23.756593465755735 23.625698257711747; + 22.64390180335094 22.48400934735562] + end if expected_Ez == nothing # Error: no expected input provided