diff --git a/Project.toml b/Project.toml index 296883f5420..e096c79d99d 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Michael Schlottke-Lakemper ", "Gregor version = "0.9.12-DEV" [deps] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" @@ -17,6 +18,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" @@ -30,7 +32,6 @@ PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Preferences = "21216c6a-2e73-6563-6e65-726566657250" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" -RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" Requires = "ae029012-a4dd-5104-9daa-d747884805df" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" @@ -64,14 +65,15 @@ TrixiMakieExt = "Makie" TrixiNLsolveExt = "NLsolve" [compat] +Adapt = "3.7, 4.0" Accessors = "0.1.12" CodeTracking = "1.0.5" ConstructionBase = "1.3" Convex = "0.16" DataStructures = "0.18.15" DelimitedFiles = "1" -DiffEqBase = "6 - 6.143" -DiffEqCallbacks = "2.25" +DiffEqBase = "6" +DiffEqCallbacks = "2.25, 3, 4" Downloads = "1.6" ECOS = "1.1.2" EllipsisNotation = "1.0" @@ -79,6 +81,7 @@ FillArrays = "0.13.2, 1" ForwardDiff = "0.10.24" HDF5 = "0.16.10, 0.17" IfElse = "0.1" +KernelAbstractions = "0.9" LinearAlgebra = "1" LinearMaps = "2.7, 3.0" LoopVectorization = "0.12.151" @@ -94,7 +97,6 @@ PrecompileTools = "1.1" Preferences = "1.3" Printf = "1" RecipesBase = "1.1" -RecursiveArrayTools = "2.38.10" Reexport = "1.0" Requires = "1.1" SciMLBase = "1.90, 2" @@ -102,7 +104,7 @@ SimpleUnPack = "1.1" SparseArrays = "1" StableRNGs = "1.0.2" StartUpDG = "0.17.7, 1.1.5" -Static = "0.8.7" +Static = "0.8.7, 1" StaticArrayInterface = "1.4" StaticArrays = "1.5" StrideArrays = "0.1.26" diff --git a/esiwace/LocalPreferences.toml b/esiwace/LocalPreferences.toml new file mode 100644 index 00000000000..8e9b0f7f61d --- /dev/null +++ b/esiwace/LocalPreferences.toml @@ -0,0 +1,30 @@ +[CUDA_Runtime_jll] +local = "true" +version = "11.8" + +[HDF5] +libhdf5 = "/lrz/sys/spack/release/23.1.0/opt/icelake/hdf5/1.14.2-gcc-wvedyel/lib/libhdf5.so" +libhdf5_hl = "/lrz/sys/spack/release/23.1.0/opt/icelake/hdf5/1.14.2-gcc-wvedyel/lib/libhdf5_hl.so" + +[HDF5_jll] +libhdf5_hl_path = "/lrz/sys/spack/release/23.1.0/opt/icelake/hdf5/1.14.2-gcc-wvedyel/lib/libhdf5_hl.so" +libhdf5_path = "/lrz/sys/spack/release/23.1.0/opt/icelake/hdf5/1.14.2-gcc-wvedyel/lib/libhdf5.so" + +[MPIPreferences] +__clear__ = ["preloads_env_switch"] +_format = "1.0" +abi = "OpenMPI" +binary = "system" +cclibs = [] +libmpi = "libmpi" +mpiexec = "mpiexec" +preloads = [] + +[P4est] +libp4est = "/dss/dsstbyfs02/scratch/07/di38qiw/install/t8code/lib/libp4est.so" +libsc = "/dss/dsstbyfs02/scratch/07/di38qiw/install/t8code/lib/libsc.so" + +[T8code] +libp4est = "/dss/dsstbyfs02/scratch/07/di38qiw/install/t8code/lib/libp4est.so" +libsc = "/dss/dsstbyfs02/scratch/07/di38qiw/install/t8code/lib/libsc.so" +libt8 = "/dss/dsstbyfs02/scratch/07/di38qiw/install/t8code/lib/libt8.so" diff --git a/esiwace/Manifest.toml b/esiwace/Manifest.toml new file mode 100644 index 00000000000..716f88b6047 --- /dev/null +++ b/esiwace/Manifest.toml @@ -0,0 +1,2230 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.11.2" +manifest_format = "2.0" +project_hash = "51af5189a097c771e1ad670a00f207ba8c9fed4b" + +[[deps.ADTypes]] +git-tree-sha1 = "72af59f5b8f09faee36b4ec48e014a79210f2f4f" +uuid = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +version = "1.11.0" +weakdeps = ["ChainRulesCore", "ConstructionBase", "EnzymeCore"] + + [deps.ADTypes.extensions] + ADTypesChainRulesCoreExt = "ChainRulesCore" + ADTypesConstructionBaseExt = "ConstructionBase" + ADTypesEnzymeCoreExt = "EnzymeCore" + +[[deps.AbstractFFTs]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef" +uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" +version = "1.5.0" +weakdeps = ["ChainRulesCore", "Test"] + + [deps.AbstractFFTs.extensions] + AbstractFFTsChainRulesCoreExt = "ChainRulesCore" + AbstractFFTsTestExt = "Test" + +[[deps.Accessors]] +deps = ["CompositionsBase", "ConstructionBase", "InverseFunctions", "LinearAlgebra", "MacroTools", "Markdown"] +git-tree-sha1 = "96bed9b1b57cf750cca50c311a197e306816a1cc" +uuid = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" +version = "0.1.39" + + [deps.Accessors.extensions] + AccessorsAxisKeysExt = "AxisKeys" + AccessorsDatesExt = "Dates" + AccessorsIntervalSetsExt = "IntervalSets" + AccessorsStaticArraysExt = "StaticArrays" + AccessorsStructArraysExt = "StructArrays" + AccessorsTestExt = "Test" + AccessorsUnitfulExt = "Unitful" + + [deps.Accessors.weakdeps] + AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5" + Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" + IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" + Requires = "ae029012-a4dd-5104-9daa-d747884805df" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" + Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" + +[[deps.Adapt]] +deps = ["LinearAlgebra", "Requires"] +git-tree-sha1 = "50c3c56a52972d78e8be9fd135bfb91c9574c140" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "4.1.1" +weakdeps = ["StaticArrays"] + + [deps.Adapt.extensions] + AdaptStaticArraysExt = "StaticArrays" + +[[deps.AliasTables]] +deps = ["PtrArrays", "Random"] +git-tree-sha1 = "9876e1e164b144ca45e9e3198d0b689cadfed9ff" +uuid = "66dad0bd-aa9a-41b7-9441-69ab47430ed8" +version = "1.1.3" + +[[deps.ArgCheck]] +git-tree-sha1 = "680b3b8759bd4c54052ada14e52355ab69e07876" +uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197" +version = "2.4.0" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.2" + +[[deps.ArnoldiMethod]] +deps = ["LinearAlgebra", "Random", "StaticArrays"] +git-tree-sha1 = "d57bd3762d308bded22c3b82d033bff85f6195c6" +uuid = "ec485272-7323-5ecc-a04f-4719b315124d" +version = "0.4.0" + +[[deps.ArrayInterface]] +deps = ["Adapt", "LinearAlgebra"] +git-tree-sha1 = "017fcb757f8e921fb44ee063a7aafe5f89b86dd1" +uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +version = "7.18.0" + + [deps.ArrayInterface.extensions] + ArrayInterfaceBandedMatricesExt = "BandedMatrices" + ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices" + ArrayInterfaceCUDAExt = "CUDA" + ArrayInterfaceCUDSSExt = "CUDSS" + ArrayInterfaceChainRulesCoreExt = "ChainRulesCore" + ArrayInterfaceChainRulesExt = "ChainRules" + ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore" + ArrayInterfaceReverseDiffExt = "ReverseDiff" + ArrayInterfaceSparseArraysExt = "SparseArrays" + ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore" + ArrayInterfaceTrackerExt = "Tracker" + + [deps.ArrayInterface.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" + ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.ArrayLayouts]] +deps = ["FillArrays", "LinearAlgebra"] +git-tree-sha1 = "2bf6e01f453284cb61c312836b4680331ddfc44b" +uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" +version = "1.11.0" +weakdeps = ["SparseArrays"] + + [deps.ArrayLayouts.extensions] + ArrayLayoutsSparseArraysExt = "SparseArrays" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.11.0" + +[[deps.Atomix]] +deps = ["UnsafeAtomics"] +git-tree-sha1 = "c3b238aa28c1bebd4b5ea4988bebf27e9a01b72b" +uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" +version = "1.0.1" + + [deps.Atomix.extensions] + AtomixCUDAExt = "CUDA" + AtomixMetalExt = "Metal" + AtomixoneAPIExt = "oneAPI" + + [deps.Atomix.weakdeps] + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + Metal = "dde4c033-4e86-420c-a63e-0dd931031962" + oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" + +[[deps.AutoHashEquals]] +git-tree-sha1 = "4ec6b48702dacc5994a835c1189831755e4e76ef" +uuid = "15f4f7f2-30c1-5605-9d31-71845cf9641f" +version = "2.2.0" + +[[deps.BFloat16s]] +deps = ["LinearAlgebra", "Printf", "Random", "Test"] +git-tree-sha1 = "2c7cc21e8678eff479978a0a2ef5ce2f51b63dff" +uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" +version = "0.5.0" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +version = "1.11.0" + +[[deps.BitTwiddlingConvenienceFunctions]] +deps = ["Static"] +git-tree-sha1 = "f21cfd4950cb9f0587d5067e69405ad2acd27b87" +uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b" +version = "0.1.6" + +[[deps.BracketingNonlinearSolve]] +deps = ["CommonSolve", "ConcreteStructs", "NonlinearSolveBase", "PrecompileTools", "Reexport", "SciMLBase"] +git-tree-sha1 = "95cb19c37ea427617e9795655667712f03058d98" +uuid = "70df07ce-3d50-431d-a3e7-ca6ddb60ac1e" +version = "1.1.0" +weakdeps = ["ForwardDiff"] + + [deps.BracketingNonlinearSolve.extensions] + BracketingNonlinearSolveForwardDiffExt = "ForwardDiff" + +[[deps.CEnum]] +git-tree-sha1 = "389ad5c84de1ae7cf0e28e381131c98ea87d54fc" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.5.0" + +[[deps.CPUSummary]] +deps = ["CpuId", "IfElse", "PrecompileTools", "Static"] +git-tree-sha1 = "5a97e67919535d6841172016c9530fd69494e5ec" +uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" +version = "0.2.6" + +[[deps.CUDA]] +deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CUDA_Driver_jll", "CUDA_Runtime_Discovery", "CUDA_Runtime_jll", "Crayons", "DataFrames", "ExprTools", "GPUArrays", "GPUCompiler", "KernelAbstractions", "LLVM", "LLVMLoopInfo", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "NVTX", "Preferences", "PrettyTables", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "StaticArrays", "Statistics", "demumble_jll"] +git-tree-sha1 = "e0725a467822697171af4dae15cec10b4fc19053" +uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" +version = "5.5.2" +weakdeps = ["ChainRulesCore", "EnzymeCore", "SpecialFunctions"] + + [deps.CUDA.extensions] + ChainRulesCoreExt = "ChainRulesCore" + EnzymeCoreExt = "EnzymeCore" + SpecialFunctionsExt = "SpecialFunctions" + +[[deps.CUDA_Driver_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "14996d716a2eaaeccfc8d7bc854dd87fde720ac1" +uuid = "4ee394cb-3365-5eb0-8335-949819d2adfc" +version = "0.10.4+0" + +[[deps.CUDA_Runtime_Discovery]] +deps = ["Libdl"] +git-tree-sha1 = "33576c7c1b2500f8e7e6baa082e04563203b3a45" +uuid = "1af6417a-86b4-443c-805f-a4643ffb695f" +version = "0.3.5" + +[[deps.CUDA_Runtime_jll]] +deps = ["Artifacts", "CUDA_Driver_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] +git-tree-sha1 = "17f1536c600133f7c4113bae0a2d98dbf27c7ebc" +uuid = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" +version = "0.15.5+0" + +[[deps.ChainRulesCore]] +deps = ["Compat", "LinearAlgebra"] +git-tree-sha1 = "3e4b134270b372f2ed4d4d0e936aabaefc1802bc" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "1.25.0" +weakdeps = ["SparseArrays"] + + [deps.ChainRulesCore.extensions] + ChainRulesCoreSparseArraysExt = "SparseArrays" + +[[deps.CloseOpenIntervals]] +deps = ["Static", "StaticArrayInterface"] +git-tree-sha1 = "05ba0d07cd4fd8b7a39541e31a7b0254704ea581" +uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9" +version = "0.1.13" + +[[deps.CodeTracking]] +deps = ["InteractiveUtils", "UUIDs"] +git-tree-sha1 = "7eee164f122511d3e4e1ebadb7956939ea7e1c77" +uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2" +version = "1.3.6" + +[[deps.CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "bce6804e5e6044c6daab27bb533d1295e4a2e759" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.6" + +[[deps.ColorTypes]] +deps = ["FixedPointNumbers", "Random"] +git-tree-sha1 = "c7acce7a7e1078a20a285211dd73cd3941a871d6" +uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" +version = "0.12.0" + + [deps.ColorTypes.extensions] + StyledStringsExt = "StyledStrings" + + [deps.ColorTypes.weakdeps] + StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b" + +[[deps.Colors]] +deps = ["ColorTypes", "FixedPointNumbers", "Reexport"] +git-tree-sha1 = "64e15186f0aa277e174aa81798f7eb8598e0157e" +uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" +version = "0.13.0" + +[[deps.CommonSolve]] +git-tree-sha1 = "0eee5eb66b1cf62cd6ad1b460238e60e4b09400c" +uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" +version = "0.2.4" + +[[deps.CommonSubexpressions]] +deps = ["MacroTools"] +git-tree-sha1 = "cda2cfaebb4be89c9084adaca7dd7333369715c5" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.1" + +[[deps.CommonWorldInvalidations]] +git-tree-sha1 = "ae52d1c52048455e85a387fbee9be553ec2b68d0" +uuid = "f70d9fcc-98c5-4d4a-abd7-e4cdeebd8ca8" +version = "1.0.0" + +[[deps.Compat]] +deps = ["TOML", "UUIDs"] +git-tree-sha1 = "8ae8d32e09f0dcf42a36b90d4e17f5dd2e4c4215" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "4.16.0" +weakdeps = ["Dates", "LinearAlgebra"] + + [deps.Compat.extensions] + CompatLinearAlgebraExt = "LinearAlgebra" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.1.1+0" + +[[deps.CompositionsBase]] +git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" +uuid = "a33af91c-f02d-484b-be07-31d278c5ca2b" +version = "0.1.2" +weakdeps = ["InverseFunctions"] + + [deps.CompositionsBase.extensions] + CompositionsBaseInverseFunctionsExt = "InverseFunctions" + +[[deps.ConcreteStructs]] +git-tree-sha1 = "f749037478283d372048690eb3b5f92a79432b34" +uuid = "2569d6c7-a4a2-43d3-a901-331e8e4be471" +version = "0.2.3" + +[[deps.ConstructionBase]] +git-tree-sha1 = "76219f1ed5771adbb096743bff43fb5fdd4c1157" +uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" +version = "1.5.8" + + [deps.ConstructionBase.extensions] + ConstructionBaseIntervalSetsExt = "IntervalSets" + ConstructionBaseLinearAlgebraExt = "LinearAlgebra" + ConstructionBaseStaticArraysExt = "StaticArrays" + + [deps.ConstructionBase.weakdeps] + IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" + LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.CpuId]] +deps = ["Markdown"] +git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406" +uuid = "adafc99b-e345-5852-983c-f28acb93d879" +version = "0.3.1" + +[[deps.Crayons]] +git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" +uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" +version = "4.1.1" + +[[deps.DataAPI]] +git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.16.0" + +[[deps.DataFrames]] +deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] +git-tree-sha1 = "fb61b4812c49343d7ef0b533ba982c46021938a6" +uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +version = "1.7.0" + +[[deps.DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "1d0a14036acb104d9e89698bd408f63ab58cdc82" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.20" + +[[deps.DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" +version = "1.11.0" + +[[deps.DelimitedFiles]] +deps = ["Mmap"] +git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae" +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" +version = "1.9.1" + +[[deps.DiffEqBase]] +deps = ["ArrayInterface", "ConcreteStructs", "DataStructures", "DocStringExtensions", "EnumX", "EnzymeCore", "FastBroadcast", "FastClosures", "FastPower", "ForwardDiff", "FunctionWrappers", "FunctionWrappersWrappers", "LinearAlgebra", "Logging", "Markdown", "MuladdMacro", "Parameters", "PreallocationTools", "PrecompileTools", "Printf", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators", "SciMLStructures", "Setfield", "Static", "StaticArraysCore", "Statistics", "TruncatedStacktraces"] +git-tree-sha1 = "b1e23a7fe7371934d9d538114a7e7166c1d09e05" +uuid = "2b5f629d-d688-5b77-993f-72d75c75574e" +version = "6.161.0" + + [deps.DiffEqBase.extensions] + DiffEqBaseCUDAExt = "CUDA" + DiffEqBaseChainRulesCoreExt = "ChainRulesCore" + DiffEqBaseDistributionsExt = "Distributions" + DiffEqBaseEnzymeExt = ["ChainRulesCore", "Enzyme"] + DiffEqBaseGeneralizedGeneratedExt = "GeneralizedGenerated" + DiffEqBaseMPIExt = "MPI" + DiffEqBaseMeasurementsExt = "Measurements" + DiffEqBaseMonteCarloMeasurementsExt = "MonteCarloMeasurements" + DiffEqBaseReverseDiffExt = "ReverseDiff" + DiffEqBaseSparseArraysExt = "SparseArrays" + DiffEqBaseTrackerExt = "Tracker" + DiffEqBaseUnitfulExt = "Unitful" + + [deps.DiffEqBase.weakdeps] + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" + Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" + GeneralizedGenerated = "6b9d7cbe-bcb9-11e9-073f-15a7a543e2eb" + MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" + Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" + MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" + +[[deps.DiffEqCallbacks]] +deps = ["ConcreteStructs", "DataStructures", "DiffEqBase", "DifferentiationInterface", "Functors", "LinearAlgebra", "Markdown", "RecipesBase", "RecursiveArrayTools", "SciMLBase", "StaticArraysCore"] +git-tree-sha1 = "f6bc598f21c7bf2f7885cff9b3c9078e606ab075" +uuid = "459566f4-90b8-5000-8ac3-15dfb0a30def" +version = "4.2.2" + +[[deps.DiffResults]] +deps = ["StaticArraysCore"] +git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.1.0" + +[[deps.DiffRules]] +deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.15.1" + +[[deps.DifferentiationInterface]] +deps = ["ADTypes", "LinearAlgebra"] +git-tree-sha1 = "7ffe68edc2a4ad0ff2eeb5a32cc99ab45746fe3d" +uuid = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" +version = "0.6.27" + + [deps.DifferentiationInterface.extensions] + DifferentiationInterfaceChainRulesCoreExt = "ChainRulesCore" + DifferentiationInterfaceDiffractorExt = "Diffractor" + DifferentiationInterfaceEnzymeExt = ["EnzymeCore", "Enzyme"] + DifferentiationInterfaceFastDifferentiationExt = "FastDifferentiation" + DifferentiationInterfaceFiniteDiffExt = "FiniteDiff" + DifferentiationInterfaceFiniteDifferencesExt = "FiniteDifferences" + DifferentiationInterfaceForwardDiffExt = ["ForwardDiff", "DiffResults"] + DifferentiationInterfaceMooncakeExt = "Mooncake" + DifferentiationInterfacePolyesterForwardDiffExt = "PolyesterForwardDiff" + DifferentiationInterfaceReverseDiffExt = ["ReverseDiff", "DiffResults"] + DifferentiationInterfaceSparseArraysExt = "SparseArrays" + DifferentiationInterfaceSparseMatrixColoringsExt = "SparseMatrixColorings" + DifferentiationInterfaceStaticArraysExt = "StaticArrays" + DifferentiationInterfaceSymbolicsExt = "Symbolics" + DifferentiationInterfaceTrackerExt = "Tracker" + DifferentiationInterfaceZygoteExt = ["Zygote", "ForwardDiff"] + + [deps.DifferentiationInterface.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" + Diffractor = "9f5e2b26-1114-432f-b630-d3fe2085c51c" + Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" + EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" + FastDifferentiation = "eb9bf01b-bf85-4b60-bf87-ee5de06c00be" + FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" + FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" + PolyesterForwardDiff = "98d1487c-24ca-40b6-b7ab-df2af84e126b" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[[deps.Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" +version = "1.11.0" + +[[deps.DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.9.3" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.EllipsisNotation]] +deps = ["StaticArrayInterface"] +git-tree-sha1 = "3507300d4343e8e4ad080ad24e335274c2e297a9" +uuid = "da5c29d0-fa7d-589e-88eb-ea29b0a81949" +version = "1.8.0" + +[[deps.EnumX]] +git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237" +uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56" +version = "1.0.4" + +[[deps.EnzymeCore]] +git-tree-sha1 = "0cdb7af5c39e92d78a0ee8d0a447d32f7593137e" +uuid = "f151be2c-9106-41f4-ab19-57ee4f262869" +version = "0.8.8" +weakdeps = ["Adapt"] + + [deps.EnzymeCore.extensions] + AdaptExt = "Adapt" + +[[deps.ExponentialUtilities]] +deps = ["Adapt", "ArrayInterface", "GPUArraysCore", "GenericSchur", "LinearAlgebra", "PrecompileTools", "Printf", "SparseArrays", "libblastrampoline_jll"] +git-tree-sha1 = "cae251c76f353e32d32d76fae2fea655eab652af" +uuid = "d4d017d3-3776-5f7e-afef-a10c40355c18" +version = "1.27.0" +weakdeps = ["StaticArrays"] + + [deps.ExponentialUtilities.extensions] + ExponentialUtilitiesStaticArraysExt = "StaticArrays" + +[[deps.ExprTools]] +git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec" +uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" +version = "0.1.10" + +[[deps.Expronicon]] +deps = ["MLStyle", "Pkg", "TOML"] +git-tree-sha1 = "fc3951d4d398b5515f91d7fe5d45fc31dccb3c9b" +uuid = "6b7a57c9-7cc1-4fdf-b7f5-e857abae3636" +version = "0.8.5" + +[[deps.FFTW]] +deps = ["AbstractFFTs", "FFTW_jll", "LinearAlgebra", "MKL_jll", "Preferences", "Reexport"] +git-tree-sha1 = "4820348781ae578893311153d69049a93d05f39d" +uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +version = "1.8.0" + +[[deps.FFTW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4d81ed14783ec49ce9f2e168208a12ce1815aa25" +uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a" +version = "3.3.10+1" + +[[deps.FastBroadcast]] +deps = ["ArrayInterface", "LinearAlgebra", "Polyester", "Static", "StaticArrayInterface", "StrideArraysCore"] +git-tree-sha1 = "ab1b34570bcdf272899062e1a56285a53ecaae08" +uuid = "7034ab61-46d4-4ed7-9d0f-46aef9175898" +version = "0.3.5" + +[[deps.FastClosures]] +git-tree-sha1 = "acebe244d53ee1b461970f8910c235b259e772ef" +uuid = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a" +version = "0.3.2" + +[[deps.FastGaussQuadrature]] +deps = ["LinearAlgebra", "SpecialFunctions", "StaticArrays"] +git-tree-sha1 = "fd923962364b645f3719855c88f7074413a6ad92" +uuid = "442a2c76-b920-505d-bb47-c5924d526838" +version = "1.0.2" + +[[deps.FastLapackInterface]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "cbf5edddb61a43669710cbc2241bc08b36d9e660" +uuid = "29a986be-02c6-4525-aec4-84b980013641" +version = "2.0.4" + +[[deps.FastPower]] +git-tree-sha1 = "58c3431137131577a7c379d00fea00be524338fb" +uuid = "a4df4552-cc26-4903-aec0-212e50a0e84b" +version = "1.1.1" + + [deps.FastPower.extensions] + FastPowerEnzymeExt = "Enzyme" + FastPowerForwardDiffExt = "ForwardDiff" + FastPowerMeasurementsExt = "Measurements" + FastPowerMonteCarloMeasurementsExt = "MonteCarloMeasurements" + FastPowerReverseDiffExt = "ReverseDiff" + FastPowerTrackerExt = "Tracker" + + [deps.FastPower.weakdeps] + Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" + MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" +version = "1.11.0" + +[[deps.FillArrays]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "6a70198746448456524cb442b8af316927ff3e1a" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "1.13.0" + + [deps.FillArrays.extensions] + FillArraysPDMatsExt = "PDMats" + FillArraysSparseArraysExt = "SparseArrays" + FillArraysStatisticsExt = "Statistics" + + [deps.FillArrays.weakdeps] + PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[deps.FiniteDiff]] +deps = ["ArrayInterface", "LinearAlgebra", "Setfield"] +git-tree-sha1 = "84e3a47db33be7248daa6274b287507dd6ff84e8" +uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" +version = "2.26.2" + + [deps.FiniteDiff.extensions] + FiniteDiffBandedMatricesExt = "BandedMatrices" + FiniteDiffBlockBandedMatricesExt = "BlockBandedMatrices" + FiniteDiffSparseArraysExt = "SparseArrays" + FiniteDiffStaticArraysExt = "StaticArrays" + + [deps.FiniteDiff.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.FixedPointNumbers]] +deps = ["Statistics"] +git-tree-sha1 = "05882d6995ae5c12bb5f36dd2ed3f61c98cbb172" +uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" +version = "0.8.5" + +[[deps.ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"] +git-tree-sha1 = "a2df1b776752e3f344e5116c06d75a10436ab853" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.38" +weakdeps = ["StaticArrays"] + + [deps.ForwardDiff.extensions] + ForwardDiffStaticArraysExt = "StaticArrays" + +[[deps.FunctionWrappers]] +git-tree-sha1 = "d62485945ce5ae9c0c48f124a84998d755bae00e" +uuid = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e" +version = "1.1.3" + +[[deps.FunctionWrappersWrappers]] +deps = ["FunctionWrappers"] +git-tree-sha1 = "b104d487b34566608f8b4e1c39fb0b10aa279ff8" +uuid = "77dc65aa-8811-40c2-897b-53d922fa7daf" +version = "0.1.3" + +[[deps.Functors]] +deps = ["Compat", "ConstructionBase", "LinearAlgebra", "Random"] +git-tree-sha1 = "60a0339f28a233601cb74468032b5c302d5067de" +uuid = "d9f16b24-f501-4c13-a1f2-28368ffc5196" +version = "0.5.2" + +[[deps.Future]] +deps = ["Random"] +uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" +version = "1.11.0" + +[[deps.GPUArrays]] +deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] +git-tree-sha1 = "62ee71528cca49be797076a76bdc654a170a523e" +uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" +version = "10.3.1" + +[[deps.GPUArraysCore]] +deps = ["Adapt"] +git-tree-sha1 = "ec632f177c0d990e64d955ccc1b8c04c485a0950" +uuid = "46192b85-c4d5-4398-a991-12ede77f4527" +version = "0.1.6" + +[[deps.GPUCompiler]] +deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "PrecompileTools", "Preferences", "Scratch", "Serialization", "TOML", "TimerOutputs", "UUIDs"] +git-tree-sha1 = "1d6f290a5eb1201cd63574fbc4440c788d5cb38f" +uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" +version = "0.27.8" + +[[deps.GaussQuadrature]] +deps = ["SpecialFunctions"] +git-tree-sha1 = "eb6f1f48aa994f3018cbd029a17863c6535a266d" +uuid = "d54b0c1a-921d-58e0-8e36-89d8069c0969" +version = "0.5.8" + +[[deps.GenericSchur]] +deps = ["LinearAlgebra", "Printf"] +git-tree-sha1 = "af49a0851f8113fcfae2ef5027c6d49d0acec39b" +uuid = "c145ed77-6b09-5dd9-b285-bf645a82121e" +version = "0.5.4" + +[[deps.Graphs]] +deps = ["ArnoldiMethod", "Compat", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"] +git-tree-sha1 = "1dc470db8b1131cfc7fb4c115de89fe391b9e780" +uuid = "86223c79-3864-5bf0-83f7-82e725a168b6" +version = "1.12.0" + +[[deps.HDF5]] +deps = ["Compat", "HDF5_jll", "Libdl", "MPIPreferences", "Mmap", "Preferences", "Printf", "Random", "Requires", "UUIDs"] +git-tree-sha1 = "e856eef26cf5bf2b0f95f8f4fc37553c72c8641c" +uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +version = "0.17.2" +weakdeps = ["MPI"] + + [deps.HDF5.extensions] + MPIExt = "MPI" + +[[deps.HDF5_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "LibCURL_jll", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "OpenSSL_jll", "TOML", "Zlib_jll", "libaec_jll"] +git-tree-sha1 = "82a471768b513dc39e471540fdadc84ff80ff997" +uuid = "0234f1f7-429e-5d53-9886-15a909be8d59" +version = "1.14.3+3" + +[[deps.HostCPUFeatures]] +deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"] +git-tree-sha1 = "8e070b599339d622e9a081d17230d74a5c473293" +uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" +version = "0.1.17" + +[[deps.Hwloc_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "50aedf345a709ab75872f80a2779568dc0bb461b" +uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" +version = "2.11.2+1" + +[[deps.IfElse]] +git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" +uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" +version = "0.1.1" + +[[deps.Inflate]] +git-tree-sha1 = "d1b1b796e47d94588b3757fe84fbf65a5ec4a80d" +uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" +version = "0.1.5" + +[[deps.InlineStrings]] +git-tree-sha1 = "45521d31238e87ee9f9732561bfee12d4eebd52d" +uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" +version = "1.4.2" + + [deps.InlineStrings.extensions] + ArrowTypesExt = "ArrowTypes" + ParsersExt = "Parsers" + + [deps.InlineStrings.weakdeps] + ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" + Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" + +[[deps.IntelOpenMP_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl"] +git-tree-sha1 = "10bd689145d2c3b2a9844005d01087cc1194e79e" +uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" +version = "2024.2.1+0" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +version = "1.11.0" + +[[deps.InverseFunctions]] +git-tree-sha1 = "a779299d77cd080bf77b97535acecd73e1c5e5cb" +uuid = "3587e190-3f89-42d0-90ee-14403ec27112" +version = "0.1.17" +weakdeps = ["Dates", "Test"] + + [deps.InverseFunctions.extensions] + InverseFunctionsDatesExt = "Dates" + InverseFunctionsTestExt = "Test" + +[[deps.InvertedIndices]] +git-tree-sha1 = "6da3c4316095de0f5ee2ebd875df8721e7e0bdbe" +uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" +version = "1.3.1" + +[[deps.IrrationalConstants]] +git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.2.2" + +[[deps.IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "be3dc50a92e5a386872a493a10050136d4703f9b" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.6.1" + +[[deps.JuliaNVTXCallbacks_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "af433a10f3942e882d3c671aacb203e006a5808f" +uuid = "9c1d0b0a-7046-5b2e-a33f-ea22f176ac7e" +version = "0.2.1+0" + +[[deps.KLU]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse_jll"] +git-tree-sha1 = "07649c499349dad9f08dde4243a4c597064663e9" +uuid = "ef3ab10e-7fda-4108-b977-705223b18434" +version = "0.6.0" + +[[deps.KernelAbstractions]] +deps = ["Adapt", "Atomix", "InteractiveUtils", "MacroTools", "PrecompileTools", "Requires", "StaticArrays", "UUIDs"] +git-tree-sha1 = "b9a838cd3028785ac23822cded5126b3da394d1a" +uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +version = "0.9.31" +weakdeps = ["EnzymeCore", "LinearAlgebra", "SparseArrays"] + + [deps.KernelAbstractions.extensions] + EnzymeExt = "EnzymeCore" + LinearAlgebraExt = "LinearAlgebra" + SparseArraysExt = "SparseArrays" + +[[deps.Kronecker]] +deps = ["LinearAlgebra", "NamedDims", "SparseArrays", "StatsBase"] +git-tree-sha1 = "9253429e28cceae6e823bec9ffde12460d79bb38" +uuid = "2c470bb0-bcc8-11e8-3dad-c9649493f05e" +version = "0.5.5" + +[[deps.Krylov]] +deps = ["LinearAlgebra", "Printf", "SparseArrays"] +git-tree-sha1 = "4f20a2df85a9e5d55c9e84634bbf808ed038cabd" +uuid = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" +version = "0.9.8" + +[[deps.LLVM]] +deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Unicode"] +git-tree-sha1 = "d422dfd9707bec6617335dc2ea3c5172a87d5908" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "9.1.3" +weakdeps = ["BFloat16s"] + + [deps.LLVM.extensions] + BFloat16sExt = "BFloat16s" + +[[deps.LLVMExtra_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] +git-tree-sha1 = "05a8bd5a42309a9ec82f700876903abce1017dd3" +uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" +version = "0.0.34+0" + +[[deps.LLVMLoopInfo]] +git-tree-sha1 = "2e5c102cfc41f48ae4740c7eca7743cc7e7b75ea" +uuid = "8b046642-f1f6-4319-8d3c-209ddc03c586" +version = "1.0.0" + +[[deps.LaTeXStrings]] +git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c" +uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +version = "1.4.0" + +[[deps.LayoutPointers]] +deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface"] +git-tree-sha1 = "a9eaadb366f5493a5654e843864c13d8b107548c" +uuid = "10f19ff3-798f-405d-979b-55457f8fc047" +version = "0.1.17" + +[[deps.LazyArrays]] +deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra", "MacroTools", "SparseArrays"] +git-tree-sha1 = "f289bee714e11708df257c57514585863aa02b33" +uuid = "5078a376-72f3-5289-bfd5-ec5146d43c02" +version = "2.3.1" + + [deps.LazyArrays.extensions] + LazyArraysBandedMatricesExt = "BandedMatrices" + LazyArraysBlockArraysExt = "BlockArrays" + LazyArraysBlockBandedMatricesExt = "BlockBandedMatrices" + LazyArraysStaticArraysExt = "StaticArrays" + + [deps.LazyArrays.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" +version = "1.11.0" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.4" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "8.6.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" +version = "1.11.0" + +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.7.2+0" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.11.0+1" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +version = "1.11.0" + +[[deps.Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "61dfdba58e585066d8bce214c5a51eaa0539f269" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.17.0+1" + +[[deps.LightXML]] +deps = ["Libdl", "XML2_jll"] +git-tree-sha1 = "3a994404d3f6709610701c7dabfc03fed87a81f8" +uuid = "9c8b4983-aa76-5018-a973-4c85ecc9e179" +version = "0.9.1" + +[[deps.LineSearch]] +deps = ["ADTypes", "CommonSolve", "ConcreteStructs", "FastClosures", "LinearAlgebra", "MaybeInplace", "SciMLBase", "SciMLJacobianOperators", "StaticArraysCore"] +git-tree-sha1 = "97d502765cc5cf3a722120f50da03c2474efce04" +uuid = "87fe0de2-c867-4266-b59a-2f0a94fc965b" +version = "0.1.4" +weakdeps = ["LineSearches"] + + [deps.LineSearch.extensions] + LineSearchLineSearchesExt = "LineSearches" + +[[deps.LineSearches]] +deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"] +git-tree-sha1 = "e4c3be53733db1051cc15ecf573b1042b3a712a1" +uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" +version = "7.3.0" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +version = "1.11.0" + +[[deps.LinearMaps]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ee79c3208e55786de58f8dcccca098ced79f743f" +uuid = "7a12625a-238d-50fd-b39a-03d52299707e" +version = "3.11.3" +weakdeps = ["ChainRulesCore", "SparseArrays", "Statistics"] + + [deps.LinearMaps.extensions] + LinearMapsChainRulesCoreExt = "ChainRulesCore" + LinearMapsSparseArraysExt = "SparseArrays" + LinearMapsStatisticsExt = "Statistics" + +[[deps.LinearSolve]] +deps = ["ArrayInterface", "ChainRulesCore", "ConcreteStructs", "DocStringExtensions", "EnumX", "FastLapackInterface", "GPUArraysCore", "InteractiveUtils", "KLU", "Krylov", "LazyArrays", "Libdl", "LinearAlgebra", "MKL_jll", "Markdown", "PrecompileTools", "Preferences", "RecursiveFactorization", "Reexport", "SciMLBase", "SciMLOperators", "Setfield", "SparseArrays", "Sparspak", "StaticArraysCore", "UnPack"] +git-tree-sha1 = "9d5872d134bd33dd3e120767004f760770958863" +uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" +version = "2.38.0" + + [deps.LinearSolve.extensions] + LinearSolveBandedMatricesExt = "BandedMatrices" + LinearSolveBlockDiagonalsExt = "BlockDiagonals" + LinearSolveCUDAExt = "CUDA" + LinearSolveCUDSSExt = "CUDSS" + LinearSolveEnzymeExt = "EnzymeCore" + LinearSolveFastAlmostBandedMatricesExt = "FastAlmostBandedMatrices" + LinearSolveHYPREExt = "HYPRE" + LinearSolveIterativeSolversExt = "IterativeSolvers" + LinearSolveKernelAbstractionsExt = "KernelAbstractions" + LinearSolveKrylovKitExt = "KrylovKit" + LinearSolveMetalExt = "Metal" + LinearSolvePardisoExt = "Pardiso" + LinearSolveRecursiveArrayToolsExt = "RecursiveArrayTools" + + [deps.LinearSolve.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" + EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" + FastAlmostBandedMatrices = "9d29842c-ecb8-4973-b1e9-a27b1157504e" + HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771" + IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" + KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" + KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77" + Metal = "dde4c033-4e86-420c-a63e-0dd931031962" + Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2" + RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" + +[[deps.LogExpFunctions]] +deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "13ca9e2586b89836fd20cccf56e57e2b9ae7f38f" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.29" + + [deps.LogExpFunctions.extensions] + LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" + LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" + LogExpFunctionsInverseFunctionsExt = "InverseFunctions" + + [deps.LogExpFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +version = "1.11.0" + +[[deps.LoopVectorization]] +deps = ["ArrayInterface", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "PrecompileTools", "SIMDTypes", "SLEEFPirates", "Static", "StaticArrayInterface", "ThreadingUtilities", "UnPack", "VectorizationBase"] +git-tree-sha1 = "8084c25a250e00ae427a379a5b607e7aed96a2dd" +uuid = "bdcacae8-1622-11e9-2a5c-532679323890" +version = "0.12.171" +weakdeps = ["ChainRulesCore", "ForwardDiff", "SpecialFunctions"] + + [deps.LoopVectorization.extensions] + ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"] + SpecialFunctionsExt = "SpecialFunctions" + +[[deps.MKL_jll]] +deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "oneTBB_jll"] +git-tree-sha1 = "f046ccd0c6db2832a9f639e2c669c6fe867e5f4f" +uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" +version = "2024.2.0+0" + +[[deps.MLStyle]] +git-tree-sha1 = "bc38dff0548128765760c79eb7388a4b37fae2c8" +uuid = "d8e11817-5142-5d16-987a-aa16d5891078" +version = "0.4.17" + +[[deps.MPI]] +deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "PkgVersion", "PrecompileTools", "Requires", "Serialization", "Sockets"] +git-tree-sha1 = "892676019c58f34e38743bc989b0eca5bce5edc5" +uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195" +version = "0.20.22" + + [deps.MPI.extensions] + AMDGPUExt = "AMDGPU" + CUDAExt = "CUDA" + + [deps.MPI.weakdeps] + AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + +[[deps.MPICH_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] +git-tree-sha1 = "7715e65c47ba3941c502bffb7f266a41a7f54423" +uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4" +version = "4.2.3+0" + +[[deps.MPIPreferences]] +deps = ["Libdl", "Preferences"] +git-tree-sha1 = "c105fe467859e7f6e9a852cb15cb4301126fac07" +uuid = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" +version = "0.1.11" + +[[deps.MPItrampoline_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] +git-tree-sha1 = "70e830dab5d0775183c99fc75e4c24c614ed7142" +uuid = "f1f71cc9-e9ae-5b93-9b94-4fe0e1ad3748" +version = "5.5.1+0" + +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "2fa9ee3e63fd3a4f7a9a4f4744a52f4856de82df" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.13" + +[[deps.ManualMemory]] +git-tree-sha1 = "bcaef4fc7a0cfe2cba636d84cda54b5e4e4ca3cd" +uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667" +version = "0.1.8" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +version = "1.11.0" + +[[deps.MaybeInplace]] +deps = ["ArrayInterface", "LinearAlgebra", "MacroTools"] +git-tree-sha1 = "54e2fdc38130c05b42be423e90da3bade29b74bd" +uuid = "bb5d69b7-63fc-4a16-80bd-7e42200c7bdb" +version = "0.1.4" +weakdeps = ["SparseArrays"] + + [deps.MaybeInplace.extensions] + MaybeInplaceSparseArraysExt = "SparseArrays" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.6+0" + +[[deps.MicrosoftMPI_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "bc95bf4149bf535c09602e3acdf950d9b4376227" +uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf" +version = "10.1.4+3" + +[[deps.Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "1.2.0" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" +version = "1.11.0" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2023.12.12" + +[[deps.MuladdMacro]] +git-tree-sha1 = "cac9cc5499c25554cba55cd3c30543cff5ca4fab" +uuid = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221" +version = "0.2.4" + +[[deps.NLSolversBase]] +deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"] +git-tree-sha1 = "a0b464d183da839699f4c79e7606d9d186ec172c" +uuid = "d41bc354-129a-5804-8e4c-c37616107c6c" +version = "7.8.3" + +[[deps.NVTX]] +deps = ["Colors", "JuliaNVTXCallbacks_jll", "Libdl", "NVTX_jll"] +git-tree-sha1 = "6a6f8bfaa91bb2e40ff562ab9f30dc827741daef" +uuid = "5da4648a-3479-48b8-97b9-01cb529c0a1f" +version = "0.3.5" + +[[deps.NVTX_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ce3269ed42816bf18d500c9f63418d4b0d9f5a3b" +uuid = "e98f9f5b-d649-5603-91fd-7774390e6439" +version = "3.1.0+2" + +[[deps.NaNMath]] +deps = ["OpenLibm_jll"] +git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "1.0.2" + +[[deps.NamedDims]] +deps = ["LinearAlgebra", "Pkg", "Statistics"] +git-tree-sha1 = "90178dc801073728b8b2d0d8677d10909feb94d8" +uuid = "356022a1-0364-5f58-8944-0da4b18d706f" +version = "1.2.2" + + [deps.NamedDims.extensions] + AbstractFFTsExt = "AbstractFFTs" + ChainRulesCoreExt = "ChainRulesCore" + CovarianceEstimationExt = "CovarianceEstimation" + TrackerExt = "Tracker" + + [deps.NamedDims.weakdeps] + AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + CovarianceEstimation = "587fd27a-f159-11e8-2dae-1979310e6154" + Requires = "ae029012-a4dd-5104-9daa-d747884805df" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.NodesAndModes]] +deps = ["DelimitedFiles", "LinearAlgebra", "SpecialFunctions", "StaticArrays"] +git-tree-sha1 = "ee6719b4ed5fd08b654017648bf5fa2e2dc8f1ec" +uuid = "7aca2e03-f7e2-4192-9ec8-f4ca66d597fb" +version = "1.1.0" + +[[deps.NonlinearSolve]] +deps = ["ADTypes", "ArrayInterface", "BracketingNonlinearSolve", "CommonSolve", "ConcreteStructs", "DiffEqBase", "DifferentiationInterface", "FastClosures", "FiniteDiff", "ForwardDiff", "LineSearch", "LinearAlgebra", "LinearSolve", "NonlinearSolveBase", "NonlinearSolveFirstOrder", "NonlinearSolveQuasiNewton", "NonlinearSolveSpectralMethods", "PrecompileTools", "Preferences", "Reexport", "SciMLBase", "SimpleNonlinearSolve", "SparseArrays", "SparseMatrixColorings", "StaticArraysCore", "SymbolicIndexingInterface"] +git-tree-sha1 = "d0caebdb5a31e1a11ca9f7f189cdbf341ac89f0e" +uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec" +version = "4.3.0" + + [deps.NonlinearSolve.extensions] + NonlinearSolveFastLevenbergMarquardtExt = "FastLevenbergMarquardt" + NonlinearSolveFixedPointAccelerationExt = "FixedPointAcceleration" + NonlinearSolveLeastSquaresOptimExt = "LeastSquaresOptim" + NonlinearSolveMINPACKExt = "MINPACK" + NonlinearSolveNLSolversExt = "NLSolvers" + NonlinearSolveNLsolveExt = ["NLsolve", "LineSearches"] + NonlinearSolvePETScExt = ["PETSc", "MPI"] + NonlinearSolveSIAMFANLEquationsExt = "SIAMFANLEquations" + NonlinearSolveSpeedMappingExt = "SpeedMapping" + NonlinearSolveSundialsExt = "Sundials" + + [deps.NonlinearSolve.weakdeps] + FastLevenbergMarquardt = "7a0df574-e128-4d35-8cbd-3d84502bf7ce" + FixedPointAcceleration = "817d07cb-a79a-5c30-9a31-890123675176" + LeastSquaresOptim = "0fc2ff8b-aaa3-5acd-a817-1944a5e08891" + LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" + MINPACK = "4854310b-de5a-5eb6-a2a5-c1dee2bd17f9" + MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" + NLSolvers = "337daf1e-9722-11e9-073e-8b9effe078ba" + NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" + PETSc = "ace2c81b-2b5f-4b1e-a30d-d662738edfe0" + SIAMFANLEquations = "084e46ad-d928-497d-ad5e-07fa361a48c4" + SpeedMapping = "f1835b91-879b-4a3f-a438-e4baacf14412" + Sundials = "c3572dad-4567-51f8-b174-8c6c989267f4" + +[[deps.NonlinearSolveBase]] +deps = ["ADTypes", "Adapt", "ArrayInterface", "CommonSolve", "Compat", "ConcreteStructs", "DifferentiationInterface", "EnzymeCore", "FastClosures", "LinearAlgebra", "Markdown", "MaybeInplace", "Preferences", "Printf", "RecursiveArrayTools", "SciMLBase", "SciMLJacobianOperators", "SciMLOperators", "StaticArraysCore", "SymbolicIndexingInterface", "TimerOutputs"] +git-tree-sha1 = "5bca24ce7b0c034dcbdc6ad6d658b02e0eed566e" +uuid = "be0214bd-f91f-a760-ac4e-3421ce2b2da0" +version = "1.4.0" + + [deps.NonlinearSolveBase.extensions] + NonlinearSolveBaseBandedMatricesExt = "BandedMatrices" + NonlinearSolveBaseDiffEqBaseExt = "DiffEqBase" + NonlinearSolveBaseForwardDiffExt = "ForwardDiff" + NonlinearSolveBaseLineSearchExt = "LineSearch" + NonlinearSolveBaseLinearSolveExt = "LinearSolve" + NonlinearSolveBaseSparseArraysExt = "SparseArrays" + NonlinearSolveBaseSparseMatrixColoringsExt = "SparseMatrixColorings" + + [deps.NonlinearSolveBase.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + LineSearch = "87fe0de2-c867-4266-b59a-2f0a94fc965b" + LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35" + +[[deps.NonlinearSolveFirstOrder]] +deps = ["ADTypes", "ArrayInterface", "CommonSolve", "ConcreteStructs", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LineSearch", "LinearAlgebra", "LinearSolve", "MaybeInplace", "NonlinearSolveBase", "PrecompileTools", "Reexport", "SciMLBase", "SciMLJacobianOperators", "Setfield", "StaticArraysCore"] +git-tree-sha1 = "a1ea35ab0bcc99753e26d574ba1e339f19d100fa" +uuid = "5959db7a-ea39-4486-b5fe-2dd0bf03d60d" +version = "1.2.0" + +[[deps.NonlinearSolveQuasiNewton]] +deps = ["ArrayInterface", "CommonSolve", "ConcreteStructs", "DiffEqBase", "LinearAlgebra", "LinearSolve", "MaybeInplace", "NonlinearSolveBase", "PrecompileTools", "Reexport", "SciMLBase", "SciMLOperators", "StaticArraysCore"] +git-tree-sha1 = "8f14b848afcfc0a2941cd3cca1bef04c987465bb" +uuid = "9a2c21bd-3a47-402d-9113-8faf9a0ee114" +version = "1.1.0" +weakdeps = ["ForwardDiff"] + + [deps.NonlinearSolveQuasiNewton.extensions] + NonlinearSolveQuasiNewtonForwardDiffExt = "ForwardDiff" + +[[deps.NonlinearSolveSpectralMethods]] +deps = ["CommonSolve", "ConcreteStructs", "DiffEqBase", "LineSearch", "MaybeInplace", "NonlinearSolveBase", "PrecompileTools", "Reexport", "SciMLBase"] +git-tree-sha1 = "f28b1ab17b5f15eb2b174eaf8813cf17f0b3e6c0" +uuid = "26075421-4e9a-44e1-8bd1-420ed7ad02b2" +version = "1.1.0" +weakdeps = ["ForwardDiff"] + + [deps.NonlinearSolveSpectralMethods.extensions] + NonlinearSolveSpectralMethodsForwardDiffExt = "ForwardDiff" + +[[deps.Octavian]] +deps = ["CPUSummary", "IfElse", "LoopVectorization", "ManualMemory", "PolyesterWeave", "PrecompileTools", "Static", "StaticArrayInterface", "ThreadingUtilities", "VectorizationBase"] +git-tree-sha1 = "92410e147bdcaf9e2f982a7cc9b1341fc5dd1a77" +uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" +version = "0.3.28" + + [deps.Octavian.extensions] + ForwardDiffExt = "ForwardDiff" + HyperDualNumbersExt = "HyperDualNumbers" + + [deps.Octavian.weakdeps] + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + HyperDualNumbers = "50ceba7f-c3ee-5a84-a6e8-3ad40456ec97" + +[[deps.OffsetArrays]] +git-tree-sha1 = "5e1897147d1ff8d98883cda2be2187dcf57d8f0c" +uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +version = "1.15.0" +weakdeps = ["Adapt"] + + [deps.OffsetArrays.extensions] + OffsetArraysAdaptExt = "Adapt" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.27+1" + +[[deps.OpenLibm_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "05823500-19ac-5b8b-9628-191a04bc5112" +version = "0.8.1+2" + +[[deps.OpenMPI_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] +git-tree-sha1 = "e25c1778a98e34219a00455d6e4384e017ea9762" +uuid = "fe0851c0-eecd-5654-98d4-656369965a5c" +version = "4.1.6+0" + +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "7493f61f55a6cce7325f197443aa80d32554ba10" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "3.0.15+1" + +[[deps.OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.5+0" + +[[deps.OrderedCollections]] +git-tree-sha1 = "12f1439c4f986bb868acda6ea33ebc78e19b95ad" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.7.0" + +[[deps.OrdinaryDiffEq]] +deps = ["ADTypes", "Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DocStringExtensions", "EnumX", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FillArrays", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "InteractiveUtils", "LineSearches", "LinearAlgebra", "LinearSolve", "Logging", "MacroTools", "MuladdMacro", "NonlinearSolve", "OrdinaryDiffEqAdamsBashforthMoulton", "OrdinaryDiffEqBDF", "OrdinaryDiffEqCore", "OrdinaryDiffEqDefault", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqExplicitRK", "OrdinaryDiffEqExponentialRK", "OrdinaryDiffEqExtrapolation", "OrdinaryDiffEqFIRK", "OrdinaryDiffEqFeagin", "OrdinaryDiffEqFunctionMap", "OrdinaryDiffEqHighOrderRK", "OrdinaryDiffEqIMEXMultistep", "OrdinaryDiffEqLinear", "OrdinaryDiffEqLowOrderRK", "OrdinaryDiffEqLowStorageRK", "OrdinaryDiffEqNonlinearSolve", "OrdinaryDiffEqNordsieck", "OrdinaryDiffEqPDIRK", "OrdinaryDiffEqPRK", "OrdinaryDiffEqQPRK", "OrdinaryDiffEqRKN", "OrdinaryDiffEqRosenbrock", "OrdinaryDiffEqSDIRK", "OrdinaryDiffEqSSPRK", "OrdinaryDiffEqStabilizedIRK", "OrdinaryDiffEqStabilizedRK", "OrdinaryDiffEqSymplecticRK", "OrdinaryDiffEqTsit5", "OrdinaryDiffEqVerner", "Polyester", "PreallocationTools", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators", "SciMLStructures", "SimpleNonlinearSolve", "SimpleUnPack", "SparseArrays", "SparseDiffTools", "Static", "StaticArrayInterface", "StaticArrays", "TruncatedStacktraces"] +git-tree-sha1 = "36ce9bfc14a4b3dcf1490e80b5f1f4d35bfddf39" +uuid = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" +version = "6.90.1" + +[[deps.OrdinaryDiffEqAdamsBashforthMoulton]] +deps = ["ADTypes", "DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqLowOrderRK", "Polyester", "RecursiveArrayTools", "Reexport", "Static"] +git-tree-sha1 = "8e3c5978d0531a961f70d2f2730d1d16ed3bbd12" +uuid = "89bda076-bce5-4f1c-845f-551c83cdda9a" +version = "1.1.0" + +[[deps.OrdinaryDiffEqBDF]] +deps = ["ArrayInterface", "DiffEqBase", "FastBroadcast", "LinearAlgebra", "MacroTools", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "OrdinaryDiffEqSDIRK", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "StaticArrays", "TruncatedStacktraces"] +git-tree-sha1 = "b4498d40bf35da0b6d22652ff2e9d8820590b3c6" +uuid = "6ad6398a-0878-4a85-9266-38940aa047c8" +version = "1.1.2" + +[[deps.OrdinaryDiffEqCore]] +deps = ["ADTypes", "Accessors", "Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DocStringExtensions", "EnumX", "FastBroadcast", "FastClosures", "FastPower", "FillArrays", "FunctionWrappersWrappers", "InteractiveUtils", "LinearAlgebra", "Logging", "MacroTools", "MuladdMacro", "Polyester", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators", "SciMLStructures", "SimpleUnPack", "Static", "StaticArrayInterface", "StaticArraysCore", "SymbolicIndexingInterface", "TruncatedStacktraces"] +git-tree-sha1 = "c7f395034602c3e4d40ece93dc2c9f066f0ce61f" +uuid = "bbf590c4-e513-4bbe-9b18-05decba2e5d8" +version = "1.13.0" +weakdeps = ["EnzymeCore"] + + [deps.OrdinaryDiffEqCore.extensions] + OrdinaryDiffEqCoreEnzymeCoreExt = "EnzymeCore" + +[[deps.OrdinaryDiffEqDefault]] +deps = ["DiffEqBase", "EnumX", "LinearAlgebra", "LinearSolve", "OrdinaryDiffEqBDF", "OrdinaryDiffEqCore", "OrdinaryDiffEqRosenbrock", "OrdinaryDiffEqTsit5", "OrdinaryDiffEqVerner", "PrecompileTools", "Preferences", "Reexport"] +git-tree-sha1 = "c8223e487d58bef28a3535b33ddf8ffdb44f46fb" +uuid = "50262376-6c5a-4cf5-baba-aaf4f84d72d7" +version = "1.1.0" + +[[deps.OrdinaryDiffEqDifferentiation]] +deps = ["ADTypes", "ArrayInterface", "DiffEqBase", "FastBroadcast", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "LinearAlgebra", "LinearSolve", "OrdinaryDiffEqCore", "SciMLBase", "SparseArrays", "SparseDiffTools", "StaticArrayInterface", "StaticArrays"] +git-tree-sha1 = "8977f283a7d89c5d5c06c933467ed4af0a99f2f7" +uuid = "4302a76b-040a-498a-8c04-15b101fed76b" +version = "1.2.0" + +[[deps.OrdinaryDiffEqExplicitRK]] +deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "TruncatedStacktraces"] +git-tree-sha1 = "4dbce3f9e6974567082ce5176e21aab0224a69e9" +uuid = "9286f039-9fbf-40e8-bf65-aa933bdc4db0" +version = "1.1.0" + +[[deps.OrdinaryDiffEqExponentialRK]] +deps = ["DiffEqBase", "ExponentialUtilities", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqSDIRK", "OrdinaryDiffEqVerner", "RecursiveArrayTools", "Reexport", "SciMLBase"] +git-tree-sha1 = "f63938b8e9e5d3a05815defb3ebdbdcf61ec0a74" +uuid = "e0540318-69ee-4070-8777-9e2de6de23de" +version = "1.1.0" + +[[deps.OrdinaryDiffEqExtrapolation]] +deps = ["DiffEqBase", "FastBroadcast", "FastPower", "LinearSolve", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "Polyester", "RecursiveArrayTools", "Reexport"] +git-tree-sha1 = "048bcccc8f59c20d5b4ad268eef4d7d21c005a94" +uuid = "becaefa8-8ca2-5cf9-886d-c06f3d2bd2c4" +version = "1.2.1" + +[[deps.OrdinaryDiffEqFIRK]] +deps = ["DiffEqBase", "FastBroadcast", "FastPower", "LinearAlgebra", "LinearSolve", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "RecursiveArrayTools", "Reexport", "SciMLOperators"] +git-tree-sha1 = "7a6e3996dc0850aee6cdc10c8afa377242fce702" +uuid = "5960d6e9-dd7a-4743-88e7-cf307b64f125" +version = "1.5.0" + +[[deps.OrdinaryDiffEqFeagin]] +deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "RecursiveArrayTools", "Reexport", "Static"] +git-tree-sha1 = "a7cc74d3433db98e59dc3d58bc28174c6c290adf" +uuid = "101fe9f7-ebb6-4678-b671-3a81e7194747" +version = "1.1.0" + +[[deps.OrdinaryDiffEqFunctionMap]] +deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "SciMLBase", "Static"] +git-tree-sha1 = "925a91583d1ab84f1f0fea121be1abf1179c5926" +uuid = "d3585ca7-f5d3-4ba6-8057-292ed1abd90f" +version = "1.1.1" + +[[deps.OrdinaryDiffEqHighOrderRK]] +deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "Static"] +git-tree-sha1 = "103e017ff186ac39d731904045781c9bacfca2b0" +uuid = "d28bc4f8-55e1-4f49-af69-84c1a99f0f58" +version = "1.1.0" + +[[deps.OrdinaryDiffEqIMEXMultistep]] +deps = ["DiffEqBase", "FastBroadcast", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "Reexport"] +git-tree-sha1 = "9f8f52aad2399d7714b400ff9d203254b0a89c4a" +uuid = "9f002381-b378-40b7-97a6-27a27c83f129" +version = "1.1.0" + +[[deps.OrdinaryDiffEqLinear]] +deps = ["DiffEqBase", "ExponentialUtilities", "LinearAlgebra", "OrdinaryDiffEqCore", "OrdinaryDiffEqTsit5", "OrdinaryDiffEqVerner", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators"] +git-tree-sha1 = "0f81a77ede3da0dc714ea61e81c76b25db4ab87a" +uuid = "521117fe-8c41-49f8-b3b6-30780b3f0fb5" +version = "1.1.0" + +[[deps.OrdinaryDiffEqLowOrderRK]] +deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "SciMLBase", "Static"] +git-tree-sha1 = "d4bb32e09d6b68ce2eb45fb81001eab46f60717a" +uuid = "1344f307-1e59-4825-a18e-ace9aa3fa4c6" +version = "1.2.0" + +[[deps.OrdinaryDiffEqLowStorageRK]] +deps = ["Adapt", "DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "Static", "StaticArrays"] +git-tree-sha1 = "590561f3af623d5485d070b4d7044f8854535f5a" +uuid = "b0944070-b475-4768-8dec-fb6eb410534d" +version = "1.2.1" + +[[deps.OrdinaryDiffEqNonlinearSolve]] +deps = ["ADTypes", "ArrayInterface", "DiffEqBase", "FastBroadcast", "FastClosures", "ForwardDiff", "LinearAlgebra", "LinearSolve", "MuladdMacro", "NonlinearSolve", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "PreallocationTools", "RecursiveArrayTools", "SciMLBase", "SciMLOperators", "SciMLStructures", "SimpleNonlinearSolve", "StaticArrays"] +git-tree-sha1 = "3a3eb0b7ef3f996c468d6f8013eac9525bcfd788" +uuid = "127b3ac7-2247-4354-8eb6-78cf4e7c58e8" +version = "1.3.0" + +[[deps.OrdinaryDiffEqNordsieck]] +deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqTsit5", "Polyester", "RecursiveArrayTools", "Reexport", "Static"] +git-tree-sha1 = "ef44754f10e0dfb9bb55ded382afed44cd94ab57" +uuid = "c9986a66-5c92-4813-8696-a7ec84c806c8" +version = "1.1.0" + +[[deps.OrdinaryDiffEqPDIRK]] +deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "Polyester", "Reexport", "StaticArrays"] +git-tree-sha1 = "a8b7f8107c477e07c6a6c00d1d66cac68b801bbc" +uuid = "5dd0a6cf-3d4b-4314-aa06-06d4e299bc89" +version = "1.1.0" + +[[deps.OrdinaryDiffEqPRK]] +deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "Reexport"] +git-tree-sha1 = "da525d277962a1b76102c79f30cb0c31e13fe5b9" +uuid = "5b33eab2-c0f1-4480-b2c3-94bc1e80bda1" +version = "1.1.0" + +[[deps.OrdinaryDiffEqQPRK]] +deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "Static"] +git-tree-sha1 = "332f9d17d0229218f66a73492162267359ba85e9" +uuid = "04162be5-8125-4266-98ed-640baecc6514" +version = "1.1.0" + +[[deps.OrdinaryDiffEqRKN]] +deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "RecursiveArrayTools", "Reexport"] +git-tree-sha1 = "41c09d9c20877546490f907d8dffdd52690dd65f" +uuid = "af6ede74-add8-4cfd-b1df-9a4dbb109d7a" +version = "1.1.0" + +[[deps.OrdinaryDiffEqRosenbrock]] +deps = ["ADTypes", "DiffEqBase", "FastBroadcast", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "LinearSolve", "MacroTools", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "Polyester", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "Static"] +git-tree-sha1 = "760a51a626d0065455847e4a3f788b07e86e5090" +uuid = "43230ef6-c299-4910-a778-202eb28ce4ce" +version = "1.3.1" + +[[deps.OrdinaryDiffEqSDIRK]] +deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MacroTools", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "RecursiveArrayTools", "Reexport", "SciMLBase", "TruncatedStacktraces"] +git-tree-sha1 = "f6683803a58de600ab7a26d2f49411c9923e9721" +uuid = "2d112036-d095-4a1e-ab9a-08536f3ecdbf" +version = "1.1.0" + +[[deps.OrdinaryDiffEqSSPRK]] +deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "Static", "StaticArrays"] +git-tree-sha1 = "7dbe4ac56f930df5e9abd003cedb54e25cbbea86" +uuid = "669c94d9-1f4b-4b64-b377-1aa079aa2388" +version = "1.2.0" + +[[deps.OrdinaryDiffEqStabilizedIRK]] +deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "RecursiveArrayTools", "Reexport", "StaticArrays"] +git-tree-sha1 = "348fd6def9a88518715425025eadd58517017325" +uuid = "e3e12d00-db14-5390-b879-ac3dd2ef6296" +version = "1.1.0" + +[[deps.OrdinaryDiffEqStabilizedRK]] +deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "RecursiveArrayTools", "Reexport", "StaticArrays"] +git-tree-sha1 = "1b0d894c880e25f7d0b022d7257638cf8ce5b311" +uuid = "358294b1-0aab-51c3-aafe-ad5ab194a2ad" +version = "1.1.0" + +[[deps.OrdinaryDiffEqSymplecticRK]] +deps = ["DiffEqBase", "FastBroadcast", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "RecursiveArrayTools", "Reexport"] +git-tree-sha1 = "4e8b8c8b81df3df17e2eb4603115db3b30a88235" +uuid = "fa646aed-7ef9-47eb-84c4-9443fc8cbfa8" +version = "1.1.0" + +[[deps.OrdinaryDiffEqTsit5]] +deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "Static", "TruncatedStacktraces"] +git-tree-sha1 = "96552f7d4619fabab4038a29ed37dd55e9eb513a" +uuid = "b1df2697-797e-41e3-8120-5422d3b24e4a" +version = "1.1.0" + +[[deps.OrdinaryDiffEqVerner]] +deps = ["DiffEqBase", "FastBroadcast", "LinearAlgebra", "MuladdMacro", "OrdinaryDiffEqCore", "Polyester", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "Static", "TruncatedStacktraces"] +git-tree-sha1 = "81d7841e73e385b9925d5c8e4427f2adcdda55db" +uuid = "79d7bb75-1356-48c1-b8c0-6832512096c2" +version = "1.1.1" + +[[deps.P4est]] +deps = ["CEnum", "MPI", "MPIPreferences", "P4est_jll", "Preferences", "Reexport", "UUIDs"] +git-tree-sha1 = "6a924bc3d05ebb09de7e8294a30c022461a44720" +uuid = "7d669430-f675-4ae7-b43e-fab78ec5a902" +version = "0.4.13" + +[[deps.P4est_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "Pkg", "TOML", "Zlib_jll"] +git-tree-sha1 = "70c2d9a33b8810198314a5722ee3e9520110b28d" +uuid = "6b5a15aa-cf52-5330-8376-5e5d90283449" +version = "2.8.1+2" + +[[deps.PackageExtensionCompat]] +git-tree-sha1 = "fb28e33b8a95c4cee25ce296c817d89cc2e53518" +uuid = "65ce6f38-6b18-4e1d-a461-8949797d7930" +version = "1.0.2" +weakdeps = ["Requires", "TOML"] + +[[deps.Parameters]] +deps = ["OrderedCollections", "UnPack"] +git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe" +uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" +version = "0.12.3" + +[[deps.PathIntersections]] +deps = ["ForwardDiff", "GaussQuadrature", "LinearAlgebra", "SparseArrays", "StaticArrays"] +git-tree-sha1 = "5283bb8bb16e0f90ac5194af390e7d41f507763a" +uuid = "4c1a95c7-462a-4a7e-b284-959c63fbf1dc" +version = "0.2.0" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.11.0" + + [deps.Pkg.extensions] + REPLExt = "REPL" + + [deps.Pkg.weakdeps] + REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.PkgVersion]] +deps = ["Pkg"] +git-tree-sha1 = "f9501cc0430a26bc3d156ae1b5b0c1b47af4d6da" +uuid = "eebad327-c553-4316-9ea0-9fa01ccd7688" +version = "0.3.3" + +[[deps.Polyester]] +deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Static", "StaticArrayInterface", "StrideArraysCore", "ThreadingUtilities"] +git-tree-sha1 = "6d38fea02d983051776a856b7df75b30cf9a3c1f" +uuid = "f517fe37-dbe3-4b94-8317-1923a5111588" +version = "0.7.16" + +[[deps.PolyesterWeave]] +deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"] +git-tree-sha1 = "645bed98cd47f72f67316fd42fc47dee771aefcd" +uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" +version = "0.2.2" + +[[deps.PolynomialBases]] +deps = ["ArgCheck", "AutoHashEquals", "FFTW", "FastGaussQuadrature", "LinearAlgebra", "Requires", "SimpleUnPack", "SpecialFunctions"] +git-tree-sha1 = "b62fd0464edfffce54393cd617135af30fa47006" +uuid = "c74db56a-226d-5e98-8bb0-a6049094aeea" +version = "0.4.22" + +[[deps.PooledArrays]] +deps = ["DataAPI", "Future"] +git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" +uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" +version = "1.4.3" + +[[deps.PreallocationTools]] +deps = ["Adapt", "ArrayInterface", "ForwardDiff"] +git-tree-sha1 = "6c62ce45f268f3f958821a1e5192cf91c75ae89c" +uuid = "d236fae5-4411-538c-8e31-a6e3d9e00b46" +version = "0.4.24" + + [deps.PreallocationTools.extensions] + PreallocationToolsReverseDiffExt = "ReverseDiff" + + [deps.PreallocationTools.weakdeps] + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.1" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.3" + +[[deps.PrettyTables]] +deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"] +git-tree-sha1 = "1101cd475833706e4d0e7b122218257178f48f34" +uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" +version = "2.4.0" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" +version = "1.11.0" + +[[deps.PtrArrays]] +git-tree-sha1 = "77a42d78b6a92df47ab37e177b2deac405e1c88f" +uuid = "43287f4e-b6f4-7ad1-bb20-aadabca52c3d" +version = "1.2.1" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +version = "1.11.0" + +[[deps.Random123]] +deps = ["Random", "RandomNumbers"] +git-tree-sha1 = "4743b43e5a9c4a2ede372de7061eed81795b12e7" +uuid = "74087812-796a-5b5d-8853-05524746bad3" +version = "1.7.0" + +[[deps.RandomNumbers]] +deps = ["Random"] +git-tree-sha1 = "c6ec94d2aaba1ab2ff983052cf6a606ca5985902" +uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143" +version = "1.6.0" + +[[deps.RecipesBase]] +deps = ["PrecompileTools"] +git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff" +uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" +version = "1.3.4" + +[[deps.RecursiveArrayTools]] +deps = ["Adapt", "ArrayInterface", "DocStringExtensions", "GPUArraysCore", "IteratorInterfaceExtensions", "LinearAlgebra", "RecipesBase", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables"] +git-tree-sha1 = "32f824db4e5bab64e25a12b22483a30a6b813d08" +uuid = "731186ca-8d62-57ce-b412-fbd966d074cd" +version = "3.27.4" + + [deps.RecursiveArrayTools.extensions] + RecursiveArrayToolsFastBroadcastExt = "FastBroadcast" + RecursiveArrayToolsForwardDiffExt = "ForwardDiff" + RecursiveArrayToolsMeasurementsExt = "Measurements" + RecursiveArrayToolsMonteCarloMeasurementsExt = "MonteCarloMeasurements" + RecursiveArrayToolsReverseDiffExt = ["ReverseDiff", "Zygote"] + RecursiveArrayToolsSparseArraysExt = ["SparseArrays"] + RecursiveArrayToolsStructArraysExt = "StructArrays" + RecursiveArrayToolsTrackerExt = "Tracker" + RecursiveArrayToolsZygoteExt = "Zygote" + + [deps.RecursiveArrayTools.weakdeps] + FastBroadcast = "7034ab61-46d4-4ed7-9d0f-46aef9175898" + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" + MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[[deps.RecursiveFactorization]] +deps = ["LinearAlgebra", "LoopVectorization", "Polyester", "PrecompileTools", "StrideArraysCore", "TriangularSolve"] +git-tree-sha1 = "6db1a75507051bc18bfa131fbc7c3f169cc4b2f6" +uuid = "f2c3362d-daeb-58d1-803e-2bc74f2840b4" +version = "0.2.23" + +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + +[[deps.RuntimeGeneratedFunctions]] +deps = ["ExprTools", "SHA", "Serialization"] +git-tree-sha1 = "04c968137612c4a5629fa531334bb81ad5680f00" +uuid = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47" +version = "0.5.13" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.SIMDTypes]] +git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c" +uuid = "94e857df-77ce-4151-89e5-788b33177be4" +version = "0.1.0" + +[[deps.SLEEFPirates]] +deps = ["IfElse", "Static", "VectorizationBase"] +git-tree-sha1 = "456f610ca2fbd1c14f5fcf31c6bfadc55e7d66e0" +uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" +version = "0.6.43" + +[[deps.SciMLBase]] +deps = ["ADTypes", "Accessors", "ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "Expronicon", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "PrecompileTools", "Preferences", "Printf", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLOperators", "SciMLStructures", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface"] +git-tree-sha1 = "213408a448e27170e4fca428838b8d11c5bbf1ab" +uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462" +version = "2.68.1" + + [deps.SciMLBase.extensions] + SciMLBaseChainRulesCoreExt = "ChainRulesCore" + SciMLBaseMakieExt = "Makie" + SciMLBasePartialFunctionsExt = "PartialFunctions" + SciMLBasePyCallExt = "PyCall" + SciMLBasePythonCallExt = "PythonCall" + SciMLBaseRCallExt = "RCall" + SciMLBaseZygoteExt = "Zygote" + + [deps.SciMLBase.weakdeps] + ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" + PartialFunctions = "570af359-4316-4cb7-8c74-252c00c2016b" + PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" + PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" + RCall = "6f49c342-dc21-5d91-9882-a32aef131414" + Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[[deps.SciMLJacobianOperators]] +deps = ["ADTypes", "ArrayInterface", "ConcreteStructs", "ConstructionBase", "DifferentiationInterface", "FastClosures", "LinearAlgebra", "SciMLBase", "SciMLOperators"] +git-tree-sha1 = "f66048bb969e67bd7d1bdd03cd0b81219642bbd0" +uuid = "19f34311-ddf3-4b8b-af20-060888a46c0e" +version = "0.1.1" + +[[deps.SciMLOperators]] +deps = ["Accessors", "ArrayInterface", "DocStringExtensions", "LinearAlgebra", "MacroTools"] +git-tree-sha1 = "6149620767866d4b0f0f7028639b6e661b6a1e44" +uuid = "c0aeaf25-5076-4817-a8d5-81caf7dfa961" +version = "0.3.12" +weakdeps = ["SparseArrays", "StaticArraysCore"] + + [deps.SciMLOperators.extensions] + SciMLOperatorsSparseArraysExt = "SparseArrays" + SciMLOperatorsStaticArraysCoreExt = "StaticArraysCore" + +[[deps.SciMLStructures]] +deps = ["ArrayInterface"] +git-tree-sha1 = "0444a37a25fab98adbd90baa806ee492a3af133a" +uuid = "53ae85a6-f571-4167-b2af-e1d143709226" +version = "1.6.1" + +[[deps.Scratch]] +deps = ["Dates"] +git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.2.1" + +[[deps.SentinelArrays]] +deps = ["Dates", "Random"] +git-tree-sha1 = "712fb0231ee6f9120e005ccd56297abbc053e7e0" +uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" +version = "1.4.8" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +version = "1.11.0" + +[[deps.Setfield]] +deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"] +git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac" +uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" +version = "1.1.1" + +[[deps.SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" +version = "1.11.0" + +[[deps.SimpleNonlinearSolve]] +deps = ["ADTypes", "ArrayInterface", "BracketingNonlinearSolve", "CommonSolve", "ConcreteStructs", "DifferentiationInterface", "FastClosures", "FiniteDiff", "ForwardDiff", "LineSearch", "LinearAlgebra", "MaybeInplace", "NonlinearSolveBase", "PrecompileTools", "Reexport", "SciMLBase", "Setfield", "StaticArraysCore"] +git-tree-sha1 = "a3868a6add9f5989d1f4bd21de0333ef89fb9d9f" +uuid = "727e6d20-b764-4bd8-a329-72de5adea6c7" +version = "2.1.0" + + [deps.SimpleNonlinearSolve.extensions] + SimpleNonlinearSolveChainRulesCoreExt = "ChainRulesCore" + SimpleNonlinearSolveDiffEqBaseExt = "DiffEqBase" + SimpleNonlinearSolveReverseDiffExt = "ReverseDiff" + SimpleNonlinearSolveTrackerExt = "Tracker" + + [deps.SimpleNonlinearSolve.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.SimpleTraits]] +deps = ["InteractiveUtils", "MacroTools"] +git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231" +uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" +version = "0.9.4" + +[[deps.SimpleUnPack]] +git-tree-sha1 = "58e6353e72cde29b90a69527e56df1b5c3d8c437" +uuid = "ce78b400-467f-4804-87d8-8f486da07d0a" +version = "1.1.0" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" +version = "1.11.0" + +[[deps.SortingAlgorithms]] +deps = ["DataStructures"] +git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "1.2.1" + +[[deps.SparseArrays]] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +version = "1.11.0" + +[[deps.SparseDiffTools]] +deps = ["ADTypes", "Adapt", "ArrayInterface", "Compat", "DataStructures", "FiniteDiff", "ForwardDiff", "Graphs", "LinearAlgebra", "PackageExtensionCompat", "Random", "Reexport", "SciMLOperators", "Setfield", "SparseArrays", "StaticArrayInterface", "StaticArrays", "UnPack", "VertexSafeGraphs"] +git-tree-sha1 = "b906758c107b049b6b71599b9f928d9b14e5554a" +uuid = "47a9eef4-7e08-11e9-0b38-333d64bd3804" +version = "2.23.0" + + [deps.SparseDiffTools.extensions] + SparseDiffToolsEnzymeExt = "Enzyme" + SparseDiffToolsPolyesterExt = "Polyester" + SparseDiffToolsPolyesterForwardDiffExt = "PolyesterForwardDiff" + SparseDiffToolsSymbolicsExt = "Symbolics" + SparseDiffToolsZygoteExt = "Zygote" + + [deps.SparseDiffTools.weakdeps] + Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" + Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588" + PolyesterForwardDiff = "98d1487c-24ca-40b6-b7ab-df2af84e126b" + Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" + Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[[deps.SparseMatrixColorings]] +deps = ["ADTypes", "DataStructures", "DocStringExtensions", "LinearAlgebra", "Random", "SparseArrays"] +git-tree-sha1 = "76b44c879661552d64f382acf66faa29ab56b3d9" +uuid = "0a514795-09f3-496d-8182-132a7b665d35" +version = "0.4.10" +weakdeps = ["Colors"] + + [deps.SparseMatrixColorings.extensions] + SparseMatrixColoringsColorsExt = "Colors" + +[[deps.Sparspak]] +deps = ["Libdl", "LinearAlgebra", "Logging", "OffsetArrays", "Printf", "SparseArrays", "Test"] +git-tree-sha1 = "342cf4b449c299d8d1ceaf00b7a49f4fbc7940e7" +uuid = "e56a9233-b9d6-4f03-8d0f-1825330902ac" +version = "0.3.9" + +[[deps.SpecialFunctions]] +deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] +git-tree-sha1 = "64cca0c26b4f31ba18f13f6c12af7c85f478cfde" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "2.5.0" +weakdeps = ["ChainRulesCore"] + + [deps.SpecialFunctions.extensions] + SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" + +[[deps.StableRNGs]] +deps = ["Random"] +git-tree-sha1 = "83e6cce8324d49dfaf9ef059227f91ed4441a8e5" +uuid = "860ef19b-820b-49d6-a774-d7a799459cd3" +version = "1.0.2" + +[[deps.StartUpDG]] +deps = ["ConstructionBase", "FillArrays", "HDF5", "Kronecker", "LinearAlgebra", "NodesAndModes", "PathIntersections", "Printf", "RecipesBase", "RecursiveArrayTools", "Reexport", "Setfield", "SparseArrays", "StaticArrays", "Triangulate", "WriteVTK"] +git-tree-sha1 = "498a2fa1132a294a99385f334d596d92f3ca6ca3" +uuid = "472ebc20-7c99-4d4b-9470-8fde4e9faa0f" +version = "1.1.5" + + [deps.StartUpDG.extensions] + StartUpDGSummationByPartsOperatorsExt = "SummationByPartsOperators" + TriangulatePlotsExt = "Plots" + + [deps.StartUpDG.weakdeps] + Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" + SummationByPartsOperators = "9f78cca6-572e-554e-b819-917d2f1cf240" + +[[deps.Static]] +deps = ["CommonWorldInvalidations", "IfElse", "PrecompileTools"] +git-tree-sha1 = "87d51a3ee9a4b0d2fe054bdd3fc2436258db2603" +uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" +version = "1.1.1" + +[[deps.StaticArrayInterface]] +deps = ["ArrayInterface", "Compat", "IfElse", "LinearAlgebra", "PrecompileTools", "Static"] +git-tree-sha1 = "96381d50f1ce85f2663584c8e886a6ca97e60554" +uuid = "0d7ed370-da01-4f52-bd93-41d350b8b718" +version = "1.8.0" +weakdeps = ["OffsetArrays", "StaticArrays"] + + [deps.StaticArrayInterface.extensions] + StaticArrayInterfaceOffsetArraysExt = "OffsetArrays" + StaticArrayInterfaceStaticArraysExt = "StaticArrays" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] +git-tree-sha1 = "777657803913ffc7e8cc20f0fd04b634f871af8f" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.9.8" +weakdeps = ["ChainRulesCore", "Statistics"] + + [deps.StaticArrays.extensions] + StaticArraysChainRulesCoreExt = "ChainRulesCore" + StaticArraysStatisticsExt = "Statistics" + +[[deps.StaticArraysCore]] +git-tree-sha1 = "192954ef1208c7019899fbf8049e717f92959682" +uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +version = "1.4.3" + +[[deps.Statistics]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.11.1" +weakdeps = ["SparseArrays"] + + [deps.Statistics.extensions] + SparseArraysExt = ["SparseArrays"] + +[[deps.StatsAPI]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed" +uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" +version = "1.7.0" + +[[deps.StatsBase]] +deps = ["AliasTables", "DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] +git-tree-sha1 = "29321314c920c26684834965ec2ce0dacc9cf8e5" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.34.4" + +[[deps.StrideArrays]] +deps = ["ArrayInterface", "LinearAlgebra", "LoopVectorization", "Octavian", "Random", "SLEEFPirates", "Static", "StaticArrayInterface", "StaticArraysCore", "Statistics", "StrideArraysCore", "VectorizationBase", "VectorizedRNG", "VectorizedStatistics"] +git-tree-sha1 = "a009ced9a1952b91f3982a6e06df672189c6cbc9" +uuid = "d1fa6d79-ef01-42a6-86c9-f7c551f8593b" +version = "0.1.29" + +[[deps.StrideArraysCore]] +deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface", "ThreadingUtilities"] +git-tree-sha1 = "f35f6ab602df8413a50c4a25ca14de821e8605fb" +uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da" +version = "0.5.7" + +[[deps.StringManipulation]] +deps = ["PrecompileTools"] +git-tree-sha1 = "a6b1675a536c5ad1a60e5a5153e1fee12eb146e3" +uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" +version = "0.4.0" + +[[deps.StructArrays]] +deps = ["ConstructionBase", "DataAPI", "Tables"] +git-tree-sha1 = "f4dc295e983502292c4c3f951dbb4e985e35b3be" +uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" +version = "0.6.18" +weakdeps = ["Adapt", "GPUArraysCore", "SparseArrays", "StaticArrays"] + + [deps.StructArrays.extensions] + StructArraysAdaptExt = "Adapt" + StructArraysGPUArraysCoreExt = "GPUArraysCore" + StructArraysSparseArraysExt = "SparseArrays" + StructArraysStaticArraysExt = "StaticArrays" + +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "7.7.0+0" + +[[deps.SummationByPartsOperators]] +deps = ["ArgCheck", "AutoHashEquals", "FFTW", "InteractiveUtils", "LinearAlgebra", "LoopVectorization", "MuladdMacro", "PolynomialBases", "PrecompileTools", "RecursiveArrayTools", "Reexport", "Requires", "SciMLBase", "SimpleUnPack", "SparseArrays", "StaticArrayInterface", "StaticArrays", "Unrolled"] +git-tree-sha1 = "0405e8d721d57b8b2674d61d263527e97efbb8cc" +uuid = "9f78cca6-572e-554e-b819-917d2f1cf240" +version = "0.5.72" + + [deps.SummationByPartsOperators.extensions] + SummationByPartsOperatorsBandedMatricesExt = "BandedMatrices" + SummationByPartsOperatorsDiffEqCallbacksExt = "DiffEqCallbacks" + SummationByPartsOperatorsForwardDiffExt = "ForwardDiff" + SummationByPartsOperatorsOptimForwardDiffExt = ["Optim", "ForwardDiff"] + SummationByPartsOperatorsStructArraysExt = "StructArrays" + + [deps.SummationByPartsOperators.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def" + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + Optim = "429524aa-4258-5aef-a3af-852621145aeb" + StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" + +[[deps.SymbolicIndexingInterface]] +deps = ["Accessors", "ArrayInterface", "RuntimeGeneratedFunctions", "StaticArraysCore"] +git-tree-sha1 = "8db233b54917e474165d582bef2244fa040e0a56" +uuid = "2efcf032-c050-4f8e-a9bb-153293bab1f5" +version = "0.3.36" + +[[deps.T8code]] +deps = ["CEnum", "Libdl", "MPI", "MPIPreferences", "Preferences", "Reexport", "UUIDs", "t8code_jll"] +git-tree-sha1 = "1b5ef460f156ed68e3affb67f48e2b4bec9915e4" +uuid = "d0cc0030-9a40-4274-8435-baadcfd54fa1" +version = "0.7.4" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.1" + +[[deps.Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "OrderedCollections", "TableTraits"] +git-tree-sha1 = "598cd7c1f68d1e205689b1c2fe65a9f85846f297" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.12.0" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +version = "1.11.0" + +[[deps.ThreadingUtilities]] +deps = ["ManualMemory"] +git-tree-sha1 = "eda08f7e9818eb53661b3deb74e3159460dfbc27" +uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5" +version = "0.5.2" + +[[deps.TimerOutputs]] +deps = ["ExprTools", "Printf"] +git-tree-sha1 = "d7298ebdfa1654583468a487e8e83fae9d72dac3" +uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +version = "0.5.26" + +[[deps.TranscodingStreams]] +git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.11.3" + +[[deps.Triangle_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "fe28e9a4684f6f54e868b9136afb8fd11f1734a7" +uuid = "5639c1d2-226c-5e70-8d55-b3095415a16a" +version = "1.6.2+0" + +[[deps.TriangularSolve]] +deps = ["CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "LoopVectorization", "Polyester", "Static", "VectorizationBase"] +git-tree-sha1 = "be986ad9dac14888ba338c2554dcfec6939e1393" +uuid = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf" +version = "0.2.1" + +[[deps.Triangulate]] +deps = ["DocStringExtensions", "Printf", "Triangle_jll"] +git-tree-sha1 = "e387c61cb8f5f091e61d4e443a5f435d769871c2" +uuid = "f7e6ffb2-c36d-4f8f-a77e-16e897189344" +version = "2.3.4" + + [deps.Triangulate.weakdeps] + CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" + GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" + PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" + +[[deps.TriplotBase]] +git-tree-sha1 = "4d4ed7f294cda19382ff7de4c137d24d16adc89b" +uuid = "981d1d27-644d-49a2-9326-4793e63143c3" +version = "0.1.0" + +[[deps.TriplotRecipes]] +deps = ["RecipesBase", "TriplotBase"] +git-tree-sha1 = "fceb3b0f37ff6ccf3c70b9c5198d2eefec46ada0" +uuid = "808ab39a-a642-4abf-81ff-4cb34ebbffa3" +version = "0.1.2" + +[[deps.Trixi]] +deps = ["Accessors", "Adapt", "CodeTracking", "ConstructionBase", "DataStructures", "DelimitedFiles", "DiffEqBase", "DiffEqCallbacks", "Downloads", "EllipsisNotation", "FillArrays", "ForwardDiff", "HDF5", "IfElse", "KernelAbstractions", "LinearAlgebra", "LinearMaps", "LoopVectorization", "MPI", "MuladdMacro", "Octavian", "OffsetArrays", "P4est", "Polyester", "PrecompileTools", "Preferences", "Printf", "RecipesBase", "Reexport", "Requires", "SciMLBase", "SimpleUnPack", "SparseArrays", "StableRNGs", "StartUpDG", "Static", "StaticArrayInterface", "StaticArrays", "StrideArrays", "StructArrays", "SummationByPartsOperators", "T8code", "TimerOutputs", "Triangulate", "TriplotBase", "TriplotRecipes", "TrixiBase", "UUIDs"] +path = ".." +uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb" +version = "0.9.12-DEV" + + [deps.Trixi.extensions] + TrixiConvexECOSExt = ["Convex", "ECOS"] + TrixiMakieExt = "Makie" + TrixiNLsolveExt = "NLsolve" + + [deps.Trixi.weakdeps] + Convex = "f65535da-76fb-5f13-bab9-19810c17039a" + ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199" + Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" + NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" + +[[deps.TrixiBase]] +deps = ["TimerOutputs"] +git-tree-sha1 = "017b747e5d59a41e903a6b03a083db7102236e1e" +uuid = "9a0f1c46-06d5-4909-a5a3-ce25d3fa3284" +version = "0.1.4" +weakdeps = ["MPI"] + + [deps.TrixiBase.extensions] + TrixiBaseMPIExt = "MPI" + +[[deps.TruncatedStacktraces]] +deps = ["InteractiveUtils", "MacroTools", "Preferences"] +git-tree-sha1 = "ea3e54c2bdde39062abf5a9758a23735558705e1" +uuid = "781d530d-4396-4725-bb49-402e4bee1e77" +version = "1.4.0" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" +version = "1.11.0" + +[[deps.UnPack]] +git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" +uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" +version = "1.0.2" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" +version = "1.11.0" + +[[deps.Unrolled]] +deps = ["MacroTools"] +git-tree-sha1 = "6cc9d682755680e0f0be87c56392b7651efc2c7b" +uuid = "9602ed7d-8fef-5bc8-8597-8f21381861e8" +version = "0.1.5" + +[[deps.UnsafeAtomics]] +git-tree-sha1 = "b13c4edda90890e5b04ba24e20a310fbe6f249ff" +uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" +version = "0.3.0" +weakdeps = ["LLVM"] + + [deps.UnsafeAtomics.extensions] + UnsafeAtomicsLLVM = ["LLVM"] + +[[deps.VTKBase]] +git-tree-sha1 = "c2d0db3ef09f1942d08ea455a9e252594be5f3b6" +uuid = "4004b06d-e244-455f-a6ce-a5f9919cc534" +version = "1.0.1" + +[[deps.VectorizationBase]] +deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static", "StaticArrayInterface"] +git-tree-sha1 = "4ab62a49f1d8d9548a1c8d1a75e5f55cf196f64e" +uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" +version = "0.21.71" + +[[deps.VectorizedRNG]] +deps = ["Distributed", "Random", "SLEEFPirates", "UnPack", "VectorizationBase"] +git-tree-sha1 = "5ca83562ba95272d8709c6c91e31e23c3c4c9825" +uuid = "33b4df10-0173-11e9-2a0c-851a7edac40e" +version = "0.2.25" +weakdeps = ["Requires", "StaticArraysCore"] + + [deps.VectorizedRNG.extensions] + VectorizedRNGStaticArraysExt = ["StaticArraysCore"] + +[[deps.VectorizedStatistics]] +deps = ["LoopVectorization", "PrecompileTools", "Static"] +git-tree-sha1 = "f59703fbab297efe6ad09ef1dc656f8f0a21ad28" +uuid = "3b853605-1c98-4422-8364-4bd93ee0529e" +version = "0.5.10" + +[[deps.VertexSafeGraphs]] +deps = ["Graphs"] +git-tree-sha1 = "8351f8d73d7e880bfc042a8b6922684ebeafb35c" +uuid = "19fa3120-7c27-5ec5-8db8-b0b0aa330d6f" +version = "0.2.0" + +[[deps.WriteVTK]] +deps = ["Base64", "CodecZlib", "FillArrays", "LightXML", "TranscodingStreams", "VTKBase"] +git-tree-sha1 = "1d8042d58334ab7947ce505709df7009da6f3375" +uuid = "64499a7a-5c06-52f2-abe2-ccb03c286192" +version = "1.21.1" + +[[deps.XML2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] +git-tree-sha1 = "a2fccc6559132927d4c5dc183e3e01048c6dcbd6" +uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" +version = "2.13.5+0" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+1" + +[[deps.demumble_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "6498e3581023f8e530f34760d18f75a69e3a4ea8" +uuid = "1e29f10c-031c-5a83-9565-69cddfc27673" +version = "1.3.0+0" + +[[deps.libaec_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "46bf7be2917b59b761247be3f317ddf75e50e997" +uuid = "477f73a3-ac25-53e9-8cc3-50b2fa2566f0" +version = "1.1.2+0" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.11.0+0" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.59.0+0" + +[[deps.oneTBB_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "7d0ea0f4895ef2f5cb83645fa689e52cb55cf493" +uuid = "1317d2d5-d96f-522e-a858-c73665f53c3e" +version = "2021.12.0+0" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+2" + +[[deps.t8code_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "TOML", "Zlib_jll"] +git-tree-sha1 = "cf073e7d4275b8a030140936639f3d6a5eeb3e74" +uuid = "4ee9bed8-4011-53f7-90c2-22363c2f500d" +version = "3.0.1+0" diff --git a/esiwace/Project.toml b/esiwace/Project.toml new file mode 100644 index 00000000000..8bb32271f42 --- /dev/null +++ b/esiwace/Project.toml @@ -0,0 +1,11 @@ +[deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" +OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" +P4est = "7d669430-f675-4ae7-b43e-fab78ec5a902" +T8code = "d0cc0030-9a40-4274-8435-baadcfd54fa1" +TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +Trixi = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb" diff --git a/esiwace/README.md b/esiwace/README.md new file mode 100644 index 00000000000..dfde57a0037 --- /dev/null +++ b/esiwace/README.md @@ -0,0 +1,143 @@ +# ESiWACE3 Trixi.jl service + +## Instructions for terrabyte cluster + +You need to get an account at https://docs.terrabyte.lrz.de/services/identity/get-account/ +and set up two-factor authentication. + +Documentation is available here: https://docs.terrabyte.lrz.de/ + + +### Login +```shell +ssh login.terrabyte.lrz.de +``` +You have storage space at `$HOME`, `$SCRATCH` (not backed up, temporary), and `$PROJECT` +(soon to come). + + +### Set up t8code + +**TODO: once there is $PROJECT, this step can be skipped** + +1. Load modules + ```shell + module purge + module load spack/23.1.0 + module load gcc/12.2.0 + module load openmpi/4.1.5-gcc11 + ``` +2. Change to scratch folder + ```shell + cd $SCRATCH + ``` +3. Clone the repository + ```shell + git clone --branch 'v3.0.1' --depth 1 https://github.com/DLR-AMR/t8code.git + cd t8code + git submodule init + git submodule update + ``` +4. Build using cmake: + ```shell + module add cmake + mkdir build + cd build + cmake \ + -DCMAKE_C_COMPILER=mpicc \ + -DCMAKE_CXX_COMPILER=mpicxx \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX="$SCRATCH/install/t8code" \ + -DT8CODE_BUILD_TESTS=OFF \ + -DT8CODE_BUILD_TUTORIALS=OFF \ + -DT8CODE_BUILD_EXAMPLES=OFF \ + -DT8CODE_BUILD_BENCHMARKS=OFF \ + -DT8CODE_ENABLE_MPI=ON + .. + nice make -j8 + nice make install -j8 + ``` + + +### Set up Julia +Julia is not available on the cluster. We need to install it manually. +1. If there is no `.bashrc` or `.bash_profile` in your `$HOME` directory, create one + ``` + touch $HOME/.bashrc + ``` +2. Use the official Julia installer: + ```shell + curl -fsSL https://install.julialang.org | sh + ``` + Accept the defaults. Once finished you will be told to source your `.bashrc` or re-login. +3. Julia should now be available + ```shell + julia --version + ``` +4. Install the 1.11 branch + ```shell + juliaup add 1.11 + ``` + + +### Set up Trixi.jl +1. Clone the repository + ```shell + git clone https://github.com/benegee/Trixi.jl.git + git switch lc/gpu-develop + ``` +2. Go to the `esiwace` directory. We collect necessary environmental settings in + `profile`. Edit this file as neccessary and source it: + ```shell + . profile + ``` +3. The Julia project is configured by several files: `Project.toml` lists dependencies, + `Manifest.toml` lists exact version numbers for all installed packages, + `LocalPreferences.toml` contains advanced configuration options. + It should only be necessary to adapt `LocalPreference.toml` to reflect the t8code + installation path. +4. Open Julia via the `$JL` command and instantiate the project: + ```shell + $JL --project=. -e 'using Pkg; Pkg.instantiate()' + ``` + This will take some time! Some packages might throw errors. + + +### Check installation +1. Make sure that everything is precompiled by running the following: + ```shell + $JL --project=. -e 'using OrdinaryDiffEq, Trixi' + ``` + If there are still some errors, they might get resolved when running on compute nodes. +2. To test CUDA, first log in to a GPU node: + ```shell + salloc --cluster=hpda2 --partition=hpda2_testgpu --nodes=1 --ntasks-per-node=1 --gres=gpu:1 --time=00:30:00 + ``` + Then start Julia: + ```shell + $JL --project=. -e 'using CUDA; CUDA.versioninfo()' + ``` + This should print + ``` + CUDA runtime 11.8, local installation + ... + ``` + + + +## Launch +1. SLURM jobscripts are in `jobscripts`. Edit as necessary. At least, you have to specify + your mail address. +2. The actual simulation is configured in `run.jl` and based on Trixi.jl files in `elixirs`. +3. Send job to queue: + ```shell + sbatch jobscript/single_node.sh + ``` diff --git a/esiwace/elixirs/elixir_euler_taylor_green_vortex.jl b/esiwace/elixirs/elixir_euler_taylor_green_vortex.jl new file mode 100644 index 00000000000..1d204b43680 --- /dev/null +++ b/esiwace/elixirs/elixir_euler_taylor_green_vortex.jl @@ -0,0 +1,74 @@ +using OrdinaryDiffEq +using Trixi +using CUDA +CUDA.allowscalar(false) + +############################################################################### +# semidiscretization of the compressible Euler equations + +equations = CompressibleEulerEquations3D(1.4) + +function initial_condition_taylor_green_vortex(x, t, + equations::CompressibleEulerEquations3D) + A = 1.0 # magnitude of speed + Ms = 0.1 # maximum Mach number + + rho = 1.0 + v1 = A * sin(x[1]) * cos(x[2]) * cos(x[3]) + v2 = -A * cos(x[1]) * sin(x[2]) * cos(x[3]) + v3 = 0.0 + p = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms + p = p + 1.0/16.0 * A^2 * rho * (cos(2*x[1])*cos(2*x[3]) + + 2*cos(2*x[2]) + 2*cos(2*x[1]) + cos(2*x[2])*cos(2*x[3])) + + return prim2cons(SVector(rho, v1, v2, v3, p), equations) +end + +initial_condition = initial_condition_taylor_green_vortex + +#volume_flux = flux_ranocha +volume_flux = flux_lax_friedrichs +solver = DGSEM(polydeg=5, surface_flux=volume_flux, + volume_integral=VolumeIntegralFluxDifferencing(volume_flux)) + +coordinates_min = (-1.0, -1.0, -1.0) .* pi +coordinates_max = ( 1.0, 1.0, 1.0) .* pi + +initial_refinement_level = 1 +trees_per_dimension = (4, 4, 4) + +mesh = P4estMesh(trees_per_dimension, polydeg=1, + coordinates_min=coordinates_min, coordinates_max=coordinates_max, + periodicity=true, initial_refinement_level=initial_refinement_level) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) + + +############################################################################### +# ODE solvers, callbacks etc. + +tspan = (0.0, 1000.0) +ode = semidiscretize(semi, tspan; adapt_to=CuArray) + +summary_callback = SummaryCallback() + +stepsize_callback = StepsizeCallback(cfl=0.1) + +callbacks = CallbackSet(summary_callback, stepsize_callback) + + +############################################################################### +# run the simulation + +maxiters=200 + +# disable warnings when maxiters is reached +sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false), + dt=1.0, + save_everystep=false, callback=callbacks, + maxiters=maxiters, verbose=false); + +# print the timer summary +summary_callback() + +finalize(mesh) diff --git a/esiwace/jobscripts/single_node.sh b/esiwace/jobscripts/single_node.sh new file mode 100644 index 00000000000..fd88d9d3267 --- /dev/null +++ b/esiwace/jobscripts/single_node.sh @@ -0,0 +1,19 @@ +#!/bin/bash -x +#SBATCH --cluster=hpda2 +#SBATCH --partition=hpda2_compute_gpu +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=4 +#SBATCH --gres=gpu:4 +#SBATCH --get-user-env +#SBATCH --export=NONE +#SBATCH --mail-user=bgeihe@uni-koeln.de +#SBATCH --mail-type=all +#SBATCH --job-name=single_node +#SBATCH --output=stdout.%j +#SBATCH --error=stderr.%j +#SBATCH --time=00:30:00 + +source profile + +mpiexec -n $SLURM_NTASKS $JL --threads=1 --project=. run.jl + diff --git a/esiwace/profile b/esiwace/profile new file mode 100644 index 00000000000..e69e3fa87f1 --- /dev/null +++ b/esiwace/profile @@ -0,0 +1,10 @@ +module purge +module load slurm_setup +module load spack/23.1.0 +module load gcc/12.2.0 +module load openmpi/4.1.5-gcc11 +module load cuda/11.8.0 + +export JULIA_DEPOT_PATH="$SCRATCH/julia-depot/" +export JL="julia +1.11" + diff --git a/esiwace/run.jl b/esiwace/run.jl new file mode 100644 index 00000000000..046405dd6ca --- /dev/null +++ b/esiwace/run.jl @@ -0,0 +1,65 @@ +using Trixi +using MPI +using TimerOutputs +using CUDA + +function main(elixir_path) + + comm = MPI.COMM_WORLD + rank = MPI.Comm_rank(comm) + isroot = rank == 0 + + # pin rank to device? + CUDA.device!(rank % 4) + gpu = CUDA.device() + print("Rank $rank has device $(gpu) with ID $(CUDA.uuid(gpu)), has CUDA: $(MPI.has_cuda())\n") + + # setup + maxiters = 400 + + if isroot + println("Warming up...") + end + + # start simulation with tiny final time to trigger precompilation + duration_precompile = @elapsed trixi_include(elixir_path, + tspan=(0.0, 1e-14)) + + if isroot + println("Finished warm-up in $duration_precompile seconds\n") + println("Starting simulation...") + end + + # start the real simulation + duration_elixir = @elapsed trixi_include(elixir_path, maxiters=maxiters) + + # store metrics (on every rank!) + metrics = Dict{String, Float64}("elapsed time" => duration_elixir) + + # read TimerOutputs timings + timer = Trixi.timer() + metrics["total time"] = 1.0e-9 * TimerOutputs.tottime(timer) + metrics["rhs! time"] = 1.0e-9 * TimerOutputs.time(timer["rhs!"]) + + # compute performance index + nrhscalls = Trixi.ncalls(semi.performance_counter) + walltime = 1.0e-9 * take!(semi.performance_counter) + metrics["PID"] = walltime * Trixi.mpi_nranks() / (Trixi.ndofsglobal(semi) * nrhscalls) + + # gather metrics from all ranks + gathered_metrics = MPI.gather(metrics, comm) + + if isroot + # reduce metrics per rank + open("metrics.out", "w") do io + for (key, _) in gathered_metrics[1] + println(io, key, ": ", mapreduce(x->x[key], min, gathered_metrics)) + end + end + end +end + +# hardcoded elixir +elixir_path = joinpath(@__DIR__(), "elixirs/elixir_euler_taylor_green_vortex.jl") + +main(elixir_path) diff --git a/examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl b/examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl index fc5e4da3ceb..56b5f6f11ff 100644 --- a/examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl +++ b/examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic.jl @@ -1,6 +1,7 @@ using OrdinaryDiffEq using Trixi +using CUDA ############################################################################### # semidiscretization of the compressible Euler equations @@ -37,7 +38,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, # ODE solvers, callbacks etc. tspan = (0.0, 5.0) -ode = semidiscretize(semi, tspan) +ode = semidiscretize(semi, tspan; adapt_to=CuArray) summary_callback = SummaryCallback() diff --git a/examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl b/examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl new file mode 100644 index 00000000000..db9103564e8 --- /dev/null +++ b/examples/p4est_3d_dgsem/elixir_euler_taylor_green_vortex.jl @@ -0,0 +1,79 @@ +using OrdinaryDiffEq +using Trixi +using CUDA +CUDA.allowscalar(false) +############################################################################### +# semidiscretization of the compressible Euler equations + +equations = CompressibleEulerEquations3D(1.4) + +""" + initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D) + +The classical inviscid Taylor-Green vortex. +""" +function initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D) + A = 1.0 # magnitude of speed + Ms = 0.1 # maximum Mach number + + rho = 1.0 + v1 = A * sin(x[1]) * cos(x[2]) * cos(x[3]) + v2 = -A * cos(x[1]) * sin(x[2]) * cos(x[3]) + v3 = 0.0 + p = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms + p = p + 1.0/16.0 * A^2 * rho * (cos(2*x[1])*cos(2*x[3]) + 2*cos(2*x[2]) + 2*cos(2*x[1]) + cos(2*x[2])*cos(2*x[3])) + + return prim2cons(SVector(rho, v1, v2, v3, p), equations) +end + +initial_condition = initial_condition_taylor_green_vortex + +solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs, + volume_integral=VolumeIntegralFluxDifferencing(flux_lax_friedrichs)) + +coordinates_min = (-1.0, -1.0, -1.0) .* pi +coordinates_max = ( 1.0, 1.0, 1.0) .* pi + +# Create P4estMesh with 8 x 8 x 8 elements (note `refinement_level=1`) +trees_per_dimension = (4, 4, 4) +mesh = P4estMesh(trees_per_dimension, polydeg=1, + coordinates_min=coordinates_min, coordinates_max=coordinates_max, + initial_refinement_level=2) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) + + +############################################################################### +# ODE solvers, callbacks etc. + +tspan = (0.0, 1.0)#5.0) +ode = semidiscretize(semi, tspan; adapt_to=CuArray) + +summary_callback = SummaryCallback() + +analysis_interval = 100 +analysis_callback = AnalysisCallback(semi, interval=analysis_interval) + +alive_callback = AliveCallback(analysis_interval=analysis_interval) + +save_solution = SaveSolutionCallback(interval=100, + save_initial_solution=true, + save_final_solution=true, + solution_variables=cons2prim) + +stepsize_callback = StepsizeCallback(cfl=0.9) + +callbacks = CallbackSet(summary_callback, + analysis_callback, + alive_callback, + #save_solution, + stepsize_callback) + + +############################################################################### +# run the simulation + +sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false), + dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback + save_everystep=false, callback=callbacks); +summary_callback() # print the timer summary diff --git a/examples/structured_2d_dgsem/elixir_advection_coupled.jl b/examples/structured_2d_dgsem/elixir_advection_coupled.jl index 50cee1a071e..e842c6185d9 100644 --- a/examples/structured_2d_dgsem/elixir_advection_coupled.jl +++ b/examples/structured_2d_dgsem/elixir_advection_coupled.jl @@ -1,5 +1,6 @@ using OrdinaryDiffEq using Trixi +import Trixi.Indexing ############################################################################### # Coupled semidiscretization of four linear advection systems using converter functions such that @@ -61,13 +62,13 @@ coupling_function12 = (x, u, equations_other, equations_own) -> u coupling_function13 = (x, u, equations_other, equations_own) -> u # Define the coupling boundary conditions and the system it is coupled to. -boundary_conditions_x_neg1 = BoundaryConditionCoupled(2, (:end, :i_forward), Float64, +boundary_conditions_x_neg1 = BoundaryConditionCoupled(2, (Indexing.last, Indexing.i_forward), Float64, coupling_function12) -boundary_conditions_x_pos1 = BoundaryConditionCoupled(2, (:begin, :i_forward), Float64, +boundary_conditions_x_pos1 = BoundaryConditionCoupled(2, (Indexing.first, Indexing.i_forward), Float64, coupling_function12) -boundary_conditions_y_neg1 = BoundaryConditionCoupled(3, (:i_forward, :end), Float64, +boundary_conditions_y_neg1 = BoundaryConditionCoupled(3, (Indexing.i_forward, Indexing.last), Float64, coupling_function13) -boundary_conditions_y_pos1 = BoundaryConditionCoupled(3, (:i_forward, :begin), Float64, +boundary_conditions_y_pos1 = BoundaryConditionCoupled(3, (Indexing.i_forward, Indexing.first), Float64, coupling_function13) # A semidiscretization collects data structures and functions for the spatial discretization @@ -93,13 +94,13 @@ coupling_function21 = (x, u, equations_other, equations_own) -> u coupling_function24 = (x, u, equations_other, equations_own) -> u # Define the coupling boundary conditions and the system it is coupled to. -boundary_conditions_x_neg2 = BoundaryConditionCoupled(1, (:end, :i_forward), Float64, +boundary_conditions_x_neg2 = BoundaryConditionCoupled(1, (Indexing.last, Indexing.i_forward), Float64, coupling_function21) -boundary_conditions_x_pos2 = BoundaryConditionCoupled(1, (:begin, :i_forward), Float64, +boundary_conditions_x_pos2 = BoundaryConditionCoupled(1, (Indexing.first, Indexing.i_forward), Float64, coupling_function21) -boundary_conditions_y_neg2 = BoundaryConditionCoupled(4, (:i_forward, :end), Float64, +boundary_conditions_y_neg2 = BoundaryConditionCoupled(4, (Indexing.i_forward, Indexing.last), Float64, coupling_function24) -boundary_conditions_y_pos2 = BoundaryConditionCoupled(4, (:i_forward, :begin), Float64, +boundary_conditions_y_pos2 = BoundaryConditionCoupled(4, (Indexing.i_forward, Indexing.first), Float64, coupling_function24) # A semidiscretization collects data structures and functions for the spatial discretization @@ -125,13 +126,13 @@ coupling_function34 = (x, u, equations_other, equations_own) -> u coupling_function31 = (x, u, equations_other, equations_own) -> u # Define the coupling boundary conditions and the system it is coupled to. -boundary_conditions_x_neg3 = BoundaryConditionCoupled(4, (:end, :i_forward), Float64, +boundary_conditions_x_neg3 = BoundaryConditionCoupled(4, (Indexing.last, Indexing.i_forward), Float64, coupling_function34) -boundary_conditions_x_pos3 = BoundaryConditionCoupled(4, (:begin, :i_forward), Float64, +boundary_conditions_x_pos3 = BoundaryConditionCoupled(4, (Indexing.first, Indexing.i_forward), Float64, coupling_function34) -boundary_conditions_y_neg3 = BoundaryConditionCoupled(1, (:i_forward, :end), Float64, +boundary_conditions_y_neg3 = BoundaryConditionCoupled(1, (Indexing.i_forward, Indexing.last), Float64, coupling_function31) -boundary_conditions_y_pos3 = BoundaryConditionCoupled(1, (:i_forward, :begin), Float64, +boundary_conditions_y_pos3 = BoundaryConditionCoupled(1, (Indexing.i_forward, Indexing.first), Float64, coupling_function31) # A semidiscretization collects data structures and functions for the spatial discretization @@ -157,13 +158,13 @@ coupling_function43 = (x, u, equations_other, equations_own) -> u coupling_function42 = (x, u, equations_other, equations_own) -> u # Define the coupling boundary conditions and the system it is coupled to. -boundary_conditions_x_neg4 = BoundaryConditionCoupled(3, (:end, :i_forward), Float64, +boundary_conditions_x_neg4 = BoundaryConditionCoupled(3, (Indexing.last, Indexing.i_forward), Float64, coupling_function43) -boundary_conditions_x_pos4 = BoundaryConditionCoupled(3, (:begin, :i_forward), Float64, +boundary_conditions_x_pos4 = BoundaryConditionCoupled(3, (Indexing.first, Indexing.i_forward), Float64, coupling_function43) -boundary_conditions_y_neg4 = BoundaryConditionCoupled(2, (:i_forward, :end), Float64, +boundary_conditions_y_neg4 = BoundaryConditionCoupled(2, (Indexing.i_forward, Indexing.last), Float64, coupling_function42) -boundary_conditions_y_pos4 = BoundaryConditionCoupled(2, (:i_forward, :begin), Float64, +boundary_conditions_y_pos4 = BoundaryConditionCoupled(2, (Indexing.i_forward, Indexing.first), Float64, coupling_function42) # A semidiscretization collects data structures and functions for the spatial discretization diff --git a/examples/structured_2d_dgsem/elixir_advection_meshview.jl b/examples/structured_2d_dgsem/elixir_advection_meshview.jl index 33e36a39f2c..5208da33e6c 100644 --- a/examples/structured_2d_dgsem/elixir_advection_meshview.jl +++ b/examples/structured_2d_dgsem/elixir_advection_meshview.jl @@ -51,21 +51,21 @@ mesh2 = StructuredMeshView(parent_mesh; indices_min = (9, 1), indices_max = (16, coupling_function = (x, u, equations_other, equations_own) -> u # Define the coupled boundary conditions -# The indices (:end, :i_forward) and (:begin, :i_forward) denote the interface indexing. +# The indices (Indexing.last, Indexing.i_forward) and (Indexing.first, Indexing.i_forward) denote the interface indexing. # For a system with coupling in x and y see examples/structured_2d_dgsem/elixir_advection_coupled.jl. boundary_conditions1 = ( # Connect left boundary with right boundary of left mesh - x_neg = BoundaryConditionCoupled(2, (:end, :i_forward), Float64, + x_neg = BoundaryConditionCoupled(2, (Indexing.last, Indexing.i_forward), Float64, coupling_function), - x_pos = BoundaryConditionCoupled(2, (:begin, :i_forward), Float64, + x_pos = BoundaryConditionCoupled(2, (Indexing.first, Indexing.i_forward), Float64, coupling_function), y_neg = boundary_condition_periodic, y_pos = boundary_condition_periodic) boundary_conditions2 = ( # Connect left boundary with right boundary of left mesh - x_neg = BoundaryConditionCoupled(1, (:end, :i_forward), Float64, + x_neg = BoundaryConditionCoupled(1, (Indexing.last, Indexing.i_forward), Float64, coupling_function), - x_pos = BoundaryConditionCoupled(1, (:begin, :i_forward), Float64, + x_pos = BoundaryConditionCoupled(1, (Indexing.first, Indexing.i_forward), Float64, coupling_function), y_neg = boundary_condition_periodic, y_pos = boundary_condition_periodic) diff --git a/examples/structured_2d_dgsem/elixir_mhd_coupled.jl b/examples/structured_2d_dgsem/elixir_mhd_coupled.jl index de274248b45..7baa17a0776 100644 --- a/examples/structured_2d_dgsem/elixir_mhd_coupled.jl +++ b/examples/structured_2d_dgsem/elixir_mhd_coupled.jl @@ -49,9 +49,9 @@ mesh1 = StructuredMesh(cells_per_dimension, periodicity = (false, true)) coupling_function1 = (x, u, equations_other, equations_own) -> u -boundary_conditions1 = (x_neg = BoundaryConditionCoupled(2, (:end, :i_forward), Float64, +boundary_conditions1 = (x_neg = BoundaryConditionCoupled(2, (Indexing.last, Indexing.i_forward), Float64, coupling_function1), - x_pos = BoundaryConditionCoupled(2, (:begin, :i_forward), Float64, + x_pos = BoundaryConditionCoupled(2, (Indexing.first, Indexing.i_forward), Float64, coupling_function1), y_neg = boundary_condition_periodic, y_pos = boundary_condition_periodic) @@ -72,9 +72,9 @@ mesh2 = StructuredMesh(cells_per_dimension, periodicity = (false, true)) coupling_function2 = (x, u, equations_other, equations_own) -> u -boundary_conditions2 = (x_neg = BoundaryConditionCoupled(1, (:end, :i_forward), Float64, +boundary_conditions2 = (x_neg = BoundaryConditionCoupled(1, (Indexing.last, Indexing.i_forward), Float64, coupling_function2), - x_pos = BoundaryConditionCoupled(1, (:begin, :i_forward), Float64, + x_pos = BoundaryConditionCoupled(1, (Indexing.first, Indexing.i_forward), Float64, coupling_function2), y_neg = boundary_condition_periodic, y_pos = boundary_condition_periodic) diff --git a/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl new file mode 100644 index 00000000000..556d8a02893 --- /dev/null +++ b/examples/t8code_3d_dgsem/elixir_advection_cubed_sphere.jl @@ -0,0 +1,61 @@ +using OrdinaryDiffEq +using Trixi + +############################################################################### +# semidiscretization of the linear advection equation + +advection_velocity = (0.2, -0.7, 0.5) +equations = LinearScalarAdvectionEquation3D(advection_velocity) + +# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) + +initial_condition = initial_condition_convergence_test + +boundary_condition = BoundaryConditionDirichlet(initial_condition) +boundary_conditions = Dict(:inside => boundary_condition, + :outside => boundary_condition) + +# Note that the first argument refers to the level of refinement, unlike in for p4est +mesh = Trixi.T8codeMeshCubedSphere(2, 3, 0.5, 0.5; + polydeg = 3, initial_refinement_level = 0) + +# A semidiscretization collects data structures and functions for the spatial discretization +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, + boundary_conditions = boundary_conditions) + +############################################################################### +# ODE solvers, callbacks etc. + +# Create ODE problem with time span from 0.0 to 1.0 +tspan = (0.0, 1.0) +ode = semidiscretize(semi, tspan) + +# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup +# and resets the timers +summary_callback = SummaryCallback() + +# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results +analysis_callback = AnalysisCallback(semi, interval = 100) + +# # The SaveSolutionCallback allows to save the solution to a file in regular intervals +# save_solution = SaveSolutionCallback(interval = 100, +# solution_variables = cons2prim) + +# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step +stepsize_callback = StepsizeCallback(cfl = 1.2) + +# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver +callbacks = CallbackSet(summary_callback, analysis_callback, # save_solution, + stepsize_callback) + +############################################################################### +# run the simulation + +# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks +sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false), + dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback + save_everystep = false, callback = callbacks); + +# Print the timer summary +summary_callback() diff --git a/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl new file mode 100644 index 00000000000..21260288996 --- /dev/null +++ b/examples/t8code_3d_dgsem/elixir_euler_baroclinic_instability.jl @@ -0,0 +1,301 @@ +# An idealized baroclinic instability test case +# For optimal results consider increasing the resolution to 16x16x8 trees per cube face. +# +# Note that this elixir can take several hours to run. +# Using 24 threads of an AMD Ryzen Threadripper 3990X (more threads don't speed it up further) +# and `check-bounds=no`, this elixirs takes about one hour to run. +# With 16x16x8 trees per cube face on the same machine, it takes about 28 hours. +# +# References: +# - Paul A. Ullrich, Thomas Melvin, Christiane Jablonowski, Andrew Staniforth (2013) +# A proposed baroclinic wave test case for deep- and shallow-atmosphere dynamical cores +# https://doi.org/10.1002/qj.2241 + +using OrdinaryDiffEq +using Trixi +using LinearAlgebra + +############################################################################### +# Setup for the baroclinic instability test +gamma = 1.4 +equations = CompressibleEulerEquations3D(gamma) + +# Initial condition for an idealized baroclinic instability test +# https://doi.org/10.1002/qj.2241, Section 3.2 and Appendix A +function initial_condition_baroclinic_instability(x, t, + equations::CompressibleEulerEquations3D) + lon, lat, r = cartesian_to_sphere(x) + radius_earth = 6.371229e6 + # Make sure that the r is not smaller than radius_earth + z = max(r - radius_earth, 0.0) + + # Unperturbed basic state + rho, u, p = basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z) + + # Stream function type perturbation + u_perturbation, v_perturbation = perturbation_stream_function(lon, lat, z) + + u += u_perturbation + v = v_perturbation + + # Convert spherical velocity to Cartesian + v1 = -sin(lon) * u - sin(lat) * cos(lon) * v + v2 = cos(lon) * u - sin(lat) * sin(lon) * v + v3 = cos(lat) * v + + return prim2cons(SVector(rho, v1, v2, v3, p), equations) +end + +# Steady state for RHS correction below +function steady_state_baroclinic_instability(x, t, equations::CompressibleEulerEquations3D) + lon, lat, r = cartesian_to_sphere(x) + radius_earth = 6.371229e6 + # Make sure that the r is not smaller than radius_earth + z = max(r - radius_earth, 0.0) + + # Unperturbed basic state + rho, u, p = basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z) + + # Convert spherical velocity to Cartesian + v1 = -sin(lon) * u + v2 = cos(lon) * u + v3 = 0.0 + + return prim2cons(SVector(rho, v1, v2, v3, p), equations) +end + +function cartesian_to_sphere(x) + r = norm(x) + lambda = atan(x[2], x[1]) + if lambda < 0 + lambda += 2 * pi + end + phi = asin(x[3] / r) + + return lambda, phi, r +end + +# Unperturbed balanced steady-state. +# Returns primitive variables with only the velocity in longitudinal direction (rho, u, p). +# The other velocity components are zero. +function basic_state_baroclinic_instability_longitudinal_velocity(lon, lat, z) + # Parameters from Table 1 in the paper + # Corresponding names in the paper are commented + radius_earth = 6.371229e6 # a + half_width_parameter = 2 # b + gravitational_acceleration = 9.80616 # g + k = 3 # k + surface_pressure = 1e5 # p₀ + gas_constant = 287 # R + surface_equatorial_temperature = 310.0 # T₀ᴱ + surface_polar_temperature = 240.0 # T₀ᴾ + lapse_rate = 0.005 # Γ + angular_velocity = 7.29212e-5 # Ω + + # Distance to the center of the Earth + r = z + radius_earth + + # In the paper: T₀ + temperature0 = 0.5 * (surface_equatorial_temperature + surface_polar_temperature) + # In the paper: A, B, C, H + const_a = 1 / lapse_rate + const_b = (temperature0 - surface_polar_temperature) / + (temperature0 * surface_polar_temperature) + const_c = 0.5 * (k + 2) * (surface_equatorial_temperature - surface_polar_temperature) / + (surface_equatorial_temperature * surface_polar_temperature) + const_h = gas_constant * temperature0 / gravitational_acceleration + + # In the paper: (r - a) / bH + scaled_z = z / (half_width_parameter * const_h) + + # Temporary variables + temp1 = exp(lapse_rate / temperature0 * z) + temp2 = exp(-scaled_z^2) + + # In the paper: ̃τ₁, ̃τ₂ + tau1 = const_a * lapse_rate / temperature0 * temp1 + + const_b * (1 - 2 * scaled_z^2) * temp2 + tau2 = const_c * (1 - 2 * scaled_z^2) * temp2 + + # In the paper: ∫τ₁(r') dr', ∫τ₂(r') dr' + inttau1 = const_a * (temp1 - 1) + const_b * z * temp2 + inttau2 = const_c * z * temp2 + + # Temporary variables + temp3 = r / radius_earth * cos(lat) + temp4 = temp3^k - k / (k + 2) * temp3^(k + 2) + + # In the paper: T + temperature = 1 / ((r / radius_earth)^2 * (tau1 - tau2 * temp4)) + + # In the paper: U, u (zonal wind, first component of spherical velocity) + big_u = gravitational_acceleration / radius_earth * k * temperature * inttau2 * + (temp3^(k - 1) - temp3^(k + 1)) + temp5 = radius_earth * cos(lat) + u = -angular_velocity * temp5 + sqrt(angular_velocity^2 * temp5^2 + temp5 * big_u) + + # Hydrostatic pressure + p = surface_pressure * + exp(-gravitational_acceleration / gas_constant * (inttau1 - inttau2 * temp4)) + + # Density (via ideal gas law) + rho = p / (gas_constant * temperature) + + return rho, u, p +end + +# Perturbation as in Equations 25 and 26 of the paper (analytical derivative) +function perturbation_stream_function(lon, lat, z) + # Parameters from Table 1 in the paper + # Corresponding names in the paper are commented + perturbation_radius = 1 / 6 # d₀ / a + perturbed_wind_amplitude = 1.0 # Vₚ + perturbation_lon = pi / 9 # Longitude of perturbation location + perturbation_lat = 2 * pi / 9 # Latitude of perturbation location + pertz = 15000 # Perturbation height cap + + # Great circle distance (d in the paper) divided by a (radius of the Earth) + # because we never actually need d without dividing by a + great_circle_distance_by_a = acos(sin(perturbation_lat) * sin(lat) + + cos(perturbation_lat) * cos(lat) * + cos(lon - perturbation_lon)) + + # In the first case, the vertical taper function is per definition zero. + # In the second case, the stream function is per definition zero. + if z > pertz || great_circle_distance_by_a > perturbation_radius + return 0.0, 0.0 + end + + # Vertical tapering of stream function + perttaper = 1.0 - 3 * z^2 / pertz^2 + 2 * z^3 / pertz^3 + + # sin/cos(pi * d / (2 * d_0)) in the paper + sin_, cos_ = sincos(0.5 * pi * great_circle_distance_by_a / perturbation_radius) + + # Common factor for both u and v + factor = 16 / (3 * sqrt(3)) * perturbed_wind_amplitude * perttaper * cos_^3 * sin_ + + u_perturbation = -factor * (-sin(perturbation_lat) * cos(lat) + + cos(perturbation_lat) * sin(lat) * cos(lon - perturbation_lon)) / + sin(great_circle_distance_by_a) + + v_perturbation = factor * cos(perturbation_lat) * sin(lon - perturbation_lon) / + sin(great_circle_distance_by_a) + + return u_perturbation, v_perturbation +end + +@inline function source_terms_baroclinic_instability(u, x, t, + equations::CompressibleEulerEquations3D) + radius_earth = 6.371229e6 # a + gravitational_acceleration = 9.80616 # g + angular_velocity = 7.29212e-5 # Ω + + r = norm(x) + # Make sure that r is not smaller than radius_earth + z = max(r - radius_earth, 0.0) + r = z + radius_earth + + du1 = zero(eltype(u)) + + # Gravity term + temp = -gravitational_acceleration * radius_earth^2 / r^3 + du2 = temp * u[1] * x[1] + du3 = temp * u[1] * x[2] + du4 = temp * u[1] * x[3] + du5 = temp * (u[2] * x[1] + u[3] * x[2] + u[4] * x[3]) + + # Coriolis term, -2Ω × ρv = -2 * angular_velocity * (0, 0, 1) × u[2:4] + du2 -= -2 * angular_velocity * u[3] + du3 -= 2 * angular_velocity * u[2] + + return SVector(du1, du2, du3, du4, du5) +end + +############################################################################### +# Start of the actual elixir, semidiscretization of the problem + +initial_condition = initial_condition_baroclinic_instability + +boundary_conditions = Dict(:inside => boundary_condition_slip_wall, + :outside => boundary_condition_slip_wall) + +# This is a good estimate for the speed of sound in this example. +# Other values between 300 and 400 should work as well. +surface_flux = FluxLMARS(340) +volume_flux = flux_kennedy_gruber +solver = DGSEM(polydeg = 5, surface_flux = surface_flux, + volume_integral = VolumeIntegralFluxDifferencing(volume_flux)) + +# For optimal results, use 4 lat lon levels and 8 layers here +# Note that the first argument refers to the level of refinement, unlike in for p4est +lat_lon_levels = 3 +layers = 4 +mesh = Trixi.T8codeMeshCubedSphere(lat_lon_levels, layers, 6.371229e6, 30000.0, + polydeg = 5, initial_refinement_level = 0) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, + source_terms = source_terms_baroclinic_instability, + boundary_conditions = boundary_conditions) + +############################################################################### +# ODE solvers, callbacks etc. + +tspan = (0.0, 10 * 24 * 60 * 60.0) # time in seconds for 10 days + +# Save RHS of the steady state and subtract it in every RHS evaluation. +# This trick preserves the steady state exactly (to machine rounding errors, of course). +# Otherwise, this elixir produces entirely unusable results for a resolution of 8x8x4 cells +# per cube face with a polydeg of 3. +# With this trick, even the polydeg 3 simulation produces usable (although badly resolved) results, +# and most of the grid imprinting in higher polydeg simulation is eliminated. +# +# See https://github.com/trixi-framework/Trixi.jl/issues/980 for more information. +u_steady_state = compute_coefficients(steady_state_baroclinic_instability, tspan[1], semi) +# Use a `let` block for performance (otherwise du_steady_state will be a global variable) +let du_steady_state = similar(u_steady_state) + # Save RHS of the steady state + Trixi.rhs!(du_steady_state, u_steady_state, semi, tspan[1]) + + global function corrected_rhs!(du, u, semi, t) + # Normal RHS evaluation + Trixi.rhs!(du, u, semi, t) + # Correct by subtracting the steady-state RHS + Trixi.@trixi_timeit Trixi.timer() "rhs correction" begin + # Use Trixi.@threaded for threaded performance + Trixi.@threaded for i in eachindex(du) + du[i] -= du_steady_state[i] + end + end + end +end +u0 = compute_coefficients(tspan[1], semi) +ode = ODEProblem(corrected_rhs!, u0, tspan, semi) + +summary_callback = SummaryCallback() + +analysis_interval = 5000 +analysis_callback = AnalysisCallback(semi, interval = analysis_interval) + +alive_callback = AliveCallback(analysis_interval = analysis_interval) + +#save_solution = SaveSolutionCallback(interval = 5000, +# save_initial_solution = true, +# save_final_solution = true, +# solution_variables = cons2prim) + +callbacks = CallbackSet(summary_callback, + analysis_callback, + alive_callback) +# , save_solution) + +############################################################################### +# run the simulation + +# Use a Runge-Kutta method with automatic (error based) time step size control +# Enable threading of the RK method for better performance on multiple threads +sol = solve(ode, RDPK3SpFSAL49(thread = OrdinaryDiffEq.True()); abstol = 1.0e-6, + reltol = 1.0e-6, + ode_default_options()..., callback = callbacks); + +summary_callback() # print the timer summary diff --git a/src/Trixi.jl b/src/Trixi.jl index cb50ca2e7df..95403c44417 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -43,6 +43,7 @@ import SciMLBase: get_du, get_tmp_cache, u_modified!, using DelimitedFiles: readdlm using Downloads: Downloads +import Adapt using CodeTracking: CodeTracking using ConstructionBase: ConstructionBase using DiffEqCallbacks: PeriodicCallback, PeriodicCallbackAffect @@ -51,6 +52,7 @@ using FillArrays: Ones, Zeros using ForwardDiff: ForwardDiff using HDF5: HDF5, h5open, attributes, create_dataset, datatype, dataspace using IfElse: ifelse +using KernelAbstractions using LinearMaps: LinearMap using LoopVectorization: LoopVectorization, @turbo, indices using StaticArrayInterface: static_length # used by LoopVectorization @@ -62,7 +64,7 @@ using P4est using T8code using RecipesBase: RecipesBase using Requires: @require -using Static: Static, One, True, False +using Static: Static, One, StaticBool, True, False @reexport using StaticArrays: SVector using StaticArrays: StaticArrays, MVector, MArray, SMatrix, @SMatrix using StrideArrays: PtrArray, StrideArray, StaticInt @@ -126,6 +128,7 @@ include("auxiliary/auxiliary.jl") include("auxiliary/mpi.jl") include("auxiliary/p4est.jl") include("auxiliary/t8code.jl") +include("auxiliary/vector_of_arrays.jl") include("equations/equations.jl") include("meshes/meshes.jl") include("solvers/solvers.jl") diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 8ab798dd336..d223f415542 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -325,4 +325,51 @@ function download(src_url, file_path) return file_path end + +# Returns u[:, indices...] as an SVector. size(u, 1) should thus be +# known at compile time in the caller and passed via Val() +@inline function get_svector(u, ::Val{N}, indices...) where {N} + # There is a cut-off at `n == 10` inside of the method + # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17 + # in Julia `v1.5`, leading to type instabilities if + # more than ten variables are used. That's why we use + # `Val(...)` below. + # We use `@inline` to make sure that the `getindex` calls are + # really inlined, which might be the default choice of the Julia + # compiler for standard `Array`s but not necessarily for more + # advanced array types such as `PtrArray`s, cf. + # https://github.com/JuliaSIMD/VectorizationBase.jl/issues/55 + SVector(ntuple(@inline(v->u[v, indices...]), N)) +end + +# Returns u[1, :, indices] and u[2, :, indices] as SVectors. size(u, 2) +# should thus be known at compile time in the caller and passed via Val() +@inline function get_svectors(u, ::Val{N}, indices...) where {N} + # There is a cut-off at `n == 10` inside of the method + # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17 + # in Julia `v1.5`, leading to type instabilities if + # more than ten variables are used. That's why we use + # `Val(...)` below. + u_ll = SVector(ntuple(@inline(v->u[1, v, indices...]), N)) + u_rr = SVector(ntuple(@inline(v->u[2, v, indices...]), N)) + return u_ll, u_rr +end + +@inline function add_to_first_axis!(u, u_node::SVector{N}, indices...) where {N} + for v in Base.OneTo(N) + u[v, indices...] += u_node[v] + end + return nothing +end + +# Use this function instead of `add_to_first_axis!` to speed up +# multiply-and-add-to-node-vars operations +# See https://github.com/trixi-framework/Trixi.jl/pull/643 +@inline function multiply_add_to_first_axis!(u, factor, u_node::SVector{N}, + indices...) where {N} + for v in Base.OneTo(N) + u[v, indices...] = u[v, indices...] + factor * u_node[v] + end + return nothing +end end # @muladd diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 90650f6abcf..f17907cb98d 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -314,4 +314,53 @@ end function raw_copy!(c::AbstractContainer, from::Int, destination::Int) raw_copy!(c, c, from, from, destination) end + +# Containers that support heterogenous computing via KernelAbstractions.jl +# should be subtypes of this type. +# +# The first type parameter must be `Array` by default and if +# `Adapt.adapt_structure(to, container)` is called then this must be +# `typeof(to)`. This is used in downstream code, e.g. for calling +# `wrap_array` with an appropriate type after `resize!`ing. +# +# The second type parameter determines if KA.jl is used. +# By default, each container must initialize this to `false`. +# However, when `Adapt.adapt_structure` is called on the container, it must +# be changed to `true`. +abstract type AbstractHeterogeneousContainer{T, B} <: AbstractContainer end +uses_ka(::Any) = false # need ::Any here since not all containers <: AbstractContainer +uses_ka(::AbstractHeterogeneousContainer{T, B}) where {T, B} = B +array_type(::AbstractHeterogeneousContainer{T}) where {T} = T +function backend_or_nothing(c::AbstractContainer) + # Return KA backend if KA is used, else nothing + if uses_ka(c) # compile-time constant + return get_backend(c) + else + return nothing + end +end +# Subtypes must implement a method for these functions +function Adapt.adapt_structure(to, ::AbstractHeterogeneousContainer) + error("required method not implemented") +end +function KernelAbstractions.get_backend(::AbstractHeterogeneousContainer) + erorr("required method not implemented") +end + +# For some KA.jl backends like CUDA.jl, empty arrays do seem to simply be +# null pointers which can cause `unsafe_wrap` to fail when calling +# Adapt.adapt (ArgumentError, see +# https://github.com/JuliaGPU/CUDA.jl/blob/v5.4.2/src/array.jl#L212-L229). +# To circumvent this, on length zero arrays this allocates +# a separate empty array instead of wrapping. +# However, since zero length arrays are not used in calculations, +# it should be okay if the underlying storage vectors and wrapped arrays +# are not the same as long as they are properly wrapped when `resize!`d etc. +function unsafe_wrap_or_alloc(to, vec, size) + if length(vec) == 0 + return allocate(get_backend(vec), eltype(vec), size) + else + return unsafe_wrap(to, pointer(vec), size) + end +end end # @muladd diff --git a/src/auxiliary/precompile.jl b/src/auxiliary/precompile.jl index 5a1de036808..eef2de9181e 100644 --- a/src/auxiliary/precompile.jl +++ b/src/auxiliary/precompile.jl @@ -384,10 +384,10 @@ function _precompile_manual_() # end # end @assert Base.precompile(Tuple{typeof(SummaryCallback)}) - @assert Base.precompile(Tuple{DiscreteCallback{typeof(Trixi.summary_callback), - typeof(Trixi.summary_callback), - typeof(Trixi.initialize_summary_callback), - typeof(SciMLBase.FINALIZE_DEFAULT)}}) + #@assert Base.precompile(Tuple{DiscreteCallback{typeof(Trixi.summary_callback), + # typeof(Trixi.summary_callback), + # typeof(Trixi.initialize_summary_callback), + # typeof(SciMLBase.FINALIZE_DEFAULT)}}) @assert Base.precompile(Tuple{typeof(summary_box), Base.TTY, String, Vector{Pair{String, Any}}}) # TODO: AMRCallback, ControllerThreeLevel, indicators @@ -525,9 +525,9 @@ function _precompile_manual_() typeof(Trixi.initialize_summary_callback), typeof(SciMLBase.FINALIZE_DEFAULT)} @assert Base.precompile(Tuple{typeof(show), Base.TTY, summary_callback_type}) - @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain", - summary_callback_type}) - @assert Base.precompile(Tuple{summary_callback_type, Base.TTY}) + #@assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain", + # summary_callback_type}) + #Base.precompile(Tuple{summary_callback_type, Base.TTY}) # TODO: SteadyStateCallback, AnalysisCallback diff --git a/src/auxiliary/vector_of_arrays.jl b/src/auxiliary/vector_of_arrays.jl new file mode 100644 index 00000000000..777ceae2256 --- /dev/null +++ b/src/auxiliary/vector_of_arrays.jl @@ -0,0 +1,22 @@ +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent + +# Wraps a Vector of Arrays, forwards `getindex` to the underlying Vector. +# Implements `Adapt.adapt_structure` to allow offloading to the GPU which is +# not possible for a plain Vector of Arrays. +struct VecOfArrays{T <: AbstractArray} + arrays::Vector{T} +end +Base.getindex(v::VecOfArrays, i::Int) = Base.getindex(v.arrays, i) +Base.IndexStyle(v::VecOfArrays) = Base.IndexStyle(v.arrays) +Base.size(v::VecOfArrays) = Base.size(v.arrays) +Base.length(v::VecOfArrays) = Base.length(v.arrays) +Base.eltype(v::VecOfArrays{T}) where {T} = T +function Adapt.adapt_structure(to, v::VecOfArrays) + return [Adapt.adapt(to, arr) for arr in v.arrays] |> VecOfArrays +end +end # @muladd diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl index 0a26bbdbebe..e75eb9eff61 100644 --- a/src/callbacks_step/analysis.jl +++ b/src/callbacks_step/analysis.jl @@ -144,7 +144,14 @@ function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t, integrator) where {Condition, Affect! <: AnalysisCallback} semi = integrator.p du_ode = first(get_tmp_cache(integrator)) - initialize!(cb, u_ode, du_ode, t, integrator, semi) + if semi isa SemidiscretizationHyperbolic && uses_ka(semi.cache.elements) + semi_cpu = Adapt.adapt(Array, semi) + du_ode_cpu = Adapt.adapt(Array, du_ode) + u_ode_cpu = Adapt.adapt(Array, u_ode) + initialize!(cb, u_ode_cpu, du_ode_cpu, t, integrator, semi_cpu) + else + initialize!(cb, u_ode, du_ode, t, integrator, semi) + end end # This is the actual initialization method @@ -227,7 +234,14 @@ function (analysis_callback::AnalysisCallback)(integrator) semi = integrator.p du_ode = first(get_tmp_cache(integrator)) u_ode = integrator.u - analysis_callback(u_ode, du_ode, integrator, semi) + if semi isa SemidiscretizationHyperbolic && uses_ka(semi.cache.elements) + semi_cpu = Adapt.adapt(Array, semi) + du_ode_cpu = Adapt.adapt(Array, du_ode) + u_ode_cpu = Adapt.adapt(Array, u_ode) + analysis_callback(u_ode_cpu, du_ode_cpu, integrator, semi_cpu) + else + analysis_callback(u_ode, du_ode, integrator, semi) + end end # This method gets called internally as the main entry point to the AnalysiCallback @@ -604,8 +618,15 @@ function (cb::DiscreteCallback{Condition, Affect!})(sol) where {Condition, @unpack analyzer = analysis_callback cache_analysis = analysis_callback.cache - l2_error, linf_error = calc_error_norms(sol.u[end], sol.t[end], analyzer, semi, - cache_analysis) + if semi isa SemidiscretizationHyperbolic && uses_ka(semi.cache.elements) + semi_cpu = Adapt.adapt(Array, semi) + u_ode_cpu = Adapt.adapt(Array, sol.u[end]) + l2_error, linf_error = calc_error_norms(u_ode_cpu, sol.t[end], analyzer, semi_cpu, + cache_analysis) + else + l2_error, linf_error = calc_error_norms(sol.u[end], sol.t[end], analyzer, semi, + cache_analysis) + end (; l2 = l2_error, linf = linf_error) end diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index 3324e819cee..2c95dae2aa4 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -45,7 +45,113 @@ function max_dt(u, t, mesh::TreeMesh{3}, end function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, - constant_speed::False, equations, dg::DG, cache) + constant_speed, equations, dg::DG, cache) + backend = backend_or_nothing(cache.elements) + _max_dt(backend, u, t, mesh, constant_speed, equations, dg, cache) +end + +@inline function _max_dt(backend::Backend, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, + constant_speed::False, equations, dg::DG, cache) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + num_elements = nelements(dg, cache) + nodes = eachnode(dg) + kernel! = max_scaled_speed_kernel!(backend) + + max_scaled_speeds = allocate(backend, eltype(t), num_elements) + kernel!(max_scaled_speeds, u, constant_speed, equations, nodes, + contravariant_vectors, + inverse_jacobian; ndrange = num_elements) + + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) + + return 2 / (nnodes(dg) * max_scaled_speed) +end + +@kernel function max_scaled_speed_kernel!(max_scaled_speeds, u, + constant_speed::False, equations, nodes, + contravariant_vectors, inverse_jacobian) + element = @index(Global, Linear) + NVARS = Val(nvariables(equations)) + + max_lambda1 = max_lambda2 = max_lambda3 = zero(eltype(max_scaled_speeds)) + for k in nodes, j in nodes, i in nodes + u_node = get_svector(u, NVARS, i, j, k, element) + lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations) + + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, + k, element) + lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3) + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, + k, element) + lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3) + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, + k, element) + lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3) + + inv_jacobian = abs(inverse_jacobian[i, j, k, element]) + + max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed) + max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed) + max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed) + end + + max_scaled_speeds[element] = max_lambda1 + max_lambda2 + max_lambda3 +end + +@inline function _max_dt(backend::Backend, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, + constant_speed::True, equations, dg::DG, cache) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + num_elements = nelements(dg, cache) + nodes = eachnode(dg) + kernel! = max_scaled_speed_kernel!(backend) + + max_lambda1.max_lambda2.max_lambda3 = max_abs_speeds(equations) + max_scaled_speeds = allocate(backend, eltype(t), num_elements) + kernel!(max_scaled_speeds, constant_speed, nodes, contravariant_vectors, + inverse_jacobian, max_lambda1, max_lambda2, max_lambda3; + ndrange = num_elements) + + # to avoid a division by zero if the speed vanishes everywhere, + # e.g. for steady-state linear advection + max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) + + return 2 / (nnodes(dg) * max_scaled_speed) +end + +@kernel function max_scaled_speed_kernel!(max_scaled_speeds, + constant_speed::True, nodes, + contravariant_vectors, inverse_jacobian, + max_lambda1, max_lambda2, max_lambda3) + element = @index(Global, Linear) + for k in nodes, j in nodes, i in nodes + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, + k, element) + lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 + + Ja13 * max_lambda3) + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, + k, element) + lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 + + Ja23 * max_lambda3) + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, + k, element) + lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 + + Ja33 * max_lambda3) + + inv_jacobian = abs(inverse_jacobian[i, j, k, element]) + + max_scaled_speeds[element] = inv_jacobian * + (lambda1_transformed + lambda2_transformed + + lambda3_transformed) + end +end + +@inline function _max_dt(::Nothing, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, + constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) @@ -82,8 +188,9 @@ function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3} return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, - constant_speed::True, equations, dg::DG, cache) +@inline function _max_dt(::Nothing, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, + constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl index fa5b1d8f81a..4c85c543e16 100644 --- a/src/meshes/t8code_mesh.jl +++ b/src/meshes/t8code_mesh.jl @@ -598,7 +598,7 @@ struct AbaqusFile{NDIMS} end """ - T8codeMesh(meshfile::String, NDIMS; kwargs...) + T8codeMesh(filepath::String, NDIMS; kwargs...) Main mesh constructor for the `T8codeMesh` that imports an unstructured, conforming mesh from either a Gmsh mesh file (`.msh`) or Abaqus mesh file (`.inp`) which is determined @@ -783,6 +783,52 @@ function t8_cmesh_new_from_connectivity(connectivity::Ptr{p8est_connectivity}, c return t8_cmesh_new_from_p8est(connectivity, comm, 0) end +""" +T8codeMeshCubedSphere(trees_per_face_dimension, layers, inner_radius, thickness; + polydeg, RealT=Float64, initial_refinement_level=0) + +Construct a cubed spherical shell of given inner radius and thickness as `T8codeMesh` with +`6 * trees_per_face_dimension^2 * layers` trees. The mesh will have two boundaries, +`:inside` and `:outside`. + +# Arguments +- `lat_lon_levels_per_face_dimension::Integer`: number of trees per patch in longitudinal + and latitudinal direction given as level of + refinement. +- `layers::Integer`: the number of trees in the third local dimension of each face, i.e., + the number of layers of the shell. +- `inner_radius::Float64`: Radius of the inner side of the shell. +- `thickness::Float64`: Thickness of the shell. The outer radius will be + `inner_radius + thickness`. +- `polydeg::Integer`: polynomial degree used to store the geometry of the mesh. + The mapping will be approximated by an interpolation polynomial + of the specified degree for each tree. +- `RealT::Type`: the type that should be used for coordinates. +- `initial_refinement_level::Integer`: refine the mesh uniformly to this level before the + simulation starts. +""" +function T8codeMeshCubedSphere(lat_lon_levels_per_face_dimension, layers, inner_radius, + thickness; + polydeg, RealT = Float64, initial_refinement_level = 0) + NDIMS = 3 + cmesh = t8_cmesh_new_cubed_spherical_shell(inner_radius, thickness, + lat_lon_levels_per_face_dimension, + layers, mpi_comm()) + do_face_ghost = mpi_isparallel() + scheme = t8_scheme_new_default_cxx() + forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, do_face_ghost, + mpi_comm()) + + num_trees = t8_cmesh_get_num_trees(cmesh) + boundary_names = fill(Symbol("---"), 2 * NDIMS, num_trees) + for itree in 1:num_trees + boundary_names[5, itree] = :inside + boundary_names[6, itree] = :outside + end + + return T8codeMesh{NDIMS, RealT}(forest, boundary_names; polydeg = polydeg) +end + struct adapt_callback_passthrough adapt_callback::Function user_data::Any diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl index dd5c3c4791d..86a31ef107e 100644 --- a/src/semidiscretization/semidiscretization.jl +++ b/src/semidiscretization/semidiscretization.jl @@ -78,12 +78,18 @@ function calc_error_norms(u_ode, t, analyzer, semi::AbstractSemidiscretization, end """ - semidiscretize(semi::AbstractSemidiscretization, tspan) + semidiscretize(semi::AbstractSemidiscretization, tspan; adapt_to = nothing) Wrap the semidiscretization `semi` as an ODE problem in the time interval `tspan` that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai/latest/). +The optional keyword argument `adapt_to` controls whether `semi` is adapted via +`Adapt.jl`. If it is not nothing, `semi` gets adapted to `adapt_to` before +semidiscretizing it. If it is adapted, KernelAbstractions.jl will be used in +the solver backend. The `adapt_to` keyword is only supported for +`SemidiscretizationHyperbolic` objects that use a `P4estMesh` as their mesh. """ function semidiscretize(semi::AbstractSemidiscretization, tspan; + adapt_to = nothing, reset_threads = true) # Optionally reset Polyester.jl threads. See # https://github.com/trixi-framework/Trixi.jl/issues/1583 @@ -98,7 +104,19 @@ function semidiscretize(semi::AbstractSemidiscretization, tspan; # See https://github.com/trixi-framework/Trixi.jl/issues/328 iip = true # is-inplace, i.e., we modify a vector when calling rhs! specialize = SciMLBase.FullSpecialize # specialize on rhs! and parameters (semi) - return ODEProblem{iip, specialize}(rhs!, u0_ode, tspan, semi) + + if !isnothing(adapt_to) + if !(typeof(semi) <: SemidiscretizationHyperbolic) && !(typeof(semi.mesh) <: P4estMesh) + throw(ArgumentError("adapt_to keyword argument not supported for this semidiscretization")) + end + semi_adapted = Adapt.adapt(adapt_to, semi) + backend = get_backend(semi_adapted.cache.elements) + _u0_ode = allocate(backend, eltype(u0_ode), size(u0_ode)) + KernelAbstractions.copyto!(backend, _u0_ode, u0_ode) + return ODEProblem{iip, specialize}(rhs!, _u0_ode, tspan, semi_adapted) + else + return ODEProblem{iip, specialize}(rhs!, u0_ode, tspan, semi) + end end """ diff --git a/src/semidiscretization/semidiscretization_coupled.jl b/src/semidiscretization/semidiscretization_coupled.jl index 745a8d3f6f8..e4e98b26332 100644 --- a/src/semidiscretization/semidiscretization_coupled.jl +++ b/src/semidiscretization/semidiscretization_coupled.jl @@ -421,13 +421,13 @@ This is currently only implemented for [`StructuredMesh`](@ref). ```julia # Connect the left boundary of mesh 2 to our boundary such that our positive # boundary direction will match the positive y direction of the other boundary -BoundaryConditionCoupled(2, (:begin, :i), Float64, fun) +BoundaryConditionCoupled(2, (Indexing.first, :i), Float64, fun) # Connect the same two boundaries oppositely oriented -BoundaryConditionCoupled(2, (:begin, :i_backwards), Float64, fun) +BoundaryConditionCoupled(2, (Indexing.first, Indexing.i_backwards), Float64, fun) # Using this as y_neg boundary will connect `our_cells[i, 1, j]` to `other_cells[j, end-i, end]` -BoundaryConditionCoupled(2, (:j, :i_backwards, :end), Float64, fun) +BoundaryConditionCoupled(2, (:j, Indexing.i_backwards, Indexing.last), Float64, fun) ``` !!! warning "Experimental code" @@ -452,11 +452,11 @@ mutable struct BoundaryConditionCoupled{NDIMS, NDIMS = length(indices) u_boundary = Array{uEltype, NDIMS * 2 - 1}(undef, ntuple(_ -> 0, NDIMS * 2 - 1)) - if indices[1] in (:begin, :end) + if indices[1] in (Indexing.first, Indexing.last) other_orientation = 1 - elseif indices[2] in (:begin, :end) + elseif indices[2] in (Indexing.first, Indexing.last) other_orientation = 2 - else # indices[3] in (:begin, :end) + else # indices[3] in (Indexing.first, Indexing.last) other_orientation = 3 end diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index 02cff56a1d0..041be37e22e 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -30,19 +30,19 @@ mutable struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, BoundaryConditions, SourceTerms, Solver, - Cache}(mesh::Mesh, equations::Equations, + Cache}(mesh::Mesh, + equations::Equations, initial_condition::InitialCondition, boundary_conditions::BoundaryConditions, source_terms::SourceTerms, solver::Solver, - cache::Cache) where {Mesh, Equations, - InitialCondition, - BoundaryConditions, - SourceTerms, - Solver, - Cache} - performance_counter = PerformanceCounter() - + cache::Cache, + performance_counter::PerformanceCounter) where {Mesh, Equations, + InitialCondition, + BoundaryConditions, + SourceTerms, + Solver, + Cache} new(mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache, performance_counter) end @@ -74,6 +74,8 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions) + performance_counter = PerformanceCounter() + SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), typeof(initial_condition), typeof(_boundary_conditions), typeof(source_terms), @@ -81,7 +83,7 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver initial_condition, _boundary_conditions, source_terms, solver, - cache) + cache, performance_counter) end # Create a new semidiscretization but change some parameters compared to the input. @@ -103,6 +105,30 @@ function remake(semi::SemidiscretizationHyperbolic; uEltype = real(semi.solver), source_terms, boundary_conditions, uEltype) end +function Adapt.adapt_structure(to, semi::SemidiscretizationHyperbolic) + if !(typeof(semi.mesh) <: P4estMesh) + error("Adapt.adapt is only supported for semidiscretizations based on P4estMesh") + end + + mesh = semi.mesh + equations = Adapt.adapt_structure(to, semi.equations) + initial_condition = Adapt.adapt_structure(to, semi.initial_condition) + boundary_conditions = Adapt.adapt_structure(to, semi.boundary_conditions) + source_terms = Adapt.adapt_structure(to, semi.source_terms) + solver = Adapt.adapt_structure(to, semi.solver) + cache = Adapt.adapt_structure(to, semi.cache) + performance_counter = semi.performance_counter + + SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), + typeof(initial_condition), + typeof(boundary_conditions), typeof(source_terms), + typeof(solver), typeof(cache)}(mesh, equations, + initial_condition, + boundary_conditions, + source_terms, solver, + cache, performance_counter) +end + # general fallback function digest_boundary_conditions(boundary_conditions, mesh, solver, cache) boundary_conditions @@ -317,6 +343,7 @@ function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationHyperboli summary_line(io, "source terms", semi.source_terms) summary_line(io, "solver", semi.solver |> typeof |> nameof) summary_line(io, "total #DOFs per field", ndofsglobal(semi)) + summary_line(io, "backend", backend_or_nothing(semi.cache.elements)) summary_footer(io) end end diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index a0fb0d95079..6d54a62098d 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -658,7 +658,12 @@ end # (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) else # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`. - unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), + if mesh isa P4estMesh + ArrayType = array_type(cache.elements) + else + ArrayType = Array + end + unsafe_wrap(ArrayType{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) end @@ -702,7 +707,12 @@ end @assert length(u_ode) == nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache) end - unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), + if mesh isa P4estMesh + ArrayType = array_type(cache.elements) + else + ArrayType = Array + end + unsafe_wrap(ArrayType{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode), (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache))) end @@ -752,6 +762,37 @@ function compute_coefficients!(u, func, t, mesh::AbstractMesh{3}, equations, dg: end end +end # @muladd; put it up here because module definition below needs to be at top level + +# For some mesh types, elements next to a surface may have local coordinate systems +# that are not aligned so the nodes may have to be indexed differently. +# `IndexInfo` is used to describe how the nodes should be indexed. +# For example, in 2d a `Tuple` with two `IndexInfo` objects, one for each dimension, +# would be used. +# `first` or `last` indicates that the corresponding index is constant and is either +# the first or the last one. This effectively encodes the position of the surface +# with respect to the local coordinate system. The other `IndexInfo` object(s) +# encode if the index in the corresponding dimension is running forward or backward. +# +# The Enum is wrapped in a module and exported so that the enum values do not pollute +# the global namespace and can only be accessed via `Indexing.value`. +module Indexing +@enum IndexInfo begin + first + last + i_forward + i_backward + j_forward + j_backward +end +export IndexInfo +end +using .Indexing + +# Adapt.@adapt_structure macro must be outside of the same `begin ... end` block where +# the type it is used on is defined, otherwise this throws an UndefVar +Adapt.@adapt_structure DG + # Discretizations specific to each mesh type of Trixi.jl # If some functionality is shared by multiple combinations of meshes/solvers, # it is defined in the directory of the most basic mesh and solver type. @@ -765,4 +806,3 @@ include("dgsem_structured/dg.jl") include("dgsem_unstructured/dg.jl") include("dgsem_p4est/dg.jl") include("dgsem_t8code/dg.jl") -end # @muladd diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl index 3ea668c0264..ce359942c10 100644 --- a/src/solvers/dgsem/basis_lobatto_legendre.jl +++ b/src/solvers/dgsem/basis_lobatto_legendre.jl @@ -126,6 +126,31 @@ In particular, not the nodes themselves are returned. @inline get_nodes(basis::LobattoLegendreBasis) = basis.nodes +function Adapt.adapt_structure(to, basis::LobattoLegendreBasis) + # Do not adapt SVector fields, i.e. nodes, weights and inverse_weights + (; nodes, weights, inverse_weights) = basis + inverse_vandermonde_legendre = Adapt.adapt_structure(to, + basis.inverse_vandermonde_legendre) + boundary_interpolation = basis.boundary_interpolation + derivative_matrix = Adapt.adapt_structure(to, basis.derivative_matrix) + derivative_split = Adapt.adapt_structure(to, basis.derivative_split) + derivative_split_transpose = Adapt.adapt_structure(to, + basis.derivative_split_transpose) + derivative_dhat = Adapt.adapt_structure(to, basis.derivative_dhat) + return LobattoLegendreBasis{real(basis), nnodes(basis), typeof(basis.nodes), + typeof(inverse_vandermonde_legendre), + typeof(boundary_interpolation), + typeof(derivative_matrix)}(nodes, + weights, + inverse_weights, + inverse_vandermonde_legendre, + boundary_interpolation, + derivative_matrix, + derivative_split, + derivative_split_transpose, + derivative_dhat) +end + """ integrate(f, u, basis::LobattoLegendreBasis) @@ -213,6 +238,16 @@ end @inline polydeg(mortar::LobattoLegendreMortarL2) = nnodes(mortar) - 1 +function Adapt.adapt_structure(to, mortar::LobattoLegendreMortarL2) + forward_upper = Adapt.adapt_structure(to, mortar.forward_upper) + forward_lower = Adapt.adapt_structure(to, mortar.forward_lower) + reverse_upper = Adapt.adapt_structure(to, mortar.reverse_upper) + reverse_lower = Adapt.adapt_structure(to, mortar.reverse_lower) + return LobattoLegendreMortarL2{real(mortar), nnodes(mortar), typeof(forward_upper), + typeof(reverse_upper)}(forward_upper, forward_lower, + reverse_upper, reverse_lower) +end + # TODO: We can create EC mortars along the lines of the following implementation. # abstract type AbstractMortarEC{RealT} <: AbstractMortar{RealT} end diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index 3ef9cb2a421..523e7ec3333 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -6,25 +6,32 @@ #! format: noindent mutable struct P4estElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1, - NDIMSP2, NDIMSP3} <: AbstractContainer + NDIMSP2, NDIMSP3, + ArrayNDIMSP1 <: DenseArray{RealT, NDIMSP1}, + ArrayNDIMSP2 <: DenseArray{RealT, NDIMSP2}, + ArrayNDIMSP3 <: DenseArray{RealT, NDIMSP3}, + VectorRealT <: DenseVector{RealT}, + VectoruEltype <: DenseVector{uEltype}, + ArrayType, Bool} <: + AbstractHeterogeneousContainer{ArrayType, Bool} # Physical coordinates at each node - node_coordinates::Array{RealT, NDIMSP2} # [orientation, node_i, node_j, node_k, element] + node_coordinates::ArrayNDIMSP2 # [orientation, node_i, node_j, node_k, element] # Jacobian matrix of the transformation # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,... - jacobian_matrix::Array{RealT, NDIMSP3} + jacobian_matrix::ArrayNDIMSP3 # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension) - contravariant_vectors::Array{RealT, NDIMSP3} # [dimension, index, node_i, node_j, node_k, element] + contravariant_vectors::ArrayNDIMSP3 # [dimension, index, node_i, node_j, node_k, element] # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix) - inverse_jacobian::Array{RealT, NDIMSP1} # [node_i, node_j, node_k, element] + inverse_jacobian::ArrayNDIMSP1 # [node_i, node_j, node_k, element] # Buffer for calculated surface flux - surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element] + surface_flux_values::ArrayNDIMSP2 # [variable, i, j, direction, element] # internal `resize!`able storage - _node_coordinates::Vector{RealT} - _jacobian_matrix::Vector{RealT} - _contravariant_vectors::Vector{RealT} - _inverse_jacobian::Vector{RealT} - _surface_flux_values::Vector{uEltype} + _node_coordinates::VectorRealT + _jacobian_matrix::VectorRealT + _contravariant_vectors::VectorRealT + _inverse_jacobian::VectorRealT + _surface_flux_values::VectoruEltype end @inline function nelements(elements::P4estElementContainer) @@ -51,28 +58,30 @@ function Base.resize!(elements::P4estElementContainer, capacity) n_dims = ndims(elements) n_nodes = size(elements.node_coordinates, 2) n_variables = size(elements.surface_flux_values, 1) + ArrayType = array_type(elements) resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity) - elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + elements.node_coordinates = unsafe_wrap(ArrayType, pointer(_node_coordinates), (n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity) - elements.jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix), + elements.jacobian_matrix = unsafe_wrap(ArrayType, pointer(_jacobian_matrix), (n_dims, n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_contravariant_vectors, length(_jacobian_matrix)) - elements.contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors), + elements.contravariant_vectors = unsafe_wrap(ArrayType, + pointer(_contravariant_vectors), size(elements.jacobian_matrix)) resize!(_inverse_jacobian, n_nodes^n_dims * capacity) - elements.inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian), + elements.inverse_jacobian = unsafe_wrap(ArrayType, pointer(_inverse_jacobian), (ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_surface_flux_values, n_variables * n_nodes^(n_dims - 1) * (n_dims * 2) * capacity) - elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + elements.surface_flux_values = unsafe_wrap(ArrayType, pointer(_surface_flux_values), (n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., n_dims * 2, capacity)) @@ -116,33 +125,106 @@ function init_elements(mesh::Union{P4estMesh{NDIMS, NDIMS, RealT}, NDIMS * 2, nelements)) elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2, - NDIMS + 3}(node_coordinates, jacobian_matrix, - contravariant_vectors, - inverse_jacobian, surface_flux_values, - _node_coordinates, _jacobian_matrix, - _contravariant_vectors, - _inverse_jacobian, _surface_flux_values) + NDIMS + 3, Array{RealT, NDIMS + 1}, + Array{RealT, NDIMS + 2}, Array{RealT, NDIMS + 3}, + Vector{RealT}, Vector{uEltype}, Array, false}(node_coordinates, + jacobian_matrix, + contravariant_vectors, + inverse_jacobian, + surface_flux_values, + _node_coordinates, + _jacobian_matrix, + _contravariant_vectors, + _inverse_jacobian, + _surface_flux_values) init_elements!(elements, mesh, basis) return elements end -mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: - AbstractContainer - u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - neighbor_ids::Matrix{Int} # [primary/secondary, interface] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface] +# Required methods due to <: AbstractHeterogeneousContainer +function KernelAbstractions.get_backend(elements::P4estElementContainer) + return KernelAbstractions.get_backend(elements.node_coordinates) +end +function Adapt.adapt_structure(to, + elements::P4estElementContainer{NDIMS, RealT, uEltype}) where { + NDIMS, + RealT, + uEltype + } + # Adapt underlying storage + _node_coordinates = Adapt.adapt_structure(to, elements._node_coordinates) + _jacobian_matrix = Adapt.adapt_structure(to, elements._jacobian_matrix) + _contravariant_vectors = Adapt.adapt_structure(to, elements._contravariant_vectors) + _inverse_jacobian = Adapt.adapt_structure(to, elements._inverse_jacobian) + _surface_flux_values = Adapt.adapt_structure(to, elements._surface_flux_values) + + # Wrap arrays again + node_coordinates = unsafe_wrap_or_alloc(to, _node_coordinates, + size(elements.node_coordinates)) + jacobian_matrix = unsafe_wrap_or_alloc(to, _jacobian_matrix, + size(elements.jacobian_matrix)) + contravariant_vectors = unsafe_wrap_or_alloc(to, _contravariant_vectors, + size(jacobian_matrix)) + inverse_jacobian = unsafe_wrap_or_alloc(to, _inverse_jacobian, + size(elements.inverse_jacobian)) + surface_flux_values = unsafe_wrap_or_alloc(to, _surface_flux_values, + size(elements.surface_flux_values)) + + new_type_params = (NDIMS, + RealT, + uEltype, + NDIMS + 1, + NDIMS + 2, + NDIMS + 3, + typeof(inverse_jacobian), # ArrayNDIMSP1 + typeof(node_coordinates), # ArrayNDIMSP2 + typeof(jacobian_matrix), # ArrayNDIMSP3 + typeof(_node_coordinates), # VectorRealT + typeof(_surface_flux_values), # VectoruEltype + to, + true) + return P4estElementContainer{new_type_params...}(node_coordinates, + jacobian_matrix, + contravariant_vectors, + inverse_jacobian, + surface_flux_values, + _node_coordinates, + _jacobian_matrix, + _contravariant_vectors, + _inverse_jacobian, + _surface_flux_values) +end + +mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, + uArray <: DenseArray{uEltype, NDIMSP2}, + IdsMatrix <: DenseMatrix{Int}, + IndicesMatrix <: + DenseMatrix{NTuple{NDIMS, IndexInfo}}, + uVector <: DenseVector{uEltype}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, IndexInfo}}, + ArrayType, Bool} <: + AbstractHeterogeneousContainer{ArrayType, Bool} + u::uArray # [primary/secondary, variable, i, j, interface] + neighbor_ids::IdsMatrix # [primary/secondary, interface] + node_indices::IndicesMatrix # [primary/secondary, interface] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::uVector + _neighbor_ids::IdsVector + _node_indices::IndicesVector end @inline function ninterfaces(interfaces::P4estInterfaceContainer) size(interfaces.neighbor_ids, 2) end @inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estInterfaceContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(interfaces::P4estInterfaceContainer, capacity) @@ -151,17 +233,20 @@ function Base.resize!(interfaces::P4estInterfaceContainer, capacity) n_dims = ndims(interfaces) n_nodes = size(interfaces.u, 3) n_variables = size(interfaces.u, 2) + ArrayType = array_type(interfaces) resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) - interfaces.u = unsafe_wrap(Array, pointer(_u), + interfaces.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) resize!(_neighbor_ids, 2 * capacity) - interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, capacity)) + interfaces.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids), + (2, capacity)) resize!(_node_indices, 2 * capacity) - interfaces.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + interfaces.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), + (2, capacity)) return nothing end @@ -184,13 +269,19 @@ function init_interfaces(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, e _neighbor_ids = Vector{Int}(undef, 2 * n_interfaces) neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, n_interfaces)) - _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces) + _node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, 2 * n_interfaces) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces)) - interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, neighbor_ids, - node_indices, - _u, _neighbor_ids, - _node_indices) + interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2, + typeof(u), typeof(neighbor_ids), + typeof(node_indices), typeof(_u), + typeof(_neighbor_ids), typeof(_node_indices), + Array, false}(u, + neighbor_ids, + node_indices, + _u, + _neighbor_ids, + _node_indices) init_interfaces!(interfaces, mesh) @@ -203,21 +294,57 @@ function init_interfaces!(interfaces, mesh::P4estMesh) return interfaces end -mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1} <: - AbstractContainer - u::Array{uEltype, NDIMSP1} # [variables, i, j, boundary] - neighbor_ids::Vector{Int} # [boundary] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary] +# Required methods due to <: AbstractHeterogeneousContainer +function KernelAbstractions.get_backend(interfaces::P4estInterfaceContainer) + return KernelAbstractions.get_backend(interfaces.u) +end +function Adapt.adapt_structure(to, interfaces::P4estInterfaceContainer) + # Adapt underlying storage + _u = Adapt.adapt_structure(to, interfaces._u) + _neighbor_ids = Adapt.adapt_structure(to, interfaces._neighbor_ids) + _node_indices = Adapt.adapt_structure(to, interfaces._node_indices) + # Wrap arrays again + u = unsafe_wrap_or_alloc(to, _u, size(interfaces.u)) + neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, size(interfaces.neighbor_ids)) + node_indices = unsafe_wrap_or_alloc(to, _node_indices, size(interfaces.node_indices)) + + NDIMS = ndims(interfaces) + new_type_params = (NDIMS, + eltype(interfaces), + NDIMS + 2, + typeof(u), typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), typeof(_node_indices), + to, + true) + return P4estInterfaceContainer{new_type_params...}(u, neighbor_ids, node_indices, + _u, _neighbor_ids, _node_indices) +end + +mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1, + uArray <: DenseArray{uEltype, NDIMSP1}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, IndexInfo}}, + uVector <: DenseVector{uEltype}, ArrayType, + Bool} <: + AbstractHeterogeneousContainer{ArrayType, Bool} + u::uArray # [variables, i, j, boundary] + neighbor_ids::IdsVector # [boundary] + node_indices::IndicesVector # [boundary] name::Vector{Symbol} # [boundary] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector end @inline function nboundaries(boundaries::P4estBoundaryContainer) length(boundaries.neighbor_ids) end @inline Base.ndims(::P4estBoundaryContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estBoundaryContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(boundaries::P4estBoundaryContainer, capacity) @@ -226,9 +353,10 @@ function Base.resize!(boundaries::P4estBoundaryContainer, capacity) n_dims = ndims(boundaries) n_nodes = size(boundaries.u, 2) n_variables = size(boundaries.u, 1) + ArrayType = array_type(boundaries) resize!(_u, n_variables * n_nodes^(n_dims - 1) * capacity) - boundaries.u = unsafe_wrap(Array, pointer(_u), + boundaries.u = unsafe_wrap(ArrayType, pointer(_u), (n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) @@ -257,12 +385,14 @@ function init_boundaries(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, e n_boundaries)) neighbor_ids = Vector{Int}(undef, n_boundaries) - node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries) + node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, n_boundaries) names = Vector{Symbol}(undef, n_boundaries) - boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1}(u, neighbor_ids, - node_indices, names, - _u) + boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u), Array, false}(u, neighbor_ids, + node_indices, names, + _u) if n_boundaries > 0 init_boundaries!(boundaries, mesh) @@ -309,6 +439,24 @@ function init_boundaries_iter_face_inner(info_pw, boundaries, boundary_id, mesh) return nothing end +# Required methods due to <: AbstractHeterogeneousContainer +function KernelAbstractions.get_backend(boundaries::P4estBoundaryContainer) + return KernelAbstractions.get_backend(boundaries.u) +end +function Adapt.adapt_structure(to, boundaries::P4estBoundaryContainer) + _u = Adapt.adapt_structure(to, boundaries._u) + u = unsafe_wrap_or_alloc(to, _u, size(boundaries.u)) + neighbor_ids = Adapt.adapt_structure(to, boundaries.neighbor_ids) + node_indices = Adapt.adapt_structure(to, boundaries.node_indices) + name = boundaries.name + + NDIMS = ndims(boundaries) + return P4estBoundaryContainer{NDIMS, eltype(boundaries), NDIMS + 1, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u), to, true}(u, neighbor_ids, node_indices, + name, _u) +end + # Container data structure (structure-of-arrays style) for DG L2 mortars # # The positions used in `neighbor_ids` are 1:3 (in 2D) or 1:5 (in 3D), where 1:2 (in 2D) @@ -334,20 +482,33 @@ end # │ └─────────────┴─────────────┘ └───────────────────────────┘ # │ # ⋅────> ξ -mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3} <: - AbstractContainer - u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - neighbor_ids::Matrix{Int} # [position, mortar] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] +mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3, + uArray <: DenseArray{uEltype, NDIMSP3}, + IdsMatrix <: DenseMatrix{Int}, + IndicesMatrix <: + DenseMatrix{NTuple{NDIMS, IndexInfo}}, + uVector <: DenseVector{uEltype}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, IndexInfo}}, + ArrayType, Bool} <: + AbstractHeterogeneousContainer{ArrayType, Bool} + u::uArray # [small/large side, variable, position, i, j, mortar] + neighbor_ids::IdsMatrix # [position, mortar] + node_indices::IndicesMatrix # [small/large, mortar] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::uVector + _neighbor_ids::IdsVector + _node_indices::IndicesVector end @inline nmortars(mortars::P4estMortarContainer) = size(mortars.neighbor_ids, 2) @inline Base.ndims(::P4estMortarContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estMortarContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(mortars::P4estMortarContainer, capacity) @@ -356,18 +517,19 @@ function Base.resize!(mortars::P4estMortarContainer, capacity) n_dims = ndims(mortars) n_nodes = size(mortars.u, 4) n_variables = size(mortars.u, 2) + ArrayType = array_type(mortars) resize!(_u, 2 * n_variables * 2^(n_dims - 1) * n_nodes^(n_dims - 1) * capacity) - mortars.u = unsafe_wrap(Array, pointer(_u), + mortars.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, 2^(n_dims - 1), ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) resize!(_neighbor_ids, (2^(n_dims - 1) + 1) * capacity) - mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + mortars.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids), (2^(n_dims - 1) + 1, capacity)) resize!(_node_indices, 2 * capacity) - mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + mortars.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), (2, capacity)) return nothing end @@ -391,15 +553,18 @@ function init_mortars(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, elem neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2^(NDIMS - 1) + 1, n_mortars)) - _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars) + _node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, 2 * n_mortars) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars)) - mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3}(u, - neighbor_ids, - node_indices, - _u, - _neighbor_ids, - _node_indices) + mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), + typeof(_node_indices), Array, false}(u, + neighbor_ids, + node_indices, + _u, + _neighbor_ids, + _node_indices) if n_mortars > 0 init_mortars!(mortars, mesh) @@ -414,6 +579,35 @@ function init_mortars!(mortars, mesh::P4estMesh) return mortars end +# Required methods due to <: AbstractHeterogeneousContainer +function KernelAbstractions.get_backend(mortars::P4estMortarContainer) + return KernelAbstractions.get_backend(mortars.u) +end +function Adapt.adapt_structure(to, mortars::P4estMortarContainer) + # Adapt underlying storage + _u = Adapt.adapt_structure(to, mortars._u) + _neighbor_ids = Adapt.adapt_structure(to, mortars._neighbor_ids) + _node_indices = Adapt.adapt_structure(to, mortars._node_indices) + + # Wrap arrays again + u = unsafe_wrap_or_alloc(to, _u, size(mortars.u)) + neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, size(mortars.neighbor_ids)) + node_indices = unsafe_wrap_or_alloc(to, _node_indices, size(mortars.node_indices)) + + + NDIMS = ndims(mortars) + new_type_params = (NDIMS, + eltype(mortars), + NDIMS + 1, + NDIMS + 3, + typeof(u), typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), typeof(_node_indices), + to, + true) + return P4estMortarContainer{new_type_params...}(u, neighbor_ids, node_indices, + _u, _neighbor_ids, _node_indices) +end + function reinitialize_containers!(mesh::P4estMesh, equations, dg::DGSEM, cache) # Re-initialize elements container @unpack elements = cache @@ -706,17 +900,17 @@ end # Return direction of the face, which is indexed by node_indices @inline function indices2direction(indices) - if indices[1] === :begin + if indices[1] === Indexing.first return 1 - elseif indices[1] === :end + elseif indices[1] === Indexing.last return 2 - elseif indices[2] === :begin + elseif indices[2] === Indexing.first return 3 - elseif indices[2] === :end + elseif indices[2] === Indexing.last return 4 - elseif indices[3] === :begin + elseif indices[3] === Indexing.first return 5 - else # if indices[3] === :end + else # if indices[3] === Indexing.last return 6 end end diff --git a/src/solvers/dgsem_p4est/containers_2d.jl b/src/solvers/dgsem_p4est/containers_2d.jl index 6af6fd6d90e..f306a42d853 100644 --- a/src/solvers/dgsem_p4est/containers_2d.jl +++ b/src/solvers/dgsem_p4est/containers_2d.jl @@ -95,24 +95,24 @@ end # relative to the interface. if side == 1 || orientation == 0 # Forward indexing - i = :i_forward + i = Indexing.i_forward else # Backward indexing - i = :i_backward + i = Indexing.i_backward end if faces[side] == 0 # Index face in negative x-direction - interfaces.node_indices[side, interface_id] = (:begin, i) + interfaces.node_indices[side, interface_id] = (Indexing.first, i) elseif faces[side] == 1 # Index face in positive x-direction - interfaces.node_indices[side, interface_id] = (:end, i) + interfaces.node_indices[side, interface_id] = (Indexing.last, i) elseif faces[side] == 2 # Index face in negative y-direction - interfaces.node_indices[side, interface_id] = (i, :begin) + interfaces.node_indices[side, interface_id] = (i, Indexing.first) else # faces[side] == 3 # Index face in positive y-direction - interfaces.node_indices[side, interface_id] = (i, :end) + interfaces.node_indices[side, interface_id] = (i, Indexing.last) end end @@ -124,16 +124,16 @@ end face, boundary_id) if face == 0 # Index face in negative x-direction - boundaries.node_indices[boundary_id] = (:begin, :i_forward) + boundaries.node_indices[boundary_id] = (Indexing.first, Indexing.i_forward) elseif face == 1 # Index face in positive x-direction - boundaries.node_indices[boundary_id] = (:end, :i_forward) + boundaries.node_indices[boundary_id] = (Indexing.last, Indexing.i_forward) elseif face == 2 # Index face in negative y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :begin) + boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.first) else # face == 3 # Index face in positive y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :end) + boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.last) end return boundaries @@ -148,24 +148,24 @@ end # relative to the mortar. if side == 1 || orientation == 0 # Forward indexing for small side or orientation == 0 - i = :i_forward + i = Indexing.i_forward else # Backward indexing for large side with reversed orientation - i = :i_backward + i = Indexing.i_backward end if faces[side] == 0 # Index face in negative x-direction - mortars.node_indices[side, mortar_id] = (:begin, i) + mortars.node_indices[side, mortar_id] = (Indexing.first, i) elseif faces[side] == 1 # Index face in positive x-direction - mortars.node_indices[side, mortar_id] = (:end, i) + mortars.node_indices[side, mortar_id] = (Indexing.last, i) elseif faces[side] == 2 # Index face in negative y-direction - mortars.node_indices[side, mortar_id] = (i, :begin) + mortars.node_indices[side, mortar_id] = (i, Indexing.first) else # faces[side] == 3 # Index face in positive y-direction - mortars.node_indices[side, mortar_id] = (i, :end) + mortars.node_indices[side, mortar_id] = (i, Indexing.last) end end diff --git a/src/solvers/dgsem_p4est/containers_3d.jl b/src/solvers/dgsem_p4est/containers_3d.jl index 7e383924ba7..88bbc693a2a 100644 --- a/src/solvers/dgsem_p4est/containers_3d.jl +++ b/src/solvers/dgsem_p4est/containers_3d.jl @@ -81,11 +81,11 @@ end faces, orientation, interface_id) # Iterate over primary and secondary element for side in 1:2 - # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)). + # Align interface at the primary element (primary element has surface indices (Indexing.i_forward, Indexing.j_forward)). # The secondary element needs to be indexed differently. if side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward + surface_index1 = Indexing.i_forward + surface_index2 = Indexing.j_forward else surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], @@ -94,28 +94,28 @@ end if faces[side] == 0 # Index face in negative x-direction - interfaces.node_indices[side, interface_id] = (:begin, surface_index1, + interfaces.node_indices[side, interface_id] = (Indexing.first, surface_index1, surface_index2) elseif faces[side] == 1 # Index face in positive x-direction - interfaces.node_indices[side, interface_id] = (:end, surface_index1, + interfaces.node_indices[side, interface_id] = (Indexing.last, surface_index1, surface_index2) elseif faces[side] == 2 # Index face in negative y-direction - interfaces.node_indices[side, interface_id] = (surface_index1, :begin, + interfaces.node_indices[side, interface_id] = (surface_index1, Indexing.first, surface_index2) elseif faces[side] == 3 # Index face in positive y-direction - interfaces.node_indices[side, interface_id] = (surface_index1, :end, + interfaces.node_indices[side, interface_id] = (surface_index1, Indexing.last, surface_index2) elseif faces[side] == 4 # Index face in negative z-direction interfaces.node_indices[side, interface_id] = (surface_index1, - surface_index2, :begin) + surface_index2, Indexing.first) else # faces[side] == 5 # Index face in positive z-direction interfaces.node_indices[side, interface_id] = (surface_index1, - surface_index2, :end) + surface_index2, Indexing.last) end end @@ -127,22 +127,22 @@ end face, boundary_id) if face == 0 # Index face in negative x-direction - boundaries.node_indices[boundary_id] = (:begin, :i_forward, :j_forward) + boundaries.node_indices[boundary_id] = (Indexing.first, Indexing.i_forward, Indexing.j_forward) elseif face == 1 # Index face in positive x-direction - boundaries.node_indices[boundary_id] = (:end, :i_forward, :j_forward) + boundaries.node_indices[boundary_id] = (Indexing.last, Indexing.i_forward, Indexing.j_forward) elseif face == 2 # Index face in negative y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :begin, :j_forward) + boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.first, Indexing.j_forward) elseif face == 3 # Index face in positive y-direction - boundaries.node_indices[boundary_id] = (:i_forward, :end, :j_forward) + boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.last, Indexing.j_forward) elseif face == 4 # Index face in negative z-direction - boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :begin) + boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.j_forward, Indexing.first) else # face == 5 # Index face in positive z-direction - boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :end) + boundaries.node_indices[boundary_id] = (Indexing.i_forward, Indexing.j_forward, Indexing.last) end return boundaries @@ -156,8 +156,8 @@ end # Align mortar at small side. # The large side needs to be indexed differently. if side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward + surface_index1 = Indexing.i_forward + surface_index2 = Indexing.j_forward else surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], @@ -166,28 +166,28 @@ end if faces[side] == 0 # Index face in negative x-direction - mortars.node_indices[side, mortar_id] = (:begin, surface_index1, + mortars.node_indices[side, mortar_id] = (Indexing.first, surface_index1, surface_index2) elseif faces[side] == 1 # Index face in positive x-direction - mortars.node_indices[side, mortar_id] = (:end, surface_index1, + mortars.node_indices[side, mortar_id] = (Indexing.last, surface_index1, surface_index2) elseif faces[side] == 2 # Index face in negative y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :begin, + mortars.node_indices[side, mortar_id] = (surface_index1, Indexing.first, surface_index2) elseif faces[side] == 3 # Index face in positive y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :end, + mortars.node_indices[side, mortar_id] = (surface_index1, Indexing.last, surface_index2) elseif faces[side] == 4 # Index face in negative z-direction mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, - :begin) + Indexing.first) else # faces[side] == 5 # Index face in positive z-direction mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, - :end) + Indexing.last) end end @@ -228,8 +228,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ ↑ # │ │ # └───> ξ └───> ξ - surface_index1 = :i_forward - surface_index2 = :j_forward + surface_index1 = Indexing.i_forward + surface_index2 = Indexing.j_forward elseif ((lower && orientation_code == 2) # Corner 0 of my side matches corner 2 of other side || (!lower && orientation_code == 1)) # Corner 0 of other side matches corner 1 of my side @@ -241,8 +241,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ │ # │ ↓ # └───> ξ ξ - surface_index1 = :j_backward - surface_index2 = :i_forward + surface_index1 = Indexing.j_backward + surface_index2 = Indexing.i_forward elseif ((lower && orientation_code == 1) # Corner 0 of my side matches corner 1 of other side || (!lower && orientation_code == 2)) # Corner 0 of other side matches corner 2 of my side @@ -254,8 +254,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ ↑ # │ │ # └───> ξ η <───┘ - surface_index1 = :j_forward - surface_index2 = :i_backward + surface_index1 = Indexing.j_forward + surface_index2 = Indexing.i_backward else # orientation_code == 3 # Corner 0 of my side matches corner 3 of other side and # corner 0 of other side matches corner 3 of my side. @@ -267,8 +267,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ │ # │ ↓ # └───> ξ η - surface_index1 = :i_backward - surface_index2 = :j_backward + surface_index1 = Indexing.i_backward + surface_index2 = Indexing.j_backward end else # flipped if orientation_code == 0 @@ -281,8 +281,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ ↑ # │ │ # └───> ξ └───> η - surface_index1 = :j_forward - surface_index2 = :i_forward + surface_index1 = Indexing.j_forward + surface_index2 = Indexing.i_forward elseif orientation_code == 2 # Corner 0 of my side matches corner 2 of other side and # corner 0 of other side matches corner 2 of my side. @@ -294,8 +294,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ │ # │ ↓ # └───> ξ η - surface_index1 = :i_forward - surface_index2 = :j_backward + surface_index1 = Indexing.i_forward + surface_index2 = Indexing.j_backward elseif orientation_code == 1 # Corner 0 of my side matches corner 1 of other side and # corner 0 of other side matches corner 1 of my side. @@ -307,8 +307,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ ↑ # │ │ # └───> ξ ξ <───┘ - surface_index1 = :i_backward - surface_index2 = :j_forward + surface_index1 = Indexing.i_backward + surface_index2 = Indexing.j_forward else # orientation_code == 3 # Corner 0 of my side matches corner 3 of other side and # corner 0 of other side matches corner 3 of my side. @@ -320,8 +320,8 @@ function orientation_to_indices_p4est(my_face, other_face, orientation_code) # ↑ │ # │ ↓ # └───> ξ ξ - surface_index1 = :j_backward - surface_index2 = :i_backward + surface_index1 = Indexing.j_backward + surface_index2 = Indexing.i_backward end end diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl index 676b37efff3..6168d247c12 100644 --- a/src/solvers/dgsem_p4est/containers_parallel.jl +++ b/src/solvers/dgsem_p4est/containers_parallel.jl @@ -5,15 +5,20 @@ @muladd begin #! format: noindent -mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: - AbstractContainer - u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - local_neighbor_ids::Vector{Int} # [interface] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [interface] - local_sides::Vector{Int} # [interface] - +mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, + uArray <: DenseArray{uEltype, NDIMSP2}, + VecInt <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, IndexInfo}}, + uVector <: DenseVector{uEltype}, + ArrayType, Bool} <: + AbstractHeterogeneousContainer{ArrayType, Bool} + u::uArray # [primary/secondary, variable, i, j, interface] + local_neighbor_ids::VecInt # [interface] + node_indices::IndicesVector # [interface] + local_sides::VecInt # [interface] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector end @inline function nmpiinterfaces(interfaces::P4estMPIInterfaceContainer) @@ -27,9 +32,10 @@ function Base.resize!(mpi_interfaces::P4estMPIInterfaceContainer, capacity) n_dims = ndims(mpi_interfaces) n_nodes = size(mpi_interfaces.u, 3) n_variables = size(mpi_interfaces.u, 2) + ArrayType = array_type(mpi_interfaces) resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) - mpi_interfaces.u = unsafe_wrap(Array, pointer(_u), + mpi_interfaces.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) @@ -60,15 +66,18 @@ function init_mpi_interfaces(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, local_neighbor_ids = Vector{Int}(undef, n_mpi_interfaces) - node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_mpi_interfaces) + node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, n_mpi_interfaces) local_sides = Vector{Int}(undef, n_mpi_interfaces) - mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, - local_neighbor_ids, - node_indices, - local_sides, - _u) + mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2, + typeof(u), typeof(local_neighbor_ids), + typeof(node_indices), typeof(_u), + Array, false}(u, + local_neighbor_ids, + node_indices, + local_sides, + _u) init_mpi_interfaces!(mpi_interfaces, mesh) @@ -81,6 +90,29 @@ function init_mpi_interfaces!(mpi_interfaces, mesh::ParallelP4estMesh) return mpi_interfaces end +# Required methods due to <: AbstractHeterogeneousContainer +function KernelAbstractions.get_backend(mpi_interfaces::P4estMPIInterfaceContainer) + return KernelAbstractions.get_backend(mpi_interfaces.u) +end +function Adapt.adapt_structure(to, mpi_interfaces::P4estMPIInterfaceContainer) + # Adapt Vectors and underlying storage + _u = Adapt.adapt_structure(to, mpi_interfaces._u) + local_neighbor_ids = Adapt.adapt_structure(to, mpi_interfaces.local_neighbor_ids) + node_indices = Adapt.adapt_structure(to, mpi_interfaces.node_indices) + local_sides = Adapt.adapt_structure(to, mpi_interfaces.local_sides) + + # Wrap array again + u = unsafe_wrap_or_alloc(to, _u, size(mpi_interfaces.u)) + + NDIMS = ndims(mpi_interfaces) + return P4estMPIInterfaceContainer{NDIMS, eltype(mpi_interfaces.u), + NDIMS + 2, + typeof(u), typeof(local_neighbor_ids), + typeof(node_indices), typeof(_u), + to, true}(u, local_neighbor_ids, node_indices, + local_sides, _u) +end + # Container data structure (structure-of-arrays style) for DG L2 mortars # # Similar to `P4estMortarContainer`. The field `neighbor_ids` has been split up into @@ -88,15 +120,19 @@ end # available elements belonging to a particular MPI mortar. Furthermore, `normal_directions` holds # the normal vectors on the surface of the small elements for each mortar. mutable struct P4estMPIMortarContainer{NDIMS, uEltype <: Real, RealT <: Real, NDIMSP1, - NDIMSP2, NDIMSP3} <: AbstractContainer - u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - local_neighbor_ids::Vector{Vector{Int}} # [mortar][ids] - local_neighbor_positions::Vector{Vector{Int}} # [mortar][positions] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] - normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] + NDIMSP2, NDIMSP3, + uArray <: DenseArray{uEltype, NDIMSP3}, + uVector <: DenseVector{uEltype}, + ArrayType, Bool} <: + AbstractHeterogeneousContainer{ArrayType, Bool} + u::uArray # [small/large side, variable, position, i, j, mortar] + local_neighbor_ids::Vector{Vector{Int}} # [mortar][ids] + local_neighbor_positions::Vector{Vector{Int}} # [mortar][positions] + node_indices::Matrix{NTuple{NDIMS, IndexInfo}} # [small/large, mortar] + normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] # internal `resize!`able storage - _u::Vector{uEltype} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::uVector + _node_indices::Vector{NTuple{NDIMS, IndexInfo}} _normal_directions::Vector{RealT} end @@ -153,7 +189,7 @@ function init_mpi_mortars(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, eq local_neighbor_ids = fill(Vector{Int}(), n_mpi_mortars) local_neighbor_positions = fill(Vector{Int}(), n_mpi_mortars) - _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mpi_mortars) + _node_indices = Vector{NTuple{NDIMS, IndexInfo}}(undef, 2 * n_mpi_mortars) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mpi_mortars)) _normal_directions = Vector{RealT}(undef, @@ -164,11 +200,13 @@ function init_mpi_mortars(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, eq 2^(NDIMS - 1), n_mpi_mortars)) mpi_mortars = P4estMPIMortarContainer{NDIMS, uEltype, RealT, NDIMS + 1, NDIMS + 2, - NDIMS + 3}(u, local_neighbor_ids, - local_neighbor_positions, - node_indices, normal_directions, - _u, _node_indices, - _normal_directions) + NDIMS + 3, typeof(u), + typeof(_u), + Array, false}(u, local_neighbor_ids, + local_neighbor_positions, + node_indices, normal_directions, + _u, _node_indices, + _normal_directions) if n_mpi_mortars > 0 init_mpi_mortars!(mpi_mortars, mesh, basis, elements) @@ -184,6 +222,36 @@ function init_mpi_mortars!(mpi_mortars, mesh::ParallelP4estMesh, basis, elements return mpi_mortars end +# Required methods due to <: AbstractHeterogeneousContainer +function KernelAbstractions.get_backend(mpi_mortars::P4estMPIMortarContainer) + return KernelAbstractions.get_backend(mpi_mortars.u) +end +function Adapt.adapt_structure(to, mpi_mortars::P4estMPIMortarContainer) + # TODO: Vector of Vector type data structure does not work on GPUs, + # must be redesigned. This skeleton implementation here just exists just + # for compatability with the rest of the KA.jl solver code + + _u = Adapt.adapt_structure(to, mpi_mortars._u) + _node_indices = mpi_mortars._node_indices + _normal_directions = mpi_mortars._normal_directions + + u = unsafe_wrap_or_alloc(to, _u, size(mpi_mortars.u)) + local_neighbor_ids = mpi_mortars.local_neighbor_ids + local_neighbor_positions = mpi_mortars.local_neighbor_positions + node_indices = mpi_mortars.node_indices + normal_directions = mpi_mortars.normal_directions + + NDIMS = ndims(mpi_mortars) + return P4estMPIMortarContainer{NDIMS, eltype(_u), + eltype(_normal_directions), + NDIMS + 1, NDIMS + 2, NDIMS + 3, + typeof(u), typeof(_u), + to, true}(u, local_neighbor_ids, + local_neighbor_positions, + node_indices, normal_directions, _u, + _node_indices, _normal_directions) +end + # Overload init! function for regular interfaces, regular mortars and boundaries since they must # call the appropriate init_surfaces! function for parallel p4est meshes function init_interfaces!(interfaces, mesh::ParallelP4estMesh) diff --git a/src/solvers/dgsem_p4est/containers_parallel_2d.jl b/src/solvers/dgsem_p4est/containers_parallel_2d.jl index d531d33821b..198b3280742 100644 --- a/src/solvers/dgsem_p4est/containers_parallel_2d.jl +++ b/src/solvers/dgsem_p4est/containers_parallel_2d.jl @@ -14,24 +14,24 @@ # relative to the interface. if local_side == 1 || orientation == 0 # Forward indexing - i = :i_forward + i = Indexing.i_forward else # Backward indexing - i = :i_backward + i = Indexing.i_backward end if faces[local_side] == 0 # Index face in negative x-direction - mpi_interfaces.node_indices[mpi_interface_id] = (:begin, i) + mpi_interfaces.node_indices[mpi_interface_id] = (Indexing.first, i) elseif faces[local_side] == 1 # Index face in positive x-direction - mpi_interfaces.node_indices[mpi_interface_id] = (:end, i) + mpi_interfaces.node_indices[mpi_interface_id] = (Indexing.last, i) elseif faces[local_side] == 2 # Index face in negative y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (i, :begin) + mpi_interfaces.node_indices[mpi_interface_id] = (i, Indexing.first) else # faces[local_side] == 3 # Index face in positive y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (i, :end) + mpi_interfaces.node_indices[mpi_interface_id] = (i, Indexing.last) end return mpi_interfaces diff --git a/src/solvers/dgsem_p4est/containers_parallel_3d.jl b/src/solvers/dgsem_p4est/containers_parallel_3d.jl index 56f0a543b97..37cbc6bb236 100644 --- a/src/solvers/dgsem_p4est/containers_parallel_3d.jl +++ b/src/solvers/dgsem_p4est/containers_parallel_3d.jl @@ -9,11 +9,11 @@ @inline function init_mpi_interface_node_indices!(mpi_interfaces::P4estMPIInterfaceContainer{3}, faces, local_side, orientation, mpi_interface_id) - # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)). + # Align interface at the primary element (primary element has surface indices (Indexing.i_forward, Indexing.j_forward)). # The secondary element needs to be indexed differently. if local_side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward + surface_index1 = Indexing.i_forward + surface_index2 = Indexing.j_forward else # local_side == 2 surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], @@ -22,28 +22,28 @@ if faces[local_side] == 0 # Index face in negative x-direction - mpi_interfaces.node_indices[mpi_interface_id] = (:begin, surface_index1, + mpi_interfaces.node_indices[mpi_interface_id] = (Indexing.first, surface_index1, surface_index2) elseif faces[local_side] == 1 # Index face in positive x-direction - mpi_interfaces.node_indices[mpi_interface_id] = (:end, surface_index1, + mpi_interfaces.node_indices[mpi_interface_id] = (Indexing.last, surface_index1, surface_index2) elseif faces[local_side] == 2 # Index face in negative y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :begin, + mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, Indexing.first, surface_index2) elseif faces[local_side] == 3 # Index face in positive y-direction - mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :end, + mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, Indexing.last, surface_index2) elseif faces[local_side] == 4 # Index face in negative z-direction mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2, - :begin) + Indexing.first) else # faces[local_side] == 5 # Index face in positive z-direction mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2, - :end) + Indexing.last) end return mpi_interfaces @@ -57,8 +57,8 @@ end # Align mortar at small side. # The large side needs to be indexed differently. if side == 1 - surface_index1 = :i_forward - surface_index2 = :j_forward + surface_index1 = Indexing.i_forward + surface_index2 = Indexing.j_forward else surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], @@ -67,28 +67,28 @@ end if faces[side] == 0 # Index face in negative x-direction - mortars.node_indices[side, mortar_id] = (:begin, surface_index1, + mortars.node_indices[side, mortar_id] = (Indexing.first, surface_index1, surface_index2) elseif faces[side] == 1 # Index face in positive x-direction - mortars.node_indices[side, mortar_id] = (:end, surface_index1, + mortars.node_indices[side, mortar_id] = (Indexing.last, surface_index1, surface_index2) elseif faces[side] == 2 # Index face in negative y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :begin, + mortars.node_indices[side, mortar_id] = (surface_index1, Indexing.first, surface_index2) elseif faces[side] == 3 # Index face in positive y-direction - mortars.node_indices[side, mortar_id] = (surface_index1, :end, + mortars.node_indices[side, mortar_id] = (surface_index1, Indexing.last, surface_index2) elseif faces[side] == 4 # Index face in negative z-direction mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, - :begin) + Indexing.first) else # faces[side] == 5 # Index face in positive z-direction mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, - :end) + Indexing.last) end end diff --git a/src/solvers/dgsem_p4est/dg.jl b/src/solvers/dgsem_p4est/dg.jl index 8197ad4a2d0..1ebc2fb7579 100644 --- a/src/solvers/dgsem_p4est/dg.jl +++ b/src/solvers/dgsem_p4est/dg.jl @@ -51,6 +51,7 @@ include("dg_2d.jl") include("dg_2d_parabolic.jl") include("dg_3d.jl") +include("dg_3d_gpu.jl") include("dg_3d_parabolic.jl") include("dg_parallel.jl") diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index 6a7bbbb71d7..a8142849d42 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -24,7 +24,7 @@ function create_cache(mesh::Union{P4estMesh{2}, T8codeMesh{2}}, equations, u_threaded) end -# index_to_start_step_2d(index::Symbol, index_range) +# index_to_start_step_2d(index::IndexInfo, index_range) # # Given a symbolic `index` and an `indexrange` (usually `eachnode(dg)`), # return `index_start, index_step`, i.e., a tuple containing @@ -45,17 +45,17 @@ end # i_volume += i_volume_step # j_volume += j_volume_step # end -@inline function index_to_start_step_2d(index::Symbol, index_range) +@inline function index_to_start_step_2d(index::IndexInfo, index_range) index_begin = first(index_range) index_end = last(index_range) - if index === :begin + if index === Indexing.first return index_begin, 0 - elseif index === :end + elseif index === Indexing.last return index_end, 0 - elseif index === :i_forward + elseif index === Indexing.i_forward return index_begin, 1 - else # if index === :i_backward + else # if index === Indexing.i_backward return index_end, -1 end end @@ -149,7 +149,7 @@ function calc_interface_flux!(surface_flux_values, # Initiate the secondary index to be used in the surface for loop. # This index on the primary side will always run forward but # the secondary index might need to run backwards for flipped sides. - if :i_backward in secondary_indices + if Indexing.i_backward in secondary_indices node_secondary = index_end node_secondary_step = -1 else @@ -615,7 +615,7 @@ end large_indices = node_indices[2, mortar] large_direction = indices2direction(large_indices) - if :i_backward in large_indices + if Indexing.i_backward in large_indices for i in eachnode(dg) for v in eachvariable(equations) surface_flux_values[v, end + 1 - i, large_direction, large_element] = u_buffer[v, diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl index 9f8306031c1..b3f69bb5e94 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl @@ -428,7 +428,7 @@ end large_indices = node_indices[2, mortar] large_direction = indices2direction(large_indices) - if :i_backward in large_indices + if Indexing.i_backward in large_indices for i in eachnode(dg) for v in eachvariable(equations) surface_flux_values[v, end + 1 - i, large_direction, large_element] = u_buffer[v, @@ -637,7 +637,7 @@ function calc_interface_flux!(surface_flux_values, # Initiate the secondary index to be used in the surface for loop. # This index on the primary side will always run forward but # the secondary index might need to run backwards for flipped sides. - if :i_backward in secondary_indices + if Indexing.i_backward in secondary_indices node_secondary = index_end node_secondary_step = -1 else diff --git a/src/solvers/dgsem_p4est/dg_2d_parallel.jl b/src/solvers/dgsem_p4est/dg_2d_parallel.jl index 5d5225c6f9e..9c03ee8a113 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parallel.jl @@ -71,7 +71,7 @@ function calc_mpi_interface_flux!(surface_flux_values, # Initiate the node index to be used in the surface for loop, # the surface flux storage must be indexed in alignment with the local element indexing - if :i_backward in local_indices + if Indexing.i_backward in local_indices surface_node = index_end surface_node_step = -1 else @@ -308,7 +308,7 @@ end # correct orientation. # Note that the index of the small sides will always run forward but # the index of the large side might need to run backwards for flipped sides. - if :i_backward in large_indices + if Indexing.i_backward in large_indices for i in eachnode(dg) for v in eachvariable(equations) surface_flux_values[v, end + 1 - i, large_direction, element] = u_buffer[v, diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index ac87f339d7c..e0896f5d84e 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -13,23 +13,23 @@ function create_cache(mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, fstar_primary_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2), 4) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays fstar_secondary_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2), 4) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays fstar_tmp_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays u_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2)) - for _ in 1:Threads.nthreads()] + for _ in 1:Threads.nthreads()] |> VecOfArrays (; fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded, u_threaded) end -# index_to_start_step_3d(index::Symbol, index_range) +# index_to_start_step_3d(index::IndexInfo, index_range) # # Given a symbolic `index` and an `indexrange` (usually `eachnode(dg)`), # return `index_start, index_step_i, index_step_j`, i.e., a tuple containing @@ -58,36 +58,38 @@ end # j_volume += j_volume_step_j # k_volume += k_volume_step_j # end -@inline function index_to_start_step_3d(index::Symbol, index_range) +@inline function index_to_start_step_3d(index::IndexInfo, index_range) index_begin = first(index_range) index_end = last(index_range) - if index === :begin + if index === Indexing.first return index_begin, 0, 0 - elseif index === :end + elseif index === Indexing.last return index_end, 0, 0 - elseif index === :i_forward + elseif index === Indexing.i_forward return index_begin, 1, index_begin - index_end - 1 - elseif index === :i_backward + elseif index === Indexing.i_backward return index_end, -1, index_end + 1 - index_begin - elseif index === :j_forward + elseif index === Indexing.j_forward return index_begin, 0, 1 - else # if index === :j_backward + else # if index === Indexing.j_backward return index_end, 0, -1 end end -# Extract the two varying indices from a symbolic index tuple. -# For example, `surface_indices((:i_forward, :end, :j_forward)) == (:i_forward, :j_forward)`. -@inline function surface_indices(indices::NTuple{3, Symbol}) +# Extract the two varying indices from an IndexInfo tuple. +# For example, +# `surface_indices((Indexing.i_forward, Indexing.last, Indexing.j_forward)) == +# (Indexing.i_forward, Indexing.j_forward)`. +@inline function surface_indices(indices::NTuple{3, IndexInfo}) i1, i2, i3 = indices index = i1 - (index === :begin || index === :end) && return (i2, i3) + (index === Indexing.first || index === Indexing.last) && return (i2, i3) index = i2 - (index === :begin || index === :end) && return (i1, i3) + (index === Indexing.first || index === Indexing.last) && return (i1, i3) - # i3 in (:begin, :end) + # i3 in (Indexing.first, Indexing.last) return (i1, i2) end @@ -95,6 +97,13 @@ end function prolong2interfaces!(cache, u, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, surface_integral, dg::DG) + backend = backend_or_nothing(cache.interfaces) + _prolong2interfaces!(backend, cache, u, mesh, equations, surface_integral, dg) +end + +@inline function _prolong2interfaces!(backend::Nothing, cache, u, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, surface_integral, dg::DG) @unpack interfaces = cache index_range = eachnode(dg) @@ -171,6 +180,15 @@ function calc_interface_flux!(surface_flux_values, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, nonconservative_terms, equations, surface_integral, dg::DG, cache) + backend = backend_or_nothing(cache.interfaces) + _calc_interface_flux!(backend, surface_flux_values, mesh, nonconservative_terms, + equations, surface_integral, dg, cache) +end + +@inline function _calc_interface_flux!(backend::Nothing, surface_flux_values, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + nonconservative_terms, + equations, surface_integral, dg::DG, cache) @unpack neighbor_ids, node_indices = cache.interfaces @unpack contravariant_vectors = cache.elements index_range = eachnode(dg) @@ -315,6 +333,13 @@ end function prolong2boundaries!(cache, u, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, surface_integral, dg::DG) + backend = backend_or_nothing(cache.boundaries) + _prolong2boundaries!(backend, cache, u, mesh, equations, surface_integral, dg) +end + +@inline function _prolong2boundaries!(backend::Nothing, cache, u, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, surface_integral, dg::DG) @unpack boundaries = cache index_range = eachnode(dg) @@ -356,6 +381,15 @@ end function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, surface_integral, dg::DG) + backend = backend_or_nothing(cache.boundaries) + _calc_boundary_flux!(backend, cache, t, boundary_condition, boundary_indexing, mesh, + equations, surface_integral, dg) +end + +@inline function _calc_boundary_flux!(backend::Nothing, cache, t, + boundary_condition, boundary_indexing, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, surface_integral, dg::DG) @unpack boundaries = cache @unpack surface_flux_values, node_coordinates, contravariant_vectors = cache.elements @unpack surface_flux = surface_integral @@ -417,8 +451,14 @@ end function prolong2mortars!(cache, u, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, - mortar_l2::LobattoLegendreMortarL2, - dg::DGSEM) + mortar_l2::LobattoLegendreMortarL2, dg::DGSEM) + backend = backend_or_nothing(cache.mortars) + _prolong2mortars!(backend, cache, u, mesh, equations, mortar_l2, dg) +end + +@inline function _prolong2mortars!(backend::Nothing, cache, u, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, + mortar_l2::LobattoLegendreMortarL2, dg::DGSEM) @unpack fstar_tmp_threaded = cache @unpack neighbor_ids, node_indices = cache.mortars index_range = eachnode(dg) @@ -524,6 +564,16 @@ function calc_mortar_flux!(surface_flux_values, nonconservative_terms, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DG, cache) + backend = backend_or_nothing(cache.mortars) + _calc_mortar_flux!(backend, surface_flux_values, mesh, nonconservative_terms, + equations, mortar_l2, surface_integral, dg, cache) +end + +@inline function _calc_mortar_flux!(backend::Nothing, surface_flux_values, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + nonconservative_terms, equations, + mortar_l2::LobattoLegendreMortarL2, + surface_integral, dg::DG, cache) @unpack neighbor_ids, node_indices = cache.mortars @unpack contravariant_vectors = cache.elements @unpack fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded = cache @@ -745,6 +795,16 @@ function calc_surface_integral!(du, u, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, cache) + backend = backend_or_nothing(cache.elements) + _calc_surface_integral!(backend, du, u, mesh, equations, surface_integral, dg, cache) + +end + +@inline function _calc_surface_integral!(backend::Nothing, du, u, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, cache) @unpack boundary_interpolation = dg.basis @unpack surface_flux_values = cache.elements diff --git a/src/solvers/dgsem_p4est/dg_3d_gpu.jl b/src/solvers/dgsem_p4est/dg_3d_gpu.jl new file mode 100644 index 00000000000..92d545fd263 --- /dev/null +++ b/src/solvers/dgsem_p4est/dg_3d_gpu.jl @@ -0,0 +1,591 @@ +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent + +@inline function _calc_volume_integral!(backend::Backend, du, u, mesh::P4estMesh{3}, + nonconservative_terms::False, equations, + volume_integral::VolumeIntegralWeakForm, + dg::DGSEM, + cache) + nelements(dg, cache) == 0 && return nothing + + @unpack derivative_dhat = dg.basis + @unpack contravariant_vectors = cache.elements + nodes = eachnode(dg) + kernel! = _weak_form_kernel!(backend) + + kernel!(du, u, equations, nodes, derivative_dhat, contravariant_vectors, + ndrange = nelements(dg, cache)) + return nothing +end + +@kernel function _weak_form_kernel!(du, u, equations, nodes, derivative_dhat, + contravariant_vectors, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + element = @index(Global) + NVARS = Val(nvariables(equations)) + + for k in nodes, j in nodes, i in nodes + u_node = get_svector(u, NVARS, i, j, k, element) + + flux1 = flux(u_node, 1, equations) + flux2 = flux(u_node, 2, equations) + flux3 = flux(u_node, 3, equations) + + # Compute the contravariant flux by taking the scalar product of the + # first contravariant vector Ja^1 and the flux vector + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, + element) + contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 + Ja13 * flux3 + for ii in nodes + multiply_add_to_first_axis!(du, alpha * derivative_dhat[ii, i], + contravariant_flux1, ii, j, k, + element) + end + + # Compute the contravariant flux by taking the scalar product of the + # second contravariant vector Ja^2 and the flux vector + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, + element) + contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 + Ja23 * flux3 + for jj in nodes + multiply_add_to_first_axis!(du, alpha * derivative_dhat[jj, j], + contravariant_flux2, i, jj, k, + element) + end + + # Compute the contravariant flux by taking the scalar product of the + # third contravariant vector Ja^3 and the flux vector + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, + element) + contravariant_flux3 = Ja31 * flux1 + Ja32 * flux2 + Ja33 * flux3 + for kk in nodes + multiply_add_to_first_axis!(du, alpha * derivative_dhat[kk, k], + contravariant_flux3, i, j, kk, + element) + end + end +end + +@inline function _calc_volume_integral!(backend::Backend, du, u, + mesh::P4estMesh{3}, + nonconservative_terms::False, equations, + volume_integral::VolumeIntegralFluxDifferencing, + dg::DGSEM, cache) + @unpack derivative_split = dg.basis + @unpack contravariant_vectors = cache.elements + nodes = eachnode(dg) + kernel! = _flux_differencing_kernel!(backend) + + kernel!(du, u, equations, volume_integral.volume_flux, nodes, derivative_split, + contravariant_vectors, + ndrange = nelements(dg, cache)) + return nothing +end + +@kernel function _flux_differencing_kernel!(du, u, equations, + volume_flux, nodes, derivative_split, + contravariant_vectors, alpha = true) + # true * [some floating point value] == [exactly the same floating point value] + # This can (hopefully) be optimized away due to constant propagation. + element = @index(Global, Linear) + NVARS = Val(nvariables(equations)) + num_nodes = length(nodes) + + # Calculate volume integral in one element + for k in nodes, j in nodes, i in nodes + u_node = get_svector(u, NVARS, i, j, k, element) + + # pull the contravariant vectors in each coordinate direction + Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) + Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element) + Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element) + + # All diagonal entries of `derivative_split` are zero. Thus, we can skip + # the computation of the diagonal terms. In addition, we use the symmetry + # of the `volume_flux` to save half of the possible two-point flux + # computations. + + # x direction + for ii in (i + 1):num_nodes + u_node_ii = get_svector(u, NVARS, ii, j, k, element) + # pull the contravariant vectors and compute the average + Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, + ii, j, k, element) + Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii) + # compute the contravariant sharp flux in the direction of the + # averaged contravariant vector + fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations) + multiply_add_to_first_axis!(du, alpha * derivative_split[i, ii], fluxtilde1, + i, j, k, element) + multiply_add_to_first_axis!(du, alpha * derivative_split[ii, i], fluxtilde1, + ii, j, k, element) + end + + # y direction + for jj in (j + 1):num_nodes + u_node_jj = get_svector(u, NVARS, i, jj, k, element) + # pull the contravariant vectors and compute the average + Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, + i, jj, k, element) + Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj) + # compute the contravariant sharp flux in the direction of the + # averaged contravariant vector + fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations) + multiply_add_to_first_axis!(du, alpha * derivative_split[j, jj], fluxtilde2, + i, j, k, element) + multiply_add_to_first_axis!(du, alpha * derivative_split[jj, j], fluxtilde2, + i, jj, k, element) + end + + # z direction + for kk in (k + 1):num_nodes + u_node_kk = get_svector(u, NVARS, i, j, kk, element) + # pull the contravariant vectors and compute the average + Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors, + i, j, kk, element) + Ja3_avg = 0.5 * (Ja3_node + Ja3_node_kk) + # compute the contravariant sharp flux in the direction of the + # averaged contravariant vector + fluxtilde3 = volume_flux(u_node, u_node_kk, Ja3_avg, equations) + multiply_add_to_first_axis!(du, alpha * derivative_split[k, kk], fluxtilde3, + i, j, k, element) + multiply_add_to_first_axis!(du, alpha * derivative_split[kk, k], fluxtilde3, + i, j, kk, element) + end + end +end + +@inline function _prolong2interfaces!(backend::Backend, cache, u, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, surface_integral, dg::DG) + @unpack interfaces = cache + ninterfaces(interfaces) == 0 && return nothing + + nodes = eachnode(dg) + kernel! = prolong2interfaces_kernel!(backend) + + kernel!(interfaces.u, interfaces.neighbor_ids, interfaces.node_indices, u, + Val(nvariables(equations)), nodes, + ndrange = ninterfaces(interfaces)) + return nothing +end + +@kernel function prolong2interfaces_kernel!(u_interfaces, neighbor_ids, node_indices, u, + ::Val{NVARS}, nodes) where {NVARS} + interface = @index(Global, Linear) + # Copy solution data from the primary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the indices of the primary side + # will always run forwards. + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + nodes) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + nodes) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + nodes) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + for j in nodes + for i in nodes + for v in 1:NVARS + u_interfaces[1, v, i, j, interface] = u[v, i_primary, j_primary, + k_primary, primary_element] + end + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i + end + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j + end + + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], + nodes) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], + nodes) + k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], + nodes) + + i_secondary = i_secondary_start + j_secondary = j_secondary_start + k_secondary = k_secondary_start + for j in nodes + for i in nodes + for v in 1:NVARS + u_interfaces[2, v, i, j, interface] = u[v, i_secondary, j_secondary, + k_secondary, + secondary_element] + end + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i + k_secondary += k_secondary_step_i + end + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j + k_secondary += k_secondary_step_j + end +end + +@inline function _calc_interface_flux!(backend::Backend, surface_flux_values, + mesh::P4estMesh{3}, + nonconservative_terms::False, + equations, surface_integral, dg::DG, cache) + @unpack interfaces = cache + ninterfaces(interfaces) == 0 && return nothing + + @unpack neighbor_ids, node_indices = interfaces + @unpack contravariant_vectors = cache.elements + nodes = eachnode(dg) + kernel! = interface_flux_kernel!(backend) + + kernel!(surface_flux_values, equations, surface_integral.surface_flux, nodes, + interfaces.u, neighbor_ids, node_indices, contravariant_vectors, + ndrange = ninterfaces(interfaces)) + return nothing +end + +@kernel function interface_flux_kernel!(surface_flux_values, equations, surface_flux, nodes, + u_interfaces, neighbor_ids, node_indices, + contravariant_vectors) + interface = @index(Global, Linear) + NVARS = Val(nvariables(equations)) + + # Get element and side information on the primary element + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + primary_direction = indices2direction(primary_indices) + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + nodes) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + nodes) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + nodes) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + + # Get element and side information on the secondary element + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + secondary_direction = indices2direction(secondary_indices) + secondary_surface_indices = surface_indices(secondary_indices) + + # Get the surface indexing on the secondary element. + # Note that the indices of the primary side will always run forward but + # the secondary indices might need to run backwards for flipped sides. + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], + nodes) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], + nodes) + i_secondary = i_secondary_start + j_secondary = j_secondary_start + + for j in nodes + for i in nodes + # Get the normal direction from the primary element. + # Note, contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, k_primary, + primary_element) + u_ll, u_rr = get_svectors(u_interfaces, NVARS, i, j, interface) + + flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) + + for v in eachvariable(equations) + surface_flux_values[v, i, j, primary_direction, primary_element] = flux_[v] + surface_flux_values[v, i_secondary, j_secondary, + secondary_direction, secondary_element] = -flux_[v] + end + + # Increment the primary element indices + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i + # Increment the secondary element surface indices + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i + end + # Increment the primary element indices + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j + # Increment the secondary element surface indices + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j + end +end + +@inline function _prolong2boundaries!(backend::Backend, cache, u, mesh::P4estMesh{3}, + equations, surface_integral, dg::DG) + @unpack boundaries = cache + nboundaries(boundaries) == 0 && return nothing + + nodes = eachnode(dg) + kernel! = prolong2boundaries_kernel!(backend) + + kernel!(boundaries.u, boundaries.neighbor_ids, boundaries.node_indices, u, + Val(nvariables(equations)), nodes, + ndrange = nboundaries(boundaries)) + return nothing +end + +@kernel function prolong2boundaries_kernel!(u_boundaries, neighbor_ids, _node_indices, u, + ::Val{NVARS}, nodes) where {NVARS} + boundary = @index(Global, Linear) + + # Copy solution data from the element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + element = neighbor_ids[boundary] + node_indices = _node_indices[boundary] + + i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1], + nodes) + j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2], + nodes) + k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3], + nodes) + + i_node = i_node_start + j_node = j_node_start + k_node = k_node_start + for j in nodes + for i in nodes + for v in 1:NVARS + u_boundaries[v, i, j, boundary] = u[v, i_node, j_node, k_node, + element] + end + i_node += i_node_step_i + j_node += j_node_step_i + k_node += k_node_step_i + end + i_node += i_node_step_j + j_node += j_node_step_j + k_node += k_node_step_j + end +end + +@inline function _calc_boundary_flux!(backend::Backend, cache, t, + boundary_condition, boundary_indexing, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, surface_integral, dg::DG) + @unpack boundaries, elements = cache + nboundaries(boundaries) == 0 && return nothing + + @unpack neighbor_ids, node_indices = boundaries + @unpack surface_flux_values, node_coordinates, contravariant_vectors = elements + nodes = eachnode(dg) + kernel! = boundary_flux_kernel!(backend) + + kernel!(surface_flux_values, t, boundary_condition, boundary_indexing, equations, + surface_integral.surface_flux, nodes, + boundaries.u, neighbor_ids, node_indices, + node_coordinates, contravariant_vectors, + ndrange = nboundaries(boundaries)) + return nothing +end + +@kernel function boundary_flux_kernel!(surface_flux_values, t, + boundary_condition, boundary_indexing, equations, + surface_flux, nodes, + u_boundaries, neighbor_ids, _node_indices, + node_coordinates, contravariant_vectors) + local_index = @index(Global, Linear) + NVARS = Val(nvariables(equations)) + boundary = boundary_indexing[local_index] + + # Get information on the adjacent element, compute the surface fluxes, + # and store them + element = neighbor_ids[boundary] + node_indices = _node_indices[boundary] + direction = indices2direction(node_indices) + + i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1], + nodes) + j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2], + nodes) + k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3], + nodes) + + i_node = i_node_start + j_node = j_node_start + k_node = k_node_start + for j in nodes + for i in nodes + # Extract solution data from boundary container + u_inner = get_svector(u_boundaries, NVARS, i, j, boundary) + + # Outward-pointing normal direction (not normalized) + normal_direction = get_normal_direction(direction, + contravariant_vectors, + i_node, j_node, k_node, element) + + # Coordinates at boundary node + x = get_svector(node_coordinates, Val(3), i_node, j_node, k_node, element) + + flux_ = boundary_condition(u_inner, normal_direction, x, t, + surface_flux, equations) + + # Copy flux to element storage in the correct orientation + for v in eachvariable(equations) + surface_flux_values[v, i, j, direction, element] = flux_[v] + end + + i_node += i_node_step_i + j_node += j_node_step_i + k_node += k_node_step_i + end + i_node += i_node_step_j + j_node += j_node_step_j + k_node += k_node_step_j + end +end + +@inline function _prolong2mortars!(backend::Backend, cache, u, + mesh::P4estMesh{3}, equations, + mortar_l2::LobattoLegendreMortarL2, + dg::DGSEM) + if nmortars(dg, cache) > 0 + error("mortars currently not supported by KA.jl P4estMesh solver") + end + return nothing +end + +@inline function _calc_mortar_flux!(backend::Backend, surface_flux_values, + mesh::P4estMesh{3}, + nonconservative_terms, equations, + mortar_l2::LobattoLegendreMortarL2, + surface_integral, dg::DG, cache) + return nothing +end + +@inline function _calc_surface_integral!(backend::Backend, du, u, + mesh::P4estMesh{3}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, cache) + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + nodes = eachnode(dg) + # Note that all fluxes have been computed with outward-pointing normal vectors. + # Access the factors only once before beginning the loop to increase performance. + # We also use explicit assignments instead of `+=` to let `@muladd` turn these + # into FMAs (see comment at the top of the file). + factor_1 = boundary_interpolation[1, 1] + factor_2 = boundary_interpolation[nnodes(dg), 2] + kernel! = surface_integral_kernel!(backend) + + kernel!(du, u, Val(nvariables(equations)), factor_1, factor_2, nodes, + surface_flux_values, ndrange = nelements(cache.elements)) + return nothing +end + +@kernel function surface_integral_kernel!(du, u, ::Val{NVARS}, + boundary_interp_factor_1, boundary_interp_factor_2, + nodes, surface_flux_values) where {NVARS} + element = @index(Global, Linear) + num_nodes = length(nodes) + for m in nodes, l in nodes + for v in 1:NVARS + # surface at -x + du[v, 1, l, m, element] = (du[v, 1, l, m, element] + + surface_flux_values[v, l, m, 1, element] * + boundary_interp_factor_1) + + # surface at +x + du[v, num_nodes, l, m, element] = (du[v, num_nodes, l, m, element] + + surface_flux_values[v, l, m, 2, + element] * + boundary_interp_factor_2) + + # surface at -y + du[v, l, 1, m, element] = (du[v, l, 1, m, element] + + surface_flux_values[v, l, m, 3, element] * + boundary_interp_factor_1) + + # surface at +y + du[v, l, num_nodes, m, element] = (du[v, l, num_nodes, m, element] + + surface_flux_values[v, l, m, 4, + element] * + boundary_interp_factor_2) + + # surface at -z + du[v, l, m, 1, element] = (du[v, l, m, 1, element] + + surface_flux_values[v, l, m, 5, element] * + boundary_interp_factor_1) + + # surface at +z + du[v, l, m, num_nodes, element] = (du[v, l, m, num_nodes, element] + + surface_flux_values[v, l, m, 6, + element] * + boundary_interp_factor_2) + end + end +end + +@inline function _apply_jacobian!(backend::Backend, du, mesh::P4estMesh{3}, + equations, dg::DG, cache) + NVARS = Val(nvariables(equations)) + nodes = eachnode(dg) + @unpack inverse_jacobian = cache.elements + kernel! = _apply_jacobian_kernel!(backend) + + kernel!(du, inverse_jacobian, NVARS, nodes, ndrange = nelements(cache.elements)) + return nothing +end + +@kernel function _apply_jacobian_kernel!(du, inverse_jacobian, + ::Val{NVARS}, nodes) where {NVARS} + element = @index(Global, Linear) + for k in nodes, j in nodes, i in nodes + factor = -inverse_jacobian[i, j, k, element] + for v in 1:NVARS + du[v, i, j, k, element] *= factor + end + end +end + +@inline function _calc_sources!(backend::Backend, du, u, t, source_terms, + equations::AbstractEquations{3}, dg::DG, cache) + @unpack node_coordinates = cache.elements + NVARS = Val(nvariables(equations)) + nodes = eachnode(dg) + kernel! = _calc_sources_kernel!(backend) + + kernel!(du, u, t, source_terms, equations, NVARS, nodes, node_coordinates; + ndrange = nelements(cache.elements)) + + return nothing +end + +@kernel function _calc_sources_kernel!(du, u, t, source_terms, equations, NVARS, + nodes, node_coordinates) + element = @index(Global, Linear) + for k in nodes, j in nodes, i in nodes + u_local = get_svector(u, NVARS, i, j, k, element) + x_local = get_svector(node_coordinates, Val(3), i, j, k, element) + du_local = source_terms(u_local, x_local, t, equations) + add_to_first_axis!(du, du_local, i, j, k, element) + end +end +end # @muladd diff --git a/src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl new file mode 100644 index 00000000000..c94ccd764c7 --- /dev/null +++ b/src/solvers/dgsem_p4est/dg_3d_gpu_parallel.jl @@ -0,0 +1,178 @@ +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent + +@inline function _prolong2mpiinterfaces!(backend::Backend, cache, u, + mesh::P4estMesh{3}, + equations, surface_integral, dg::DG) + @unpack mpi_interfaces = cache + nmpiinterfaces(mpi_interfaces) == 0 && return nothing + + nodes = eachnode(dg) + kernel! = prolong2mpiinterfaces_kernel!(backend) + + kernel!(mpi_interfaces.u, mpi_interfaces.local_sides, + mpi_interfaces.local_neighbor_ids, + mpi_interfaces.node_indices, u, Val(nvariables(equations)), nodes, + ndrange = nmpiinterfaces(mpi_interfaces)) + return nothing +end + +@kernel function prolong2mpiinterfaces_kernel!(u_mpi_interfaces, local_sides, + local_neighbor_ids, + node_indices, u, ::Val{NVARS}, + nodes) where {NVARS} + interface = @index(Global, Linear) + # Copy solution data from the local element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the index of the primary side + # will always run forwards. + local_side = local_sides[interface] + local_element = local_neighbor_ids[interface] + local_indices = node_indices[interface] + + i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1], + nodes) + j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2], + nodes) + k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3], + nodes) + + i_element = i_element_start + j_element = j_element_start + k_element = k_element_start + for j in nodes + for i in nodes + for v in 1:NVARS + u_mpi_interfaces[local_side, v, i, j, interface] = u[v, i_element, + j_element, + k_element, + local_element] + end + i_element += i_element_step_i + j_element += j_element_step_i + k_element += k_element_step_i + end + i_element += i_element_step_j + j_element += j_element_step_j + k_element += k_element_step_j + end +end + +@inline function _calc_mpi_interface_flux!(backend::Backend, surface_flux_values, + mesh::ParallelP4estMesh{3}, + nonconservative_terms::False, + equations, surface_integral, dg::DG, cache) + @unpack mpi_interfaces = cache + nmpiinterfaces(mpi_interfaces) == 0 && return nothing + + @unpack local_neighbor_ids, node_indices, local_sides = mpi_interfaces + @unpack contravariant_vectors = cache.elements + nodes = eachnode(dg) + kernel! = mpi_interface_flux_kernel!(backend) + + kernel!(surface_flux_values, equations, surface_integral.surface_flux, nodes, + mpi_interfaces.u, local_neighbor_ids, node_indices, local_sides, + contravariant_vectors, ndrange = nmpiinterfaces(mpi_interfaces)) + return nothing +end + +@kernel function mpi_interface_flux_kernel!(surface_flux_values, equations, + surface_flux, nodes, + u_mpi_interfaces, local_neighbor_ids, + node_indices, local_sides, + contravariant_vectors) + interface = @index(Global, Linear) + NVARS = Val(nvariables(equations)) + + # Get element and side index information on the local element + local_element = local_neighbor_ids[interface] + local_indices = node_indices[interface] + local_direction = indices2direction(local_indices) + local_side = local_sides[interface] + + # Create the local i,j,k indexing on the local element used to pull normal direction information + i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1], + nodes) + j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2], + nodes) + k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3], + nodes) + + i_element = i_element_start + j_element = j_element_start + k_element = k_element_start + + # Initiate the node indices to be used in the surface for loop, + # the surface flux storage must be indexed in alignment with the local element indexing + local_surface_indices = surface_indices(local_indices) + i_surface_start, i_surface_step_i, i_surface_step_j = index_to_start_step_3d(local_surface_indices[1], + nodes) + j_surface_start, j_surface_step_i, j_surface_step_j = index_to_start_step_3d(local_surface_indices[2], + nodes) + i_surface = i_surface_start + j_surface = j_surface_start + + for j in nodes + for i in nodes + # Get the normal direction on the local element + # Contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(local_direction, + contravariant_vectors, + i_element, j_element, k_element, + local_element) + u_ll, u_rr = get_svectors(u_mpi_interfaces, NVARS, i, j, interface) + + if local_side == 1 + flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) + else # local_side == 2 + flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations) + end + + for v in 1:nvariables(equations) + surface_flux_values[v, i_surface, j_surface, + local_direction, local_element] = flux_[v] + end + + # Increment local element indices to pull the normal direction + i_element += i_element_step_i + j_element += j_element_step_i + k_element += k_element_step_i + # Increment the surface node indices along the local element + i_surface += i_surface_step_i + j_surface += j_surface_step_i + end + # Increment local element indices to pull the normal direction + i_element += i_element_step_j + j_element += j_element_step_j + k_element += k_element_step_j + # Increment the surface node indices along the local element + i_surface += i_surface_step_j + j_surface += j_surface_step_j + end +end + +@inline function _prolong2mpimortars!(backend::Backend, cache, u, + mesh::ParallelP4estMesh{3}, + equations, + mortar_l2::LobattoLegendreMortarL2, + dg::DGSEM) + if nmpimortars(dg, cache) > 0 + error("mortars currently not supported by KA.jl P4estMesh solver") + end + return nothing +end + +@inline function _calc_mpi_mortar_flux!(backend::Backend, surface_flux_values, + mesh::ParallelP4estMesh{3}, + nonconservative_terms, equations, + mortar_l2::LobattoLegendreMortarL2, + surface_integral, dg::DG, cache) + return nothing +end +end # @muladd \ No newline at end of file diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl index cd64e77e739..87340bc07d5 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl @@ -109,6 +109,10 @@ function rhs!(du, u, t, # Finish to send MPI data @trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache) + if mesh isa P4estMesh && uses_ka(cache.elements) + synchronize(get_backend(cache.elements)) + end + return nothing end @@ -116,6 +120,14 @@ function prolong2mpiinterfaces!(cache, u, mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}}, equations, surface_integral, dg::DG) + backend = backend_or_nothing(cache.mpi_interfaces) + _prolong2mpiinterfaces!(backend, cache, u, mesh, equations, surface_integral, dg) +end + +@inline function _prolong2mpiinterfaces!(backend::Nothing, cache, u, + mesh::Union{ParallelP4estMesh{3}, + ParallelT8codeMesh{3}}, + equations, surface_integral, dg::DG) @unpack mpi_interfaces = cache index_range = eachnode(dg) @@ -165,6 +177,16 @@ function calc_mpi_interface_flux!(surface_flux_values, ParallelT8codeMesh{3}}, nonconservative_terms, equations, surface_integral, dg::DG, cache) + backend = backend_or_nothing(cache.mpi_interfaces) + _calc_mpi_interface_flux!(backend, surface_flux_values, mesh, nonconservative_terms, + equations, surface_integral, dg, cache) +end + +@inline function _calc_mpi_interface_flux!(backend::Nothing, surface_flux_values, + mesh::Union{ParallelP4estMesh{3}, + ParallelT8codeMesh{3}}, + nonconservative_terms, + equations, surface_integral, dg::DG, cache) @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces @unpack contravariant_vectors = cache.elements index_range = eachnode(dg) @@ -306,6 +328,16 @@ function prolong2mpimortars!(cache, u, equations, mortar_l2::LobattoLegendreMortarL2, dg::DGSEM) + backend = backend_or_nothing(cache.mpi_mortars) + _prolong2mpimortars!(backend, cache, u, mesh, equations, + mortar_l2, dg) +end + +@inline function _prolong2mpimortars!(backend::Nothing, cache, u, + mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}}, + equations, + mortar_l2::LobattoLegendreMortarL2, + dg::DGSEM) @unpack node_indices = cache.mpi_mortars index_range = eachnode(dg) @@ -416,6 +448,16 @@ function calc_mpi_mortar_flux!(surface_flux_values, nonconservative_terms, equations, mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DG, cache) + backend = backend_or_nothing(cache.mpi_mortars) + _calc_mpi_mortar_flux!(backend, surface_flux_values, mesh, nonconservative_terms, equations, + mortar_l2, surface_integral, dg, cache) +end + +@inline function _calc_mpi_mortar_flux!(backend::Nothing, surface_flux_values, + mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}}, + nonconservative_terms, equations, + mortar_l2::LobattoLegendreMortarL2, + surface_integral, dg::DG, cache) @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars @unpack contravariant_vectors = cache.elements @unpack fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded = cache diff --git a/src/solvers/dgsem_p4est/dg_gpu_parallel.jl b/src/solvers/dgsem_p4est/dg_gpu_parallel.jl new file mode 100644 index 00000000000..a4041652523 --- /dev/null +++ b/src/solvers/dgsem_p4est/dg_gpu_parallel.jl @@ -0,0 +1,76 @@ +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent + +# TODO: Support MPI mortars +@inline function _start_mpi_send!(backend::Backend, mpi_cache::P4estMPICache, + mesh::ParallelP4estMesh{3}, equations, dg, cache) + @unpack mpi_neighbor_ranks, mpi_neighbor_interfaces = mpi_cache + @unpack mpi_send_buffers, mpi_send_requests = mpi_cache + @unpack mpi_interfaces = cache + @unpack local_sides = mpi_interfaces + + kernel! = copy_to_mpi_send!(backend) + + for (index, d) in enumerate(mpi_neighbor_ranks) + send_buffer = mpi_send_buffers[index] + neighbor_interfaces = mpi_neighbor_interfaces[index] + kernel!(send_buffer, neighbor_interfaces, local_sides, mpi_interfaces.u, + Val(nvariables(equations)), Val(ndims(mesh)), + ndrange = (nnodes(dg), nnodes(dg), length(neighbor_interfaces))) + synchronize(backend) + mpi_send_requests[index] = MPI.Isend(send_buffer, d, mpi_rank(), mpi_comm()) + end +end + +@kernel function copy_to_mpi_send!(send_buffer, neighbor_interfaces, local_sides, + u_mpi_interfaces, ::Val{NVARS}, + ::Val{3}) where {NVARS} + i, j, k = @index(Global, NTuple) + I = @index(Global, Linear) + buf_idx = (I - 1) * NVARS + interface = neighbor_interfaces[k] + local_side = local_sides[interface] + for v in 1:NVARS + send_buffer[buf_idx + v] = u_mpi_interfaces[local_side, v, i, j, interface] + end +end + +@inline function _finish_mpi_receive!(backend::Backend, mpi_cache::P4estMPICache, + mesh, equations, dg, cache) + @unpack mpi_neighbor_ranks, mpi_neighbor_interfaces = mpi_cache + @unpack mpi_recv_buffers, mpi_recv_requests = mpi_cache + @unpack mpi_interfaces = cache + @unpack local_sides = mpi_interfaces + + kernel! = copy_from_mpi_recv!(backend) + + d = MPI.Waitany(mpi_recv_requests) + while d !== nothing + recv_buffer = mpi_recv_buffers[d] + neighbor_interfaces = mpi_neighbor_interfaces[d] + kernel!(recv_buffer, neighbor_interfaces, local_sides, mpi_interfaces.u, + Val(nvariables(equations)), Val(ndims(mesh)), + ndrange = (nnodes(dg), nnodes(dg), length(neighbor_interfaces))) + + d = MPI.Waitany(mpi_recv_requests) + end + synchronize(backend) +end + +@kernel function copy_from_mpi_recv!(recv_buffer, neighbor_interfaces, local_sides, + u_mpi_interfaces, ::Val{NVARS}, + ::Val{3}) where {NVARS} + i, j, k = @index(Global, NTuple) + I = @index(Global, Linear) + buf_idx = (I - 1) * NVARS + interface = neighbor_interfaces[k] + remote_side = local_sides[interface] == 1 ? 2 : 1 + for v in 1:NVARS + u_mpi_interfaces[remote_side, v, i, j, interface] = recv_buffer[buf_idx + v] + end +end +end # @muladd \ No newline at end of file diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl index 0aee0b5652e..092d864d44c 100644 --- a/src/solvers/dgsem_p4est/dg_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_parallel.jl @@ -5,12 +5,12 @@ @muladd begin #! format: noindent -mutable struct P4estMPICache{uEltype} +mutable struct P4estMPICache{BufferType <: DenseVector, VecInt <: DenseVector{<:Integer}} mpi_neighbor_ranks::Vector{Int} - mpi_neighbor_interfaces::Vector{Vector{Int}} - mpi_neighbor_mortars::Vector{Vector{Int}} - mpi_send_buffers::Vector{Vector{uEltype}} - mpi_recv_buffers::Vector{Vector{uEltype}} + mpi_neighbor_interfaces::VecOfArrays{VecInt} + mpi_neighbor_mortars::VecOfArrays{VecInt} + mpi_send_buffers::VecOfArrays{BufferType} + mpi_recv_buffers::VecOfArrays{BufferType} mpi_send_requests::Vector{MPI.Request} mpi_recv_requests::Vector{MPI.Request} n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}} @@ -25,27 +25,56 @@ function P4estMPICache(uEltype) end mpi_neighbor_ranks = Vector{Int}(undef, 0) - mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) - mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) - mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) - mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) |> VecOfArrays + mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) |> VecOfArrays + mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays + mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays mpi_send_requests = Vector{MPI.Request}(undef, 0) mpi_recv_requests = Vector{MPI.Request}(undef, 0) n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1) n_elements_global = 0 first_element_global_id = 0 - P4estMPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, - mpi_neighbor_mortars, - mpi_send_buffers, mpi_recv_buffers, - mpi_send_requests, mpi_recv_requests, - n_elements_by_rank, n_elements_global, - first_element_global_id) + P4estMPICache{Vector{uEltype}, Vector{Int}}(mpi_neighbor_ranks, + mpi_neighbor_interfaces, + mpi_neighbor_mortars, + mpi_send_buffers, mpi_recv_buffers, + mpi_send_requests, mpi_recv_requests, + n_elements_by_rank, n_elements_global, + first_element_global_id) end -@inline Base.eltype(::P4estMPICache{uEltype}) where {uEltype} = uEltype +@inline Base.eltype(::P4estMPICache{BufferType}) where {BufferType} = eltype(BufferType) + +function Adapt.adapt_structure(to, mpi_cache::P4estMPICache) + mpi_neighbor_ranks = mpi_cache.mpi_neighbor_ranks + mpi_neighbor_interfaces = Adapt.adapt_structure(to, mpi_cache.mpi_neighbor_interfaces) + mpi_neighbor_mortars = Adapt.adapt_structure(to, mpi_cache.mpi_neighbor_mortars) + mpi_send_buffers = Adapt.adapt_structure(to, mpi_cache.mpi_send_buffers) + mpi_recv_buffers = Adapt.adapt_structure(to, mpi_cache.mpi_recv_buffers) + mpi_send_requests = mpi_cache.mpi_send_requests + mpi_recv_requests = mpi_cache.mpi_recv_requests + n_elements_by_rank = mpi_cache.n_elements_by_rank + n_elements_global = mpi_cache.n_elements_global + first_element_global_id = mpi_cache.first_element_global_id + + @assert eltype(mpi_send_buffers) == eltype(mpi_recv_buffers) + BufferType = eltype(mpi_send_buffers) + VecInt = eltype(mpi_neighbor_interfaces) + return P4estMPICache{BufferType, VecInt}(mpi_neighbor_ranks, mpi_neighbor_interfaces, + mpi_neighbor_mortars, mpi_send_buffers, + mpi_recv_buffers, mpi_send_requests, + mpi_recv_requests, n_elements_by_rank, + n_elements_global, first_element_global_id) +end function start_mpi_send!(mpi_cache::P4estMPICache, mesh, equations, dg, cache) + backend = backend_or_nothing(cache.elements) + _start_mpi_send!(backend, mpi_cache, mesh, equations, dg, cache) +end + +@inline function _start_mpi_send!(backend::Nothing, mpi_cache::P4estMPICache, + mesh, equations, dg, cache) data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1) n_small_elements = 2^(ndims(mesh) - 1) @@ -114,6 +143,12 @@ function finish_mpi_send!(mpi_cache::P4estMPICache) end function finish_mpi_receive!(mpi_cache::P4estMPICache, mesh, equations, dg, cache) + backend = backend_or_nothing(cache.elements) + _finish_mpi_receive!(backend, mpi_cache, mesh, equations, dg, cache) +end + +@inline function _finish_mpi_receive!(backend::Nothing, mpi_cache::P4estMPICache, + mesh, equations, dg, cache) data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1) n_small_elements = 2^(ndims(mesh) - 1) n_positions = n_small_elements + 1 @@ -255,16 +290,16 @@ end function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh, mpi_interfaces, mpi_mortars, nvars, n_nodes, uEltype) - mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces, - mpi_mortars, - mesh) + mpi_neighbor_ranks, _mpi_neighbor_interfaces, _mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces, + mpi_mortars, + mesh) - mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(mpi_neighbor_interfaces, - mpi_neighbor_mortars, - ndims(mesh), - nvars, - n_nodes, - uEltype) + _mpi_send_buffers, _mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(_mpi_neighbor_interfaces, + _mpi_neighbor_mortars, + ndims(mesh), + nvars, + n_nodes, + uEltype) # Determine local and total number of elements n_elements_global = Int(mesh.p4est.global_num_quadrants[]) @@ -276,6 +311,11 @@ function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh, first_element_global_id = Int(mesh.p4est.global_first_quadrant[mpi_rank() + 1]) + 1 @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements" + mpi_neighbor_interfaces = VecOfArrays(_mpi_neighbor_interfaces) + mpi_neighbor_mortars = VecOfArrays(_mpi_neighbor_mortars) + mpi_send_buffers = VecOfArrays(_mpi_send_buffers) + mpi_recv_buffers = VecOfArrays(_mpi_recv_buffers) + # TODO reuse existing structures @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars, @@ -583,4 +623,6 @@ end include("dg_2d_parallel.jl") include("dg_3d_parallel.jl") +include("dg_gpu_parallel.jl") +include("dg_3d_gpu_parallel.jl") end # muladd diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index aeae63183b0..172b02697b0 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -784,6 +784,14 @@ end function apply_jacobian!(du, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, dg::DG, cache) + backend = backend_or_nothing(cache.elements) + _apply_jacobian!(backend, du, mesh, equations, dg, cache) + return nothing +end + +@inline function _apply_jacobian!(::Nothing, du, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, + equations, dg::DG, cache) @threaded for element in eachelement(dg, cache) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) factor = -cache.elements.inverse_jacobian[i, j, k, element] diff --git a/src/solvers/dgsem_t8code/containers_3d.jl b/src/solvers/dgsem_t8code/containers_3d.jl index 981dabc397f..a4eb86ebad3 100644 --- a/src/solvers/dgsem_t8code/containers_3d.jl +++ b/src/solvers/dgsem_t8code/containers_3d.jl @@ -64,8 +64,7 @@ function calc_node_coordinates!(node_coordinates, current_index += 1), matrix1, matrix2, matrix3, view(mesh.tree_node_coordinates, :, :, :, :, - global_itree + 1), - tmp1) + global_itree + 1), tmp1) end end diff --git a/src/solvers/dgsem_tree/dg.jl b/src/solvers/dgsem_tree/dg.jl index fd9728168d8..dee67e8099b 100644 --- a/src/solvers/dgsem_tree/dg.jl +++ b/src/solvers/dgsem_tree/dg.jl @@ -8,11 +8,20 @@ # du .= zero(eltype(du)) doesn't scale when using multiple threads. # See https://github.com/trixi-framework/Trixi.jl/pull/924 for a performance comparison. function reset_du!(du, dg, cache) + backend = backend_or_nothing(cache.elements) + _reset_du!(backend, du, dg, cache) + return du +end + +@inline function _reset_du!(::Union{Nothing, CPU}, du, dg, + cache) @threaded for element in eachelement(dg, cache) du[.., element] .= zero(eltype(du)) end +end - return du +@inline function _reset_du!(::Backend, du, dg, cache) + fill!(du, zero(eltype(du))) end function volume_jacobian(element, mesh::TreeMesh, cache) diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 43f88bc9198..cbbf801f5dc 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -224,6 +224,10 @@ function rhs!(du, u, t, calc_sources!(du, u, t, source_terms, equations, dg, cache) end + if mesh isa P4estMesh && uses_ka(cache.elements) + synchronize(get_backend(cache.elements)) + end + return nothing end @@ -233,6 +237,18 @@ function calc_volume_integral!(du, u, nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache) + backend = backend_or_nothing(cache.elements) + _calc_volume_integral!(backend, du, u, mesh, nonconservative_terms, equations, + volume_integral, dg, cache) + return nothing +end + +@inline function _calc_volume_integral!(backend::Nothing, du, u, + mesh::Union{TreeMesh{3}, StructuredMesh{3}, + P4estMesh{3}, T8codeMesh{3}}, + nonconservative_terms, equations, + volume_integral::VolumeIntegralWeakForm, + dg::DGSEM, cache) @threaded for element in eachelement(dg, cache) weak_form_kernel!(du, u, element, mesh, nonconservative_terms, equations, @@ -283,11 +299,22 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 end function calc_volume_integral!(du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + mesh::Union{TreeMesh{3}, StructuredMesh{3}, + P4estMesh{3}, T8codeMesh{3}}, nonconservative_terms, equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, cache) + backend = backend_or_nothing(cache.elements) + _calc_volume_integral!(backend, du, u, mesh, nonconservative_terms, equations, + volume_integral, dg, cache) +end + +@inline function _calc_volume_integral!(backend::Nothing, du, u, + mesh::Union{TreeMesh{3}, StructuredMesh{3}, + P4estMesh{3}, T8codeMesh{3}}, + nonconservative_terms, equations, + volume_integral::VolumeIntegralFluxDifferencing, + dg::DGSEM, cache) @threaded for element in eachelement(dg, cache) flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, equations, @@ -1514,6 +1541,12 @@ end function calc_sources!(du, u, t, source_terms, equations::AbstractEquations{3}, dg::DG, cache) + backend = backend_or_nothing(cache.elements) + _calc_sources!(backend, du, u, t, source_terms, equations, dg, cache) +end + +@inline function _calc_sources!(::Nothing, du, u, t, source_terms, + equations::AbstractEquations{3}, dg::DG, cache) @unpack node_coordinates = cache.elements @threaded for element in eachelement(dg, cache) diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl index 48d4fe153c6..d3a4ddb6822 100644 --- a/src/solvers/dgsem_unstructured/dg_2d.jl +++ b/src/solvers/dgsem_unstructured/dg_2d.jl @@ -329,7 +329,7 @@ end # Iterate over tuples of boundary condition types and associated indices # in a type-stable way using "lispy tuple programming". function calc_boundary_flux_by_type!(cache, t, BCs::NTuple{N, Any}, - BC_indices::NTuple{N, Vector{Int}}, + BC_indices::NTuple{N, <:AbstractVector{<:Integer}}, mesh::Union{UnstructuredMesh2D, P4estMesh, T8codeMesh}, equations, surface_integral, dg::DG) where {N} diff --git a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl index 2c2c6876d70..10bff36f34f 100644 --- a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl +++ b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl @@ -13,9 +13,10 @@ It stores a set of global indices for each boundary condition type and name to e during the call to `calc_boundary_flux!`. The original dictionary form of the boundary conditions set by the user in the elixir file is also stored for printing. """ -mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}} +mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}, + Vec <: AbstractVector{<:Integer}} boundary_condition_types::BCs # specific boundary condition type(s), e.g. BoundaryConditionDirichlet - boundary_indices::NTuple{N, Vector{Int}} # integer vectors containing global boundary indices + boundary_indices::NTuple{N, Vec} # integer vectors containing global boundary indices boundary_dictionary::Dict{Symbol, Any} # boundary conditions as set by the user in the elixir file boundary_symbol_indices::Dict{Symbol, Vector{Int}} # integer vectors containing global boundary indices per boundary identifier end @@ -33,10 +34,11 @@ function UnstructuredSortedBoundaryTypes(boundary_conditions::Dict, cache) boundary_symbol_indices = Dict{Symbol, Vector{Int}}() container = UnstructuredSortedBoundaryTypes{n_boundary_types, - typeof(boundary_condition_types)}(boundary_condition_types, - boundary_indices, - boundary_conditions, - boundary_symbol_indices) + typeof(boundary_condition_types), + Vector{Int}}(boundary_condition_types, + boundary_indices, + boundary_conditions, + boundary_symbol_indices) initialize!(container, cache) end @@ -111,4 +113,15 @@ function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N return boundary_types_container end + +function Adapt.adapt_structure(to, bcs::UnstructuredSortedBoundaryTypes) + boundary_indices = Adapt.adapt_structure(to, bcs.boundary_indices) + n_boundary_types = length(bcs.boundary_condition_types) + return UnstructuredSortedBoundaryTypes{n_boundary_types, + typeof(bcs.boundary_condition_types), + eltype(boundary_indices)}(bcs.boundary_condition_types, + boundary_indices, + bcs.boundary_dictionary, + bcs.boundary_symbol_indices) +end end # @muladd diff --git a/test/test_t8code_3d.jl b/test/test_t8code_3d.jl index e07cba1f576..165ddf28ea3 100644 --- a/test/test_t8code_3d.jl +++ b/test/test_t8code_3d.jl @@ -111,6 +111,40 @@ mkdir(outdir) end end + # This test differs from the one in `test_p4est_3d.jl` in the latitudinal and + # longitudinal dimensions. + @trixi_testset "elixir_advection_cubed_sphere.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_cubed_sphere.jl"), + l2=[0.002006918015656413], + linf=[0.027655117058380085]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + let + t = sol.t[end] + u_ode = sol.u[end] + du_ode = similar(u_ode) + @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + end + end + + # This test is identical to the one in `test_p4est_3d.jl`. + @trixi_testset "elixir_advection_restart.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), + l2=[0.002590388934758452], + linf=[0.01840757696885409], + # With the default `maxiters = 1` in coverage tests, + # there would be no time steps after the restart. + coverage_override=(maxiters = 100_000,)) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + let + t = sol.t[end] + u_ode = sol.u[end] + du_ode = similar(u_ode) + @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + end + end + # This test is identical to the one in `test_p4est_3d.jl`. @trixi_testset "elixir_advection_restart.jl" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"), @@ -302,30 +336,29 @@ mkdir(outdir) end end - @trixi_testset "elixir_euler_convergence_pure_fv.jl" begin - @test_trixi_include(joinpath(pkgdir(Trixi, "examples", "tree_3d_dgsem"), - "elixir_euler_convergence_pure_fv.jl"), + # This test is identical to the one in `test_p4est_3d.jl` besides minor + # deviations in the expected error norms. + @trixi_testset "elixir_euler_baroclinic_instability.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, + "elixir_euler_baroclinic_instability.jl"), l2=[ - 0.037182410351406, - 0.032062252638283974, - 0.032062252638283974, - 0.03206225263828395, - 0.12228177813586687 + 6.725093801700048e-7, + 0.00021710076010951073, + 0.0004386796338203878, + 0.00020836270267103122, + 0.07601887903440395, ], linf=[ - 0.0693648413632646, - 0.0622101894740843, - 0.06221018947408474, - 0.062210189474084965, - 0.24196451799555962 + 1.9107530539574924e-5, + 0.02980358831035801, + 0.048476331898047564, + 0.02200137344113612, + 4.848310144356219, ], - mesh=T8codeMesh((4, 4, 4), polydeg = 3, - coordinates_min = (0.0, 0.0, 0.0), - coordinates_max = (2.0, 2.0, 2.0)), - # Remove SaveSolution callback - callbacks=CallbackSet(summary_callback, - analysis_callback, alive_callback, - stepsize_callback)) + tspan=(0.0, 1e2), + # Decrease tolerance of adaptive time stepping to get similar results across different systems + abstol=1.0e-9, reltol=1.0e-9, + coverage_override=(lat_lon_levels = 0, layers = 1, polydeg = 3)) # Prevent long compile time in CI # Ensure that we do not have excessive memory allocations # (e.g., from type instabilities) let