diff --git a/Project.toml b/Project.toml index 4770bf7..1f484e4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PartitionedLS" uuid = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f" authors = ["Roberto Esposito "] -version = "1.0.1" +version = "1.0.2" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" diff --git a/docs/Manifest.toml b/docs/Manifest.toml index f3eea42..e935628 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -9,32 +9,11 @@ git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" version = "0.0.1" -[[deps.ARFFFiles]] -deps = ["CategoricalArrays", "Dates", "Parsers", "Tables"] -git-tree-sha1 = "e8c8e0a2be6eb4f56b1672e46004463033daa409" -uuid = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8" -version = "1.4.1" - [[deps.AbstractTrees]] git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177" uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" version = "0.4.5" -[[deps.Adapt]] -deps = ["LinearAlgebra", "Requires"] -git-tree-sha1 = "6a55b747d1812e699320963ffde36f1ebdda4099" -uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "4.0.4" -weakdeps = ["StaticArrays"] - - [deps.Adapt.extensions] - AdaptStaticArraysExt = "StaticArrays" - -[[deps.ArgCheck]] -git-tree-sha1 = "a3a402a35a2f7e0b87828ccabbd5ebfbebe356b4" -uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197" -version = "2.3.0" - [[deps.ArgTools]] uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" version = "1.1.1" @@ -42,119 +21,21 @@ version = "1.1.1" [[deps.Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" -[[deps.Atomix]] -deps = ["UnsafeAtomics"] -git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be" -uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" -version = "0.1.0" - -[[deps.BangBang]] -deps = ["Compat", "ConstructionBase", "InitialValues", "LinearAlgebra", "Requires", "Setfield", "Tables"] -git-tree-sha1 = "7aa7ad1682f3d5754e3491bb59b8103cae28e3a3" -uuid = "198e06fe-97b7-11e9-32a5-e1d131e6ad66" -version = "0.3.40" - - [deps.BangBang.extensions] - BangBangChainRulesCoreExt = "ChainRulesCore" - BangBangDataFramesExt = "DataFrames" - BangBangStaticArraysExt = "StaticArrays" - BangBangStructArraysExt = "StructArrays" - BangBangTypedTablesExt = "TypedTables" - - [deps.BangBang.weakdeps] - ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" - DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" - StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" - StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" - TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" - [[deps.Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" -[[deps.Baselet]] -git-tree-sha1 = "aebf55e6d7795e02ca500a689d326ac979aaf89e" -uuid = "9718e550-a3fa-408a-8086-8db961cd8217" -version = "0.1.1" - -[[deps.BitFlags]] -git-tree-sha1 = "2dc09997850d68179b69dafb58ae806167a32b1b" -uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" -version = "0.1.8" - -[[deps.CEnum]] -git-tree-sha1 = "389ad5c84de1ae7cf0e28e381131c98ea87d54fc" -uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" -version = "0.5.0" - [[deps.CSV]] deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"] git-tree-sha1 = "a44910ceb69b0d44fe262dd451ab11ead3ed0be8" uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" version = "0.10.13" -[[deps.Calculus]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "f641eb0a4f00c343bbc32346e1217b86f3ce9dad" -uuid = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" -version = "0.5.1" - -[[deps.CategoricalArrays]] -deps = ["DataAPI", "Future", "Missings", "Printf", "Requires", "Statistics", "Unicode"] -git-tree-sha1 = "1568b28f91293458345dabba6a5ea3f183250a61" -uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597" -version = "0.10.8" - - [deps.CategoricalArrays.extensions] - CategoricalArraysJSONExt = "JSON" - CategoricalArraysRecipesBaseExt = "RecipesBase" - CategoricalArraysSentinelArraysExt = "SentinelArrays" - CategoricalArraysStructTypesExt = "StructTypes" - - [deps.CategoricalArrays.weakdeps] - JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" - RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" - SentinelArrays = "91c51154-3ec4-41a3-a24f-3f23e20d615c" - StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" - -[[deps.CategoricalDistributions]] -deps = ["CategoricalArrays", "Distributions", "Missings", "OrderedCollections", "Random", "ScientificTypes"] -git-tree-sha1 = "926862f549a82d6c3a7145bc7f1adff2a91a39f0" -uuid = "af321ab8-2d2e-40a6-b165-3d674595d28e" -version = "0.1.15" - - [deps.CategoricalDistributions.extensions] - UnivariateFiniteDisplayExt = "UnicodePlots" - - [deps.CategoricalDistributions.weakdeps] - UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" - -[[deps.ChainRulesCore]] -deps = ["Compat", "LinearAlgebra"] -git-tree-sha1 = "575cd02e080939a33b6df6c5853d14924c08e35b" -uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -version = "1.23.0" -weakdeps = ["SparseArrays"] - - [deps.ChainRulesCore.extensions] - ChainRulesCoreSparseArraysExt = "SparseArrays" - [[deps.CodecZlib]] deps = ["TranscodingStreams", "Zlib_jll"] git-tree-sha1 = "59939d8a997469ee05c4b4944560a820f9ba0d73" uuid = "944b1d66-785c-5afd-91f1-9de20f533193" version = "0.7.4" -[[deps.ColorTypes]] -deps = ["FixedPointNumbers", "Random"] -git-tree-sha1 = "eb7f0f8307f71fac7c606984ea5fb2817275d6e4" -uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" -version = "0.11.4" - -[[deps.Combinatorics]] -git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860" -uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" -version = "1.0.2" - [[deps.Compat]] deps = ["TOML", "UUIDs"] git-tree-sha1 = "c955881e3c981181362ae4088b35995446298b80" @@ -170,70 +51,11 @@ deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" version = "1.1.0+0" -[[deps.CompositionsBase]] -git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" -uuid = "a33af91c-f02d-484b-be07-31d278c5ca2b" -version = "0.1.2" - - [deps.CompositionsBase.extensions] - CompositionsBaseInverseFunctionsExt = "InverseFunctions" - - [deps.CompositionsBase.weakdeps] - InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" - -[[deps.ComputationalResources]] -git-tree-sha1 = "52cb3ec90e8a8bea0e62e275ba577ad0f74821f7" -uuid = "ed09eef8-17a6-5b46-8889-db040fac31e3" -version = "0.3.2" - -[[deps.ConcurrentUtilities]] -deps = ["Serialization", "Sockets"] -git-tree-sha1 = "6cbbd4d241d7e6579ab354737f4dd95ca43946e1" -uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" -version = "2.4.1" - -[[deps.ConstructionBase]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "260fd2400ed2dab602a7c15cf10c1933c59930a2" -uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" -version = "1.5.5" - - [deps.ConstructionBase.extensions] - ConstructionBaseIntervalSetsExt = "IntervalSets" - ConstructionBaseStaticArraysExt = "StaticArrays" - - [deps.ConstructionBase.weakdeps] - IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" - StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" - -[[deps.ContextVariablesX]] -deps = ["Compat", "Logging", "UUIDs"] -git-tree-sha1 = "25cc3803f1030ab855e383129dcd3dc294e322cc" -uuid = "6add18c4-b38d-439d-96f6-d6bc489c04c5" -version = "0.1.3" - -[[deps.Crayons]] -git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" -uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" -version = "4.1.1" - [[deps.DataAPI]] git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" version = "1.16.0" -[[deps.DataFrames]] -deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] -git-tree-sha1 = "04c738083f29f86e62c8afc341f0967d8717bdb8" -uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -version = "1.6.1" - -[[deps.DataStructures]] -deps = ["Compat", "InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "0f4b5d62a88d8f59003e43c25a8a90de9eb76317" -uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.18.18" - [[deps.DataValueInterfaces]] git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" @@ -243,48 +65,10 @@ version = "1.0.0" deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" -[[deps.DefineSingletons]] -git-tree-sha1 = "0fba8b706d0178b4dc7fd44a96a92382c9065c2c" -uuid = "244e2a9f-e319-4986-a169-4d1fe445cd52" -version = "0.1.2" - -[[deps.DelimitedFiles]] -deps = ["Mmap"] -git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae" -uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" -version = "1.9.1" - -[[deps.Distances]] -deps = ["LinearAlgebra", "Statistics", "StatsAPI"] -git-tree-sha1 = "66c4c81f259586e8f002eacebc177e1fb06363b0" -uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" -version = "0.10.11" -weakdeps = ["ChainRulesCore", "SparseArrays"] - - [deps.Distances.extensions] - DistancesChainRulesCoreExt = "ChainRulesCore" - DistancesSparseArraysExt = "SparseArrays" - [[deps.Distributed]] deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" -[[deps.Distributions]] -deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"] -git-tree-sha1 = "7c302d7a5fec5214eb8a5a4c466dcf7a51fcf169" -uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" -version = "0.25.107" - - [deps.Distributions.extensions] - DistributionsChainRulesCoreExt = "ChainRulesCore" - DistributionsDensityInterfaceExt = "DensityInterface" - DistributionsTestExt = "Test" - - [deps.Distributions.weakdeps] - ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" - DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" - Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - [[deps.DocStringExtensions]] deps = ["LibGit2"] git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" @@ -302,42 +86,12 @@ deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" version = "1.6.0" -[[deps.DualNumbers]] -deps = ["Calculus", "NaNMath", "SpecialFunctions"] -git-tree-sha1 = "5837a837389fccf076445fce071c8ddaea35a566" -uuid = "fa6b7ba4-c1ee-5f82-b5fc-ecf0adba8f74" -version = "0.6.8" - -[[deps.EarlyStopping]] -deps = ["Dates", "Statistics"] -git-tree-sha1 = "98fdf08b707aaf69f524a6cd0a67858cefe0cfb6" -uuid = "792122b4-ca99-40de-a6bc-6742525f08b6" -version = "0.3.0" - -[[deps.ExceptionUnwrapping]] -deps = ["Test"] -git-tree-sha1 = "dcb08a0d93ec0b1cdc4af184b26b591e9695423a" -uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4" -version = "0.1.10" - [[deps.Expat_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c" uuid = "2e619515-83b5-522b-bb60-26c02a35a201" version = "2.5.0+0" -[[deps.FLoops]] -deps = ["BangBang", "Compat", "FLoopsBase", "InitialValues", "JuliaVariables", "MLStyle", "Serialization", "Setfield", "Transducers"] -git-tree-sha1 = "ffb97765602e3cbe59a0589d237bf07f245a8576" -uuid = "cc61a311-1640-44b5-9fba-1b764f453329" -version = "0.2.1" - -[[deps.FLoopsBase]] -deps = ["ContextVariablesX"] -git-tree-sha1 = "656f7a6859be8673bf1f35da5670246b923964f7" -uuid = "b9860ae5-e623-471e-878b-f6a53c775ea6" -version = "0.1.1" - [[deps.FilePathsBase]] deps = ["Compat", "Dates", "Mmap", "Printf", "Test", "UUIDs"] git-tree-sha1 = "9f00e42f8d99fdde64d40c8ea5d14269a2e2c1aa" @@ -347,34 +101,10 @@ version = "0.9.21" [[deps.FileWatching]] uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" -[[deps.FillArrays]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "bfe82a708416cf00b73a3198db0859c82f741558" -uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" -version = "1.10.0" -weakdeps = ["PDMats", "SparseArrays", "Statistics"] - - [deps.FillArrays.extensions] - FillArraysPDMatsExt = "PDMats" - FillArraysSparseArraysExt = "SparseArrays" - FillArraysStatisticsExt = "Statistics" - -[[deps.FixedPointNumbers]] -deps = ["Statistics"] -git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc" -uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" -version = "0.8.4" - [[deps.Future]] deps = ["Random"] uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" -[[deps.GPUArraysCore]] -deps = ["Adapt"] -git-tree-sha1 = "ec632f177c0d990e64d955ccc1b8c04c485a0950" -uuid = "46192b85-c4d5-4398-a991-12ede77f4527" -version = "0.1.6" - [[deps.Git]] deps = ["Git_jll"] git-tree-sha1 = "04eff47b1354d702c3a85e8ab23d539bb7d5957e" @@ -387,29 +117,12 @@ git-tree-sha1 = "12945451c5d0e2d0dca0724c3a8d6448b46bbdf9" uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb" version = "2.44.0+1" -[[deps.HTTP]] -deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] -git-tree-sha1 = "8e59b47b9dc525b70550ca082ce85bcd7f5477cd" -uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" -version = "1.10.5" - -[[deps.HypergeometricFunctions]] -deps = ["DualNumbers", "LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"] -git-tree-sha1 = "f218fe3736ddf977e0e772bc9a586b2383da2685" -uuid = "34004b35-14d8-5ef3-9330-4cdb6864b03a" -version = "0.3.23" - [[deps.IOCapture]] deps = ["Logging", "Random"] git-tree-sha1 = "8b72179abc660bfab5e28472e019392b97d0985c" uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" version = "0.2.4" -[[deps.InitialValues]] -git-tree-sha1 = "4da0f88e9a39111c2fa3add390ab15f3a44f3ca3" -uuid = "22cec73e-a1b8-11e9-2c92-598750a2cf9c" -version = "0.3.1" - [[deps.InlineStrings]] deps = ["Parsers"] git-tree-sha1 = "9cc2baf75c6d09f9da536ddf58eb2f29dedaf461" @@ -420,22 +133,6 @@ version = "1.4.0" deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -[[deps.InvertedIndices]] -git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038" -uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" -version = "1.3.0" - -[[deps.IrrationalConstants]] -git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" -uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" -version = "0.2.2" - -[[deps.IterationControl]] -deps = ["EarlyStopping", "InteractiveUtils"] -git-tree-sha1 = "e663925ebc3d93c1150a7570d114f9ea2f664726" -uuid = "b3c1a2ee-3fec-4384-bf48-272ea71de57c" -version = "0.5.4" - [[deps.IteratorInterfaceExtensions]] git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" uuid = "82899510-4779-5014-852e-03e436cf321d" @@ -453,68 +150,11 @@ git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" version = "0.21.4" -[[deps.JuliaVariables]] -deps = ["MLStyle", "NameResolution"] -git-tree-sha1 = "49fb3cb53362ddadb4415e9b73926d6b40709e70" -uuid = "b14d175d-62b4-44ba-8fb7-3064adc8c3ec" -version = "0.2.4" - -[[deps.KernelAbstractions]] -deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "Requires", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"] -git-tree-sha1 = "ed7167240f40e62d97c1f5f7735dea6de3cc5c49" -uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" -version = "0.9.18" - - [deps.KernelAbstractions.extensions] - EnzymeExt = "EnzymeCore" - - [deps.KernelAbstractions.weakdeps] - EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" - -[[deps.LLVM]] -deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Requires", "Unicode"] -git-tree-sha1 = "839c82932db86740ae729779e610f07a1640be9a" -uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "6.6.3" - - [deps.LLVM.extensions] - BFloat16sExt = "BFloat16s" - - [deps.LLVM.weakdeps] - BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" - -[[deps.LLVMExtra_jll]] -deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] -git-tree-sha1 = "88b916503aac4fb7f701bb625cd84ca5dd1677bc" -uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" -version = "0.0.29+0" - -[[deps.LaTeXStrings]] -git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec" -uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" -version = "1.3.1" - -[[deps.LatinHypercubeSampling]] -deps = ["Random", "StableRNGs", "StatsBase", "Test"] -git-tree-sha1 = "825289d43c753c7f1bf9bed334c253e9913997f8" -uuid = "a5e1c1ea-c99a-51d3-a14d-a9a37257b02d" -version = "1.9.0" - [[deps.LazilyInitializedFields]] git-tree-sha1 = "8f7f3cabab0fd1800699663533b6d5cb3fc0e612" uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf" version = "1.2.2" -[[deps.LazyArtifacts]] -deps = ["Artifacts", "Pkg"] -uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" - -[[deps.LearnAPI]] -deps = ["InteractiveUtils", "Statistics"] -git-tree-sha1 = "ec695822c1faaaa64cee32d0b21505e1977b4809" -uuid = "92ad9a40-7767-427a-9ee6-6e577f1266cb" -version = "0.1.0" - [[deps.LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" @@ -552,112 +192,15 @@ version = "1.17.0+0" deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -[[deps.LogExpFunctions]] -deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] -git-tree-sha1 = "18144f3e9cbe9b15b070288eef858f71b291ce37" -uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" -version = "0.3.27" - - [deps.LogExpFunctions.extensions] - LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" - LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" - LogExpFunctionsInverseFunctionsExt = "InverseFunctions" - - [deps.LogExpFunctions.weakdeps] - ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" - ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" - InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" - [[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" -[[deps.LoggingExtras]] -deps = ["Dates", "Logging"] -git-tree-sha1 = "c1dd6d7978c12545b4179fb6153b9250c96b0075" -uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" -version = "1.0.3" - -[[deps.MLFlowClient]] -deps = ["Dates", "FilePathsBase", "HTTP", "JSON", "ShowCases", "URIs", "UUIDs"] -git-tree-sha1 = "049b39a208b052d020e18a0850ca9d228a11ef16" -uuid = "64a0f543-368b-4a9a-827a-e71edb2a0b83" -version = "0.4.6" - -[[deps.MLJ]] -deps = ["CategoricalArrays", "ComputationalResources", "Distributed", "Distributions", "LinearAlgebra", "MLJBalancing", "MLJBase", "MLJEnsembles", "MLJFlow", "MLJIteration", "MLJModels", "MLJTuning", "OpenML", "Pkg", "ProgressMeter", "Random", "Reexport", "ScientificTypes", "StatisticalMeasures", "Statistics", "StatsBase", "Tables"] -git-tree-sha1 = "a49aa31103f78b4c13e8d6beb13c5091cce82303" -uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" -version = "0.20.3" - -[[deps.MLJBalancing]] -deps = ["MLJBase", "MLJModelInterface", "MLUtils", "OrderedCollections", "Random", "StatsBase"] -git-tree-sha1 = "f02e28f9f3c54a138db12a97a5d823e5e572c2d6" -uuid = "45f359ea-796d-4f51-95a5-deb1a414c586" -version = "0.1.4" - -[[deps.MLJBase]] -deps = ["CategoricalArrays", "CategoricalDistributions", "ComputationalResources", "Dates", "DelimitedFiles", "Distributed", "Distributions", "InteractiveUtils", "InvertedIndices", "LearnAPI", "LinearAlgebra", "MLJModelInterface", "Missings", "OrderedCollections", "Parameters", "PrettyTables", "ProgressMeter", "Random", "RecipesBase", "Reexport", "ScientificTypes", "Serialization", "StatisticalMeasuresBase", "StatisticalTraits", "Statistics", "StatsBase", "Tables"] -git-tree-sha1 = "17d160e8f796ab5ceb4c017bc4019d21fd686a35" -uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -version = "1.2.1" -weakdeps = ["StatisticalMeasures"] - - [deps.MLJBase.extensions] - DefaultMeasuresExt = "StatisticalMeasures" - -[[deps.MLJEnsembles]] -deps = ["CategoricalArrays", "CategoricalDistributions", "ComputationalResources", "Distributed", "Distributions", "MLJModelInterface", "ProgressMeter", "Random", "ScientificTypesBase", "StatisticalMeasuresBase", "StatsBase"] -git-tree-sha1 = "94403b2c8f692011df6731913376e0e37f6c0fe9" -uuid = "50ed68f4-41fd-4504-931a-ed422449fee0" -version = "0.4.0" - -[[deps.MLJFlow]] -deps = ["MLFlowClient", "MLJBase", "MLJModelInterface"] -git-tree-sha1 = "79989f284c1f6c39eef70f6c8a39736e4f8d3d02" -uuid = "7b7b8358-b45c-48ea-a8ef-7ca328ad328f" -version = "0.4.1" - -[[deps.MLJIteration]] -deps = ["IterationControl", "MLJBase", "Random", "Serialization"] -git-tree-sha1 = "1e909ee09417ebd18559c4d9c15febff887192df" -uuid = "614be32b-d00c-4edb-bd02-1eb411ab5e55" -version = "0.6.1" - [[deps.MLJModelInterface]] deps = ["Random", "ScientificTypesBase", "StatisticalTraits"] git-tree-sha1 = "d2a45e1b5998ba3fdfb6cfe0c81096d4c7fb40e7" uuid = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" version = "1.9.6" -[[deps.MLJModels]] -deps = ["CategoricalArrays", "CategoricalDistributions", "Combinatorics", "Dates", "Distances", "Distributions", "InteractiveUtils", "LinearAlgebra", "MLJModelInterface", "Markdown", "OrderedCollections", "Parameters", "Pkg", "PrettyPrinting", "REPL", "Random", "RelocatableFolders", "ScientificTypes", "StatisticalTraits", "Statistics", "StatsBase", "Tables"] -git-tree-sha1 = "dd99a80f39cae8b112823d279dfa08ae872b4f3e" -uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -version = "0.16.16" - -[[deps.MLJTuning]] -deps = ["ComputationalResources", "Distributed", "Distributions", "LatinHypercubeSampling", "MLJBase", "ProgressMeter", "Random", "RecipesBase", "StatisticalMeasuresBase"] -git-tree-sha1 = "4a2c14b9529753db3ece53fd635c609220200507" -uuid = "03970b2e-30c4-11ea-3135-d1576263f10f" -version = "0.8.4" - -[[deps.MLStyle]] -git-tree-sha1 = "bc38dff0548128765760c79eb7388a4b37fae2c8" -uuid = "d8e11817-5142-5d16-987a-aa16d5891078" -version = "0.4.17" - -[[deps.MLUtils]] -deps = ["ChainRulesCore", "Compat", "DataAPI", "DelimitedFiles", "FLoops", "NNlib", "Random", "ShowCases", "SimpleTraits", "Statistics", "StatsBase", "Tables", "Transducers"] -git-tree-sha1 = "b45738c2e3d0d402dffa32b2c1654759a2ac35a4" -uuid = "f1d291b0-491e-4a28-83b9-f70985020b54" -version = "0.4.4" - -[[deps.MacroTools]] -deps = ["Markdown", "Random"] -git-tree-sha1 = "2fa9ee3e63fd3a4f7a9a4f4744a52f4856de82df" -uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.13" - [[deps.Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" @@ -668,29 +211,11 @@ git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899" uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391" version = "0.1.2" -[[deps.MbedTLS]] -deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"] -git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf" -uuid = "739be429-bea8-5141-9913-cc70e7f3736d" -version = "1.1.9" - [[deps.MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" version = "2.28.2+1" -[[deps.MicroCollections]] -deps = ["BangBang", "InitialValues", "Setfield"] -git-tree-sha1 = "629afd7d10dbc6935ec59b32daeb33bc4460a42e" -uuid = "128add7d-3638-4c79-886c-908ea0c25c34" -version = "0.1.4" - -[[deps.Missings]] -deps = ["DataAPI"] -git-tree-sha1 = "f66bdc5de519e8f8ae43bdc598782d35a25b1272" -uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "1.1.0" - [[deps.Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" @@ -698,36 +223,6 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804" uuid = "14a3606d-f60d-562e-9121-12d972cd8159" version = "2023.1.10" -[[deps.NNlib]] -deps = ["Adapt", "Atomix", "ChainRulesCore", "GPUArraysCore", "KernelAbstractions", "LinearAlgebra", "Pkg", "Random", "Requires", "Statistics"] -git-tree-sha1 = "1fa1a14766c60e66ab22e242d45c1857c83a3805" -uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -version = "0.9.13" - - [deps.NNlib.extensions] - NNlibAMDGPUExt = "AMDGPU" - NNlibCUDACUDNNExt = ["CUDA", "cuDNN"] - NNlibCUDAExt = "CUDA" - NNlibEnzymeCoreExt = "EnzymeCore" - - [deps.NNlib.weakdeps] - AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" - CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" - EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" - cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" - -[[deps.NaNMath]] -deps = ["OpenLibm_jll"] -git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" -uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" -version = "1.0.2" - -[[deps.NameResolution]] -deps = ["PrettyPrint"] -git-tree-sha1 = "1a0fa0e9613f46c9b8c11eee38ebb4f590013c5e" -uuid = "71a1bf82-56d0-4bbc-8a3c-48b961074391" -version = "0.1.5" - [[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" version = "1.2.0" @@ -743,35 +238,12 @@ deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" version = "0.3.23+4" -[[deps.OpenLibm_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "05823500-19ac-5b8b-9628-191a04bc5112" -version = "0.8.1+2" - -[[deps.OpenML]] -deps = ["ARFFFiles", "HTTP", "JSON", "Markdown", "Pkg", "Scratch"] -git-tree-sha1 = "6efb039ae888699d5a74fb593f6f3e10c7193e33" -uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66" -version = "0.3.1" - -[[deps.OpenSSL]] -deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"] -git-tree-sha1 = "af81a32750ebc831ee28bdaaba6e1067decef51e" -uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" -version = "1.4.2" - [[deps.OpenSSL_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] git-tree-sha1 = "3da7367955dcc5c54c1ba4d402ccdc09a1a3e046" uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" version = "3.0.13+1" -[[deps.OpenSpecFun_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" -uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" -version = "0.5.5+0" - [[deps.OrderedCollections]] git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" @@ -782,18 +254,6 @@ deps = ["Artifacts", "Libdl"] uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" version = "10.42.0+1" -[[deps.PDMats]] -deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "949347156c25054de2db3b166c52ac4728cbad65" -uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" -version = "0.11.31" - -[[deps.Parameters]] -deps = ["OrderedCollections", "UnPack"] -git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe" -uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" -version = "0.12.3" - [[deps.Parsers]] deps = ["Dates", "PrecompileTools", "UUIDs"] git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" @@ -801,10 +261,10 @@ uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" version = "2.8.1" [[deps.PartitionedLS]] -deps = ["CSV", "DataFrames", "DocStringExtensions", "LinearAlgebra", "MLJ", "MLJBase", "MLJModelInterface", "NonNegLeastSquares", "Random", "Tables", "Test"] +deps = ["CSV", "DocStringExtensions", "LinearAlgebra", "MLJModelInterface", "NonNegLeastSquares", "Random", "Tables"] path = ".." uuid = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f" -version = "0.9.5" +version = "1.0.1" [[deps.Pkg]] deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] @@ -829,38 +289,10 @@ git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6" uuid = "21216c6a-2e73-6563-6e65-726566657250" version = "1.4.3" -[[deps.PrettyPrint]] -git-tree-sha1 = "632eb4abab3449ab30c5e1afaa874f0b98b586e4" -uuid = "8162dcfd-2161-5ef2-ae6c-7681170c5f98" -version = "0.2.0" - -[[deps.PrettyPrinting]] -git-tree-sha1 = "142ee93724a9c5d04d78df7006670a93ed1b244e" -uuid = "54e16d92-306c-5ea0-a30b-337be88ac337" -version = "0.4.2" - -[[deps.PrettyTables]] -deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"] -git-tree-sha1 = "88b895d13d53b5577fd53379d913b9ab9ac82660" -uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" -version = "2.3.1" - [[deps.Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" -[[deps.ProgressMeter]] -deps = ["Distributed", "Printf"] -git-tree-sha1 = "763a8ceb07833dd51bb9e3bbca372de32c0605ad" -uuid = "92933f4c-e287-5a05-a399-4b506db050ca" -version = "1.10.0" - -[[deps.QuadGK]] -deps = ["DataStructures", "LinearAlgebra"] -git-tree-sha1 = "9b23c31e76e333e6fb4c1595ae6afa74966a729e" -uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" -version = "2.9.4" - [[deps.REPL]] deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" @@ -869,68 +301,21 @@ uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" deps = ["SHA"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -[[deps.RecipesBase]] -deps = ["PrecompileTools"] -git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff" -uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" -version = "1.3.4" - -[[deps.Reexport]] -git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" -uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "1.2.2" - [[deps.RegistryInstances]] deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"] git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51" uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3" version = "0.1.0" -[[deps.RelocatableFolders]] -deps = ["SHA", "Scratch"] -git-tree-sha1 = "ffdaf70d81cf6ff22c2b6e733c900c3321cab864" -uuid = "05181044-ff0b-4ac5-8273-598c1e38db00" -version = "1.0.1" - -[[deps.Requires]] -deps = ["UUIDs"] -git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" -uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.3.0" - -[[deps.Rmath]] -deps = ["Random", "Rmath_jll"] -git-tree-sha1 = "f65dcb5fa46aee0cf9ed6274ccbd597adc49aa7b" -uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" -version = "0.7.1" - -[[deps.Rmath_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "6ed52fdd3382cf21947b15e8870ac0ddbff736da" -uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" -version = "0.4.0+0" - [[deps.SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" version = "0.7.0" -[[deps.ScientificTypes]] -deps = ["CategoricalArrays", "ColorTypes", "Dates", "Distributions", "PrettyTables", "Reexport", "ScientificTypesBase", "StatisticalTraits", "Tables"] -git-tree-sha1 = "75ccd10ca65b939dab03b812994e571bf1e3e1da" -uuid = "321657f4-b219-11e9-178b-2701a2544e81" -version = "3.0.2" - [[deps.ScientificTypesBase]] git-tree-sha1 = "a8e18eb383b5ecf1b5e6fc237eb39255044fd92b" uuid = "30f210dd-8aff-4c5f-94ba-8e64358c1161" version = "3.0.0" -[[deps.Scratch]] -deps = ["Dates"] -git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386" -uuid = "6c6a2e73-6563-6170-7368-637461726353" -version = "1.2.1" - [[deps.SentinelArrays]] deps = ["Dates", "Random"] git-tree-sha1 = "0e7508ff27ba32f26cd459474ca2ede1bc10991f" @@ -940,147 +325,20 @@ version = "1.4.1" [[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" -[[deps.Setfield]] -deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"] -git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac" -uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" -version = "1.1.1" - -[[deps.ShowCases]] -git-tree-sha1 = "7f534ad62ab2bd48591bdeac81994ea8c445e4a5" -uuid = "605ecd9f-84a6-4c9e-81e2-4798472b76a3" -version = "0.1.0" - -[[deps.SimpleBufferStream]] -git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1" -uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7" -version = "1.1.0" - -[[deps.SimpleTraits]] -deps = ["InteractiveUtils", "MacroTools"] -git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231" -uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" -version = "0.9.4" - [[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" -[[deps.SortingAlgorithms]] -deps = ["DataStructures"] -git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085" -uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" -version = "1.2.1" - [[deps.SparseArrays]] deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" version = "1.10.0" -[[deps.SpecialFunctions]] -deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] -git-tree-sha1 = "e2cfc4012a19088254b3950b85c3c1d8882d864d" -uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "2.3.1" -weakdeps = ["ChainRulesCore"] - - [deps.SpecialFunctions.extensions] - SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" - -[[deps.SplittablesBase]] -deps = ["Setfield", "Test"] -git-tree-sha1 = "e08a62abc517eb79667d0a29dc08a3b589516bb5" -uuid = "171d559e-b47b-412a-8079-5efa626c420e" -version = "0.1.15" - -[[deps.StableRNGs]] -deps = ["Random", "Test"] -git-tree-sha1 = "ddc1a7b85e760b5285b50b882fa91e40c603be47" -uuid = "860ef19b-820b-49d6-a774-d7a799459cd3" -version = "1.0.1" - -[[deps.StaticArrays]] -deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] -git-tree-sha1 = "bf074c045d3d5ffd956fa0a461da38a44685d6b2" -uuid = "90137ffa-7385-5640-81b9-e52037218182" -version = "1.9.3" -weakdeps = ["ChainRulesCore", "Statistics"] - - [deps.StaticArrays.extensions] - StaticArraysChainRulesCoreExt = "ChainRulesCore" - StaticArraysStatisticsExt = "Statistics" - -[[deps.StaticArraysCore]] -git-tree-sha1 = "36b3d696ce6366023a0ea192b4cd442268995a0d" -uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" -version = "1.4.2" - -[[deps.StatisticalMeasures]] -deps = ["CategoricalArrays", "CategoricalDistributions", "Distributions", "LearnAPI", "LinearAlgebra", "MacroTools", "OrderedCollections", "PrecompileTools", "ScientificTypesBase", "StatisticalMeasuresBase", "Statistics", "StatsBase"] -git-tree-sha1 = "8b5a165b0ee2b361d692636bfb423b19abfd92b3" -uuid = "a19d573c-0a75-4610-95b3-7071388c7541" -version = "0.1.6" - - [deps.StatisticalMeasures.extensions] - LossFunctionsExt = "LossFunctions" - ScientificTypesExt = "ScientificTypes" - - [deps.StatisticalMeasures.weakdeps] - LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" - ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" - -[[deps.StatisticalMeasuresBase]] -deps = ["CategoricalArrays", "InteractiveUtils", "MLUtils", "MacroTools", "OrderedCollections", "PrecompileTools", "ScientificTypesBase", "Statistics"] -git-tree-sha1 = "17dfb22e2e4ccc9cd59b487dce52883e0151b4d3" -uuid = "c062fc1d-0d66-479b-b6ac-8b44719de4cc" -version = "0.1.1" - [[deps.StatisticalTraits]] deps = ["ScientificTypesBase"] git-tree-sha1 = "30b9236691858e13f167ce829490a68e1a597782" uuid = "64bff920-2084-43da-a3e6-9bb72801c0c9" version = "3.2.0" -[[deps.Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -version = "1.10.0" - -[[deps.StatsAPI]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed" -uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" -version = "1.7.0" - -[[deps.StatsBase]] -deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] -git-tree-sha1 = "5cf7606d6cef84b543b483848d4ae08ad9832b21" -uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -version = "0.34.3" - -[[deps.StatsFuns]] -deps = ["HypergeometricFunctions", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"] -git-tree-sha1 = "cef0472124fab0695b58ca35a77c6fb942fdab8a" -uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -version = "1.3.1" - - [deps.StatsFuns.extensions] - StatsFunsChainRulesCoreExt = "ChainRulesCore" - StatsFunsInverseFunctionsExt = "InverseFunctions" - - [deps.StatsFuns.weakdeps] - ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" - InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" - -[[deps.StringManipulation]] -deps = ["PrecompileTools"] -git-tree-sha1 = "a04cabe79c5f01f4d723cc6704070ada0b9d46d5" -uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" -version = "0.3.4" - -[[deps.SuiteSparse]] -deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] -uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" - [[deps.SuiteSparse_jll]] deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" @@ -1121,54 +379,13 @@ weakdeps = ["Random", "Test"] [deps.TranscodingStreams.extensions] TestExt = ["Test", "Random"] -[[deps.Transducers]] -deps = ["Adapt", "ArgCheck", "BangBang", "Baselet", "CompositionsBase", "ConstructionBase", "DefineSingletons", "Distributed", "InitialValues", "Logging", "Markdown", "MicroCollections", "Requires", "Setfield", "SplittablesBase", "Tables"] -git-tree-sha1 = "3064e780dbb8a9296ebb3af8f440f787bb5332af" -uuid = "28d57a85-8fef-5791-bfe6-a80928e7c999" -version = "0.4.80" - - [deps.Transducers.extensions] - TransducersBlockArraysExt = "BlockArrays" - TransducersDataFramesExt = "DataFrames" - TransducersLazyArraysExt = "LazyArrays" - TransducersOnlineStatsBaseExt = "OnlineStatsBase" - TransducersReferenceablesExt = "Referenceables" - - [deps.Transducers.weakdeps] - BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" - DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" - LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" - OnlineStatsBase = "925886fa-5bf2-5e8e-b522-a9147a512338" - Referenceables = "42d2dcc6-99eb-4e98-b66c-637b7d73030e" - -[[deps.URIs]] -git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b" -uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" -version = "1.5.1" - [[deps.UUIDs]] deps = ["Random", "SHA"] uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" -[[deps.UnPack]] -git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" -uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" -version = "1.0.2" - [[deps.Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" -[[deps.UnsafeAtomics]] -git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" -uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" -version = "0.2.1" - -[[deps.UnsafeAtomicsLLVM]] -deps = ["LLVM", "UnsafeAtomics"] -git-tree-sha1 = "323e3d0acf5e78a56dfae7bd8928c989b4f3083e" -uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" -version = "0.1.3" - [[deps.WeakRefStrings]] deps = ["DataAPI", "InlineStrings", "Parsers"] git-tree-sha1 = "b1be2855ed9ed8eac54e5caff2afcdb442d52c23" diff --git a/docs/build/.documenter-siteinfo.json b/docs/build/.documenter-siteinfo.json index ad7558d..8e01b9f 100644 --- a/docs/build/.documenter-siteinfo.json +++ b/docs/build/.documenter-siteinfo.json @@ -1 +1 @@ -{"documenter":{"julia_version":"1.10.2","generation_timestamp":"2024-04-05T15:13:52","documenter_version":"1.3.0"}} \ No newline at end of file +{"documenter":{"julia_version":"1.10.2","generation_timestamp":"2024-04-08T09:25:36","documenter_version":"1.3.0"}} \ No newline at end of file diff --git a/docs/build/examples/example/index.html b/docs/build/examples/example/index.html index a5a0aa8..2ee7a22 100644 --- a/docs/build/examples/example/index.html +++ b/docs/build/examples/example/index.html @@ -1,2 +1,2 @@ -Example · PartitionedLS.jl

We present here an analysis of a solution found by a Partitioned LS algorithm on the Ames House Prices dataset, which is publicly available via Kaggle.

The Julia notebook used to generate the results is available here.

This dataset has a relatively high number of columns (79 in total) each detailing one particular characteristic of housing properties in Ames, Iowa. The task is to predict the selling price of each house.

We propose a grouping of the features into 10 groups, each one representing a high-level characteristic of the property:

GroupFeatures
LotDescritptionMSSubClass, MSZoning, LotFrontage, LotArea, Street, Alley, LotShape, LandContour, LotConfig, LandSlope
BuildingPlacementUtilities, Neighborhood, Condition1, Condition2
BuildingAgeYearBuilt, YearRemodAdd
BuildingQualityBldgType, HouseStyle, OverallQual, OverallCond, RoofStyle, RoofMatl, Exterior1st, Exterior2nd, MasVnrType, MasVnrArea, ExterQual, ExterCond, Foundation, Functional
BasementBsmtQual, BsmtCond, BsmtExposure, BsmtFinType1, BsmtFinSF1, BsmtFinType2, BsmtFinSF2, BsmtUnfSF, TotalBsmtSF
PowerAndTemperatureHeating, HeatingQC, CentralAir, Electrical, Fireplaces, FireplaceQu
Sizes1stFlrSF, 2ndFlrSF, LowQualFinSF, GrLivArea
RoomsBsmtFullBath, BsmtHalfBath, FullBath, HalfBath, BedroomAbvGr, KitchenAbvGr, KitchenQual, TotRmsAbvGrd
OutsideFacilitiesGarageType, GarageYrBlt, GarageFinish, GarageCars, GarageArea, GarageQual, GarageCond, PavedDrive, WoodDeckSF, OpenPorchSF, EnclosedPorch, 3SsnPorch, ScreenPorch, PoolArea, PoolQC, Fence
VariousMiscFeature, MiscVal, MoSold, YrSold, SaleType, SaleCondition

As an example, we collect 6 columns referring to the availability and quality of air conditioning systems, electrical system, heating and fireplaces in a "Power and Temperature" group. Other feature groups refer to overall quality of the construction work and materials employed ("Building Quality"), external facilities such as garages or swimming pools ("Outside Facilities"). The $\beta$ values for the groups are as follows:

\$\\beta\$ values as found by the `Opt` algorithm on the Ames House Prices dataset

We note that the grouped solution enabled by the partitioned least squares formulation is able to give a high-level summary of the regression result. An analyst is therefore able to communicate easily to, e.g. an individual selling their house, that the price is mostly determined by the building quality and the attractiveness of the lot. A deeper analysis is of course possible by investigating the $\alpha$ values found by the algorithm. For instance, let consider the contributions to the ``Outside Facilities'':

\$\\alpha\$ values as found by the `Opt` algorithm on the Ames House Prices dataset for the "OutsideFacilities" group

Here, one is able to notice that garage quality has the biggest impact on the property's price, which is potentially actionable knowledge.

We argue that the group- and feature-level analysis made possible by our contributions improves on the interpretability of ungrouped linear regression.

+Example · PartitionedLS.jl

We present here an analysis of a solution found by a Partitioned LS algorithm on the Ames House Prices dataset, which is publicly available via Kaggle.

The Julia notebook used to generate the results is available here.

This dataset has a relatively high number of columns (79 in total) each detailing one particular characteristic of housing properties in Ames, Iowa. The task is to predict the selling price of each house.

We propose a grouping of the features into 10 groups, each one representing a high-level characteristic of the property:

GroupFeatures
LotDescritptionMSSubClass, MSZoning, LotFrontage, LotArea, Street, Alley, LotShape, LandContour, LotConfig, LandSlope
BuildingPlacementUtilities, Neighborhood, Condition1, Condition2
BuildingAgeYearBuilt, YearRemodAdd
BuildingQualityBldgType, HouseStyle, OverallQual, OverallCond, RoofStyle, RoofMatl, Exterior1st, Exterior2nd, MasVnrType, MasVnrArea, ExterQual, ExterCond, Foundation, Functional
BasementBsmtQual, BsmtCond, BsmtExposure, BsmtFinType1, BsmtFinSF1, BsmtFinType2, BsmtFinSF2, BsmtUnfSF, TotalBsmtSF
PowerAndTemperatureHeating, HeatingQC, CentralAir, Electrical, Fireplaces, FireplaceQu
Sizes1stFlrSF, 2ndFlrSF, LowQualFinSF, GrLivArea
RoomsBsmtFullBath, BsmtHalfBath, FullBath, HalfBath, BedroomAbvGr, KitchenAbvGr, KitchenQual, TotRmsAbvGrd
OutsideFacilitiesGarageType, GarageYrBlt, GarageFinish, GarageCars, GarageArea, GarageQual, GarageCond, PavedDrive, WoodDeckSF, OpenPorchSF, EnclosedPorch, 3SsnPorch, ScreenPorch, PoolArea, PoolQC, Fence
VariousMiscFeature, MiscVal, MoSold, YrSold, SaleType, SaleCondition

As an example, we collect 6 columns referring to the availability and quality of air conditioning systems, electrical system, heating and fireplaces in a "Power and Temperature" group. Other feature groups refer to overall quality of the construction work and materials employed ("Building Quality"), external facilities such as garages or swimming pools ("Outside Facilities"). The $\beta$ values for the groups are as follows:

\$\\beta\$ values as found by the `Opt` algorithm on the Ames House Prices dataset

We note that the grouped solution enabled by the partitioned least squares formulation is able to give a high-level summary of the regression result. An analyst is therefore able to communicate easily to, e.g. an individual selling their house, that the price is mostly determined by the building quality and the attractiveness of the lot. A deeper analysis is of course possible by investigating the $\alpha$ values found by the algorithm. For instance, let consider the contributions to the ``Outside Facilities'':

\$\\alpha\$ values as found by the `Opt` algorithm on the Ames House Prices dataset for the "OutsideFacilities" group

Here, one is able to notice that garage quality has the biggest impact on the property's price, which is potentially actionable knowledge.

We argue that the group- and feature-level analysis made possible by our contributions improves on the interpretability of ungrouped linear regression.

diff --git a/docs/build/index.html b/docs/build/index.html index b7f0dcc..58c4133 100644 --- a/docs/build/index.html +++ b/docs/build/index.html @@ -64,10 +64,30 @@ fit!(mach) # Make predictions -predict(mach, X)

API Documentation

PartitionedLS.PartLSType
mutable struct PartLS <: Deterministic

The PartLS struct represents a partitioned least squares model. Fields are:

  • Optimizer: the optimization algorithm to use. It can be Opt, Alt or BnB.
  • P: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element P_{k, i} = 1 if feature i belongs to partition k.
  • η: the regularization parameter. It controls the strength of the regularization.
  • ϵ: the tolerance parameter. It is used to determine when the Alt optimization algorithm has converged. Only used by the Alt algorithm.
  • T: the maximum number of iterations. It is used to determine when to stop the Alt optimization algorithm has converged. Only used by the Alt algorithm.
  • rng: the random number generator to use.
    • If nothing, the global random number generator rand is used.
    • If an integer, the global number generator rand is used after seeding it with the given integer.
    • If an object of type AbstractRNG, the given random number generator is used.

Example

model = PartLS(P=P, Optimizer=Alt, rng=123)
source
PartitionedLS.PartLSFitResultType
struct PartLSFitResult

The PartLSFitResult struct represents the solution of the partitioned least squares problem. It contains the values of the α and β variables, the intercept t and the partition matrix P.

Fields

  • α::Vector{Float64}: The values of the α variables. For each partition $k$, it holds the values of the α variables are such that $\sum_{i \in P_k} \alpha_{k} = 1$.
  • β::Vector{Float64}: The values of the β variables. For each partition $k$, $\beta_k$ is the coefficient that multiplies the features in the k-th partition.
  • t::Float64: The intercept term of the model.
  • P::Matrix{Int64}: The partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element $P_{k, i} = 1$ if feature $i$ belongs to partition $k$.
source
MLJModelInterface.fitFunction
fit(
+predict(mach, X)

API Documentation

PartitionedLS.PartLSType
PartLS

A model type for fitting a partitioned least squares model to data.

From MLJ, the type can be imported using

PartLS = @load PartLS pkg=PartitionedLS

Construct an instance with default hyper-parameters using the syntax model = FooRegressor(). Provide keyword arguments to override hyper-parameter defaults, as in FooRegressor(P=...).

Training data

In MLJ or MLJBase, bind an instance model to data with

mach = machine(model, X, y)

where

  • X: any matrix with element scitype Float64,2

Train the machine using fit!(mach).

Hyper-parameters

  • Optimizer: the optimization algorithm to use. It can be Opt, Alt or BnB.
  • P: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element P_{k, i} = 1 if feature i belongs to partition k.
  • η: the regularization parameter. It controls the strength of the regularization.
  • ϵ: the tolerance parameter. It is used to determine when the Alt optimization algorithm has converged. Only used by the Alt algorithm.
  • T: the maximum number of iterations. It is used to determine when to stop the Alt optimization algorithm has converged. Only used by the Alt algorithm.
  • rng: the random number generator to use.
    • If nothing, the global random number generator rand is used.
    • If an integer, the global number generator rand is used after seeding it with the given integer.
    • If an object of type AbstractRNG, the given random number generator is used.

Operations

  • predict(mach, Xnew): return the predictions of the model on new data Xnew

Fitted parameters

The fields of fitted_params(mach) are:

  • α: the values of the α variables. For each partition k, it holds the values of the α variables are such that $\sum_{i \in P_k} \alpha_{k} = 1$.
  • β: the values of the β variables. For each partition k, β_k is the coefficient that multiplies the features in the k-th partition.
  • t: the intercept term of the model.
  • P: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element P_{k, i} = 1 if feature i belongs to partition k.

Examples

PartLS = @load FooRegressor pkg=PartLS
+
+
+X = [[1. 2. 3.]; 
+     [3. 3. 4.]; 
+     [8. 1. 3.]; 
+     [5. 3. 1.]]
+
+y = [1.; 
+     1.; 
+     2.; 
+     3.]
+
+P = [[1 0]; 
+     [1 0]; 
+     [0 1]]
+
+
+# fit using the optimal algorithm 
+result = fit(Opt, X, y, P, η = 0.0)
+y_hat = predict(result.model, X)
source
PartitionedLS.PartLSFitResultType
struct PartLSFitResult

The PartLSFitResult struct represents the solution of the partitioned least squares problem. It contains the values of the α and β variables, the intercept t and the partition matrix P.

Fields

  • α::Vector{Float64}: The values of the α variables. For each partition $k$, it holds the values of the α variables are such that $\sum_{i \in P_k} \alpha_{k} = 1$.
  • β::Vector{Float64}: The values of the β variables. For each partition $k$, $\beta_k$ is the coefficient that multiplies the features in the k-th partition.
  • t::Float64: The intercept term of the model.
  • P::Matrix{Int64}: The partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element $P_{k, i} = 1$ if feature $i$ belongs to partition $k$.
source
MLJModelInterface.fitFunction
fit(
     ::Type{Alt},
     X::Matrix{Float64},
-    y::Vector{Float64},
+    y::AbstractVector{Float64},
     P::Matrix{Int64};
     η,
     ϵ,
@@ -75,10 +95,10 @@
     nnlsalg,
     rng
 ) -> Tuple{PartLSFitResult, Nothing, NamedTuple{(:opt,), <:Tuple{Any}}}
-

Fits a PartitionedLS model by alternating the optimization of the α and β variables. This version uses an optimization strategy based on non-negative-least-squaes solvers. This formulation is faster and more numerically stable with respect to fit(Alt, ...)`.

Arguments

  • X: $N × M$ matrix describing the examples
  • y: $N$ vector with the output values for each example
  • P: $M × K$ matrix specifying how to partition the $M$ attributes into $K$ subsets. $P_{m,k}$ should be 1 if attribute number $m$ belongs to partition $k$.
  • η: regularization factor, higher values implies more regularized solutions. Default is 0.0.
  • T: number of alternating loops to be performed. Default is 100.
  • ϵ: minimum relative improvement in the objective function before stopping the optimization. Default is 1e-6
  • nnlsalg: specific flavour of nnls algorithm to be used, possible values are :pivot, :nnls, :fnnls. Default is :nnls

Result

A Tuple with the following fields:

  1. a PartLSFitResult object containing the fitted model
  2. a nothing object
  3. a NamedTuple with a field opt containing the optimal value of the objective function
source
fit(
+

Fits a PartitionedLS model by alternating the optimization of the α and β variables. This version uses an optimization strategy based on non-negative-least-squaes solvers. This formulation is faster and more numerically stable with respect to fit(Alt, ...)`.

Arguments

  • X: $N × M$ matrix describing the examples
  • y: $N$ vector with the output values for each example
  • P: $M × K$ matrix specifying how to partition the $M$ attributes into $K$ subsets. $P_{m,k}$ should be 1 if attribute number $m$ belongs to partition $k$.
  • η: regularization factor, higher values implies more regularized solutions. Default is 0.0.
  • T: number of alternating loops to be performed. Default is 100.
  • ϵ: minimum relative improvement in the objective function before stopping the optimization. Default is 1e-6
  • nnlsalg: specific flavour of nnls algorithm to be used, possible values are :pivot, :nnls, :fnnls. Default is :nnls

Result

A Tuple with the following fields:

  1. a PartLSFitResult object containing the fitted model
  2. a nothing object
  3. a NamedTuple with a field opt containing the optimal value of the objective function
source
fit(
     ::Type{Opt},
     X::Matrix{Float64},
-    y::Vector{Float64},
+    y::AbstractVector{Float64},
     P::Matrix{Int64};
     η,
     nnlsalg,
@@ -87,29 +107,29 @@
 

Fits a PartialLS Regression model to the given data and resturns the learnt model (see the Result section). It uses a coplete enumeration strategy which is exponential in K, but guarantees to find the optimal solution.

Arguments

  • X: $N × M$ matrix describing the examples
  • y: $N$ vector with the output values for each example
  • P: $M × K$ matrix specifying how to partition the $M$ attributes into $K$ subsets. $P_{m,k}$ should be 1 if attribute number $m$ belongs to

partition $k$.

  • η: regularization factor, higher values implies more regularized solutions (default: 0.0)
  • returnAllSolutions: if true an additional output is appended to the resulting tuple containing all solutions found during the algorithm.
  • nnlsalg: the kind of nnls algorithm to be used during solving. Possible values are :pivot, :nnls, :fnnls (default: :nnls)

Example

X = rand(100, 10)
 y = rand(100)
 P = [1 0 0; 0 1 0; 0 0 1; 1 1 0; 0 1 1]
-result = fit(Opt, X, y, P)
source
fit(
+result = fit(Opt, X, y, P)
source
fit(
     ::Type{BnB},
     X::Matrix{Float64},
-    y::Vector{Float64},
+    y::AbstractVector{Float64},
     P::Matrix{Int64};
     η,
     nnlsalg
 ) -> Tuple{PartLSFitResult, Nothing, NamedTuple{(:opt, :nopen), <:Tuple{Any, Int64}}}
-

Implements the Branch and Bound algorithm to fit a Partitioned Least Squres model.

Arguments

  • X: $N × M$ matrix describing the examples
  • y: $N$ vector with the output values for each example
  • P: $M × K$ matrix specifying how to partition the $M$ attributes into $K$ subsets. $P_{m,k}$ should be 1 if attribute number $m$ belongs to

partition $k$.

  • η: regularization factor, higher values implies more regularized solutions (default: 0.0)
  • nnlsalg: the kind of nnls algorithm to be used during solving. Possible values are :pivot, :nnls, :fnnls (default: :nnls)

Result

A tuple with the following fields:

  1. a PartLSFitResult object containing the fitted model
  2. a nothing object
  3. a NamedTuple with fields:
    • opt containing the optimal value of the objective function
    • nopen containing the number of open nodes in the branch and bound tree
source
fit(
+

Implements the Branch and Bound algorithm to fit a Partitioned Least Squres model.

Arguments

  • X: $N × M$ matrix describing the examples
  • y: $N$ vector with the output values for each example
  • P: $M × K$ matrix specifying how to partition the $M$ attributes into $K$ subsets. $P_{m,k}$ should be 1 if attribute number $m$ belongs to

partition $k$.

  • η: regularization factor, higher values implies more regularized solutions (default: 0.0)
  • nnlsalg: the kind of nnls algorithm to be used during solving. Possible values are :pivot, :nnls, :fnnls (default: :nnls)

Result

A tuple with the following fields:

  1. a PartLSFitResult object containing the fitted model
  2. a nothing object
  3. a NamedTuple with fields:
    • opt containing the optimal value of the objective function
    • nopen containing the number of open nodes in the branch and bound tree
source
fit(
     m::PartLS,
     verbosity,
     X,
     y
 ) -> Tuple{PartLSFitResult, Nothing, Any}
-

Fits a PartitionedLS Regression model to the given data and resturns the learnt model (see the Result section). It conforms to the MLJ interface.

Arguments

  • m: A PartLS model to fit
  • verbosity: the verbosity level
  • X: the data matrix
  • y: the target vector
source
MLJModelInterface.predictFunction
predict(
+

Fits a PartitionedLS Regression model to the given data and resturns the learnt model (see the Result section). It conforms to the MLJ interface.

Arguments

  • m: A PartLS model to fit
  • verbosity: the verbosity level
  • X: the data matrix
  • y: the target vector
source
MLJModelInterface.predictFunction
predict(
     α::Vector{Float64},
     β::Vector{Float64},
     t::Float64,
     P::Matrix{Int64},
     X::Matrix{Float64}
 ) -> Vector{Float64}
-

Result

the prediction for the partitioned least squares problem with solution α, β, t over the dataset X and partition matrix P

source
predict(
+

Result

the prediction for the partitioned least squares problem with solution α, β, t over the dataset X and partition matrix P

source
predict(
     model::PartLSFitResult,
     X::Matrix{Float64}
 ) -> Vector{Float64}
-

Make predictions for the datataset X using the PartialLS model model.

Arguments

  • model: a PartLSFitResult
  • X: the data containing the examples for which the predictions are sought

Return

the predictions of the given model on examples in X.

source
PartitionedLS.homogeneousCoordsFunction

Rewrites X and P in homogeneous coordinates. The result is a tuple (Xo, Po) where Xo is the homogeneous version of X and Po is the homogeneous version of P.

Arguments

  • X: the data matrix
  • P: the partition matrix

Return

  • Xo: the homogeneous version of X
  • Po: the homogeneous version of P
source
PartitionedLS.regularizeProblemFunction

Adds regularization terms to the problem. The regularization terms are added to the objective function as a sum of squares of the α variables. The regularization parameter η controls the strength of the regularization.

Arguments

  • X: the data matrix
  • y: the target vector
  • P: the partition matrix
  • η: the regularization parameter

Return

  • Xn: the new data matrix
  • yn: the new target vector

Main idea

K new rows are added to the data matrix X, row $k \in \{1 \dots K\}$ is a vector of zeros except for the components that corresponds to features belonging to the k-th partition, which is set to sqrt(η). The target vector y is extended with K zeros.

The point of this change is that when the objective function is evaluated as $math \|Xw - y\|^2$, the new part of the matrix contributes to the loss with a factor of $η \sum \|w_i\|^2$ . This is equivalent to adding a regularization term to the objective function.

source
+

Make predictions for the datataset X using the PartialLS model model.

Arguments

Return

the predictions of the given model on examples in X.

source
PartitionedLS.homogeneousCoordsFunction

Rewrites X and P in homogeneous coordinates. The result is a tuple (Xo, Po) where Xo is the homogeneous version of X and Po is the homogeneous version of P.

Arguments

  • X: the data matrix
  • P: the partition matrix

Return

  • Xo: the homogeneous version of X
  • Po: the homogeneous version of P
source
PartitionedLS.regularizeProblemFunction

Adds regularization terms to the problem. The regularization terms are added to the objective function as a sum of squares of the α variables. The regularization parameter η controls the strength of the regularization.

Arguments

  • X: the data matrix
  • y: the target vector
  • P: the partition matrix
  • η: the regularization parameter

Return

  • Xn: the new data matrix
  • yn: the new target vector

Main idea

K new rows are added to the data matrix X, row $k \in \{1 \dots K\}$ is a vector of zeros except for the components that corresponds to features belonging to the k-th partition, which is set to sqrt(η). The target vector y is extended with K zeros.

The point of this change is that when the objective function is evaluated as $math \|Xw - y\|^2$, the new part of the matrix contributes to the loss with a factor of $η \sum \|w_i\|^2$ . This is equivalent to adding a regularization term to the objective function.

source
diff --git a/docs/build/search_index.js b/docs/build/search_index.js index 8ff4ba7..61bc4a4 100644 --- a/docs/build/search_index.js +++ b/docs/build/search_index.js @@ -1,3 +1,3 @@ var documenterSearchIndex = {"docs": -[{"location":"examples/example/","page":"Example","title":"Example","text":"We present here an analysis of a solution found by a Partitioned LS algorithm on the Ames House Prices dataset, which is publicly available via Kaggle.","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"The Julia notebook used to generate the results is available here.","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"This dataset has a relatively high number of columns (79 in total) each detailing one particular characteristic of housing properties in Ames, Iowa. The task is to predict the selling price of each house. ","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"We propose a grouping of the features into 10 groups, each one representing a high-level characteristic of the property:","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"Group Features\nLotDescritption MSSubClass, MSZoning, LotFrontage, LotArea, Street, Alley, LotShape, LandContour, LotConfig, LandSlope\nBuildingPlacement Utilities, Neighborhood, Condition1, Condition2\nBuildingAge YearBuilt, YearRemodAdd\nBuildingQuality BldgType, HouseStyle, OverallQual, OverallCond, RoofStyle, RoofMatl, Exterior1st, Exterior2nd, MasVnrType, MasVnrArea, ExterQual, ExterCond, Foundation, Functional\nBasement BsmtQual, BsmtCond, BsmtExposure, BsmtFinType1, BsmtFinSF1, BsmtFinType2, BsmtFinSF2, BsmtUnfSF, TotalBsmtSF\nPowerAndTemperature Heating, HeatingQC, CentralAir, Electrical, Fireplaces, FireplaceQu\nSizes 1stFlrSF, 2ndFlrSF, LowQualFinSF, GrLivArea\nRooms BsmtFullBath, BsmtHalfBath, FullBath, HalfBath, BedroomAbvGr, KitchenAbvGr, KitchenQual, TotRmsAbvGrd\nOutsideFacilities GarageType, GarageYrBlt, GarageFinish, GarageCars, GarageArea, GarageQual, GarageCond, PavedDrive, WoodDeckSF, OpenPorchSF, EnclosedPorch, 3SsnPorch, ScreenPorch, PoolArea, PoolQC, Fence\nVarious MiscFeature, MiscVal, MoSold, YrSold, SaleType, SaleCondition","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"As an example, we collect 6 columns referring to the availability and quality of air conditioning systems, electrical system, heating and fireplaces in a \"Power and Temperature\" group. Other feature groups refer to overall quality of the construction work and materials employed (\"Building Quality\"), external facilities such as garages or swimming pools (\"Outside Facilities\"). The beta values for the groups are as follows:","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"(Image: $\\beta$ values as found by the `Opt` algorithm on the Ames House Prices dataset)","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"We note that the grouped solution enabled by the partitioned least squares formulation is able to give a high-level summary of the regression result. An analyst is therefore able to communicate easily to, e.g. an individual selling their house, that the price is mostly determined by the building quality and the attractiveness of the lot. A deeper analysis is of course possible by investigating the alpha values found by the algorithm. For instance, let consider the contributions to the ``Outside Facilities'':","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"(Image: $\\alpha$ values as found by the `Opt` algorithm on the Ames House Prices dataset for the \"OutsideFacilities\" group)","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"Here, one is able to notice that garage quality has the biggest impact on the property's price, which is potentially actionable knowledge. ","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"We argue that the group- and feature-level analysis made possible by our contributions improves on the interpretability of ungrouped linear regression.","category":"page"},{"location":"#Partitioned-Least-Squares","page":"Documentation","title":"Partitioned Least Squares","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"Linear least squares is one of the most widely used regression methods among scientists in many fields. The simplicity of the model allows this method to be used when data is scarce and it is usually appealing to practitioners that need to gather some insight into the problem by inspecting the values of the learnt parameters. PartitionedLS is a variant of the linear least squares model allowing practitioners to partition the input features into groups of variables that they require to contribute similarly to the final result.","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"An example of analysing a dataset using PartitionedLS is given here","category":"page"},{"location":"#The-model","page":"Documentation","title":"The model","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"The Partitioned Least Squares model is formally defined as:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"begingather*\ntextminimize_mathbfalpha mathbfbeta mathbfX times (mathbfP circ mathbfalpha) times mathbfbeta - mathbfy _2^2 \nbeginaligned\nquad stquad mathbfalpha succeq 0\n mathbfP^T times mathbfalpha = mathbf1\nendaligned\nendgather*","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"where: ","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"mathbfX is N times M data matrix;\nmathbfP is a user-defined partition matrix having K columns (one for each element of the partition), M rows, and containing 1 in P_ij if the i-th attribute belongs to the j-th partition and 0 otherwise;\nmathbfbeta is a vector weighting the importance of each set of attributes in the partition;\nmathbfalpha is a vector weighting the importance of each attribute within one of the sets in the partition. Note that the constraints imply that for each set in the partition the weights of the corresponding alpha variables are all positive and sum to 1.","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"The PartitionedLS problem is non-convex and NP-complete. The library provides two algorithms to solve the problem anyway: an iterative algorithm based on the Alternating Least Squares approach and an optimal algorithm that guarantees requiring however exponential time in the cardinality of the partition (i.e., it is mainly useful when K is small).","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"More details can be found in the paper Partitioned Least Squares.","category":"page"},{"location":"#To-install-this-library","page":"Documentation","title":"To install this library","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"Just add it as a dependency to your Julia environment. Launch julia from the main directory of your project and enter the following commands:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"# Opens the package manager REPL\n]\n\n# Activate you local environment (can be skipped if you want to install the library globally)\nactivate .\n\n# Adds the library to the environment\nadd PartitionedLS","category":"page"},{"location":"#To-use-this-library","page":"Documentation","title":"To use this library","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"You will need a matrix P describing the partitioning of your variables, e.g.:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"P = [[1 0]; \n [1 0]; \n [0 1]]","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"specifies that the first and the second variable belongs to the first partition, while the third variable belongs to the second.","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"You have then the choice to use either the standard interface or the MLJ interface. ","category":"page"},{"location":"#Standard-interface","page":"Documentation","title":"Standard interface","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"The standard interface defines a fit function for each of the implemented algorithms. The function returns a tuple containing:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"a PartLSFitResult object containing the model and the parameters found by the algorithm;\nnothing (this is mandated by the MLJ interface, but it is not used in this case).\na NamedTuple containing some additional information.","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"A complete example:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"\nusing PartitionedLS\n\nX = [[1. 2. 3.]; \n [3. 3. 4.]; \n [8. 1. 3.]; \n [5. 3. 1.]]\n\ny = [1.; \n 1.; \n 2.; \n 3.]\n\nP = [[1 0]; \n [1 0]; \n [0 1]]\n\n\n# fit using the optimal algorithm \nresult = fit(Opt, X, y, P, η = 0.0)\n\n\n# Make predictions on the given data matrix. The function works\n# with results returned by anyone of the solvers.\npredict(result[1], X)","category":"page"},{"location":"#MLJ-interface","page":"Documentation","title":"MLJ interface","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"The MLJ interface is a allows you to use the library in a more MLJ-like fashion. The interface is defined by the PartLS model, which can be used in the MLJ framework. The model can be used in the same way as any other MLJ model.","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"A complete example:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"using MLJ\nusing PartitionedLS\n\nX = [[1. 2. 3.]; \n [3. 3. 4.]; \n [8. 1. 3.]; \n [5. 3. 1.]]\n\ny = [1.;\n 1.;\n 2.;\n 3.]\n\nP = [[1 0]; \n [1 0]; \n [0 1]]\n\n# Define the model\n\nmodel = PartLS(P=P, Optimizer=Opt, η=0.0)\n\n# Fit the model\nmach = machine(model, X, y)\nfit!(mach)\n\n# Make predictions\npredict(mach, X)","category":"page"},{"location":"#API-Documentation","page":"Documentation","title":"API Documentation","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"PartLS\nPartLSFitResult\nPartitionedLS.fit\nPartitionedLS.predict\nPartitionedLS.homogeneousCoords\nPartitionedLS.regularizeProblem","category":"page"},{"location":"#PartitionedLS.PartLS","page":"Documentation","title":"PartitionedLS.PartLS","text":"mutable struct PartLS <: Deterministic\n\nThe PartLS struct represents a partitioned least squares model. Fields are:\n\nOptimizer: the optimization algorithm to use. It can be Opt, Alt or BnB.\nP: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element P_{k, i} = 1 if feature i belongs to partition k.\nη: the regularization parameter. It controls the strength of the regularization.\nϵ: the tolerance parameter. It is used to determine when the Alt optimization algorithm has converged. Only used by the Alt algorithm.\nT: the maximum number of iterations. It is used to determine when to stop the Alt optimization algorithm has converged. Only used by the Alt algorithm.\nrng: the random number generator to use. \nIf nothing, the global random number generator rand is used.\nIf an integer, the global number generator rand is used after seeding it with the given integer.\nIf an object of type AbstractRNG, the given random number generator is used.\n\nExample\n\nmodel = PartLS(P=P, Optimizer=Alt, rng=123)\n\n\n\n\n\n","category":"type"},{"location":"#PartitionedLS.PartLSFitResult","page":"Documentation","title":"PartitionedLS.PartLSFitResult","text":"struct PartLSFitResult\n\nThe PartLSFitResult struct represents the solution of the partitioned least squares problem. It contains the values of the α and β variables, the intercept t and the partition matrix P.\n\nFields\n\nα::Vector{Float64}: The values of the α variables. For each partition k, it holds the values of the α variables are such that sum_i in P_k alpha_k = 1.\n\nβ::Vector{Float64}: The values of the β variables. For each partition k, beta_k is the coefficient that multiplies the features in the k-th partition.\n\nt::Float64: The intercept term of the model.\n\nP::Matrix{Int64}: The partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element P_k i = 1 if feature i belongs to partition k.\n\n\n\n\n\n","category":"type"},{"location":"#MLJModelInterface.fit","page":"Documentation","title":"MLJModelInterface.fit","text":"fit(\n ::Type{Alt},\n X::Matrix{Float64},\n y::Vector{Float64},\n P::Matrix{Int64};\n η,\n ϵ,\n T,\n nnlsalg,\n rng\n) -> Tuple{PartLSFitResult, Nothing, NamedTuple{(:opt,), <:Tuple{Any}}}\n\n\nFits a PartitionedLS model by alternating the optimization of the α and β variables. This version uses an optimization strategy based on non-negative-least-squaes solvers. This formulation is faster and more numerically stable with respect to fit(Alt, ...)`.\n\nArguments\n\nX: N M matrix describing the examples\ny: N vector with the output values for each example\nP: M K matrix specifying how to partition the M attributes into K subsets. P_mk should be 1 if attribute number m belongs to partition k.\nη: regularization factor, higher values implies more regularized solutions. Default is 0.0.\nT: number of alternating loops to be performed. Default is 100.\nϵ: minimum relative improvement in the objective function before stopping the optimization. Default is 1e-6\nnnlsalg: specific flavour of nnls algorithm to be used, possible values are :pivot, :nnls, :fnnls. Default is :nnls\n\nResult\n\nA Tuple with the following fields:\n\na PartLSFitResult object containing the fitted model\na nothing object\na NamedTuple with a field opt containing the optimal value of the objective function\n\n\n\n\n\nfit(\n ::Type{Opt},\n X::Matrix{Float64},\n y::Vector{Float64},\n P::Matrix{Int64};\n η,\n nnlsalg,\n returnAllSolutions\n) -> Tuple{PartLSFitResult, Nothing, Any}\n\n\nFits a PartialLS Regression model to the given data and resturns the learnt model (see the Result section). It uses a coplete enumeration strategy which is exponential in K, but guarantees to find the optimal solution.\n\nArguments\n\nX: N M matrix describing the examples\ny: N vector with the output values for each example\nP: M K matrix specifying how to partition the M attributes into K subsets. P_mk should be 1 if attribute number m belongs to\n\npartition k.\n\nη: regularization factor, higher values implies more regularized solutions (default: 0.0)\nreturnAllSolutions: if true an additional output is appended to the resulting tuple containing all solutions found during the algorithm.\nnnlsalg: the kind of nnls algorithm to be used during solving. Possible values are :pivot, :nnls, :fnnls (default: :nnls)\n\nExample\n\nX = rand(100, 10)\ny = rand(100)\nP = [1 0 0; 0 1 0; 0 0 1; 1 1 0; 0 1 1]\nresult = fit(Opt, X, y, P)\n\n\n\n\n\nfit(\n ::Type{BnB},\n X::Matrix{Float64},\n y::Vector{Float64},\n P::Matrix{Int64};\n η,\n nnlsalg\n) -> Tuple{PartLSFitResult, Nothing, NamedTuple{(:opt, :nopen), <:Tuple{Any, Int64}}}\n\n\nImplements the Branch and Bound algorithm to fit a Partitioned Least Squres model.\n\nArguments\n\nX: N M matrix describing the examples\ny: N vector with the output values for each example\nP: M K matrix specifying how to partition the M attributes into K subsets. P_mk should be 1 if attribute number m belongs to\n\npartition k.\n\nη: regularization factor, higher values implies more regularized solutions (default: 0.0)\nnnlsalg: the kind of nnls algorithm to be used during solving. Possible values are :pivot, :nnls, :fnnls (default: :nnls)\n\nResult\n\nA tuple with the following fields:\n\na PartLSFitResult object containing the fitted model\na nothing object\na NamedTuple with fields: \nopt containing the optimal value of the objective function\nnopen containing the number of open nodes in the branch and bound tree\n\n\n\n\n\nfit(\n m::PartLS,\n verbosity,\n X,\n y\n) -> Tuple{PartLSFitResult, Nothing, Any}\n\n\nFits a PartitionedLS Regression model to the given data and resturns the learnt model (see the Result section). It conforms to the MLJ interface.\n\nArguments\n\nm: A PartLS model to fit\nverbosity: the verbosity level\nX: the data matrix\ny: the target vector\n\n\n\n\n\n","category":"function"},{"location":"#MLJModelInterface.predict","page":"Documentation","title":"MLJModelInterface.predict","text":"predict(\n α::Vector{Float64},\n β::Vector{Float64},\n t::Float64,\n P::Matrix{Int64},\n X::Matrix{Float64}\n) -> Vector{Float64}\n\n\nResult\n\nthe prediction for the partitioned least squares problem with solution α, β, t over the dataset X and partition matrix P\n\n\n\n\n\npredict(\n model::PartLSFitResult,\n X::Matrix{Float64}\n) -> Vector{Float64}\n\n\nMake predictions for the datataset X using the PartialLS model model.\n\nArguments\n\nmodel: a PartLSFitResult\nX: the data containing the examples for which the predictions are sought\n\nReturn\n\nthe predictions of the given model on examples in X. \n\n\n\n\n\n","category":"function"},{"location":"#PartitionedLS.homogeneousCoords","page":"Documentation","title":"PartitionedLS.homogeneousCoords","text":"Rewrites X and P in homogeneous coordinates. The result is a tuple (Xo, Po) where Xo is the homogeneous version of X and Po is the homogeneous version of P.\n\nArguments\n\nX: the data matrix\nP: the partition matrix\n\nReturn\n\nXo: the homogeneous version of X\nPo: the homogeneous version of P\n\n\n\n\n\n","category":"function"},{"location":"#PartitionedLS.regularizeProblem","page":"Documentation","title":"PartitionedLS.regularizeProblem","text":"Adds regularization terms to the problem. The regularization terms are added to the objective function as a sum of squares of the α variables. The regularization parameter η controls the strength of the regularization.\n\nArguments\n\nX: the data matrix\ny: the target vector\nP: the partition matrix\nη: the regularization parameter\n\nReturn\n\nXn: the new data matrix\nyn: the new target vector\n\nMain idea\n\nK new rows are added to the data matrix X, row k in 1 dots K is a vector of zeros except for the components that corresponds to features belonging to the k-th partition, which is set to sqrt(η). The target vector y is extended with K zeros.\n\nThe point of this change is that when the objective function is evaluated as math Xw - y^2, the new part of the matrix contributes to the loss with a factor of η sum w_i^2 . This is equivalent to adding a regularization term to the objective function.\n\n\n\n\n\n","category":"function"}] +[{"location":"examples/example/","page":"Example","title":"Example","text":"We present here an analysis of a solution found by a Partitioned LS algorithm on the Ames House Prices dataset, which is publicly available via Kaggle.","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"The Julia notebook used to generate the results is available here.","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"This dataset has a relatively high number of columns (79 in total) each detailing one particular characteristic of housing properties in Ames, Iowa. The task is to predict the selling price of each house. ","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"We propose a grouping of the features into 10 groups, each one representing a high-level characteristic of the property:","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"Group Features\nLotDescritption MSSubClass, MSZoning, LotFrontage, LotArea, Street, Alley, LotShape, LandContour, LotConfig, LandSlope\nBuildingPlacement Utilities, Neighborhood, Condition1, Condition2\nBuildingAge YearBuilt, YearRemodAdd\nBuildingQuality BldgType, HouseStyle, OverallQual, OverallCond, RoofStyle, RoofMatl, Exterior1st, Exterior2nd, MasVnrType, MasVnrArea, ExterQual, ExterCond, Foundation, Functional\nBasement BsmtQual, BsmtCond, BsmtExposure, BsmtFinType1, BsmtFinSF1, BsmtFinType2, BsmtFinSF2, BsmtUnfSF, TotalBsmtSF\nPowerAndTemperature Heating, HeatingQC, CentralAir, Electrical, Fireplaces, FireplaceQu\nSizes 1stFlrSF, 2ndFlrSF, LowQualFinSF, GrLivArea\nRooms BsmtFullBath, BsmtHalfBath, FullBath, HalfBath, BedroomAbvGr, KitchenAbvGr, KitchenQual, TotRmsAbvGrd\nOutsideFacilities GarageType, GarageYrBlt, GarageFinish, GarageCars, GarageArea, GarageQual, GarageCond, PavedDrive, WoodDeckSF, OpenPorchSF, EnclosedPorch, 3SsnPorch, ScreenPorch, PoolArea, PoolQC, Fence\nVarious MiscFeature, MiscVal, MoSold, YrSold, SaleType, SaleCondition","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"As an example, we collect 6 columns referring to the availability and quality of air conditioning systems, electrical system, heating and fireplaces in a \"Power and Temperature\" group. Other feature groups refer to overall quality of the construction work and materials employed (\"Building Quality\"), external facilities such as garages or swimming pools (\"Outside Facilities\"). The beta values for the groups are as follows:","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"(Image: $\\beta$ values as found by the `Opt` algorithm on the Ames House Prices dataset)","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"We note that the grouped solution enabled by the partitioned least squares formulation is able to give a high-level summary of the regression result. An analyst is therefore able to communicate easily to, e.g. an individual selling their house, that the price is mostly determined by the building quality and the attractiveness of the lot. A deeper analysis is of course possible by investigating the alpha values found by the algorithm. For instance, let consider the contributions to the ``Outside Facilities'':","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"(Image: $\\alpha$ values as found by the `Opt` algorithm on the Ames House Prices dataset for the \"OutsideFacilities\" group)","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"Here, one is able to notice that garage quality has the biggest impact on the property's price, which is potentially actionable knowledge. ","category":"page"},{"location":"examples/example/","page":"Example","title":"Example","text":"We argue that the group- and feature-level analysis made possible by our contributions improves on the interpretability of ungrouped linear regression.","category":"page"},{"location":"#Partitioned-Least-Squares","page":"Documentation","title":"Partitioned Least Squares","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"Linear least squares is one of the most widely used regression methods among scientists in many fields. The simplicity of the model allows this method to be used when data is scarce and it is usually appealing to practitioners that need to gather some insight into the problem by inspecting the values of the learnt parameters. PartitionedLS is a variant of the linear least squares model allowing practitioners to partition the input features into groups of variables that they require to contribute similarly to the final result.","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"An example of analysing a dataset using PartitionedLS is given here","category":"page"},{"location":"#The-model","page":"Documentation","title":"The model","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"The Partitioned Least Squares model is formally defined as:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"begingather*\ntextminimize_mathbfalpha mathbfbeta mathbfX times (mathbfP circ mathbfalpha) times mathbfbeta - mathbfy _2^2 \nbeginaligned\nquad stquad mathbfalpha succeq 0\n mathbfP^T times mathbfalpha = mathbf1\nendaligned\nendgather*","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"where: ","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"mathbfX is N times M data matrix;\nmathbfP is a user-defined partition matrix having K columns (one for each element of the partition), M rows, and containing 1 in P_ij if the i-th attribute belongs to the j-th partition and 0 otherwise;\nmathbfbeta is a vector weighting the importance of each set of attributes in the partition;\nmathbfalpha is a vector weighting the importance of each attribute within one of the sets in the partition. Note that the constraints imply that for each set in the partition the weights of the corresponding alpha variables are all positive and sum to 1.","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"The PartitionedLS problem is non-convex and NP-complete. The library provides two algorithms to solve the problem anyway: an iterative algorithm based on the Alternating Least Squares approach and an optimal algorithm that guarantees requiring however exponential time in the cardinality of the partition (i.e., it is mainly useful when K is small).","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"More details can be found in the paper Partitioned Least Squares.","category":"page"},{"location":"#To-install-this-library","page":"Documentation","title":"To install this library","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"Just add it as a dependency to your Julia environment. Launch julia from the main directory of your project and enter the following commands:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"# Opens the package manager REPL\n]\n\n# Activate you local environment (can be skipped if you want to install the library globally)\nactivate .\n\n# Adds the library to the environment\nadd PartitionedLS","category":"page"},{"location":"#To-use-this-library","page":"Documentation","title":"To use this library","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"You will need a matrix P describing the partitioning of your variables, e.g.:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"P = [[1 0]; \n [1 0]; \n [0 1]]","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"specifies that the first and the second variable belongs to the first partition, while the third variable belongs to the second.","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"You have then the choice to use either the standard interface or the MLJ interface. ","category":"page"},{"location":"#Standard-interface","page":"Documentation","title":"Standard interface","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"The standard interface defines a fit function for each of the implemented algorithms. The function returns a tuple containing:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"a PartLSFitResult object containing the model and the parameters found by the algorithm;\nnothing (this is mandated by the MLJ interface, but it is not used in this case).\na NamedTuple containing some additional information.","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"A complete example:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"\nusing PartitionedLS\n\nX = [[1. 2. 3.]; \n [3. 3. 4.]; \n [8. 1. 3.]; \n [5. 3. 1.]]\n\ny = [1.; \n 1.; \n 2.; \n 3.]\n\nP = [[1 0]; \n [1 0]; \n [0 1]]\n\n\n# fit using the optimal algorithm \nresult = fit(Opt, X, y, P, η = 0.0)\n\n\n# Make predictions on the given data matrix. The function works\n# with results returned by anyone of the solvers.\npredict(result[1], X)","category":"page"},{"location":"#MLJ-interface","page":"Documentation","title":"MLJ interface","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"The MLJ interface is a allows you to use the library in a more MLJ-like fashion. The interface is defined by the PartLS model, which can be used in the MLJ framework. The model can be used in the same way as any other MLJ model.","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"A complete example:","category":"page"},{"location":"","page":"Documentation","title":"Documentation","text":"using MLJ\nusing PartitionedLS\n\nX = [[1. 2. 3.]; \n [3. 3. 4.]; \n [8. 1. 3.]; \n [5. 3. 1.]]\n\ny = [1.;\n 1.;\n 2.;\n 3.]\n\nP = [[1 0]; \n [1 0]; \n [0 1]]\n\n# Define the model\n\nmodel = PartLS(P=P, Optimizer=Opt, η=0.0)\n\n# Fit the model\nmach = machine(model, X, y)\nfit!(mach)\n\n# Make predictions\npredict(mach, X)","category":"page"},{"location":"#API-Documentation","page":"Documentation","title":"API Documentation","text":"","category":"section"},{"location":"","page":"Documentation","title":"Documentation","text":"PartLS\nPartLSFitResult\nPartitionedLS.fit\nPartitionedLS.predict\nPartitionedLS.homogeneousCoords\nPartitionedLS.regularizeProblem","category":"page"},{"location":"#PartitionedLS.PartLS","page":"Documentation","title":"PartitionedLS.PartLS","text":"PartLS\n\nA model type for fitting a partitioned least squares model to data.\n\nFrom MLJ, the type can be imported using\n\nPartLS = @load PartLS pkg=PartitionedLS\n\nConstruct an instance with default hyper-parameters using the syntax model = FooRegressor(). Provide keyword arguments to override hyper-parameter defaults, as in FooRegressor(P=...).\n\nTraining data\n\nIn MLJ or MLJBase, bind an instance model to data with\n\nmach = machine(model, X, y)\n\nwhere\n\nX: any matrix with element scitype Float64,2\n\nTrain the machine using fit!(mach).\n\nHyper-parameters\n\nOptimizer: the optimization algorithm to use. It can be Opt, Alt or BnB.\nP: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element P_{k, i} = 1 if feature i belongs to partition k.\nη: the regularization parameter. It controls the strength of the regularization.\nϵ: the tolerance parameter. It is used to determine when the Alt optimization algorithm has converged. Only used by the Alt algorithm.\nT: the maximum number of iterations. It is used to determine when to stop the Alt optimization algorithm has converged. Only used by the Alt algorithm.\nrng: the random number generator to use. \nIf nothing, the global random number generator rand is used.\nIf an integer, the global number generator rand is used after seeding it with the given integer.\nIf an object of type AbstractRNG, the given random number generator is used.\n\nOperations\n\npredict(mach, Xnew): return the predictions of the model on new data Xnew\n\nFitted parameters\n\nThe fields of fitted_params(mach) are:\n\nα: the values of the α variables. For each partition k, it holds the values of the α variables are such that sum_i in P_k alpha_k = 1.\nβ: the values of the β variables. For each partition k, β_k is the coefficient that multiplies the features in the k-th partition.\nt: the intercept term of the model.\nP: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element P_{k, i} = 1 if feature i belongs to partition k.\n\nExamples\n\nPartLS = @load FooRegressor pkg=PartLS\n\n\nX = [[1. 2. 3.]; \n [3. 3. 4.]; \n [8. 1. 3.]; \n [5. 3. 1.]]\n\ny = [1.; \n 1.; \n 2.; \n 3.]\n\nP = [[1 0]; \n [1 0]; \n [0 1]]\n\n\n# fit using the optimal algorithm \nresult = fit(Opt, X, y, P, η = 0.0)\ny_hat = predict(result.model, X)\n\n\n\n\n\n","category":"type"},{"location":"#PartitionedLS.PartLSFitResult","page":"Documentation","title":"PartitionedLS.PartLSFitResult","text":"struct PartLSFitResult\n\nThe PartLSFitResult struct represents the solution of the partitioned least squares problem. It contains the values of the α and β variables, the intercept t and the partition matrix P.\n\nFields\n\nα::Vector{Float64}: The values of the α variables. For each partition k, it holds the values of the α variables are such that sum_i in P_k alpha_k = 1.\n\nβ::Vector{Float64}: The values of the β variables. For each partition k, beta_k is the coefficient that multiplies the features in the k-th partition.\n\nt::Float64: The intercept term of the model.\n\nP::Matrix{Int64}: The partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element P_k i = 1 if feature i belongs to partition k.\n\n\n\n\n\n","category":"type"},{"location":"#MLJModelInterface.fit","page":"Documentation","title":"MLJModelInterface.fit","text":"fit(\n ::Type{Alt},\n X::Matrix{Float64},\n y::AbstractVector{Float64},\n P::Matrix{Int64};\n η,\n ϵ,\n T,\n nnlsalg,\n rng\n) -> Tuple{PartLSFitResult, Nothing, NamedTuple{(:opt,), <:Tuple{Any}}}\n\n\nFits a PartitionedLS model by alternating the optimization of the α and β variables. This version uses an optimization strategy based on non-negative-least-squaes solvers. This formulation is faster and more numerically stable with respect to fit(Alt, ...)`.\n\nArguments\n\nX: N M matrix describing the examples\ny: N vector with the output values for each example\nP: M K matrix specifying how to partition the M attributes into K subsets. P_mk should be 1 if attribute number m belongs to partition k.\nη: regularization factor, higher values implies more regularized solutions. Default is 0.0.\nT: number of alternating loops to be performed. Default is 100.\nϵ: minimum relative improvement in the objective function before stopping the optimization. Default is 1e-6\nnnlsalg: specific flavour of nnls algorithm to be used, possible values are :pivot, :nnls, :fnnls. Default is :nnls\n\nResult\n\nA Tuple with the following fields:\n\na PartLSFitResult object containing the fitted model\na nothing object\na NamedTuple with a field opt containing the optimal value of the objective function\n\n\n\n\n\nfit(\n ::Type{Opt},\n X::Matrix{Float64},\n y::AbstractVector{Float64},\n P::Matrix{Int64};\n η,\n nnlsalg,\n returnAllSolutions\n) -> Tuple{PartLSFitResult, Nothing, Any}\n\n\nFits a PartialLS Regression model to the given data and resturns the learnt model (see the Result section). It uses a coplete enumeration strategy which is exponential in K, but guarantees to find the optimal solution.\n\nArguments\n\nX: N M matrix describing the examples\ny: N vector with the output values for each example\nP: M K matrix specifying how to partition the M attributes into K subsets. P_mk should be 1 if attribute number m belongs to\n\npartition k.\n\nη: regularization factor, higher values implies more regularized solutions (default: 0.0)\nreturnAllSolutions: if true an additional output is appended to the resulting tuple containing all solutions found during the algorithm.\nnnlsalg: the kind of nnls algorithm to be used during solving. Possible values are :pivot, :nnls, :fnnls (default: :nnls)\n\nExample\n\nX = rand(100, 10)\ny = rand(100)\nP = [1 0 0; 0 1 0; 0 0 1; 1 1 0; 0 1 1]\nresult = fit(Opt, X, y, P)\n\n\n\n\n\nfit(\n ::Type{BnB},\n X::Matrix{Float64},\n y::AbstractVector{Float64},\n P::Matrix{Int64};\n η,\n nnlsalg\n) -> Tuple{PartLSFitResult, Nothing, NamedTuple{(:opt, :nopen), <:Tuple{Any, Int64}}}\n\n\nImplements the Branch and Bound algorithm to fit a Partitioned Least Squres model.\n\nArguments\n\nX: N M matrix describing the examples\ny: N vector with the output values for each example\nP: M K matrix specifying how to partition the M attributes into K subsets. P_mk should be 1 if attribute number m belongs to\n\npartition k.\n\nη: regularization factor, higher values implies more regularized solutions (default: 0.0)\nnnlsalg: the kind of nnls algorithm to be used during solving. Possible values are :pivot, :nnls, :fnnls (default: :nnls)\n\nResult\n\nA tuple with the following fields:\n\na PartLSFitResult object containing the fitted model\na nothing object\na NamedTuple with fields: \nopt containing the optimal value of the objective function\nnopen containing the number of open nodes in the branch and bound tree\n\n\n\n\n\nfit(\n m::PartLS,\n verbosity,\n X,\n y\n) -> Tuple{PartLSFitResult, Nothing, Any}\n\n\nFits a PartitionedLS Regression model to the given data and resturns the learnt model (see the Result section). It conforms to the MLJ interface.\n\nArguments\n\nm: A PartLS model to fit\nverbosity: the verbosity level\nX: the data matrix\ny: the target vector\n\n\n\n\n\n","category":"function"},{"location":"#MLJModelInterface.predict","page":"Documentation","title":"MLJModelInterface.predict","text":"predict(\n α::Vector{Float64},\n β::Vector{Float64},\n t::Float64,\n P::Matrix{Int64},\n X::Matrix{Float64}\n) -> Vector{Float64}\n\n\nResult\n\nthe prediction for the partitioned least squares problem with solution α, β, t over the dataset X and partition matrix P\n\n\n\n\n\npredict(\n model::PartLSFitResult,\n X::Matrix{Float64}\n) -> Vector{Float64}\n\n\nMake predictions for the datataset X using the PartialLS model model.\n\nArguments\n\nmodel: a PartLSFitResult\nX: the data containing the examples for which the predictions are sought\n\nReturn\n\nthe predictions of the given model on examples in X. \n\n\n\n\n\n","category":"function"},{"location":"#PartitionedLS.homogeneousCoords","page":"Documentation","title":"PartitionedLS.homogeneousCoords","text":"Rewrites X and P in homogeneous coordinates. The result is a tuple (Xo, Po) where Xo is the homogeneous version of X and Po is the homogeneous version of P.\n\nArguments\n\nX: the data matrix\nP: the partition matrix\n\nReturn\n\nXo: the homogeneous version of X\nPo: the homogeneous version of P\n\n\n\n\n\n","category":"function"},{"location":"#PartitionedLS.regularizeProblem","page":"Documentation","title":"PartitionedLS.regularizeProblem","text":"Adds regularization terms to the problem. The regularization terms are added to the objective function as a sum of squares of the α variables. The regularization parameter η controls the strength of the regularization.\n\nArguments\n\nX: the data matrix\ny: the target vector\nP: the partition matrix\nη: the regularization parameter\n\nReturn\n\nXn: the new data matrix\nyn: the new target vector\n\nMain idea\n\nK new rows are added to the data matrix X, row k in 1 dots K is a vector of zeros except for the components that corresponds to features belonging to the k-th partition, which is set to sqrt(η). The target vector y is extended with K zeros.\n\nThe point of this change is that when the objective function is evaluated as math Xw - y^2, the new part of the matrix contributes to the loss with a factor of η sum w_i^2 . This is equivalent to adding a regularization term to the objective function.\n\n\n\n\n\n","category":"function"}] } diff --git a/src/PartitionedLS.jl b/src/PartitionedLS.jl index 7429e6a..b925da4 100644 --- a/src/PartitionedLS.jl +++ b/src/PartitionedLS.jl @@ -1,12 +1,12 @@ module PartitionedLS export fit, predict, PartLS, PartLSFitResult, Opt, Alt, BnB, regularizeProblem, homogeneousCoords +import MLJModelInterface import Base.size using LinearAlgebra using NonNegLeastSquares using DocStringExtensions -using MLJModelInterface using Tables using Random @@ -14,6 +14,7 @@ import MLJModelInterface.fit import MLJModelInterface.fitted_params import MLJModelInterface.predict +const MMI = MLJModelInterface """ $(TYPEDEF) @@ -154,10 +155,31 @@ include("PartitionedLSOpt.jl") include("PartitionedLSBnB.jl") """ - $(TYPEDEF) + PartLS + +A model type for fitting a partitioned least squares model to data. + +From MLJ, the type can be imported using + +PartLS = @load PartLS pkg=PartitionedLS + +Construct an instance with default hyper-parameters using the syntax model = FooRegressor(). Provide keyword arguments to override hyper-parameter defaults, as in FooRegressor(P=...). + + +# Training data + +In MLJ or MLJBase, bind an instance `model` to data with + + mach = machine(model, X, y) + +where + +- `X`: any matrix with element scitype `Float64,2` + +Train the machine using `fit!(mach)`. + +# Hyper-parameters -The PartLS struct represents a partitioned least squares model. -Fields are: - `Optimizer`: the optimization algorithm to use. It can be `Opt`, `Alt` or `BnB`. - `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`. @@ -169,12 +191,49 @@ Fields are: - If an integer, the global number generator `rand` is used after seeding it with the given integer. - If an object of type `AbstractRNG`, the given random number generator is used. -## Example +# Operations + +- `predict(mach, Xnew)`: return the predictions of the model on new data `Xnew` + + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `α`: the values of the α variables. For each partition `k`, it holds the values of the α variables + are such that ``\\sum_{i \\in P_k} \\alpha_{k} = 1``. +- `β`: the values of the β variables. For each partition `k`, `β_k` is the coefficient that multiplies the features in the k-th partition. +- `t`: the intercept term of the model. +- `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column + corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`. + +# Examples + ```julia -model = PartLS(P=P, Optimizer=Alt, rng=123) +PartLS = @load FooRegressor pkg=PartLS + + +X = [[1. 2. 3.]; + [3. 3. 4.]; + [8. 1. 3.]; + [5. 3. 1.]] + +y = [1.; + 1.; + 2.; + 3.] + +P = [[1 0]; + [1 0]; + [0 1]] + + +# fit using the optimal algorithm +result = fit(Opt, X, y, P, η = 0.0) +y_hat = predict(result.model, X) ``` """ -MLJModelInterface.@mlj_model mutable struct PartLS <: MLJModelInterface.Deterministic +MMI.@mlj_model mutable struct PartLS <: MMI.Deterministic Optimizer::Union{Type{Opt},Type{Alt},Type{BnB}} = Opt P::Matrix{Int} = Array{Int}(undef, 0,0)::(all(_[i, j] == 0 || _[i, j] == 1 for i in range(1, size(_, 1)) for j in range(1, size(_, 2)))) η::Float64 = 0.0::(_ >= 0) @@ -197,8 +256,8 @@ It conforms to the MLJ interface. - `y`: the target vector """ -function MLJModelInterface.fit(m::PartLS, verbosity, X, y) - X = MLJModelInterface.matrix(X) +function MMI.fit(m::PartLS, verbosity, X, y) + X = MMI.matrix(X) y = vec(y) P = m.P @@ -214,7 +273,7 @@ function MLJModelInterface.fit(m::PartLS, verbosity, X, y) return PartitionedLS.fit(m.Optimizer, X, y, P, η=m.η) end -function MLJModelInterface.fitted_params(model::PartLS, fitresult) +function MMI.fitted_params(model::PartLS, fitresult) return fitresult end @@ -225,12 +284,12 @@ Make predictions for the datataset `X` using the PartitionedLS model `model`. It conforms to the MLJ interface. """ -function MLJModelInterface.predict(model::PartLS, fitresult, X) - X = MLJModelInterface.matrix(X) +function MMI.predict(model::PartLS, fitresult, X) + X = MMI.matrix(X) return PartitionedLS.predict(fitresult, X) end -MLJModelInterface.metadata_pkg.(PartLS, +MMI.metadata_pkg.(PartLS, name = "PartitionedLS", uuid = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f", # see your Project.toml url = "https://github.com/ml-unito/PartitionedLS.jl.git", # URL to your package repo @@ -240,11 +299,85 @@ MLJModelInterface.metadata_pkg.(PartLS, ) # Then for each model, -MLJModelInterface.metadata_model(PartLS, - input_scitype = Union{Table{AbstractVector{Continuous}}, AbstractMatrix{Continuous}}, # what input data is supported? - target_scitype = AbstractVector{Continuous}, # for a supervised model, what target? +MMI.metadata_model(PartLS, + input_scitype = Union{MMI.Table{AbstractVector{MMI.Continuous}}, AbstractMatrix{MMI.Continuous}}, # what input data is supported? + target_scitype = AbstractVector{MMI.Continuous}, # for a supervised model, what target? supports_weights = false, # does the model support sample weights? load_path = "PartitionedLS.PartLS" ) - end + + +""" +#(MMI.doc_header(PartLS)) + +Use this model to fit a partitioned least squares model to data. + +# Training data + +In MLJ or MLJBase, bind an instance `model` to data with + + mach = machine(model, X, y) + +where + +- `X`: any matrix with element scitype `Float64,2` + +Train the machine using `fit!(mach)`. + +# Hyper-parameters + +- `Optimizer`: the optimization algorithm to use. It can be `Opt`, `Alt` or `BnB`. +- `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column + corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`. +- `η`: the regularization parameter. It controls the strength of the regularization. +- `ϵ`: the tolerance parameter. It is used to determine when the Alt optimization algorithm has converged. Only used by the `Alt` algorithm. +- `T`: the maximum number of iterations. It is used to determine when to stop the Alt optimization algorithm has converged. Only used by the `Alt` algorithm. +- `rng`: the random number generator to use. + - If `nothing`, the global random number generator `rand` is used. + - If an integer, the global number generator `rand` is used after seeding it with the given integer. + - If an object of type `AbstractRNG`, the given random number generator is used. + +# Operations + +- `predict(mach, Xnew)`: return the predictions of the model on new data `Xnew` + + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `α`: the values of the α variables. For each partition `k`, it holds the values of the α variables + are such that ``\\sum_{i \\in P_k} \\alpha_{k} = 1``. +- `β`: the values of the β variables. For each partition `k`, `β_k` is the coefficient that multiplies the features in the k-th partition. +- `t`: the intercept term of the model. +- `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column + corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`. + +# Examples + +```julia +PartLS = @load FooRegressor pkg=PartLS + + +X = [[1. 2. 3.]; + [3. 3. 4.]; + [8. 1. 3.]; + [5. 3. 1.]] + +y = [1.; + 1.; + 2.; + 3.] + +P = [[1 0]; + [1 0]; + [0 1]] + + +# fit using the optimal algorithm +result = fit(Opt, X, y, P, η = 0.0) +y_hat = predict(result.model, X) +``` + +"""