Merge pull request #375 from willow-ahrens/wma/leveldocs

Wma/leveldocs
finch-tensor · Jan 12, 2024 · b3d2cfb · b3d2cfb
2 parents b25e928 + 0db70bd
commit b3d2cfb
Show file tree

Hide file tree

Showing 17 changed files with 369 additions and 198 deletions.
diff --git a/docs/make.jl b/docs/make.jl
@@ -17,7 +17,6 @@ makedocs(;
     repo="https://github.com/willow-ahrens/Finch.jl/blob/{commit}{path}#{line}",
     sitename="Finch.jl",
     format=Documenter.HTML(;
-        repolink="https://github.com/willow-ahrens/Finch.jl/blob/{commit}{path}#{line}",
         prettyurls=get(ENV, "CI", "false") == "true",
         canonical="https://willow-ahrens.github.io/Finch.jl",
         assets=["assets/favicon.ico"],
@@ -28,8 +27,8 @@ makedocs(;
         #"Practical Tutorials and Use Cases" => "tutorials_use_cases/tutorials_use_cases.md",
         "Comprehensive Guides" => [
             "Calling Finch" => "guides/calling_finch.md",
+            "Tensor Formats" => "guides/tensor_formats.md",
             "The Finch Language" => "guides/finch_language.md",
-            "Exploration of Tensor Formats" => "guides/tensor_formats.md",
             #"Dimensionalization" => "guides/dimensionalization.md",
             #"Tensor Lifecycles" => "guides/tensor_lifecycles.md",
             #"Special Tensors" => [
@@ -49,13 +48,13 @@ makedocs(;
         ],
         "Technical Reference" => [
         #    "Finch Core API" => "reference/core_api.md",
-        #    "Function and Method Reference" => "reference/function_method_ref.md",
+            "Documentation Listing" => "reference/listing.md",
             "Advanced Implementation Details" => [
                 "Internals" => "reference/advanced_implementation/internals.md",
         #        "Looplets and Coiteration" => "reference/advanced_implementation/looplets_coiteration.md",
         #        "Concordization" => "reference/advanced_implementation/concordization.md",
         #        "Local Variables and Constant Propagation" => "reference/advanced_implementation/local_variables.md",
-        #        "Tensor Interface" => "reference/advanced_implementation/tensor_interface.md",
+                "Tensor Interface" => "reference/advanced_implementation/tensor_interface.md",
         #        "Looplet Interface" => "reference/advanced_implementation/looplet_interface.md",
             ],
         ],

diff --git a/docs/src/guides/tensor_formats.md b/docs/src/guides/tensor_formats.md
diff --git a/docs/src/interactive.ipynb b/docs/src/interactive.ipynb
@@ -49,11 +49,11 @@
    "file_extension": ".jl",
    "mimetype": "application/julia",
    "name": "julia",
-   "version": "1.6.7"
+   "version": "1.10.0"
   },
   "kernelspec": {
-   "name": "julia-1.6",
-   "display_name": "Julia 1.6.7",
+   "name": "julia-1.10",
+   "display_name": "Julia 1.10.0",
    "language": "julia"
   }
  },

diff --git a/docs/src/reference/advanced_implementation/tensor_interface.md b/docs/src/reference/advanced_implementation/tensor_interface.md
@@ -1 +1,179 @@
-# TODO
+# Advanced Tensor Storage Formats
+
+```jldoctest example1; setup=:(using Finch)
+julia> A = [0.0 0.0 4.4; 1.1 0.0 0.0; 2.2 0.0 5.5; 3.3 0.0 0.0]
+4×3 Matrix{Float64}:
+ 0.0  0.0  4.4
+ 1.1  0.0  0.0
+ 2.2  0.0  5.5
+ 3.3  0.0  0.0
+julia> A_fbr = Tensor(Dense(Dense(Element(0.0))), A)
+Dense [:,1:3]
+├─[:,1]: Dense [1:4]
+│ ├─[1]: 0.0
+│ ├─[2]: 1.1
+│ ├─[3]: 2.2
+│ ├─[4]: 3.3
+├─[:,2]: Dense [1:4]
+│ ├─[1]: 0.0
+│ ├─[2]: 0.0
+│ ├─[3]: 0.0
+│ ├─[4]: 0.0
+├─[:,3]: Dense [1:4]
+│ ├─[1]: 4.4
+│ ├─[2]: 0.0
+│ ├─[3]: 5.5
+│ ├─[4]: 0.0
+```
+
+We refer to a node in the tree as a subfiber. All of the nodes at the same level
+are stored in the same datastructure, and disambiguated by an integer
+`position`.  in the above example, there are three levels: the rootmost level
+contains only one subfiber, the root. The middle level has 3 subfibers, one for
+each column. The leafmost level has 12 subfibers, one for each element of the
+array.  For example, the first level is `A_fbr.lvl`, and we can represent it's
+third position as `SubFiber(A_fbr.lvl.lvl, 3)`. The second level is `A_fbr.lvl.lvl`,
+and we can access it's 9th position as `SubFiber(A_fbr.lvl.lvl.lvl, 9)`. For
+instructional purposes, you can use parentheses to call a subfiber on an index to
+select among children of a subfiber.
+
+```jldoctest example1
+julia> Finch.SubFiber(A_fbr.lvl.lvl, 3)
+Dense [1:4]
+├─[1]: 4.4
+├─[2]: 0.0
+├─[3]: 5.5
+├─[4]: 0.0
+
+julia> A_fbr[:, 3]
+Dense [1:4]
+├─[1]: 4.4
+├─[2]: 0.0
+├─[3]: 5.5
+├─[4]: 0.0
+
+julia> A_fbr(3)
+Dense [1:4]
+├─[1]: 4.4
+├─[2]: 0.0
+├─[3]: 5.5
+├─[4]: 0.0
+
+julia> Finch.SubFiber(A_fbr.lvl.lvl.lvl, 9)
+4.4
+
+julia> A_fbr[1, 3]
+4.4
+
+julia> A_fbr(3)(1)
+4.4
+
+```
+
+When we print the tree in text, positions are numbered from top to bottom.
+However, if we visualize our tree with the root at the top, positions range from
+left to right:
+
+![Dense Format Index Tree](../../assets/levels-A-d-d-e.png)
+
+Because our array is sparse, (mostly zero, or another fill value), it would be
+more efficient to store only the nonzero values. In Finch, each level is
+represented with a different format. A sparse level only stores non-fill values.
+This time, we'll use a tensor constructor with `sl` (for "`SparseList` of
+nonzeros") instead of `d` (for "`Dense`"):
+
+```jldoctest example1
+julia> A_fbr = Tensor(Dense(SparseList(Element(0.0))), A)
+Dense [:,1:3]
+├─[:,1]: SparseList (0.0) [1:4]
+│ ├─[2]: 1.1
+│ ├─[3]: 2.2
+│ ├─[4]: 3.3
+├─[:,2]: SparseList (0.0) [1:4]
+├─[:,3]: SparseList (0.0) [1:4]
+│ ├─[1]: 4.4
+│ ├─[3]: 5.5
+```
+
+![CSC Format Index Tree](../../assets/levels-A-d-sl-e.png)
+
+Our `Dense(SparseList(Element(0.0)))` format is also known as
+["CSC"](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29)
+and is equivalent to
+[`SparseMatrixCSC`](https://sparsearrays.juliasparse.org/dev/#man-csc). The
+[`Tensor`](@ref) function will perform a zero-cost copy between Finch fibers and
+sparse matrices, when available.  CSC is an excellent general-purpose
+representation when we expect most of the columns to have a few nonzeros.
+However, when most of the columns are entirely fill (a situation known as
+hypersparsity), it is better to compress the root level as well:
+
+```jldoctest example1
+julia> A_fbr = Tensor(SparseList(SparseList(Element(0.0))), A)
+SparseList (0.0) [:,1:3]
+├─[:,1]: SparseList (0.0) [1:4]
+│ ├─[2]: 1.1
+│ ├─[3]: 2.2
+│ ├─[4]: 3.3
+├─[:,3]: SparseList (0.0) [1:4]
+│ ├─[1]: 4.4
+│ ├─[3]: 5.5
+```
+
+![DCSC Format Index Tree](../../assets/levels-A-sl-sl-e.png)
+
+Here we see that the entirely zero column has also been compressed. The
+`SparseList(SparseList(Element(0.0)))` format is also known as
+["DCSC"](https://ieeexplore.ieee.org/document/4536313).
+
+The
+["COO"](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.coo_matrix.html)
+(or "Coordinate") format is often used in practice for ease of interchange
+between libraries. In an `N`-dimensional array `A`, COO stores `N` lists of
+indices `I_1, ..., I_N` where `A[I_1[p], ..., I_N[p]]` is the `p`^th stored
+value in column-major numbering. In Finch, `COO` is represented as a multi-index
+level, which can handle more than one index at once. We use curly brackets to
+declare the number of indices handled by the level:
+
+```jldoctest example1
+julia> A_fbr = Tensor(SparseCOO{2}(Element(0.0)), A)
+SparseCOO (0.0) [1:4,1:3]
+├─├─[2, 1]: 1.1
+├─├─[3, 1]: 2.2
+├─├─[4, 1]: 3.3
+├─├─[1, 3]: 4.4
+├─├─[3, 3]: 5.5
+```
+
+![COO Format Index Tree](../../assets/levels-A-sc2-e.png)
+
+The COO format is compact and straightforward, but doesn't support random
+access. For random access, one should use the `SparseHash` format. A full listing
+of supported formats is described after a rough description of shared common internals of level,
+relating to types and storage.
+
+## Types and Storage of Level
+
+All levels have a `postype`, typically denoted as `Tp` in the constructors, used for internal pointer types but accessible by the
+function:
+
+```@docs
+postype
+```
+
+Additionally, many levels have a `Vp` or `Vi` in their constructors; these stand for vector of element type `Tp` or `Ti`. 
+More generally, levels are paramterized by the types that they use for storage. By default, all levels use `Vector`, but a user 
+could could change any or all of the storage types of a tensor so that the tensor would be stored on a GPU or CPU or some combination thereof, 
+or even just via a vector with a different allocation mechanism.  The storage type should behave like `AbstractArray` 
+and needs to implement the usual abstract array functions and `Base.resize!`. See the tests for an example. 
+
+When levels are constructed in short form as in the examples above, the index, position, and storage types are inferred
+from the level below. All the levels at the bottom of a Tensor (`Element, Pattern, Repeater`) specify an index type, position type,
+and storage type even if they don't need them. These are used by levels that take these as parameters. 
+
+### Move to: Copying Fibers to a new storage type.
+
+If one needs to copy a tensor to another tensor with a different storage type, one can use the `moveto` function, described below.
+
+```@docs
+moveto
+```
diff --git a/docs/src/reference/function_method_ref.md b/docs/src/reference/function_method_ref.md
diff --git a/docs/src/reference/listing.md b/docs/src/reference/listing.md
@@ -0,0 +1,5 @@
+# Documentation Listing
+
+```@autodocs; canonical=false
+Modules = [Finch, Finch.FinchNotation]
+```
diff --git a/src/Finch.jl b/src/Finch.jl
@@ -132,7 +132,7 @@ export fsparse, fsparse!, fsprand, fspzeros, ffindnz, fread, fwrite, countstored
 export bspread, bspwrite
 export ftnsread, ftnswrite, fttread, fttwrite
 
-export moveto
+export moveto, postype
 
 include("interface/abstractarrays.jl")
 include("interface/abstractunitranges.jl")

diff --git a/src/semantics.jl b/src/semantics.jl
@@ -134,6 +134,14 @@ function similar in spirit to `Base.axes`.
 """
 function virtual_size end
 
+"""
+    virtual_resize!(tns, ctx, dims...)
+
+Resize `tns` in the context `ctx`. This is a
+function similar in spirit to `Base.resize!`.
+"""
+function virtual_resize! end
+
 """
     moveto(arr, device)
 

diff --git a/src/tensors/levels/elementlevels.jl b/src/tensors/levels/elementlevels.jl
@@ -1,5 +1,5 @@
 """
-    ElementLevel{D, [Tv=typeof(D), Tp=Int, Val]}()
+    ElementLevel{D, [Tv=typeof(D)], [Tp=Int], [Val]}()
 
 A subfiber of an element level is a scalar of type `Tv`, initialized to `D`. `D`
 may optionally be given as the first argument.

diff --git a/src/tensors/levels/patternlevels.jl b/src/tensors/levels/patternlevels.jl
@@ -1,9 +1,9 @@
 """
-    PatternLevel{[Tp]}()
+    PatternLevel{[Tp=Int]}()
 
 A subfiber of a pattern level is the Boolean value true, but it's `default` is
 false. PatternLevels are used to create tensors that represent which values
-are stored by other fibers. See [`pattern`](@ref) for usage examples.
+are stored by other fibers. See [`pattern!`](@ref) for usage examples.
 
 ```jldoctest
 julia> Tensor(Dense(Pattern()), 3)

diff --git a/src/tensors/levels/sparsebytemaplevels.jl b/src/tensors/levels/sparsebytemaplevels.jl
@@ -1,5 +1,5 @@
 """
-    SparseByteMapLevel{[Ti=Tuple{Int...}], [Tp=Int], [Ptr] [Tbl]}(lvl, [dims])
+    SparseByteMapLevel{[Ti=Int], [Ptr, Tbl]}(lvl, [dims])
 
 Like the [`SparseListLevel`](@ref), but a dense bitmap is used to encode
 which slices are stored. This allows the ByteMap level to support random access.

diff --git a/src/tensors/levels/sparsecoolevels.jl b/src/tensors/levels/sparsecoolevels.jl
@@ -1,5 +1,5 @@
 """
-    SparseCOOLevel{[N], [TI=Tuple{Int...}], [Ptr], [Tbl]}(lvl, [dims])
+    SparseCOOLevel{[N], [TI=Tuple{Int...}], [Ptr, Tbl]}(lvl, [dims])
 
 A subfiber of a sparse level does not need to represent slices which are
 entirely [`default`](@ref). Instead, only potentially non-default slices are

diff --git a/src/tensors/levels/sparsehashlevels.jl b/src/tensors/levels/sparsehashlevels.jl
@@ -1,5 +1,5 @@
 """
-    SparseHashLevel{[N], [TI=Tuple{Int...}], [Ptr], [Tbl], [Srt]}(lvl, [dims])
+    SparseHashLevel{[N], [TI=Tuple{Int...}], [Ptr, Tbl, Srt]}(lvl, [dims])
 
 A subfiber of a sparse level does not need to represent slices which are
 entirely [`default`](@ref). Instead, only potentially non-default slices are

diff --git a/src/tensors/levels/sparselistlevels.jl b/src/tensors/levels/sparselistlevels.jl
@@ -1,5 +1,5 @@
 """
-    SparseListLevel{[Ti=Int], [Tp=Int], [Ptr=Vector{Tp}], [Idx=Vector{Ti}]}(lvl, [dim])
+    SparseListLevel{[Ti=Int], [Ptr, Idx]}(lvl, [dim])
 
 A subfiber of a sparse level does not need to represent slices `A[:, ..., :, i]`
 which are entirely [`default`](@ref). Instead, only potentially non-default
@@ -99,7 +99,7 @@ function display_fiber(io::IO, mime::MIME"text/plain", fbr::SubFiber{<:SparseLis
     p = fbr.pos
     lvl = fbr.lvl
     if p + 1 > length(lvl.ptr)
-        print(io, "SparseHash(undef...)")
+        print(io, "SparseList(undef...)")
         return
     end
 

diff --git a/src/tensors/levels/sparserlelevels.jl b/src/tensors/levels/sparserlelevels.jl
@@ -1,3 +1,26 @@
+"""
+    SparseRLELevel{[Ti=Int], [Ptr, Left, Right]}(lvl, [dim])
+
+The sparse RLE level represent runs of equivalent slices `A[:, ..., :, i]`
+which are not entirely [`default`](@ref). A sorted list is used to record the
+left and right endpoints of each run. Optionally, `dim` is the size of the last dimension.
+
+`Ti` is the type of the last tensor index, and `Tp` is the type used for
+positions in the level. The types `Ptr`, `Left`, and `Right` are the types of the
+arrays used to store positions and endpoints. 
+
+```jldoctest
+julia> Tensor(Dense(SparseRLELevel(Element(0.0))), [10 0 20; 30 0 0; 0 0 40])
+Dense [:,1:3]
+├─[:,1]: SparseRLE (0.0) [1:3]
+│ ├─[1:1]: 10.0
+│ ├─[2:2]: 30.0
+├─[:,2]: SparseRLE (0.0) [1:3]
+├─[:,3]: SparseRLE (0.0) [1:3]
+│ ├─[1:1]: 20.0
+│ ├─[3:3]: 40.0
+```
+"""
 struct SparseRLELevel{Ti, Ptr<:AbstractVector, Left<:AbstractVector, Right<:AbstractVector, Lvl} <: AbstractLevel
     lvl::Lvl
     shape::Ti

diff --git a/src/tensors/levels/sparsevbllevels.jl b/src/tensors/levels/sparsevbllevels.jl
@@ -1,5 +1,5 @@
 """
-SparseVBLLevel{[Ti=Int], [Tp=Int], [Ptr=Vector{Tp}], [Idx=Vector{Ti}], [Ofs=Vector{Ofs}]}(lvl, [dim])
+SparseVBLLevel{[Ti=Int], [Ptr, Idx, Ofs]}(lvl, [dim])
 
 Like the [`SparseListLevel`](@ref), but contiguous subfibers are stored together in blocks.
 

diff --git a/src/transforms/dimensionalize.jl b/src/transforms/dimensionalize.jl
@@ -23,7 +23,7 @@ The dimensions are semantically evaluated just before the corresponding loop or
 declaration statement.  The program is assumed to be scoped, so that all loops
 have unique index names.
 
-See also: [`virtual_size`](@ref), [`virtual_resize`](@ref), [`combinedim`](@ref)
+See also: [`virtual_size`](@ref), [`virtual_resize!`](@ref), [`combinedim`](@ref)
 """
 function dimensionalize!(prgm, ctx) 
     prgm = DeclareDimensions(ctx=ctx)(prgm)