a lot

BallisticLA · Aug 25, 2024 · 8706f06 · 8706f06
1 parent 02c3ac2
commit 8706f06
Show file tree

Hide file tree

Showing 14 changed files with 316 additions and 231 deletions.
diff --git a/RandBLAS/base.hh b/RandBLAS/base.hh
@@ -283,10 +283,33 @@ enum class MajorAxis : char {
 // =============================================================================
 /// @verbatim embed:rst:leading-slashes
 ///
-///  TODO: take the expectation properties in scaled partial isometries docs and move here.
-///  (Or, rather, move above.)
+/// **Mathematical description**
 ///
-/// Any object :math:`\ttt{D}` of type :math:`\ttt{SkDist}` has the following attributes.
+/// Matrices sampled from sketching distributions in RandBLAS are mean-zero
+/// and have covariance matrices that are proportional to the identity. That is, 
+/// if :math:`\D` is a distribution over :math:`r \times c` matrices and 
+/// :math:`\mtxS` is a sample from :math:`\D`,  then
+/// :math:`\mathbb{E}\mtxS = \mathbf{0}_{r \times c}` and
+///
+/// .. math::
+///    :nowrap:
+///     
+///     \begin{gather}
+///     \alpha^2 \cdot \mathbb{E}\left[ \mtxS^T\mtxS \right]=\mathbf{I}_{c \times c}& \nonumber \\
+///     \,\beta^2 \cdot \mathbb{E}\left[ \mtxS{\mtxS}^T\, \right]=\mathbf{I}_{r \times r}& \nonumber
+///     \end{gather}
+///
+/// hold for some :math:`\alpha > 0` and :math:`\beta > 0`.
+///
+/// The *isometry scale* of the distribution
+/// is :math:`\gamma := \alpha` if :math:`c \geq r` and :math:`\gamma := \beta` otherwise. If you want to
+/// sketch in a way that preserves squared norms in expectation, then you should sketch with 
+/// a scaled sample :math:`\gamma \mtxS` rather than the sample itself.
+///
+/// **Programmatic description**
+///
+/// A variable :math:`\ttt{D}` of a type that conforms to the 
+/// :math:`\ttt{SketchingDistribution}` concept has the following attributes.
 ///
 /// .. list-table::
 ///    :widths: 25 30 40
@@ -303,19 +326,28 @@ enum class MajorAxis : char {
 ///      - samples from :math:`\ttt{D}` have this many columns
 ///    * - :math:`\ttt{D.major_axis}`
 ///      - :math:`\ttt{const MajorAxis}`
-///      - Indicate the nature of statistical independence among a sample's entries.
+///      - Implementation-dependent; see MajorAxis documentation.
+///    * - :math:`\ttt{D.isometry_scale}`
+///      - :math:`\ttt{const double}`
+///      - See above.
 ///
-///
-/// Note that we only allow one major axis. Although it's not obvious, enforcing a single major axis
-/// is relevant even for distributions over matrices with i.i.d. entries.
+/// Note that the isometry scale is always stored in double precision; this has no bearing 
+/// on the precision of sketching operators that are sampled from a :math:`\ttt{SketchingDistribution}`.
 ///
 /// @endverbatim
 template<typename SkDist>
 concept SketchingDistribution = requires(SkDist D) {
     { D.n_rows }     -> std::same_as<const int64_t&>;
     { D.n_cols }     -> std::same_as<const int64_t&>;
     { D.major_axis } -> std::same_as<const MajorAxis&>;
+    { D.isometry_scale } -> std::same_as<const double&>;
 };
+#else
+///   .. math::
+///
+///     {\D}\texttt{.isometry\_scale} = \begin{cases} \alpha &\text{ if } r \leq c \\ \beta &\text{ if } r > c \end{cases}~.
+#define SketchingDistribution typename
+#endif
 
 // =============================================================================
 /// \fn isometry_scale_factor(SkDist D) 
@@ -348,9 +380,7 @@ concept SketchingDistribution = requires(SkDist D) {
 /// @endverbatim
 template <typename T, SketchingDistribution SkDist>
 inline T isometry_scale_factor(SkDist D);
-#else
-#define SketchingDistribution typename
-#endif
+
 
 #ifdef __cpp_concepts
 // =============================================================================
@@ -365,8 +395,9 @@ template<typename SKOP>
 concept SketchingOperator = requires(SKOP S) {
     { S.n_rows }     -> std::same_as<const int64_t&>;
     { S.n_cols }     -> std::same_as<const int64_t&>;
+    { S.dist   }     -> SketchingDistribution;
     { S.seed_state } -> std::same_as<const typename SKOP::state_t&>;
-    { S.seed_state } -> std::same_as<const typename SKOP::state_t&>;
+    { S.next_state } -> std::same_as<const typename SKOP::state_t&>;
 };
 #else
 #define SketchingOperator typename

diff --git a/RandBLAS/dense_skops.hh b/RandBLAS/dense_skops.hh
@@ -190,6 +190,11 @@ RNGState<RNG> compute_next_state(DD dist, RNGState<RNG> state) {
     return state;
 }
 
+template <typename DDN>
+inline double isometry_scale(DDN dn, int64_t n_rows, int64_t n_cols) {
+    return (dn == DDN::BlackBox) ? 1.0 : std::pow(std::min(n_rows, n_cols), -0.5);
+}
+
 } // end namespace RandBLAS::dense
 
 
@@ -228,45 +233,25 @@ struct DenseDist {
     ///  Matrices drawn from this distribution have this many columns.
     const int64_t n_cols;
 
-    // ---------------------------------------------------------------------------
-    ///  The distribution used for the entries of the sketching operator.
-    const DenseDistName family;
-
     // ---------------------------------------------------------------------------
     ///  This member affects whether samples from this distribution have their
     ///  entries filled row-wise or column-wise. While there is no statistical 
     ///  difference between these two filling orders, there are situations
     ///  where one order or the other might be preferred.
-    ///  
     /// @verbatim embed:rst:leading-slashes
-    /// .. dropdown:: *Notes for experts*
-    ///    :animate: fade-in-slide-down
-    ///
-    ///    Deciding the value of this member is only needed
-    ///    in algorithms where (1) there's a need to iteratively generate panels of
-    ///    a larger sketching operator and (2) one of larger operator's dimensions
-    ///    cannot be known before the  iterative process starts.
-    ///
-    ///    Essentially, a column-wise fill order lets us stack operators horizontally
-    ///    in a consistent way, while row-wise fill order lets us stack vertically
-    ///    in a consistent way. The mapping from major_axis to fill order is given below.
-    ///
-    ///    .. list-table::
-    ///       :widths: 34 33 33
-    ///       :header-rows: 1
-    ///
-    ///       * -  
-    ///         - :math:`\texttt{major_axis} = \texttt{Long}`
-    ///         - :math:`\texttt{major_axis} = \texttt{Short}`
-    ///       * - :math:`\texttt{n_rows} > \texttt{n_cols}`
-    ///         - column-wise
-    ///         - row-wise
-    ///       * - :math:`\texttt{n_rows} \leq \texttt{n_cols}`
-    ///         - row-wise
-    ///         - column-wise
+    /// See our tutorial section on :ref:`updating sketches <sketch_updates>` for more information.
     /// @endverbatim
     const MajorAxis major_axis;
 
+    // ---------------------------------------------------------------------------
+    ///  A sketching operator sampled from this distribution should be multiplied
+    ///  by this constant in order for sketching to preserve norms in expectation.
+    const double isometry_scale;
+
+    // ---------------------------------------------------------------------------
+    ///  The distribution used for the entries of the sketching operator.
+    const DenseDistName family;
+
     // ---------------------------------------------------------------------------
     ///  A distribution over matrices of shape (n_rows, n_cols) with entries drawn
     ///  iid from either the default choice of standard normal distribution, or from
@@ -276,14 +261,17 @@ struct DenseDist {
         int64_t n_rows,
         int64_t n_cols,
         DenseDistName dn = DenseDistName::Gaussian
-    ) : n_rows(n_rows), n_cols(n_cols), family(dn), major_axis( (dn == DenseDistName::BlackBox) ? MajorAxis::Undefined : MajorAxis::Long) { }
+    ) : n_rows(n_rows), n_cols(n_cols),
+        major_axis( (dn == DenseDistName::BlackBox) ? MajorAxis::Undefined : MajorAxis::Long),
+        isometry_scale(dense::isometry_scale(dn, n_rows, n_cols)), family(dn) { }
 
     DenseDist(
         int64_t n_rows,
         int64_t n_cols,
         DenseDistName dn,
         MajorAxis ma
-    ) : n_rows(n_rows), n_cols(n_cols), family(dn), major_axis(ma) {
+    ) : n_rows(n_rows), n_cols(n_cols), major_axis(ma),
+        isometry_scale(dense::isometry_scale(dn, n_rows, n_cols)), family(dn) {
         if (dn == DenseDistName::BlackBox) {
             randblas_require(ma == MajorAxis::Undefined);
         } else {
@@ -315,16 +303,6 @@ inline int64_t major_axis_length(const DenseDist &D) {
         std::max(D.n_rows, D.n_cols) : std::min(D.n_rows, D.n_cols);
 }
 
-template <typename T>
-inline T isometry_scale_factor(DenseDist D) {
-    if (D.family == DenseDistName::BlackBox) {
-        throw std::runtime_error("Unrecognized distribution.");
-    }
-    // When we sample from the scalar distributions we always
-    // scale things so they're variance-1. 
-    return std::pow((T) std::min(D.n_rows, D.n_cols), -0.5);
-}
-
 
 // =============================================================================
 /// A sample from a distribution over dense sketching operators.
@@ -341,7 +319,7 @@ struct DenseSkOp {
     // ---------------------------------------------------------------------------
     ///  The distribution from which this sketching operator is sampled.
     ///  This member specifies the number of rows and columns of the sketching
-    ///  operator.
+    ///  operator, as well as whether it is generated row-wise or column-wise.
     const DenseDist dist;
 
     // ---------------------------------------------------------------------------
@@ -418,6 +396,7 @@ struct DenseSkOp {
     }
 };
 
+static_assert(SketchingDistribution<DenseDist>);
 
 // =============================================================================
 /// @verbatim embed:rst:leading-slashes

diff --git a/RandBLAS/sparse_skops.hh b/RandBLAS/sparse_skops.hh
@@ -105,6 +105,18 @@ static RNGState<RNG> repeated_fisher_yates(
     return RNGState<RNG> {ctr, key};
 }
 
+inline double isometry_scale(MajorAxis ma, int64_t vec_nnz, int64_t n_rows, int64_t n_cols) {
+    if (ma == MajorAxis::Short) {
+        return std::pow(vec_nnz, -0.5); 
+    } else if (ma == MajorAxis::Long) {
+        double minor_ax_len = std::min(n_rows, n_cols);
+        double major_ax_len = std::max(n_rows, n_cols);
+        return std::sqrt( major_ax_len / (vec_nnz * minor_ax_len) );
+    } else {
+        throw std::invalid_argument("Cannot compute the isometry scale for a sparse operator with unspecified major axis.");
+    }
+}
+
 }
 
 namespace RandBLAS {
@@ -121,6 +133,20 @@ struct SparseDist {
     ///  Matrices drawn from this distribution have this many columns.
     const int64_t n_cols;
 
+    // ---------------------------------------------------------------------------
+    ///  Constrains the sparsity pattern of matrices drawn from this distribution. 
+    ///
+    ///  Having major_axis==Short results in sketches are more likely to contain
+    ///  useful geometric information, without making assumptions about the data
+    ///  being sketched.
+    ///
+    const MajorAxis major_axis = MajorAxis::Short;
+
+    // ---------------------------------------------------------------------------
+    ///  A sketching operator sampled from this distribution should be multiplied
+    ///  by this constant in order for sketching to preserve norms in expectation.
+    const double isometry_scale;
+
     // ---------------------------------------------------------------------------
     ///  If this distribution is short-axis major, then matrices sampled from
     ///  it will have exactly \math{\texttt{vec_nnz}} nonzeros per short-axis
@@ -134,14 +160,15 @@ struct SparseDist {
     ///
     const int64_t vec_nnz;
 
-    // ---------------------------------------------------------------------------
-    ///  Constrains the sparsity pattern of matrices drawn from this distribution. 
-    ///
-    ///  Having major_axis==Short results in sketches are more likely to contain
-    ///  useful geometric information, without making assumptions about the data
-    ///  being sketched.
-    ///
-    const MajorAxis major_axis = MajorAxis::Short;
+    SparseDist(
+        int64_t n_rows,
+        int64_t n_cols,
+        MajorAxis ma,
+        int64_t vec_nnz
+    ) : n_rows(n_rows), n_cols(n_cols), major_axis(ma),
+        isometry_scale(sparse::isometry_scale(ma, vec_nnz, n_rows, n_cols)),
+        vec_nnz(vec_nnz) {}
+
 };
 
 template <typename RNG, SignedInteger sint_t>
@@ -151,18 +178,6 @@ inline RNGState<RNG> repeated_fisher_yates(
     return sparse::repeated_fisher_yates(state, k, n, r, indices, (sint_t*) nullptr, (double*) nullptr);
 }
 
-template <typename T>
-inline T isometry_scale_factor(SparseDist D) {
-    T vec_nnz = (T) D.vec_nnz;
-    if (D.major_axis == MajorAxis::Short) {
-        return std::pow(vec_nnz, -0.5); 
-    } else {
-        T minor_ax_len = (T) std::min(D.n_rows, D.n_cols);
-        T major_ax_len = (T) std::max(D.n_rows, D.n_cols);
-        return std::sqrt( major_ax_len / (vec_nnz * minor_ax_len) );
-    }
-}
-
 template <typename RNG>
 RNGState<RNG> compute_next_state(SparseDist dist, RNGState<RNG> state) {
     int64_t minor_len;
@@ -176,7 +191,6 @@ RNGState<RNG> compute_next_state(SparseDist dist, RNGState<RNG> state) {
     return state;
 }
 
-
 // =============================================================================
 /// A sample from a prescribed distribution over sparse matrices.
 ///
@@ -193,7 +207,7 @@ struct SparseSkOp {
     // ---------------------------------------------------------------------------
     ///  The distribution from which this sketching operator is sampled.
     ///  This member specifies the number of rows and columns of the sketching
-    ///  operator.
+    ///  operator. It also controls the sparsity pattern and the sparsity level.
     const SparseDist dist;
 
     // ---------------------------------------------------------------------------
@@ -376,6 +390,8 @@ struct SparseSkOp {
     }
 };
 
+static_assert(SketchingDistribution<SparseDist>);
+
 // =============================================================================
 /// Performs the work in sampling S from its underlying distribution. This 
 /// entails populating S.rows, S.cols, and S.vals with COO-format sparse matrix
@@ -501,12 +517,9 @@ COOMatrix<T, sint_t> coo_view_of_skop(SparseSkOp &S) {
 template <typename SparseSkOp>
 static auto transpose(SparseSkOp const &S) {
     randblas_require(S.known_filled);
-    SparseDist dist = {
-        .n_rows = S.dist.n_cols,
-        .n_cols = S.dist.n_rows,
-        .vec_nnz = S.dist.vec_nnz,
-        .major_axis = S.dist.major_axis
-    };
+    SparseDist dist(
+        S.dist.n_cols, S.dist.n_rows, S.dist.major_axis, S.dist.vec_nnz
+    );
     SparseSkOp St(dist, S.seed_state, S.cols, S.rows, S.vals);
     St.next_state = S.next_state;
     return St;

diff --git a/examples/total-least-squares/tls_sparse_skop.cc b/examples/total-least-squares/tls_sparse_skop.cc
@@ -139,12 +139,12 @@ int main(int argc, char* argv[]){
 
     // Sample the sketching operator 
     auto time_constructsketch1 = high_resolution_clock::now();
-    RandBLAS::SparseDist Dist = {
-        .n_rows = sk_dim,                            // Number of rows of the sketching operator 
-        .n_cols = m,                                 // Number of columns of the sketching operator
-        .vec_nnz = 8,                                // Number of non-zero entires per major-axis vector
-        .major_axis = RandBLAS::MajorAxis::Short     // A "SASO" (aka SJLT, aka OSNAP, aka generalized CountSketch)
-    };
+    RandBLAS::SparseDist Dist(
+        sk_dim,                     // Number of rows of the sketching operator 
+        m,                          // Number of columns of the sketching operator
+        RandBLAS::MajorAxis::Short, // A "SASO" (aka SJLT, aka OSNAP, aka generalized CountSketch)
+        8                           // Number of non-zero entires per column
+    );
     uint32_t seed = 1997;
     RandBLAS::SparseSkOp<double> S(Dist, seed);  
     RandBLAS::fill_sparse(S);