diff --git a/RandBLAS/base.hh b/RandBLAS/base.hh
index fece0b4c..9206b13f 100644
--- a/RandBLAS/base.hh
+++ b/RandBLAS/base.hh
@@ -342,9 +342,6 @@ inline int64_t get_dim_major(Axis major_axis, int64_t n_rows, int64_t n_cols) {
 ///    * - :math:`\ttt{D.n_cols}`
 ///      - :math:`\ttt{const int64_t}`
 ///      - samples from :math:`\ttt{D}` have this many columns
-///    * - :math:`\ttt{D.major_axis}`
-///      - :math:`\ttt{const Axis}`
-///      - Implementation-dependent; see Axis documentation.
 ///    * - :math:`\ttt{D.isometry_scale}`
 ///      - :math:`\ttt{const double}`
 ///      - See above.
@@ -357,7 +354,6 @@ template<typename SkDist>
 concept SketchingDistribution = requires(SkDist D) {
     { D.n_rows }     -> std::same_as<const int64_t&>;
     { D.n_cols }     -> std::same_as<const int64_t&>;
-    { D.major_axis } -> std::same_as<const Axis&>;
     { D.isometry_scale } -> std::same_as<const double&>;
 };
 #else
diff --git a/RandBLAS/dense_skops.hh b/RandBLAS/dense_skops.hh
index b92b7efd..631dbaeb 100644
--- a/RandBLAS/dense_skops.hh
+++ b/RandBLAS/dense_skops.hh
@@ -296,7 +296,6 @@ struct DenseDist {
     ///
     const blas::Layout natural_layout;
 
-
     // ---------------------------------------------------------------------------
     ///  Arguments passed to this function are used to initialize members of the same names.
     ///  The members \math{\ttt{dim_major},} \math{\ttt{dim_minor},} \math{\ttt{isometry_scale},}
@@ -386,13 +385,12 @@ struct DenseSkOp {
     T *buff = nullptr; 
 
     // ---------------------------------------------------------------------------
-    ///  The storage order that should be used for any read or write operations
-    ///  with \math{\ttt{buff}.} The leading dimension when reading from \math{\ttt{buff}}
-    ///  is assumed to be
+    ///  The storage order for \math{\ttt{buff}.} The leading dimension of
+    ///  \math{\mat(\ttt{buff})} when reading from \math{\ttt{buff}} is
     /// @verbatim embed:rst:leading-slashes
     ///  .. math::
     ///     
-    ///     \ttt{lds} = \begin{cases} \ttt{n_rows} & \text{ if } ~~ \ttt{layout == ColMajor} \\ \ttt{n_cols} & \text{ if } ~~ \ttt{layout == RowMajor}.
+    ///     \ttt{lds} = \begin{cases} \ttt{n_rows} & \text{ if } ~~ \ttt{layout == ColMajor} \\ \ttt{n_cols} & \text{ if } ~~ \ttt{layout == RowMajor} \end{cases}\,.
     ///
     ///  @endverbatim
     ///
@@ -406,8 +404,8 @@ struct DenseSkOp {
     /////////////////////////////////////////////////////////////////////
 
     // ---------------------------------------------------------------------------
-    ///  **Standard constructor**. Arguments passed to this function are 
-    ///  used to initialize members of the same name. \math{\ttt{own_memory}} is initialized to true,
+    ///  Arguments passed to this function are 
+    ///  used to initialize members of the same names. \math{\ttt{own_memory}} is initialized to true,
     ///  \math{\ttt{buff}} is initialized to nullptr, and \math{\ttt{layout}} is initialized
     ///  to \math{\ttt{dist.natural_layout}.} The \math{\ttt{next_state}} member is 
     ///  computed automatically from \math{\ttt{dist}} and \math{\ttt{next_state}.}
@@ -539,7 +537,7 @@ static_assert(SketchingOperator<DenseSkOp<double>>);
 ///
 /// @endverbatim
 template<typename T, typename RNG = r123::Philox4x32>
-RNGState<RNG> fill_dense(blas::Layout layout, const DenseDist &D, int64_t n_rows, int64_t n_cols, int64_t ro_s, int64_t co_s, T* buff, const RNGState<RNG> &seed) {
+RNGState<RNG> fill_dense_unpacked(blas::Layout layout, const DenseDist &D, int64_t n_rows, int64_t n_cols, int64_t ro_s, int64_t co_s, T* buff, const RNGState<RNG> &seed) {
     using RandBLAS::dense::fill_dense_submat_impl;
     randblas_require(D.n_rows >= n_rows + ro_s);
     randblas_require(D.n_cols >= n_cols + co_s);
@@ -600,7 +598,7 @@ RNGState<RNG> fill_dense(blas::Layout layout, const DenseDist &D, int64_t n_rows
 ///
 template <typename T, typename RNG = r123::Philox4x32>
 RNGState<RNG> fill_dense(const DenseDist &D, T *buff, const RNGState<RNG> &seed) {
-    return fill_dense(D.natural_layout, D, D.n_rows, D.n_cols, 0, 0, buff, seed);
+    return fill_dense_unpacked(D.natural_layout, D, D.n_rows, D.n_cols, 0, 0, buff, seed);
 }
 
 // =============================================================================
@@ -625,7 +623,7 @@ RNGState<RNG> fill_dense(const DenseDist &D, T *buff, const RNGState<RNG> &seed)
 ///
 /// .. math::
 ///     
-///     \ttt{lds} = \begin{cases} \ttt{n_rows} & \text{ if } ~~ \ttt{S.layout == ColMajor} \\ \ttt{n_cols} & \text{ if } ~~ \ttt{S.layout == RowMajor}.
+///     \ttt{lds} = \begin{cases} \ttt{n_rows} & \text{ if } ~~ \ttt{S.layout == ColMajor} \\ \ttt{n_cols} & \text{ if } ~~ \ttt{S.layout == RowMajor} \end{cases}\,.
 ///
 /// @endverbatim
 template <typename DenseSkOp>
@@ -635,7 +633,7 @@ void fill_dense(DenseSkOp &S) {
         S.buff = new T[S.n_rows * S.n_cols];
     }
     randblas_require(S.buff != nullptr);
-    fill_dense(S.layout, S.dist, S.n_rows, S.n_cols, 0, 0, S.buff, S.seed_state);
+    fill_dense_unpacked(S.layout, S.dist, S.n_rows, S.n_cols, 0, 0, S.buff, S.seed_state);
     return;
 }
 
@@ -656,7 +654,6 @@ struct BLASFriendlyOperator {
     }
 };
 
-// NOTE: the returned operator satisfies submatrix.layout == S.dist.natural_layout even if this differs from S.layout.
 template <typename BFO, typename DenseSkOp>
 BFO submatrix_as_blackbox(const DenseSkOp &S, int64_t n_rows, int64_t n_cols, int64_t ro_s, int64_t co_s) {
     randblas_require(ro_s + n_rows <= S.n_rows);
@@ -664,7 +661,7 @@ BFO submatrix_as_blackbox(const DenseSkOp &S, int64_t n_rows, int64_t n_cols, in
     using T = typename DenseSkOp::scalar_t;
     T *buff = new T[n_rows * n_cols];
     auto layout = S.layout;
-    fill_dense(layout, S.dist, n_rows, n_cols, ro_s, co_s, buff, S.seed_state);
+    fill_dense_unpacked(layout, S.dist, n_rows, n_cols, ro_s, co_s, buff, S.seed_state);
     int64_t dim_major = S.dist.dim_major;
     BFO submatrix{layout, n_rows, n_cols, buff, dim_major, true};
     return submatrix;
diff --git a/RandBLAS/sparse_skops.hh b/RandBLAS/sparse_skops.hh
index e3b204a7..88c75f1b 100644
--- a/RandBLAS/sparse_skops.hh
+++ b/RandBLAS/sparse_skops.hh
@@ -473,7 +473,7 @@ void laso_merge_long_axis_vector_coo_data(
 ///
 /// 
 template <typename T, typename sint_t, typename state_t>
-state_t fill_sparse(
+state_t fill_sparse_unpacked_nosub(
     const SparseDist &D,
     int64_t &nnz, T* vals, sint_t* rows, sint_t *cols,
     const state_t &seed_state
@@ -559,7 +559,7 @@ void fill_sparse(SparseSkOp &S) {
     randblas_require(S.rows != nullptr);
     randblas_require(S.cols != nullptr);
     randblas_require(S.vals != nullptr);
-    fill_sparse(S.dist, S.nnz, S.vals, S.rows, S.cols, S.seed_state);
+    fill_sparse_unpacked_nosub(S.dist, S.nnz, S.vals, S.rows, S.cols, S.seed_state);
     return;
 }
 
diff --git a/rtd/source/Doxyfile b/rtd/source/Doxyfile
index 3cb6ad72..8a3a3e4e 100644
--- a/rtd/source/Doxyfile
+++ b/rtd/source/Doxyfile
@@ -877,7 +877,7 @@ WARN_LOGFILE           =
 
 INPUT                  = ../../README.md \
                          ../../ \
-                         ../../RandBLAS/ \
+                         ../../RandBLAS/*.hh \
                          ../../RandBLAS/sparse_data
 
 # This tag can be used to specify the character encoding of the source files
diff --git a/rtd/source/FAQ.rst b/rtd/source/FAQ.rst
index 2e4ceb7f..6050af78 100644
--- a/rtd/source/FAQ.rst
+++ b/rtd/source/FAQ.rst
@@ -2,8 +2,23 @@ FAQ and Limitations
 ==============================
 
 
-(In)frequently asked questions about our design decisions
-------------------------------------------------------------
+
+How do I do this and that?
+--------------------------
+
+How do I sketch a const symmetric matrix that's only stored in an upper or lower triangle?
+  You can only do this with dense sketching operators.
+  You'll have to prepare the plain buffer representation yourself with 
+  :cpp:any:`RandBLAS::fill_dense_unpacked`
+  and then you'll have to use that buffer in your own SYMM function.
+
+How do I sketch a submatrix of a sparse matrix?
+  You can only do this if the sparse matrix in in COO format.
+  Take a look at the ``lsksp3`` and ``rsksp3`` functions in the source code (they aren't documented on this website).
+
+
+Why did you ... ?
+-----------------
 
 Why the name RandBLAS?
   RandBLAS derives its name from BLAS: the basic linear algebra subprograms. Its name evokes the *purpose* of BLAS rather 
@@ -15,21 +30,13 @@ Why the name RandBLAS?
   are made up for by the flexibilty and portability of a BLAS-like API. It is also our hope that popular high-level
   libraries that already wrap BLAS might find it straightforward to define similar wrappers for RandBLAS.
 
-DenseDist and SparseDist are very simple immutable structs. Why bother having constructors for these classes, when you could just use initialization lists?
-  DenseDist and SparseDist have common members and type-specific members.
-  We wanted the common members to appear earlier initialization order, since that makes it easier to
-  preserve RandBLAS' polymorphic API when called from other languages.
-
-  Two important common members are major_axis and isometry_scale. Users rarely need to think about the former (since there are
-  sensible defaults for DenseDist and SparseDist) and they never need to make decisions for the latter
-  What's more, there are type-specific members that the user should be mindful of, and would be ill-suited to the trailing
-  positions in an initialization list.
-  
-  Using constructors therefore has three benefits.
-
-  1. We can put the type-specific mebers earlier in the argument list,
-  2. We can include the default value for major_axis as a trailing constructor argument, and 
-  3. We can ensure that isometry_scale is always set correctly.
+DenseDist and SparseDist are simple structs. Why bother having constructors for these classes, when you could just use initialization lists?
+  Both of these types only have four user-decidable parameters.
+  We tried to implement and document these structs only using four members each.
+  This was doable, but very cumbersome.
+  In order to write clearer documentation we introduced several additional members whose values are semantically meaningful
+  but ultimately dependent on the others.
+  Using constructors makes it possible for us to ensure all members are initialized consistently.
 
 Why don't you automatically scale your sketching operators to give partial isometries in expectation?
   There are a few factors that led us to this decision. None of these factors is a huge deal, alone, but they become significant when considered together.
@@ -48,8 +55,9 @@ Why are all dimensions 64-bit integers?
   We do allow 32-bit indexing for buffers underlying sparse matrix datastructures, but we recommend sticking with 64-bit.
 
 I looked at the source code and found weird function names like "lskge3," "rskges," and "lsksp3." What's that about?
-  Makes it easier to call from languages that don't support overloads.
-  Short and specific names make it possible to communicate efficiently and precisely (useful for test code and developer documentation). 
+  There are two reasons for these kinds of names.
+  First, having these names makes it easier to call RandBLAS from languages that don't support function overloading.
+  Second, these short and specific names make it possible to communicate efficiently and precisely (useful for test code and developer documentation). 
 
 Why does sketch_symmetric not use a "side" argument, like symm in BLAS?
   There are many BLAS functions that include a "side" argument. This argument always refers to the argument with the most notable structure.
@@ -64,29 +72,8 @@ Why does sketch_symmetric not use a "side" argument, like symm in BLAS?
   We chose the third option since that's more in line with modern APIs for BLAS-like functionality (namely, std::linalg).
 
 
-(In)frequently asked questions about RandBLAS' capabilities
------------------------------------------------------------
-
-How do I call RandBLAS from other languages?
-  First, this depends on whether your language supports overloads.
-
-  * To get an important thing out of the way: we use both formal C++ overloads and C++ templates. The latter mechanism might as well constitute an overload from the perspective of other languages.
-  * We do have canonical function names to address overloads in (1) the side of an operand in matrix-matrix products and (2) the family of sketching distribution.
-  * Our templating of numerical precision should be resolved by prepending "s" for single precision or "d" for double precision on any classes and function names.
-
-  This also depends on whether your language supports classes that can manage their own memory.
-
-  * The full API for DenseSkOp and SparseSkOp requires letting them manage their own memory. If you use the appropriate constructors then they'll let you manage all memory.
-
-Can functions of the form ``sketch_[xxx]`` do something other than sketching?
-  Absolutely. It can do lifting, which is needed in some algorithms. It can also apply a square submatrix of a sketching operator (useful for distributed applications), in which case the output matrix isn't any smaller than the input matrix.
-
-Can I sketch a const symmetric matrix that's only stored in an upper or lower triangle?
-  Yes, but there are caveats. First, you can only use dense sketching operators. Second, you'll have to call fill_dense(layout, dist, … buff …, rngstate) and then use buff in your own SYMM function.
-
-
-Unavoidable limitations
-------------------------
+Limitations
+-----------
 
 No complex domain support:
   BLAS' support for this is incomplete. You can't mix real and complex, you can't conjugate without transposing, etc… 
@@ -106,9 +93,21 @@ No support for DenseSkOps with Rademachers:
   *is* the possibility of generating Rademachers far faster than uniform [-1, 1]. The implementation
   of this method might be a little complicated. 
 
+No support for negative values of "incx" or "incy" in sketch_vector.
+  The BLAS function GEMV supports negative strides between input and output vector elements.
+  It would be easy to extend sketch_vector to support this if we had a proper
+  SPMV implementation that supported negative increments. If someone wants to volunteer 
+  to extent our SPMV kernels to support that, then we'd happily accept such a contribution.
+  (It shouldn't be hard! We just haven't gotten around to this.)
+
+Symmetric matrices have to be stored as general matrices.
+  This stems partly from a desire to have sketch_symmetric work equally well with DenseSkOp and SparseSkOp.
+  Another reason is that BLAS' SYMM function doesn't allow transposes, which is a key tool we use
+  in sketch_general to resolve layout discrepencies between the various arguments.
 
-Limitations of calling RandBLAS from other languages
-----------------------------------------------------
+
+Language interoperability
+-------------------------
 
 We routinely use function overloading, and that reduces portability across languages.
 See below for details on where we stand and where we plan to go to resolve this shortcoming.
@@ -142,7 +141,6 @@ Some discussion
   All of these overload-free function names have explicit row and column offset parameters to handle submatrices of linear operators.
   However, the overloaded versions of these functions have *additional* overloads based on setting the offset parameters to zero.
 
-
 We have no plans for consistent naming of overload-free sparse BLAS functions. The most we do in this regard is offer functions
 called [left/right]_spmm for SpMM where the sparse matrix operand appears on the left or on the right.
 
diff --git a/rtd/source/api_reference/skops_and_dists.rst b/rtd/source/api_reference/skops_and_dists.rst
index daf5296d..570cc223 100644
--- a/rtd/source/api_reference/skops_and_dists.rst
+++ b/rtd/source/api_reference/skops_and_dists.rst
@@ -86,7 +86,7 @@ Gaussians et al.
   .. doxygenfunction:: RandBLAS::fill_dense(DenseSkOp &S)
       :project: RandBLAS
 
-  .. doxygenfunction:: RandBLAS::fill_dense(blas::Layout layout, const DenseDist &D, int64_t n_rows, int64_t n_cols, int64_t S_ro, int64_t S_co, T *buff, const RNGState<RNG> &seed)
+  .. doxygenfunction:: RandBLAS::fill_dense_unpacked(blas::Layout layout, const DenseDist &D, int64_t n_rows, int64_t n_cols, int64_t S_ro, int64_t S_co, T *buff, const RNGState<RNG> &seed)
       :project: RandBLAS
 
 
@@ -112,7 +112,7 @@ CountSketch et al.
   .. doxygenfunction:: RandBLAS::fill_sparse(SparseSkOp &S)
     :project: RandBLAS
 
-  .. doxygenfunction:: RandBLAS::fill_sparse(const SparseDist &D, int64_t &nnz, T* vals, sint_t* rows, sint_t* cols, const state_t &seed_state)
+  .. doxygenfunction:: RandBLAS::fill_sparse_unpacked_nosub(const SparseDist &D, int64_t &nnz, T* vals, sint_t* rows, sint_t* cols, const state_t &seed_state)
     :project: RandBLAS
 
 
diff --git a/rtd/themes/randblas_rtd/static/theme_overrides.css b/rtd/themes/randblas_rtd/static/theme_overrides.css
index 71ff91a3..00746ad3 100644
--- a/rtd/themes/randblas_rtd/static/theme_overrides.css
+++ b/rtd/themes/randblas_rtd/static/theme_overrides.css
@@ -52,7 +52,8 @@
 
 .math {
     text-align: left;
-    overflow: auto;
+    overflow-y: hidden;
+    overflow-x: auto;
 }
 .eqno {
     float: right;
diff --git a/test/handrolled_lapack.hh b/test/handrolled_lapack.hh
index 8f2202bd..b096399f 100644
--- a/test/handrolled_lapack.hh
+++ b/test/handrolled_lapack.hh
@@ -231,7 +231,7 @@ inline int64_t required_powermethod_iters(int64_t n, T p_fail, T tol) {
 
 template <typename T, typename FUNC, typename RNG>
 std::pair<T, RNGState<RNG>> power_method(int64_t n, FUNC &A, T* v, T tol, T failure_prob, const RNGState<RNG> &state) {
-    auto next_state = RandBLAS::fill_dense(blas::Layout::ColMajor, {n, 1}, n, 1, 0, 0, v, state);
+    auto next_state = RandBLAS::fill_dense_unpacked(blas::Layout::ColMajor, {n, 1}, n, 1, 0, 0, v, state);
     std::vector<T> work(n, 0.0);
     T* u = work.data();
     T norm = blas::nrm2(n, v, 1);
diff --git a/test/test_basic_rng/test_continuous.cc b/test/test_basic_rng/test_continuous.cc
index a096cf31..15c2e493 100644
--- a/test/test_basic_rng/test_continuous.cc
+++ b/test/test_basic_rng/test_continuous.cc
@@ -113,7 +113,8 @@ class TestScalarDistributions : public ::testing::Test {
         auto critical_value = critical_value_rep_mutator(num_samples, significance);
         RNGState state(seed);
         std::vector<T> samples(num_samples, -1);
-        RandBLAS::fill_dense({num_samples, 1, sd, RandBLAS::Axis::Long}, samples.data(), state);
+        RandBLAS::DenseDist D(num_samples, 1, sd, RandBLAS::Axis::Long);
+        RandBLAS::fill_dense(D, samples.data(), state);
         kolmogorov_smirnov_tester(samples, critical_value, sd);
         return;
     }
diff --git a/test/test_matmul_wrappers/test_sketch_symmetric.cc b/test/test_matmul_wrappers/test_sketch_symmetric.cc
index 3b4e7f09..7807b92c 100644
--- a/test/test_matmul_wrappers/test_sketch_symmetric.cc
+++ b/test/test_matmul_wrappers/test_sketch_symmetric.cc
@@ -53,7 +53,7 @@ void random_symmetric_mat(int64_t n, T* A, int64_t lda, STATE s) {
     // This function can be interpreted as first generating a random lda-by-lda symmetric matrix
     // whose entries in the upper triangle are iid, then symmetrizing that matrix, then
     // zeroing out all entries outside the leading principal submatrix of order n.
-    RandBLAS::fill_dense(Layout::ColMajor, {lda, lda}, n, n, 0, 0, A, s);
+    RandBLAS::fill_dense_unpacked(Layout::ColMajor, {lda, lda}, n, n, 0, 0, A, s);
     RandBLAS::symmetrize(Layout::ColMajor, Uplo::Upper, n, A, lda);
     return;
 }