diff --git a/NEWS.md b/NEWS.md
index b0d256e3..3dc64ba2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# uwot 0.0.0.9008
+# uwot 0.0.0.9008 (December 23 2018)
 
 ## New features
 
@@ -11,6 +11,10 @@ the old, less consistent, but faster settings, set `n_sgd_threads = "auto"`.
   * `gamma` is now `repulsion_strength`.
 * Default spectral initialization now looks for disconnected components and
 initializes them separately (also applies to `laplacian` and `normlaplacian`).
+* New `init` options: `sspectral`, `snormlaplacian` and `slaplacian`. These are
+like `spectral`, `normlaplacian`, `laplacian` respectively, but scaled so that
+each dimension has a standard deviation of 1e-4. This is like the difference
+between the `pca` and `spca` options.
 
 ## Bug fixes and minor improvements
 
@@ -19,7 +23,7 @@ initializes them separately (also applies to `laplacian` and `normlaplacian`).
 number of threads used.
 * Anomalously long spectral intialization times should now be reduced.
 * Internal changes and fixes thanks to a code review by Aaron Lun 
-(https://github.com/ltla)
+(https://github.com/ltla).
 
 # uwot 0.0.0.9007 (December 9 2018)
 
diff --git a/R/uwot.R b/R/uwot.R
index f1ca1784..c89fe675 100644
--- a/R/uwot.R
+++ b/R/uwot.R
@@ -81,8 +81,35 @@
 #'     \item \code{"spca"}. Like \code{"pca"}, but each dimension is then scaled
 #'     so the standard deviation is 1e-4, to give a distribution similar to
 #'     that used in t-SNE.
+#'     \item \code{"sspectral"} Like \code{"spectral"}, but each dimension is
+#'     then scaled so the standard deviation is 1e-4.
+#'     \item \code{"snormlaplacian"} Like \code{"normlaplacian"}, but each 
+#'     dimension is then scaled so the standard deviation is 1e-4.
+#'     \item \code{"slaplacian"} Like \code{"laplacian"}, but each dimension is
+#'     then scaled so the standard deviation is 1e-4.
 #'     \item A matrix of initial coordinates.
 #'   }
+#'  For spectral initializations, (\code{"spectral"}, \code{"normlaplacian"},
+#'  \code{"laplacian"}, \code{"sspectral"}, \code{"snormlaplacian"},
+#'  \code{"slaplacian"}), if more than one connected component is identified,
+#'  each connected component is initialized separately and the results are
+#'  merged. If \code{verbose = TRUE} the number of connected components are
+#'  logged to the console. The existence of multiple connected components
+#'  implies that a global view of the data cannot be attained with this
+#'  initialization. Either a PCA-based initialization or increasing the value of
+#'  \code{n_neighbors} may be more appropriate.
+#'   
+#'  The scaled initializations (\code{"spca"}, \code{"sspectral"},
+#'  \code{"snormlaplacian"}, \code{"slaplacian"}) might be useful as
+#'  alternatives to the non-scaled equivalents if these result in initial
+#'  coordinates with large inter-point distances or outliers. This usually
+#'  results in small gradients during optimization and very little progress
+#'  being made to the layout. Shrinking the initial embedding by rescaling can
+#'  help under these circumstances. \code{"spca"} is usually recommended over
+#'  \code{"pca"}, but for the spectral initializations the scaled versions
+#'  usually aren't necessary unless you are using a large value of
+#'  \code{n_neighbors} (e.g. \code{n_neighbors = 150} or higher). 
+#'  
 #' @param spread The effective scale of embedded points. In combination with
 #'   \code{min_dist}, this determines how clustered/clumped the embedded points
 #'   are.
@@ -431,8 +458,34 @@ umap <- function(X, n_neighbors = 15, n_components = 2, metric = "euclidean",
 #'     \item \code{"spca"}. Like \code{"pca"}, but each dimension is then scaled
 #'     so the standard deviation is 1e-4, to give a distribution similar to
 #'     that used in t-SNE.
+#'     \item \code{"sspectral"} Like \code{"spectral"}, but each dimension is
+#'     then scaled so the standard deviation is 1e-4.
+#'     \item \code{"snormlaplacian"} Like \code{"normlaplacian"}, but each 
+#'     dimension is then scaled so the standard deviation is 1e-4.
+#'     \item \code{"slaplacian"} Like \code{"laplacian"}, but each dimension is
+#'     then scaled so the standard deviation is 1e-4.
 #'     \item A matrix of initial coordinates.
 #'   }
+#'  For spectral initializations, (\code{"spectral"}, \code{"normlaplacian"},
+#'  \code{"laplacian"}, \code{"sspectral"}, \code{"snormlaplacian"},
+#'  \code{"slaplacian"}), if more than one connected component is identified,
+#'  each connected component is initialized separately and the results are
+#'  merged. If \code{verbose = TRUE} the number of connected components are
+#'  logged to the console. The existence of multiple connected components
+#'  implies that a global view of the data cannot be attained with this
+#'  initialization. Either a PCA-based initialization or increasing the value of
+#'  \code{n_neighbors} may be more appropriate.
+#'   
+#'  The scaled initializations (\code{"spca"}, \code{"sspectral"},
+#'  \code{"snormlaplacian"}, \code{"slaplacian"}) might be useful as
+#'  alternatives to the non-scaled equivalents if these result in initial
+#'  coordinates with large inter-point distances or outliers. This usually
+#'  results in small gradients during optimization and very little progress
+#'  being made to the layout. Shrinking the initial embedding by rescaling can
+#'  help under these circumstances. \code{"spca"} is usually recommended over
+#'  \code{"pca"}, but for the spectral initializations the scaled versions
+#'  usually aren't necessary unless you are using a large value of
+#'  \code{n_neighbors} (e.g. \code{n_neighbors = 150} or higher).
 #' @param set_op_mix_ratio Interpolate between (fuzzy) union and intersection as
 #'   the set operation used to combine local fuzzy simplicial sets to obtain a
 #'   global fuzzy simplicial sets. Both fuzzy set operations use the product
@@ -723,8 +776,35 @@ tumap <- function(X, n_neighbors = 15, n_components = 2, metric = "euclidean",
 #'     \item \code{"spca"}. Like \code{"pca"}, but each dimension is then scaled
 #'     so the standard deviation is 1e-4, to give a distribution similar to
 #'     that used in t-SNE and LargeVis.
+#'     \item \code{"sspectral"} Like \code{"spectral"}, but each dimension is
+#'     then scaled so the standard deviation is 1e-4.
+#'     \item \code{"snormlaplacian"} Like \code{"normlaplacian"}, but each 
+#'     dimension is then scaled so the standard deviation is 1e-4.
+#'     \item \code{"slaplacian"} Like \code{"laplacian"}, but each dimension is
+#'     then scaled so the standard deviation is 1e-4.
 #'     \item A matrix of initial coordinates.
 #'   }
+#'  For spectral initializations, (\code{"spectral"}, \code{"normlaplacian"},
+#'  \code{"laplacian"}, \code{"sspectral"}, \code{"snormlaplacian"},
+#'  \code{"slaplacian"}), if more than one connected component is identified,
+#'  each connected component is initialized separately and the results are
+#'  merged. If \code{verbose = TRUE} the number of connected components are
+#'  logged to the console. The existence of multiple connected components
+#'  implies that a global view of the data cannot be attained with this
+#'  initialization. Either a PCA-based initialization or increasing the value of
+#'  \code{n_neighbors} may be more appropriate.
+#'   
+#'  The scaled initializations (\code{"spca"}, \code{"sspectral"},
+#'  \code{"snormlaplacian"}, \code{"slaplacian"}) might be useful as
+#'  alternatives to the non-scaled equivalents if these result in initial
+#'  coordinates with large inter-point distances or outliers. This usually
+#'  results in small gradients during optimization and very little progress
+#'  being made to the layout. Shrinking the initial embedding by rescaling can
+#'  help under these circumstances. \code{"spca"} is usually recommended over
+#'  \code{"pca"}, but for the spectral initializations the scaled versions
+#'  usually aren't necessary unless you are using a large value of
+#'  \code{n_neighbors} (e.g. \code{n_neighbors = 150} or higher).
+#'   
 #' @param repulsion_strength Weighting applied to negative samples in low
 #'   dimensional embedding optimization. Values higher than one will result in
 #'   greater weight being given to negative samples.
@@ -1128,15 +1208,20 @@ uwot <- function(X, n_neighbors = 15, n_components = 2, metric = "euclidean",
   else {
     init <- match.arg(tolower(init), c(
       "spectral", "random", "lvrandom", "normlaplacian",
-      "laplacian", "spca", "pca"
+      "laplacian", "spca", "pca", "sspectral", "snormlaplacian", "slaplacian"
     ))
     
+    do_shrink <- init %in% 
+      c("spca", "sspectral", "snormlaplacian", "slaplacian")
+    if (do_shrink) {
+      init <- substring(init, 2)
+    }
+    
     # Don't repeat PCA initialization if we've already done it once
     if (pca_shortcut && init %in% c("spca", "pca") && pca >= n_components) {
       embedding <- X[, 1:n_components]
       if (init == "spca") {
         tsmessage("Initializing from scaled PCA")
-        embedding <- scale(embedding, scale = apply(embedding, 2, stats::sd) / 1e-4)
       }
       else {
         tsmessage("Initializing from PCA")
@@ -1154,14 +1239,15 @@ uwot <- function(X, n_neighbors = 15, n_components = 2, metric = "euclidean",
         laplacian = laplacian_eigenmap(V, ndim = n_components, verbose = verbose),
         spca = scaled_pca(X, ndim = n_components, verbose = verbose),
         pca = pca_init(X, ndim = n_components, verbose = verbose),
-        sspectral = shrink_coords(spectral_init(V, ndim = n_components, 
-                                                verbose = verbose)),
         stop("Unknown initialization method: '", init, "'")
       )
     }
+    
+    if (do_shrink) {
+      embedding <- shrink_coords(embedding)
+    }
   }
-
-
+  
   if (is.null(n_epochs) || n_epochs <= 0) {
     if (method == "largevis") {
       n_epochs <- lvish_epochs(n_vertices, V)
diff --git a/man/lvish.Rd b/man/lvish.Rd
index b0cf423d..7518e339 100644
--- a/man/lvish.Rd
+++ b/man/lvish.Rd
@@ -83,26 +83,52 @@ coordinates.}
 For lvish, the default is \code{"maxabs"}, for consistency with LargeVis.}
 
 \item{init}{Type of initialization for the coordinates. Options are:
-\itemize{
-  \item \code{"spectral"} Spectral embedding using the normalized Laplacian
-  of the fuzzy 1-skeleton, with Gaussian noise added.
-  \item \code{"normlaplacian"}. Spectral embedding using the normalized
-  Laplacian of the fuzzy 1-skeleton, without noise.
-  \item \code{"random"}. Coordinates assigned using a uniform random
-  distribution between -10 and 10.
-  \item \code{"lvrandom"}. Coordinates assigned using a Gaussian
-  distribution with standard deviation 1e-4, as used in LargeVis
-  (Tang et al., 2016) and t-SNE.
-  \item \code{"laplacian"}. Spectral embedding using the Laplacian Eigenmap
-  (Belkin and Niyogi, 2002).
-  \item \code{"pca"}. The first two principal components from PCA of
-  \code{X} if \code{X} is a data frame, and from a 2-dimensional classical
-  MDS if \code{X} is of class \code{"dist"}.
-  \item \code{"spca"}. Like \code{"pca"}, but each dimension is then scaled
-  so the standard deviation is 1e-4, to give a distribution similar to
-  that used in t-SNE and LargeVis.
-  \item A matrix of initial coordinates.
-}}
+ \itemize{
+   \item \code{"spectral"} Spectral embedding using the normalized Laplacian
+   of the fuzzy 1-skeleton, with Gaussian noise added.
+   \item \code{"normlaplacian"}. Spectral embedding using the normalized
+   Laplacian of the fuzzy 1-skeleton, without noise.
+   \item \code{"random"}. Coordinates assigned using a uniform random
+   distribution between -10 and 10.
+   \item \code{"lvrandom"}. Coordinates assigned using a Gaussian
+   distribution with standard deviation 1e-4, as used in LargeVis
+   (Tang et al., 2016) and t-SNE.
+   \item \code{"laplacian"}. Spectral embedding using the Laplacian Eigenmap
+   (Belkin and Niyogi, 2002).
+   \item \code{"pca"}. The first two principal components from PCA of
+   \code{X} if \code{X} is a data frame, and from a 2-dimensional classical
+   MDS if \code{X} is of class \code{"dist"}.
+   \item \code{"spca"}. Like \code{"pca"}, but each dimension is then scaled
+   so the standard deviation is 1e-4, to give a distribution similar to
+   that used in t-SNE and LargeVis.
+   \item \code{"sspectral"} Like \code{"spectral"}, but each dimension is
+   then scaled so the standard deviation is 1e-4.
+   \item \code{"snormlaplacian"} Like \code{"normlaplacian"}, but each 
+   dimension is then scaled so the standard deviation is 1e-4.
+   \item \code{"slaplacian"} Like \code{"laplacian"}, but each dimension is
+   then scaled so the standard deviation is 1e-4.
+   \item A matrix of initial coordinates.
+ }
+For spectral initializations, (\code{"spectral"}, \code{"normlaplacian"},
+\code{"laplacian"}, \code{"sspectral"}, \code{"snormlaplacian"},
+\code{"slaplacian"}), if more than one connected component is identified,
+each connected component is initialized separately and the results are
+merged. If \code{verbose = TRUE} the number of connected components are
+logged to the console. The existence of multiple connected components
+implies that a global view of the data cannot be attained with this
+initialization. Either a PCA-based initialization or increasing the value of
+\code{n_neighbors} may be more appropriate.
+ 
+The scaled initializations (\code{"spca"}, \code{"sspectral"},
+\code{"snormlaplacian"}, \code{"slaplacian"}) might be useful as
+alternatives to the non-scaled equivalents if these result in initial
+coordinates with large inter-point distances or outliers. This usually
+results in small gradients during optimization and very little progress
+being made to the layout. Shrinking the initial embedding by rescaling can
+help under these circumstances. \code{"spca"} is usually recommended over
+\code{"pca"}, but for the spectral initializations the scaled versions
+usually aren't necessary unless you are using a large value of
+\code{n_neighbors} (e.g. \code{n_neighbors = 150} or higher).}
 
 \item{repulsion_strength}{Weighting applied to negative samples in low
 dimensional embedding optimization. Values higher than one will result in
diff --git a/man/tumap.Rd b/man/tumap.Rd
index df3ad8f4..971e0648 100644
--- a/man/tumap.Rd
+++ b/man/tumap.Rd
@@ -81,26 +81,52 @@ coordinates.}
 For t-UMAP, the default is \code{"none"}.}
 
 \item{init}{Type of initialization for the coordinates. Options are:
-\itemize{
-  \item \code{"spectral"} Spectral embedding using the normalized Laplacian
-  of the fuzzy 1-skeleton, with Gaussian noise added.
-  \item \code{"normlaplacian"}. Spectral embedding using the normalized
-  Laplacian of the fuzzy 1-skeleton, without noise.
-  \item \code{"random"}. Coordinates assigned using a uniform random
-  distribution between -10 and 10.
-  \item \code{"lvrandom"}. Coordinates assigned using a Gaussian
-  distribution with standard deviation 1e-4, as used in LargeVis
-  (Tang et al., 2016) and t-SNE.
-  \item \code{"laplacian"}. Spectral embedding using the Laplacian Eigenmap
-  (Belkin and Niyogi, 2002).
-  \item \code{"pca"}. The first two principal components from PCA of
-  \code{X} if \code{X} is a data frame, and from a 2-dimensional classical
-  MDS if \code{X} is of class \code{"dist"}.
-  \item \code{"spca"}. Like \code{"pca"}, but each dimension is then scaled
-  so the standard deviation is 1e-4, to give a distribution similar to
-  that used in t-SNE.
-  \item A matrix of initial coordinates.
-}}
+ \itemize{
+   \item \code{"spectral"} Spectral embedding using the normalized Laplacian
+   of the fuzzy 1-skeleton, with Gaussian noise added.
+   \item \code{"normlaplacian"}. Spectral embedding using the normalized
+   Laplacian of the fuzzy 1-skeleton, without noise.
+   \item \code{"random"}. Coordinates assigned using a uniform random
+   distribution between -10 and 10.
+   \item \code{"lvrandom"}. Coordinates assigned using a Gaussian
+   distribution with standard deviation 1e-4, as used in LargeVis
+   (Tang et al., 2016) and t-SNE.
+   \item \code{"laplacian"}. Spectral embedding using the Laplacian Eigenmap
+   (Belkin and Niyogi, 2002).
+   \item \code{"pca"}. The first two principal components from PCA of
+   \code{X} if \code{X} is a data frame, and from a 2-dimensional classical
+   MDS if \code{X} is of class \code{"dist"}.
+   \item \code{"spca"}. Like \code{"pca"}, but each dimension is then scaled
+   so the standard deviation is 1e-4, to give a distribution similar to
+   that used in t-SNE.
+   \item \code{"sspectral"} Like \code{"spectral"}, but each dimension is
+   then scaled so the standard deviation is 1e-4.
+   \item \code{"snormlaplacian"} Like \code{"normlaplacian"}, but each 
+   dimension is then scaled so the standard deviation is 1e-4.
+   \item \code{"slaplacian"} Like \code{"laplacian"}, but each dimension is
+   then scaled so the standard deviation is 1e-4.
+   \item A matrix of initial coordinates.
+ }
+For spectral initializations, (\code{"spectral"}, \code{"normlaplacian"},
+\code{"laplacian"}, \code{"sspectral"}, \code{"snormlaplacian"},
+\code{"slaplacian"}), if more than one connected component is identified,
+each connected component is initialized separately and the results are
+merged. If \code{verbose = TRUE} the number of connected components are
+logged to the console. The existence of multiple connected components
+implies that a global view of the data cannot be attained with this
+initialization. Either a PCA-based initialization or increasing the value of
+\code{n_neighbors} may be more appropriate.
+ 
+The scaled initializations (\code{"spca"}, \code{"sspectral"},
+\code{"snormlaplacian"}, \code{"slaplacian"}) might be useful as
+alternatives to the non-scaled equivalents if these result in initial
+coordinates with large inter-point distances or outliers. This usually
+results in small gradients during optimization and very little progress
+being made to the layout. Shrinking the initial embedding by rescaling can
+help under these circumstances. \code{"spca"} is usually recommended over
+\code{"pca"}, but for the spectral initializations the scaled versions
+usually aren't necessary unless you are using a large value of
+\code{n_neighbors} (e.g. \code{n_neighbors = 150} or higher).}
 
 \item{set_op_mix_ratio}{Interpolate between (fuzzy) union and intersection as
 the set operation used to combine local fuzzy simplicial sets to obtain a
diff --git a/man/umap.Rd b/man/umap.Rd
index 1afaea1a..8528f1d6 100644
--- a/man/umap.Rd
+++ b/man/umap.Rd
@@ -83,26 +83,52 @@ coordinates.}
 For UMAP, the default is \code{"none"}.}
 
 \item{init}{Type of initialization for the coordinates. Options are:
-\itemize{
-  \item \code{"spectral"} Spectral embedding using the normalized Laplacian
-  of the fuzzy 1-skeleton, with Gaussian noise added.
-  \item \code{"normlaplacian"}. Spectral embedding using the normalized
-  Laplacian of the fuzzy 1-skeleton, without noise.
-  \item \code{"random"}. Coordinates assigned using a uniform random
-  distribution between -10 and 10.
-  \item \code{"lvrandom"}. Coordinates assigned using a Gaussian
-  distribution with standard deviation 1e-4, as used in LargeVis
-  (Tang et al., 2016) and t-SNE.
-  \item \code{"laplacian"}. Spectral embedding using the Laplacian Eigenmap
-  (Belkin and Niyogi, 2002).
-  \item \code{"pca"}. The first two principal components from PCA of
-  \code{X} if \code{X} is a data frame, and from a 2-dimensional classical
-  MDS if \code{X} is of class \code{"dist"}.
-  \item \code{"spca"}. Like \code{"pca"}, but each dimension is then scaled
-  so the standard deviation is 1e-4, to give a distribution similar to
-  that used in t-SNE.
-  \item A matrix of initial coordinates.
-}}
+ \itemize{
+   \item \code{"spectral"} Spectral embedding using the normalized Laplacian
+   of the fuzzy 1-skeleton, with Gaussian noise added.
+   \item \code{"normlaplacian"}. Spectral embedding using the normalized
+   Laplacian of the fuzzy 1-skeleton, without noise.
+   \item \code{"random"}. Coordinates assigned using a uniform random
+   distribution between -10 and 10.
+   \item \code{"lvrandom"}. Coordinates assigned using a Gaussian
+   distribution with standard deviation 1e-4, as used in LargeVis
+   (Tang et al., 2016) and t-SNE.
+   \item \code{"laplacian"}. Spectral embedding using the Laplacian Eigenmap
+   (Belkin and Niyogi, 2002).
+   \item \code{"pca"}. The first two principal components from PCA of
+   \code{X} if \code{X} is a data frame, and from a 2-dimensional classical
+   MDS if \code{X} is of class \code{"dist"}.
+   \item \code{"spca"}. Like \code{"pca"}, but each dimension is then scaled
+   so the standard deviation is 1e-4, to give a distribution similar to
+   that used in t-SNE.
+   \item \code{"sspectral"} Like \code{"spectral"}, but each dimension is
+   then scaled so the standard deviation is 1e-4.
+   \item \code{"snormlaplacian"} Like \code{"normlaplacian"}, but each 
+   dimension is then scaled so the standard deviation is 1e-4.
+   \item \code{"slaplacian"} Like \code{"laplacian"}, but each dimension is
+   then scaled so the standard deviation is 1e-4.
+   \item A matrix of initial coordinates.
+ }
+For spectral initializations, (\code{"spectral"}, \code{"normlaplacian"},
+\code{"laplacian"}, \code{"sspectral"}, \code{"snormlaplacian"},
+\code{"slaplacian"}), if more than one connected component is identified,
+each connected component is initialized separately and the results are
+merged. If \code{verbose = TRUE} the number of connected components are
+logged to the console. The existence of multiple connected components
+implies that a global view of the data cannot be attained with this
+initialization. Either a PCA-based initialization or increasing the value of
+\code{n_neighbors} may be more appropriate.
+ 
+The scaled initializations (\code{"spca"}, \code{"sspectral"},
+\code{"snormlaplacian"}, \code{"slaplacian"}) might be useful as
+alternatives to the non-scaled equivalents if these result in initial
+coordinates with large inter-point distances or outliers. This usually
+results in small gradients during optimization and very little progress
+being made to the layout. Shrinking the initial embedding by rescaling can
+help under these circumstances. \code{"spca"} is usually recommended over
+\code{"pca"}, but for the spectral initializations the scaled versions
+usually aren't necessary unless you are using a large value of
+\code{n_neighbors} (e.g. \code{n_neighbors = 150} or higher).}
 
 \item{spread}{The effective scale of embedded points. In combination with
 \code{min_dist}, this determines how clustered/clumped the embedded points
diff --git a/tests/testthat/test_output.R b/tests/testthat/test_output.R
index 21af6fbe..67b69e8f 100644
--- a/tests/testthat/test_output.R
+++ b/tests/testthat/test_output.R
@@ -264,4 +264,24 @@ expect_equal(res$pca_models[["3"]]$center, c(1.45, 0.22),
 
 res_trans <- umap_transform(iris10, model = res, verbose = FALSE, n_threads = 0,
                             n_epochs = 2)
-expect_ok_matrix(res_trans)
\ No newline at end of file
+expect_ok_matrix(res_trans)
+
+
+# shrunk spectral initialization
+res <- umap(iris10,
+            n_neighbors = 4, n_epochs = 2, learning_rate = 0.5,
+            init = "snormlaplacian", verbose = FALSE, n_threads = 0
+)
+expect_ok_matrix(res)
+
+res <- umap(iris10,
+            n_neighbors = 4, n_epochs = 2, learning_rate = 0.5,
+            init = "slaplacian", verbose = FALSE, n_threads = 0
+)
+expect_ok_matrix(res)
+
+res <- umap(iris10,
+            n_neighbors = 4, n_epochs = 2, learning_rate = 0.5,
+            init = "sspectral", verbose = FALSE, n_threads = 0
+)
+expect_ok_matrix(res)
\ No newline at end of file