diff --git a/README.md b/README.md
index 12fb9b9..c5c7d08 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,7 @@ ofxTSNE is very simple to run, containing only one function. The harder part is
 
 `example` demonstrates how to use ofxTSNE by constructing a toy 100-dim dataset. It contains comments explaining what the parameters do and how to set them.
 
+**clever hack**: try setting D=3 and instead of making points clustered around 10 centers, make the points random 3d points and map the point's color linearly from its 3d position. 
 
 #### clustering images
 
diff --git a/example-images/example-images.xcodeproj/project.xcworkspace/xcshareddata/example-images.xcscmblueprint b/example-images/example-images.xcodeproj/project.xcworkspace/xcshareddata/example-images.xcscmblueprint
new file mode 100644
index 0000000..1b6c554
--- /dev/null
+++ b/example-images/example-images.xcodeproj/project.xcworkspace/xcshareddata/example-images.xcscmblueprint
@@ -0,0 +1,30 @@
+{
+  "DVTSourceControlWorkspaceBlueprintPrimaryRemoteRepositoryKey" : "E0DC71BFE67F75D80C24B658C9671C34FDB45F42",
+  "DVTSourceControlWorkspaceBlueprintWorkingCopyRepositoryLocationsKey" : {
+
+  },
+  "DVTSourceControlWorkspaceBlueprintWorkingCopyStatesKey" : {
+    "CE6DB5CAD37933655326CE4949F968A4A0799398" : 0,
+    "E0DC71BFE67F75D80C24B658C9671C34FDB45F42" : 0
+  },
+  "DVTSourceControlWorkspaceBlueprintIdentifierKey" : "66619A9E-9261-40DB-A457-E6E7339B3EAA",
+  "DVTSourceControlWorkspaceBlueprintWorkingCopyPathsKey" : {
+    "CE6DB5CAD37933655326CE4949F968A4A0799398" : "ofxCcv\/",
+    "E0DC71BFE67F75D80C24B658C9671C34FDB45F42" : "ofxTSNE\/"
+  },
+  "DVTSourceControlWorkspaceBlueprintNameKey" : "example-images",
+  "DVTSourceControlWorkspaceBlueprintVersion" : 204,
+  "DVTSourceControlWorkspaceBlueprintRelativePathToProjectKey" : "example-images\/example-images.xcodeproj",
+  "DVTSourceControlWorkspaceBlueprintRemoteRepositoriesKey" : [
+    {
+      "DVTSourceControlWorkspaceBlueprintRemoteRepositoryURLKey" : "https:\/\/github.com\/kylemcdonald\/ofxCcv.git",
+      "DVTSourceControlWorkspaceBlueprintRemoteRepositorySystemKey" : "com.apple.dt.Xcode.sourcecontrol.Git",
+      "DVTSourceControlWorkspaceBlueprintRemoteRepositoryIdentifierKey" : "CE6DB5CAD37933655326CE4949F968A4A0799398"
+    },
+    {
+      "DVTSourceControlWorkspaceBlueprintRemoteRepositoryURLKey" : "https:\/\/github.com\/genekogan\/ofxTSNE.git",
+      "DVTSourceControlWorkspaceBlueprintRemoteRepositorySystemKey" : "com.apple.dt.Xcode.sourcecontrol.Git",
+      "DVTSourceControlWorkspaceBlueprintRemoteRepositoryIdentifierKey" : "E0DC71BFE67F75D80C24B658C9671C34FDB45F42"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/example-images/example-images.xcodeproj/project.xcworkspace/xcuserdata/gene.xcuserdatad/UserInterfaceState.xcuserstate b/example-images/example-images.xcodeproj/project.xcworkspace/xcuserdata/gene.xcuserdatad/UserInterfaceState.xcuserstate
index b50d009..7126961 100644
Binary files a/example-images/example-images.xcodeproj/project.xcworkspace/xcuserdata/gene.xcuserdatad/UserInterfaceState.xcuserstate and b/example-images/example-images.xcodeproj/project.xcworkspace/xcuserdata/gene.xcuserdatad/UserInterfaceState.xcuserstate differ
diff --git a/example-images/src/ofApp.h b/example-images/src/ofApp.h
index 16dfa58..f718b0c 100644
--- a/example-images/src/ofApp.h
+++ b/example-images/src/ofApp.h
@@ -28,10 +28,12 @@ class ofApp : public ofBaseApp{
     ofxTSNE tsne;
     
     vector<ofImage> images;
-    vector<vector<float> > imagePoints;
+    vector<vector<double> > imagePoints;
     vector<vector<float> > encodings;
     
     ofxPanel gui;
     ofParameter<float> scale;
     ofParameter<float> imageSize;
+    
+    int t;
 };
diff --git a/example/example.xcodeproj/project.xcworkspace/xcuserdata/gene.xcuserdatad/UserInterfaceState.xcuserstate b/example/example.xcodeproj/project.xcworkspace/xcuserdata/gene.xcuserdatad/UserInterfaceState.xcuserstate
index bf6081a..4ae9e4e 100644
Binary files a/example/example.xcodeproj/project.xcworkspace/xcuserdata/gene.xcuserdatad/UserInterfaceState.xcuserstate and b/example/example.xcodeproj/project.xcworkspace/xcuserdata/gene.xcuserdatad/UserInterfaceState.xcuserstate differ
diff --git a/example/example.xcodeproj/xcuserdata/gene.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/example/example.xcodeproj/xcuserdata/gene.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist
new file mode 100644
index 0000000..fe2b454
--- /dev/null
+++ b/example/example.xcodeproj/xcuserdata/gene.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Bucket
+   type = "1"
+   version = "2.0">
+</Bucket>
diff --git a/example/src/ofApp.cpp b/example/src/ofApp.cpp
index 17a0546..b2f0732 100644
--- a/example/src/ofApp.cpp
+++ b/example/src/ofApp.cpp
@@ -3,6 +3,12 @@
 //--------------------------------------------------------------
 void ofApp::setup(){
     
+    // the runManually flag lets us step through each iteration of t-SNE manually,
+    // letting us watch the process take place. If set to false, the whole
+    // process will take place internally when you run ofxTSNE::run
+
+    runManually = true;
+    
     // first let's construct our toy dataset.
     // we will create N samples of dimension D, which will be distributed
     // into a number of classes, where a point belonging to a particular
@@ -13,8 +19,10 @@ void ofApp::setup(){
     // transforming them from high-dimensional to low-dimensional space, so
     // in this example, the classes are just for us to see this clearer.
     
+    
     // pick initial parameters
-    int N = 2000;               // number of points in our dataset
+    
+    int N = 1500;               // number of points in our dataset
     int D = 100;                // number of dimensions in our data
     int numClasses = 10;        // how many classes to create
     
@@ -100,28 +108,36 @@ void ofApp::setup(){
     // normalize = this will automatically remap all tsne points to range {0, 1}
     //   if false, you'll get the original points.
     
-    
     int dims = 2;
-    float perplexity = 30;
-    float theta = 0.5;
+    float perplexity = 40;
+    float theta = 0.2;
     bool normalize = true;
     
     // finally let's run ofxTSNE! this may take a while depending on your
     // data, and it will return a set of embedded points, structured as
     // a vector<vector<float> > where the inner vector contains (dims) elements.
     // We will unpack these points and assign them back to our testPoints dataset.
-    
-    vector<vector<float> > tsnePoints = tsne.run(data, dims, perplexity, theta, normalize);
 
-    // unpack the embedded points back into our testPoints
-    for (int i=0; i<N; i++) {
-        testPoints[i].tsnePoint = ofPoint(tsnePoints[i][0], tsnePoints[i][1]);
+    tsnePoints = tsne.run(data, dims, perplexity, theta, normalize, runManually);
+    
+    // if we didn't run manually, we can collect the points immediately
+    if (!runManually) {
+        for (int i=0; i<testPoints.size(); i++) {
+            testPoints[i].tsnePoint = ofPoint(tsnePoints[i][0], tsnePoints[i][1]);
+        }
     }
 }
 
 //--------------------------------------------------------------
 void ofApp::update(){
-
+    // if we are running our t-SNE manually, we need to run tsne.iterate() to
+    // go through each iteration and collect the points where they currently are
+    if (runManually) {
+        tsnePoints = tsne.iterate();
+        for (int i=0; i<testPoints.size(); i++) {
+            testPoints[i].tsnePoint = ofPoint(tsnePoints[i][0], tsnePoints[i][1]);
+        }
+    }
 }
 
 //--------------------------------------------------------------
@@ -130,7 +146,7 @@ void ofApp::draw(){
     for (int i=0; i<testPoints.size(); i++) {
         float x = ofGetWidth() * testPoints[i].tsnePoint.x;
         float y = ofGetHeight() * testPoints[i].tsnePoint.y;
-        ofSetColor(testPoints[i].color, 150);
+        ofSetColor(testPoints[i].color, 100);
         ofDrawEllipse(x, y, 8, 8);
     }
 }
diff --git a/example/src/ofApp.h b/example/src/ofApp.h
index 52f25a5..950cc37 100644
--- a/example/src/ofApp.h
+++ b/example/src/ofApp.h
@@ -31,4 +31,7 @@ class ofApp : public ofBaseApp{
     
     ofxTSNE tsne;
     vector<TestPoint> testPoints;
+    vector<vector<double> > tsnePoints;
+    
+    bool runManually;
 };
diff --git a/src/bhtsne/tsne.cpp b/src/bhtsne/tsne.cpp
index 11fa0a4..c1a87c2 100755
--- a/src/bhtsne/tsne.cpp
+++ b/src/bhtsne/tsne.cpp
@@ -46,24 +46,33 @@
 using namespace std;
 
 // Perform t-SNE
-void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta) {
+void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, bool runManually) {
+    this->X = X;
+    this->N = N;
+    this->D = D;
+    this->Y = Y;
+    this->no_dims = no_dims;
+    this->perplexity = perplexity;
+    this->theta = theta;
     
     // Determine whether we are using an exact algorithm
     if(N - 1 < 3 * perplexity) { printf("Perplexity too large for the number of data points!\n"); exit(1); }
     printf("Using no_dims = %d, perplexity = %f, and theta = %f\n", no_dims, perplexity, theta);
-    bool exact = (theta == .0) ? true : false;
+    exact = (theta == .0) ? true : false;
     
     // Set learning parameters
-    float total_time = .0;
-    clock_t start, end;
-	int max_iter = 1000, stop_lying_iter = 250, mom_switch_iter = 250;
-	double momentum = .5, final_momentum = .8;
-	double eta = 200.0;
+    total_time = .0;
+    max_iter = 1000;
+    stop_lying_iter = 250;
+    mom_switch_iter = 250;
+    momentum = .5;
+    final_momentum = .8;
+	eta = 200.0;
     
     // Allocate some memory
-    double* dY    = (double*) malloc(N * no_dims * sizeof(double));
-    double* uY    = (double*) malloc(N * no_dims * sizeof(double));
-    double* gains = (double*) malloc(N * no_dims * sizeof(double));
+    dY    = (double*) malloc(N * no_dims * sizeof(double));
+    uY    = (double*) malloc(N * no_dims * sizeof(double));
+    gains = (double*) malloc(N * no_dims * sizeof(double));
     if(dY == NULL || uY == NULL || gains == NULL) { printf("Memory allocation failed!\n"); exit(1); }
     for(int i = 0; i < N * no_dims; i++)    uY[i] =  .0;
     for(int i = 0; i < N * no_dims; i++) gains[i] = 1.0;
@@ -79,7 +88,6 @@ void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexit
     for(int i = 0; i < N * D; i++) X[i] /= max_X;
     
     // Compute input similarities for exact t-SNE
-    double* P; unsigned int* row_P; unsigned int* col_P; double* val_P;
     if(exact) {
         
         // Compute similarities
@@ -130,45 +138,16 @@ void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexit
     if(exact) printf("Input similarities computed in %4.2f seconds!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC);
     else printf("Input similarities computed in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC, (double) row_P[N] / ((double) N * (double) N));
     start = clock();
-	for(int iter = 0; iter < max_iter; iter++) {
-        
-        // Compute (approximate) gradient
-        if(exact) computeExactGradient(P, Y, N, no_dims, dY);
-        else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, theta);
-        
-        // Update gains
-        for(int i = 0; i < N * no_dims; i++) gains[i] = (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8);
-        for(int i = 0; i < N * no_dims; i++) if(gains[i] < .01) gains[i] = .01;
-            
-        // Perform gradient update (with momentum and gains)
-        for(int i = 0; i < N * no_dims; i++) uY[i] = momentum * uY[i] - eta * gains[i] * dY[i];
-		for(int i = 0; i < N * no_dims; i++)  Y[i] = Y[i] + uY[i];
-        
-        // Make solution zero-mean
-		zeroMean(Y, N, no_dims);
-        
-        // Stop lying about the P-values after a while, and switch momentum
-        if(iter == stop_lying_iter) {
-            if(exact) { for(int i = 0; i < N * N; i++)        P[i] /= 12.0; }
-            else      { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; }
-        }
-        if(iter == mom_switch_iter) momentum = final_momentum;
-        
-        // Print out progress
-        if(iter > 0 && (iter % 50 == 0 || iter == max_iter - 1)) {
-            end = clock();
-            double C = .0;
-            if(exact) C = evaluateError(P, Y, N, no_dims);
-            else      C = evaluateError(row_P, col_P, val_P, Y, N, no_dims, theta);  // doing approximate computation here!
-            if(iter == 0)
-                printf("Iteration %d: error is %f\n", iter + 1, C);
-            else {
-                total_time += (float) (end - start) / CLOCKS_PER_SEC;
-                printf("Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", iter, C, (float) (end - start) / CLOCKS_PER_SEC);
-            }
-			start = clock();
+    
+    iter = 0;
+    if (!runManually) {
+        while(iter < max_iter) {
+            runIteration();
         }
     }
+}
+
+void TSNE::finish() {
     end = clock(); total_time += (float) (end - start) / CLOCKS_PER_SEC;
     
     // Clean up memory
@@ -184,6 +163,55 @@ void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexit
     printf("Fitting performed in %4.2f seconds.\n", total_time);
 }
 
+void TSNE::runIteration() {
+    
+    if (iter >= max_iter) {
+        return;
+    }
+    
+    // Compute (approximate) gradient
+    if(exact) computeExactGradient(P, Y, N, no_dims, dY);
+    else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, theta);
+    
+    // Update gains
+    for(int i = 0; i < N * no_dims; i++) gains[i] = (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8);
+    for(int i = 0; i < N * no_dims; i++) if(gains[i] < .01) gains[i] = .01;
+    
+    // Perform gradient update (with momentum and gains)
+    for(int i = 0; i < N * no_dims; i++) uY[i] = momentum * uY[i] - eta * gains[i] * dY[i];
+    for(int i = 0; i < N * no_dims; i++)  Y[i] = Y[i] + uY[i];
+    
+    // Make solution zero-mean
+    zeroMean(Y, N, no_dims);
+    
+    // Stop lying about the P-values after a while, and switch momentum
+    if(iter == stop_lying_iter) {
+        if(exact) { for(int i = 0; i < N * N; i++)        P[i] /= 12.0; }
+        else      { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; }
+    }
+    if(iter == mom_switch_iter) momentum = final_momentum;
+    
+    // Print out progress
+    if(iter > 0 && (iter % 50 == 0 || iter == max_iter - 1)) {
+        end = clock();
+        double C = .0;
+        if(exact) C = evaluateError(P, Y, N, no_dims);
+        else      C = evaluateError(row_P, col_P, val_P, Y, N, no_dims, theta);  // doing approximate computation here!
+        if(iter == 0)
+            printf("Iteration %d: error is %f\n", iter + 1, C);
+        else {
+            total_time += (float) (end - start) / CLOCKS_PER_SEC;
+            printf("Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", iter, C, (float) (end - start) / CLOCKS_PER_SEC);
+        }
+        start = clock();
+    }
+    
+    iter++;
+    
+    if (iter == max_iter) {
+        finish();
+    }
+}
 
 // Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm)
 void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta)
diff --git a/src/bhtsne/tsne.h b/src/bhtsne/tsne.h
index 4e6a800..7051c31 100755
--- a/src/bhtsne/tsne.h
+++ b/src/bhtsne/tsne.h
@@ -41,13 +41,15 @@ static inline double sign(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.
 class TSNE
 {    
 public:
-    void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta);
+    void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, bool runManually=false);
+    void runIteration();
     bool load_data(double** data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed);
     void save_data(double* data, int* landmarks, double* costs, int n, int d);
     void symmetrizeMatrix(unsigned int** row_P, unsigned int** col_P, double** val_P, int N); // should be static!
 
     
 private:
+    void finish();
     void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta);
     void computeExactGradient(double* P, double* Y, int N, int D, double* dC);
     double evaluateError(double* P, double* Y, int N, int D);
@@ -57,6 +59,20 @@ class TSNE
     void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K);
     void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD);
     double randn();
+    
+    bool exact;
+    float total_time;
+    clock_t start, end;
+    int iter, max_iter, stop_lying_iter, mom_switch_iter;
+    double momentum, final_momentum;
+    double eta;
+    double* dY, *uY, *gains;
+    double* P, *val_P;
+    unsigned int *row_P, *col_P;
+    
+    double *X, *Y;
+    int N, D, no_dims;
+    double perplexity, theta;
 };
 
 #endif
diff --git a/src/ofxTSNE.cpp b/src/ofxTSNE.cpp
index d307fa5..53c7c98 100644
--- a/src/ofxTSNE.cpp
+++ b/src/ofxTSNE.cpp
@@ -1,9 +1,16 @@
 #include "ofxTSNE.h"
 
 
-vector<vector<float> > ofxTSNE::run(vector<vector<float> > & data, int dims, double perplexity, double theta, bool normalize) {
-    int N, D;
-    double *X, *Y;
+vector<vector<double> > ofxTSNE::run(vector<vector<float> > & data, int dims, double perplexity, double theta, bool normalize, bool runManually) {
+    this->data = data;
+    this->dims = dims;
+    this->perplexity = perplexity;
+    this->theta = theta;
+    this->normalize = normalize;
+    this->runManually = runManually;
+    
+    max_iter = 1000;
+    iter = 0;
     
     N = data.size();
     D = data[0].size();
@@ -25,22 +32,37 @@ vector<vector<float> > ofxTSNE::run(vector<vector<float> > & data, int dims, dou
     }
 
     // t-SNE
-    tsne.run(X, N, D, Y, dims, perplexity, theta);
+    tsne.run(X, N, D, Y, dims, perplexity, theta, runManually);
+    
+    if (runManually) {
+        return tsnePoints;
+    }
+    return iterate();
+}
+
+vector<vector<double> > ofxTSNE::iterate(){
+    if (iter > max_iter) {
+        return tsnePoints;
+    }
+    
+    if (runManually) {
+        tsne.runIteration();
+    }
     
     // keep track of min for normalization
-    vector<float> min_, max_;
+    vector<double> min_, max_;
     min_.resize(dims);
     max_.resize(dims);
     for (int i=0; i<dims; i++) {
-        min_[i] = numeric_limits<float>::max();
-        max_[i] = numeric_limits<float>::min();
+        min_[i] = numeric_limits<double>::max();
+        max_[i] = numeric_limits<double>::min();
     }
     
     // unpack Y into tsnePoints
     tsnePoints.clear();
     int idxY = 0;
     for (int i=0; i<N; i++) {
-        vector<float> tsnePoint;
+        vector<double> tsnePoint;
         tsnePoint.resize(dims);
         for (int j=0; j<dims; j++) {
             tsnePoint[j] = Y[idxY];
@@ -52,18 +74,25 @@ vector<vector<float> > ofxTSNE::run(vector<vector<float> > & data, int dims, dou
         }
         tsnePoints.push_back(tsnePoint);
     }
-
+    
     // normalize if requested
     if (normalize) {
         for (int i=0; i<tsnePoints.size(); i++) {
             for (int j=0; j<dims; j++) {
-                tsnePoints[i][j] = ofMap(tsnePoints[i][j], min_[j], max_[j], 0, 1);
+                tsnePoints[i][j] = (tsnePoints[i][j] - min_[j]) / (max_[j] - min_[j]);
             }
         }
     }
     
-    delete(X);
-    delete(Y);
-
+    iter++;
+    if (iter == max_iter) {
+        finish();
+    }
+    
     return tsnePoints;
 }
+
+void ofxTSNE::finish() {
+    delete(X);
+    delete(Y);
+}
\ No newline at end of file
diff --git a/src/ofxTSNE.h b/src/ofxTSNE.h
index 28ce3db..74498ee 100644
--- a/src/ofxTSNE.h
+++ b/src/ofxTSNE.h
@@ -6,8 +6,23 @@
 class ofxTSNE
 {
 public:
-    vector<vector<float> > run(vector<vector<float> > & data, int dims=2, double perplexity=30, double theta=0.5, bool normalize=true);
+    vector<vector<double> > run(vector<vector<float> > & data, int dims=2, double perplexity=30, double theta=0.5, bool normalize=true, bool runManually=false);
+    vector<vector<double> > iterate();
 private:
+    void finish();
+    
     TSNE tsne;
-    vector<vector<float> > tsnePoints;
+    vector<vector<double> > tsnePoints;
+    
+    vector<vector<float> > data;
+    int dims;
+    double perplexity;
+    double theta;
+    bool normalize;
+    bool runManually;
+    
+    int N, D;
+    double *X, *Y;
+
+    int iter, max_iter;
 };