Skip to content

Commit

Permalink
added option to run iterations manually
Browse files Browse the repository at this point in the history
  • Loading branch information
genekogan committed Jan 31, 2016
1 parent c56ab31 commit dbe8462
Show file tree
Hide file tree
Showing 12 changed files with 221 additions and 76 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ ofxTSNE is very simple to run, containing only one function. The harder part is

`example` demonstrates how to use ofxTSNE by constructing a toy 100-dim dataset. It contains comments explaining what the parameters do and how to set them.

**clever hack**: try setting D=3 and instead of making points clustered around 10 centers, make the points random 3d points and map the point's color linearly from its 3d position.

#### clustering images

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"DVTSourceControlWorkspaceBlueprintPrimaryRemoteRepositoryKey" : "E0DC71BFE67F75D80C24B658C9671C34FDB45F42",
"DVTSourceControlWorkspaceBlueprintWorkingCopyRepositoryLocationsKey" : {

},
"DVTSourceControlWorkspaceBlueprintWorkingCopyStatesKey" : {
"CE6DB5CAD37933655326CE4949F968A4A0799398" : 0,
"E0DC71BFE67F75D80C24B658C9671C34FDB45F42" : 0
},
"DVTSourceControlWorkspaceBlueprintIdentifierKey" : "66619A9E-9261-40DB-A457-E6E7339B3EAA",
"DVTSourceControlWorkspaceBlueprintWorkingCopyPathsKey" : {
"CE6DB5CAD37933655326CE4949F968A4A0799398" : "ofxCcv\/",
"E0DC71BFE67F75D80C24B658C9671C34FDB45F42" : "ofxTSNE\/"
},
"DVTSourceControlWorkspaceBlueprintNameKey" : "example-images",
"DVTSourceControlWorkspaceBlueprintVersion" : 204,
"DVTSourceControlWorkspaceBlueprintRelativePathToProjectKey" : "example-images\/example-images.xcodeproj",
"DVTSourceControlWorkspaceBlueprintRemoteRepositoriesKey" : [
{
"DVTSourceControlWorkspaceBlueprintRemoteRepositoryURLKey" : "https:\/\/github.com\/kylemcdonald\/ofxCcv.git",
"DVTSourceControlWorkspaceBlueprintRemoteRepositorySystemKey" : "com.apple.dt.Xcode.sourcecontrol.Git",
"DVTSourceControlWorkspaceBlueprintRemoteRepositoryIdentifierKey" : "CE6DB5CAD37933655326CE4949F968A4A0799398"
},
{
"DVTSourceControlWorkspaceBlueprintRemoteRepositoryURLKey" : "https:\/\/github.com\/genekogan\/ofxTSNE.git",
"DVTSourceControlWorkspaceBlueprintRemoteRepositorySystemKey" : "com.apple.dt.Xcode.sourcecontrol.Git",
"DVTSourceControlWorkspaceBlueprintRemoteRepositoryIdentifierKey" : "E0DC71BFE67F75D80C24B658C9671C34FDB45F42"
}
]
}
Binary file not shown.
4 changes: 3 additions & 1 deletion example-images/src/ofApp.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ class ofApp : public ofBaseApp{
ofxTSNE tsne;

vector<ofImage> images;
vector<vector<float> > imagePoints;
vector<vector<double> > imagePoints;
vector<vector<float> > encodings;

ofxPanel gui;
ofParameter<float> scale;
ofParameter<float> imageSize;

int t;
};
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<Bucket
type = "1"
version = "2.0">
</Bucket>
38 changes: 27 additions & 11 deletions example/src/ofApp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
//--------------------------------------------------------------
void ofApp::setup(){

// the runManually flag lets us step through each iteration of t-SNE manually,
// letting us watch the process take place. If set to false, the whole
// process will take place internally when you run ofxTSNE::run

runManually = true;

// first let's construct our toy dataset.
// we will create N samples of dimension D, which will be distributed
// into a number of classes, where a point belonging to a particular
Expand All @@ -13,8 +19,10 @@ void ofApp::setup(){
// transforming them from high-dimensional to low-dimensional space, so
// in this example, the classes are just for us to see this clearer.


// pick initial parameters
int N = 2000; // number of points in our dataset

int N = 1500; // number of points in our dataset
int D = 100; // number of dimensions in our data
int numClasses = 10; // how many classes to create

Expand Down Expand Up @@ -100,28 +108,36 @@ void ofApp::setup(){
// normalize = this will automatically remap all tsne points to range {0, 1}
// if false, you'll get the original points.


int dims = 2;
float perplexity = 30;
float theta = 0.5;
float perplexity = 40;
float theta = 0.2;
bool normalize = true;

// finally let's run ofxTSNE! this may take a while depending on your
// data, and it will return a set of embedded points, structured as
// a vector<vector<float> > where the inner vector contains (dims) elements.
// We will unpack these points and assign them back to our testPoints dataset.

vector<vector<float> > tsnePoints = tsne.run(data, dims, perplexity, theta, normalize);

// unpack the embedded points back into our testPoints
for (int i=0; i<N; i++) {
testPoints[i].tsnePoint = ofPoint(tsnePoints[i][0], tsnePoints[i][1]);
tsnePoints = tsne.run(data, dims, perplexity, theta, normalize, runManually);

// if we didn't run manually, we can collect the points immediately
if (!runManually) {
for (int i=0; i<testPoints.size(); i++) {
testPoints[i].tsnePoint = ofPoint(tsnePoints[i][0], tsnePoints[i][1]);
}
}
}

//--------------------------------------------------------------
void ofApp::update(){

// if we are running our t-SNE manually, we need to run tsne.iterate() to
// go through each iteration and collect the points where they currently are
if (runManually) {
tsnePoints = tsne.iterate();
for (int i=0; i<testPoints.size(); i++) {
testPoints[i].tsnePoint = ofPoint(tsnePoints[i][0], tsnePoints[i][1]);
}
}
}

//--------------------------------------------------------------
Expand All @@ -130,7 +146,7 @@ void ofApp::draw(){
for (int i=0; i<testPoints.size(); i++) {
float x = ofGetWidth() * testPoints[i].tsnePoint.x;
float y = ofGetHeight() * testPoints[i].tsnePoint.y;
ofSetColor(testPoints[i].color, 150);
ofSetColor(testPoints[i].color, 100);
ofDrawEllipse(x, y, 8, 8);
}
}
Expand Down
3 changes: 3 additions & 0 deletions example/src/ofApp.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,7 @@ class ofApp : public ofBaseApp{

ofxTSNE tsne;
vector<TestPoint> testPoints;
vector<vector<double> > tsnePoints;

bool runManually;
};
124 changes: 76 additions & 48 deletions src/bhtsne/tsne.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,33 @@
using namespace std;

// Perform t-SNE
void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta) {
void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, bool runManually) {
this->X = X;
this->N = N;
this->D = D;
this->Y = Y;
this->no_dims = no_dims;
this->perplexity = perplexity;
this->theta = theta;

// Determine whether we are using an exact algorithm
if(N - 1 < 3 * perplexity) { printf("Perplexity too large for the number of data points!\n"); exit(1); }
printf("Using no_dims = %d, perplexity = %f, and theta = %f\n", no_dims, perplexity, theta);
bool exact = (theta == .0) ? true : false;
exact = (theta == .0) ? true : false;

// Set learning parameters
float total_time = .0;
clock_t start, end;
int max_iter = 1000, stop_lying_iter = 250, mom_switch_iter = 250;
double momentum = .5, final_momentum = .8;
double eta = 200.0;
total_time = .0;
max_iter = 1000;
stop_lying_iter = 250;
mom_switch_iter = 250;
momentum = .5;
final_momentum = .8;
eta = 200.0;

// Allocate some memory
double* dY = (double*) malloc(N * no_dims * sizeof(double));
double* uY = (double*) malloc(N * no_dims * sizeof(double));
double* gains = (double*) malloc(N * no_dims * sizeof(double));
dY = (double*) malloc(N * no_dims * sizeof(double));
uY = (double*) malloc(N * no_dims * sizeof(double));
gains = (double*) malloc(N * no_dims * sizeof(double));
if(dY == NULL || uY == NULL || gains == NULL) { printf("Memory allocation failed!\n"); exit(1); }
for(int i = 0; i < N * no_dims; i++) uY[i] = .0;
for(int i = 0; i < N * no_dims; i++) gains[i] = 1.0;
Expand All @@ -79,7 +88,6 @@ void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexit
for(int i = 0; i < N * D; i++) X[i] /= max_X;

// Compute input similarities for exact t-SNE
double* P; unsigned int* row_P; unsigned int* col_P; double* val_P;
if(exact) {

// Compute similarities
Expand Down Expand Up @@ -130,45 +138,16 @@ void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexit
if(exact) printf("Input similarities computed in %4.2f seconds!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC);
else printf("Input similarities computed in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC, (double) row_P[N] / ((double) N * (double) N));
start = clock();
for(int iter = 0; iter < max_iter; iter++) {

// Compute (approximate) gradient
if(exact) computeExactGradient(P, Y, N, no_dims, dY);
else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, theta);

// Update gains
for(int i = 0; i < N * no_dims; i++) gains[i] = (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8);
for(int i = 0; i < N * no_dims; i++) if(gains[i] < .01) gains[i] = .01;

// Perform gradient update (with momentum and gains)
for(int i = 0; i < N * no_dims; i++) uY[i] = momentum * uY[i] - eta * gains[i] * dY[i];
for(int i = 0; i < N * no_dims; i++) Y[i] = Y[i] + uY[i];

// Make solution zero-mean
zeroMean(Y, N, no_dims);

// Stop lying about the P-values after a while, and switch momentum
if(iter == stop_lying_iter) {
if(exact) { for(int i = 0; i < N * N; i++) P[i] /= 12.0; }
else { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; }
}
if(iter == mom_switch_iter) momentum = final_momentum;

// Print out progress
if(iter > 0 && (iter % 50 == 0 || iter == max_iter - 1)) {
end = clock();
double C = .0;
if(exact) C = evaluateError(P, Y, N, no_dims);
else C = evaluateError(row_P, col_P, val_P, Y, N, no_dims, theta); // doing approximate computation here!
if(iter == 0)
printf("Iteration %d: error is %f\n", iter + 1, C);
else {
total_time += (float) (end - start) / CLOCKS_PER_SEC;
printf("Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", iter, C, (float) (end - start) / CLOCKS_PER_SEC);
}
start = clock();

iter = 0;
if (!runManually) {
while(iter < max_iter) {
runIteration();
}
}
}

void TSNE::finish() {
end = clock(); total_time += (float) (end - start) / CLOCKS_PER_SEC;

// Clean up memory
Expand All @@ -184,6 +163,55 @@ void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexit
printf("Fitting performed in %4.2f seconds.\n", total_time);
}

void TSNE::runIteration() {

if (iter >= max_iter) {
return;
}

// Compute (approximate) gradient
if(exact) computeExactGradient(P, Y, N, no_dims, dY);
else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, theta);

// Update gains
for(int i = 0; i < N * no_dims; i++) gains[i] = (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8);
for(int i = 0; i < N * no_dims; i++) if(gains[i] < .01) gains[i] = .01;

// Perform gradient update (with momentum and gains)
for(int i = 0; i < N * no_dims; i++) uY[i] = momentum * uY[i] - eta * gains[i] * dY[i];
for(int i = 0; i < N * no_dims; i++) Y[i] = Y[i] + uY[i];

// Make solution zero-mean
zeroMean(Y, N, no_dims);

// Stop lying about the P-values after a while, and switch momentum
if(iter == stop_lying_iter) {
if(exact) { for(int i = 0; i < N * N; i++) P[i] /= 12.0; }
else { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; }
}
if(iter == mom_switch_iter) momentum = final_momentum;

// Print out progress
if(iter > 0 && (iter % 50 == 0 || iter == max_iter - 1)) {
end = clock();
double C = .0;
if(exact) C = evaluateError(P, Y, N, no_dims);
else C = evaluateError(row_P, col_P, val_P, Y, N, no_dims, theta); // doing approximate computation here!
if(iter == 0)
printf("Iteration %d: error is %f\n", iter + 1, C);
else {
total_time += (float) (end - start) / CLOCKS_PER_SEC;
printf("Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", iter, C, (float) (end - start) / CLOCKS_PER_SEC);
}
start = clock();
}

iter++;

if (iter == max_iter) {
finish();
}
}

// Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm)
void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta)
Expand Down
18 changes: 17 additions & 1 deletion src/bhtsne/tsne.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@ static inline double sign(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.
class TSNE
{
public:
void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta);
void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, bool runManually=false);
void runIteration();
bool load_data(double** data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed);
void save_data(double* data, int* landmarks, double* costs, int n, int d);
void symmetrizeMatrix(unsigned int** row_P, unsigned int** col_P, double** val_P, int N); // should be static!


private:
void finish();
void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta);
void computeExactGradient(double* P, double* Y, int N, int D, double* dC);
double evaluateError(double* P, double* Y, int N, int D);
Expand All @@ -57,6 +59,20 @@ class TSNE
void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K);
void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD);
double randn();

bool exact;
float total_time;
clock_t start, end;
int iter, max_iter, stop_lying_iter, mom_switch_iter;
double momentum, final_momentum;
double eta;
double* dY, *uY, *gains;
double* P, *val_P;
unsigned int *row_P, *col_P;

double *X, *Y;
int N, D, no_dims;
double perplexity, theta;
};

#endif
Expand Down
Loading

0 comments on commit dbe8462

Please sign in to comment.