diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d842e7f..53021628 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required (VERSION 3.24) project (Tapkee LANGUAGES CXX) # set paths -set (CMAKE_CXX_STANDARD 20) +set (CMAKE_CXX_STANDARD 23) set (TAPKEE_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") set (TAPKEE_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") set (TAPKEE_TESTS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/test/unit") diff --git a/README.md b/README.md index baf63d46..1dd27c12 100644 --- a/README.md +++ b/README.md @@ -43,10 +43,9 @@ some examples of usage Tapkee in Shogun as API --- -We provide an interface based on the method chaining technique. The chain starts from the call -of the `initialize()` method and followed with the `withParameters(const ParametersSet&)` call -which is used to provide parameters like the method to use and its settings. The provided -argument is formed with the following syntax: +We provide an interface based on the method chaining technique. The chain starts with the call +of the `with(const ParametersSet&)` method, which is used to provide parameters like the method +to use and its settings. The provided argument is formed with the following syntax: (keyword1=value1, keyword2=value2) @@ -63,24 +62,23 @@ are defined: `method`, `eigen_method`, `neighbors_method`, `num_neighbors`, `tar As an example of parameters setting, if you want to use the Isomap algorithm with the number of neighbors set to 15: - tapkee::initialize().withParameters((method=Isomap,num_neighbors=15)) + tapkee::with((method=Isomap,num_neighbors=15)) Please note that the inner parentheses are necessary as it uses the comma operator which appears to be ambiguous in this case. -Next, with initialized parameters you may either embed the provided matrix with: +Next, you may either embed the provided matrix with: - tapkee::initialize().withParameters((method=Isomap,num_neighbors=15)). - .embedUsing(matrix); + tapkee::with((method=Isomap,num_neighbors=15)).embedUsing(matrix); Or provide callbacks (kernel, distance and features) using any combination of the `withKernel(KernelCallback)`, `withDistance(DistanceCallback)` and `withFeatures(FeaturesCallback)` member functions: - tapkee::initialize().withParameters((method=Isomap,num_neighbors=15)) - .withKernel(kernel_callback) - .withDistance(distance_callback) - .withFeatures(features_callback) + tapkee::with((method=Isomap,num_neighbors=15)) + .withKernel(kernel_callback) + .withDistance(distance_callback) + .withFeatures(features_callback) Once callbacks are initialized you may either embed data using an STL-compatible sequence of indices or objects (that supports the @@ -92,17 +90,14 @@ member function. As a summary - a few examples: - TapkeeOutput output = initialize() - .withParameters((method=Isomap,num_neighbors=15)) + TapkeeOutput output = with((method=Isomap,num_neighbors=15)) .embedUsing(matrix); - TapkeeOutput output = initialize() - .withParameters((method=Isomap,num_neighbors=15)) + TapkeeOutput output = with((method=Isomap,num_neighbors=15)) .withDistance(distance_callback) .embedUsing(indices); - TapkeeOutput output = initialize() - .withParameters((method=Isomap,num_neighbors=15)) + TapkeeOutput output = with((method=Isomap,num_neighbors=15)) .withDistance(distance_callback) .embedRange(indices.begin(),indices.end()); @@ -130,8 +125,7 @@ A minimal working example of a program that uses the library is: MyDistanceCallback d; - TapkeeOutput output = tapkee::initialize() - .withParameters((method=MultidimensionalScaling,target_dimension=1)) + TapkeeOutput output = tapkee::with((method=MultidimensionalScaling,target_dimension=1)) .withDistance(d) .embedUsing(indices); diff --git a/examples/cbcl/cbcl.py b/examples/cbcl/cbcl.py index 428d945a..ffd93eca 100644 --- a/examples/cbcl/cbcl.py +++ b/examples/cbcl/cbcl.py @@ -1,30 +1,39 @@ -import numpy, datetime, json, subprocess, sys, os, glob +import numpy +import datetime +import json +import subprocess +import sys +import os +import glob +import tempfile + import scipy.misc +from PIL import Image def load(dir): images = [] vecs = [] for f in glob.glob(os.path.join(dir,'*.pgm')): - image = numpy.array(scipy.misc.imread(f)) + image = numpy.array(Image.open(f)) images.append((f,image)) vecs.append(image.ravel()) return numpy.vstack(vecs), images def embed(feature_matrix): - input_file = 'tmp_cbcl_input' - numpy.savetxt(input_file,feature_matrix) - output_file = 'tmp_cbcl_output.dat' - run_string = './bin/tapkee_cli -i %s -o %s -m ltsa -k 20 --transpose --verbose --benchmark' % (input_file,output_file) - output = subprocess.check_output(run_string, shell=True) - embedding = numpy.loadtxt(output_file) - os.remove(output_file) + input_file = tempfile.NamedTemporaryFile(prefix='cbcl_input') + output_file = tempfile.NamedTemporaryFile(prefix='cbcl_output') + numpy.savetxt(input_file.name, feature_matrix, delimiter=',') + runner_string = './bin/tapkee -i %s -o %s -m ltsa -k 80 --transpose-output --verbose --benchmark' % (input_file.name, output_file.name) + process = subprocess.run(runner_string, shell=True, capture_output=True, text=True) + print(process.stderr) + if process.returncode != 0: + raise Exception('Failed to embed') + embedding = numpy.loadtxt(output_file.name, delimiter=',') return embedding -def export_json(outfile,embedding,images): +def export_json(outfile, embedding, images): json_dict = {} N = embedding.shape[1] - print 'N', N - import scipy.misc json_dict['data'] = [{'cx':embedding[0,i], 'cy':embedding[1,i], 'fname':images[i][0]} for i in xrange(N)] json.dump(json_dict, open(outfile, 'w')) @@ -34,15 +43,15 @@ def plot_embedding(embedding,images): fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(embedding[0],embedding[1],alpha=0.0) - for i in xrange(embedding.shape[1]): - img = numpy.zeros((images[i][1].shape[0],images[i][1].shape[1],4)) - img[:,:,0] = 255*images[i][1] - img[:,:,1] = 255*images[i][1] - img[:,:,2] = 255*images[i][1] - img[:,:,3] = 1 - img[(images[i][1]==28),3] = 0 + for i in range(embedding.shape[1]): + img = numpy.zeros((images[i][1].shape[0], images[i][1].shape[1], 4)) + img[:,:,0] = images[i][1]/255.0 + img[:,:,1] = images[i][1]/255.0 + img[:,:,2] = images[i][1]/255.0 + img[:,:,3] = 1.0 + img[(images[i][1]==28), 3] = 0 imagebox = OffsetImage(img,cmap=plt.cm.gray,zoom=0.2) - ab = AnnotationBbox(imagebox, (embedding[0][i], embedding[1,i]),pad=0.001,frameon=False) + ab = AnnotationBbox(imagebox, (embedding[0][i], embedding[1,i]), pad=0.001, frameon=False) ax.add_artist(ab) plt.show() @@ -50,5 +59,5 @@ def plot_embedding(embedding,images): feature_matrix, images = load('data/cbcl') embedding = embed(feature_matrix) if len(sys.argv)==3: - export_json(sys.argv[2],embedding, images) + export_json(sys.argv[2], embedding, images) plot_embedding(embedding,images) diff --git a/examples/go.py b/examples/go.py index df1079bc..25ca7d8f 100755 --- a/examples/go.py +++ b/examples/go.py @@ -4,6 +4,8 @@ import sys import os import subprocess +import re +import tempfile import numpy as np from utils import generate_data, plot @@ -20,34 +22,40 @@ } def embed(data,method): - if method not in supported_methods: - raise Exception('Method is not supported by this script') - - input_file = 'tapkee_input_data' - output_file = 'tapkee_output_data' - np.savetxt(input_file, data.T,delimiter=',') + input_file = tempfile.NamedTemporaryFile(prefix='tapkee_input') + output_file = tempfile.NamedTemporaryFile(prefix='tapkee_output') + np.savetxt(input_file.name, data.T,delimiter=',') tapkee_binary = 'bin/tapkee' - runner_string = '%s -i %s -o %s -m %s -k 20 --precompute --verbose --transpose-output --benchmark' % (tapkee_binary, input_file, output_file, method) - print('-- To reproduce this use the following command', runner_string) - output = subprocess.check_output(runner_string, shell=True) + runner_string = '%s -i %s -o %s -m %s -k 20 --precompute --debug --verbose --transpose-output --benchmark' % ( + tapkee_binary, input_file.name, output_file.name, method + ) + print('-- To reproduce this use the following command `{}`'.format(runner_string)) + process = subprocess.run(runner_string, shell=True, capture_output=True, text=True) + print(process.stderr) + if process.returncode != 0: + raise Exception('Failed to embed') + + if match := re.search(r'Parameter dimension reduction method = \[([a-zA-Z0-9() ]+)\]', process.stderr): + used_method = match.group(1) + else: + used_method = '' + embedded_data = np.loadtxt(output_file, delimiter=',') - os.remove(input_file) - os.remove(output_file) - return embedded_data + return embedded_data, used_method if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Graphical example of dimension reduction with Tapkee.') - parser.add_argument('dataset', type=str, nargs=1, help='A dataset to embed. One of the following: %s' % str(['swissroll', 'scurve', 'helix'])) - parser.add_argument('method', type=str, nargs=1, help='A method to use. One of the following %s' % str(list(supported_methods.keys()))) + parser.add_argument('dataset', type=str, nargs=1, help='A dataset to embed. One of the following: %s' % str(['swissroll', 'scurve', 'helix', 'twinpeaks'])) + parser.add_argument('method', type=str, nargs=1, help='A method to use. Any of the methods supported by Tapkee') args = parser.parse_args() dataset = args.dataset[0] method = args.method[0] print('-- Loading %s data' % dataset) data, colors = generate_data(dataset) - print('-- Embedding %s data with %s' % (dataset,method)) - embedded_data = embed(data, method) + print('-- Embedding %s data with %s' % (dataset, method)) + embedded_data, used_method = embed(data, method) print('-- Plotting embedded data') - plot(data, embedded_data, colors, supported_methods[method]) + plot(data, embedded_data, colors, used_method) diff --git a/examples/minimal/minimal.cpp b/examples/minimal/minimal.cpp index 6725b4c4..fdb8a96d 100644 --- a/examples/minimal/minimal.cpp +++ b/examples/minimal/minimal.cpp @@ -20,8 +20,7 @@ int main(int argc, const char **argv) MyDistanceCallback distance; - TapkeeOutput output = initialize() - .withParameters((method = MultidimensionalScaling, target_dimension = 1)) + TapkeeOutput output = with((method = MultidimensionalScaling, target_dimension = 1)) .withDistance(distance) .embedUsing(indices); diff --git a/examples/precomputed/precomputed.cpp b/examples/precomputed/precomputed.cpp index 4086d7b4..854b3f92 100644 --- a/examples/precomputed/precomputed.cpp +++ b/examples/precomputed/precomputed.cpp @@ -7,7 +7,7 @@ using namespace tapkee; int main(int argc, const char **argv) { const int N = 100; - tapkee::DenseMatrix distances(N, N); + DenseMatrix distances(N, N); vector indices(N); for (int i = 0; i < N; i++) { @@ -19,8 +19,7 @@ int main(int argc, const char **argv) precomputed_distance_callback distance(distances); - TapkeeOutput output = initialize() - .withParameters((method = MultidimensionalScaling, target_dimension = 1)) + TapkeeOutput output = with((method = MultidimensionalScaling, target_dimension = 1)) .withDistance(distance) .embedUsing(indices); diff --git a/examples/rna/rna.cpp b/examples/rna/rna.cpp index e8b5d4e5..62b48ed0 100644 --- a/examples/rna/rna.cpp +++ b/examples/rna/rna.cpp @@ -33,8 +33,7 @@ int main(int argc, const char **argv) MatchKernelCallback kernel; - TapkeeOutput result = initialize() - .withParameters((method = KernelLocallyLinearEmbedding, num_neighbors = 30)) + TapkeeOutput result = with((method = KernelLocallyLinearEmbedding, num_neighbors = 30)) .withKernel(kernel) .embedUsing(rnas); diff --git a/examples/utils.py b/examples/utils.py index a67362b9..fe432baf 100644 --- a/examples/utils.py +++ b/examples/utils.py @@ -1,24 +1,43 @@ import matplotlib.pyplot as plt import numpy as np -def generate_data(type, N=1000): +def generate_data(type, N=1000, random_state=None): + rng = np.random.RandomState(random_state) if type=='swissroll': - tt = np.array((3*np.pi/2)*(1+2*np.random.rand(N))) - height = np.array((np.random.rand(N)-0.5)) + tt = np.array((3*np.pi/2)*(1+2*rng.rand(N))) + height = np.array((rng.rand(N)-0.5)) X = np.array([tt*np.cos(tt), 10*height, tt*np.sin(tt)]) - return X,tt + return X, tt if type=='scurve': - tt = np.array((3*np.pi*(np.random.rand(N)-0.5))) - height = np.array((np.random.rand(N)-0.5)) + tt = np.array((3*np.pi*(rng.rand(N)-0.5))) + height = np.array((rng.rand(N)-0.5)) X = np.array([np.sin(tt), 10*height, np.sign(tt)*(np.cos(tt)-1)]) - return X,tt + return X, tt if type=='helix': tt = np.linspace(1,N,N).T / N tt = tt*2*np.pi X = np.r_[[(2+np.cos(8*tt))*np.cos(tt)], - [(2+np.cos(8*tt))*np.sin(tt)], - [np.sin(8*tt)]] - return X,tt + [(2+np.cos(8*tt))*np.sin(tt)], + [np.sin(8*tt)]] + return X, tt + if type=='twinpeaks': + X = rng.uniform(-1, 1, size=(N, 2)) + tt = np.sin(np.pi * X[:, 0]) * np.tanh(X[:, 1]) + tt += 0.1 * rng.normal(size=tt.shape) + X = np.vstack([X.T, tt]) + return X, tt + if type=='klein': + u = rng.uniform(0, 2 * np.pi, N) + v = rng.uniform(0, 2 * np.pi, N) + x = (2 + np.cos(u / 2) * np.sin(v) - np.sin(u / 2) * np.sin(2 * v)) * np.cos(u) + y = (2 + np.cos(u / 2) * np.sin(v) - np.sin(u / 2) * np.sin(2 * v)) * np.sin(u) + z = np.sin(u / 2) * np.sin(v) + np.cos(u / 2) * np.sin(2 * v) + + noise = 0.01 + x += noise * rng.normal(size=x.shape) + y += noise * rng.normal(size=y.shape) + z += noise * rng.normal(size=z.shape) + return np.vstack((x, y, z)), u raise Exception('Dataset is not supported') @@ -26,17 +45,47 @@ def plot(data, embedded_data, colors='m', method=None): fig = plt.figure() fig.set_facecolor('white') - ax = fig.add_subplot(121, projection='3d') - ax.scatter(data[0], data[1], data[2], c=colors, cmap=plt.cm.Spectral, s=5) + ax_original = fig.add_subplot(121, projection='3d') + scatter_original = ax_original.scatter(data[0], data[1], data[2], c=colors, cmap=plt.cm.Spectral, s=5, picker=True) plt.axis('tight') plt.axis('off') plt.title('Original', fontsize=9) - ax = fig.add_subplot(122) - ax.scatter(embedded_data[0], embedded_data[1], c=colors, cmap=plt.cm.Spectral, s=5) + ax_embedding = fig.add_subplot(122) + scatter_embedding = ax_embedding.scatter(embedded_data[0], embedded_data[1], c=colors, cmap=plt.cm.Spectral, s=5, picker=True) plt.axis('tight') plt.axis('off') - plt.title('Embedding' + (' with ' + method) if method else '', fontsize=9) + plt.title('Embedding' + (' with ' + method) if method else '', fontsize=9, wrap=True) - plt.show() + highlighted_points = [] # To store highlighted points + + # Function to highlight points on both plots + def highlight(index): + # Reset previous highlighted points + for point in highlighted_points: + point.remove() + highlighted_points.clear() + + # Highlight the current point on both scatter plots + point1 = ax_original.scatter([data[0][index]], [data[1][index]], [data[2][index]], color='white', s=25, edgecolor='black', zorder=3) + point2 = ax_embedding.scatter([embedded_data[0][index]], [embedded_data[1][index]], color='white', s=25, edgecolor='black', zorder=3) + highlighted_points.append(point1) + highlighted_points.append(point2) + fig.canvas.draw_idle() + # Event handler for mouse motion + def on_hover(event): + if event.inaxes == ax_original: + cont, ind = scatter_original.contains(event) + elif event.inaxes == ax_embedding: + cont, ind = scatter_embedding.contains(event) + else: + return + + if cont: + index = ind['ind'][0] + highlight(index) + + fig.canvas.mpl_connect('motion_notify_event', on_hover) + + plt.show() diff --git a/include/tapkee/chain_interface.hpp b/include/tapkee/chain_interface.hpp index eef970af..87a56177 100644 --- a/include/tapkee/chain_interface.hpp +++ b/include/tapkee/chain_interface.hpp @@ -484,31 +484,19 @@ class ParametersInitializedState }; } /* End of namespace tapkee_internal */ -struct initialize +/** Returns an instance representing a state with initialized parameters. + * + * In the chain this method's call is followed by any of + * @ref tapkee_internal::ParametersInitializedState::embedUsing + * @ref tapkee_internal::ParametersInitializedState::withKernel + * @ref tapkee_internal::ParametersInitializedState::withDistance + * @ref tapkee_internal::ParametersInitializedState::withFeatures + * + * @param parameters a set of parameters formed by keywords assigned to values + */ +tapkee_internal::ParametersInitializedState with(const ParametersSet& parameters) { - /** Constructor that is the first required - * method in the call chain. - */ - initialize() - { - } - - /** The second required method in the call chain. Returns - * an instance representing a state with initialized parameters. - * - * In the chain this method's call is followed by any of - * * @ref tapkee_internal::ParametersInitializedState::embedUsing - * * @ref tapkee_internal::ParametersInitializedState::withKernel - * * @ref tapkee_internal::ParametersInitializedState::withDistance - * * @ref tapkee_internal::ParametersInitializedState::withFeatures - * - * @param parameters a set of parameters formed from keywords assigned - * to values - */ - tapkee_internal::ParametersInitializedState withParameters(const ParametersSet& parameters) const - { - return tapkee_internal::ParametersInitializedState(parameters); - } -}; + return tapkee_internal::ParametersInitializedState(parameters); +} } /* End of namespace tapkee */ diff --git a/include/tapkee/embed.hpp b/include/tapkee/embed.hpp index e268689d..8fc6ce3e 100644 --- a/include/tapkee/embed.hpp +++ b/include/tapkee/embed.hpp @@ -104,7 +104,7 @@ TapkeeOutput embed(RandomAccessIterator begin, RandomAccessIterator end, KernelC parameters.check(); parameters.merge(tapkee_internal::defaults); parameters.visit([] (const stichwort::Parameter& p) { - tapkee::LoggingSingleton::instance().message_debug(fmt::format("Parameter {} = [{}]", p.name(), p.repr())); + tapkee::Logging::instance().message_debug(fmt::format("Parameter {} = [{}]", p.name(), p.repr())); }); DimensionReductionMethod selected_method = parameters[method]; @@ -114,8 +114,7 @@ TapkeeOutput embed(RandomAccessIterator begin, RandomAccessIterator end, KernelC tapkee_internal::Context context(progress_function_ptr, cancel_function_ptr); - LoggingSingleton::instance().message_info( - fmt::format("Using the {} method.", get_method_name(selected_method))); + Logging::instance().message_info(fmt::format("Using the {} method.", get_method_name(selected_method))); output = tapkee_internal::initialize(begin, end, kernel_callback, distance_callback, features_callback, parameters, context) diff --git a/include/tapkee/external/barnes_hut_sne/tsne.hpp b/include/tapkee/external/barnes_hut_sne/tsne.hpp index 7507f44b..6cfae3b4 100644 --- a/include/tapkee/external/barnes_hut_sne/tsne.hpp +++ b/include/tapkee/external/barnes_hut_sne/tsne.hpp @@ -63,9 +63,9 @@ class TSNE // Determine whether we are using an exact algorithm bool exact = (theta == .0) ? true : false; if (exact) - tapkee::LoggingSingleton::instance().message_info("Using exact t-SNE algorithm"); + tapkee::Logging::instance().message_info("Using exact t-SNE algorithm"); else - tapkee::LoggingSingleton::instance().message_info("Using Barnes-Hut-SNE algorithm"); + tapkee::Logging::instance().message_info("Using Barnes-Hut-SNE algorithm"); // Set learning parameters int max_iter = 1000, stop_lying_iter = 250, mom_switch_iter = 250; @@ -188,8 +188,7 @@ class TSNE C = evaluateError(P.data(), Y, N); else C = evaluateError(row_P, col_P, val_P, Y, N, theta); // doing approximate computation here! - tapkee::LoggingSingleton::instance().message_info( - fmt::format("Iteration {}: error is {}", iter, C)); + tapkee::Logging::instance().message_info(fmt::format("Iteration {}: error is {}", iter, C)); } } // Clean up memory diff --git a/include/tapkee/methods.hpp b/include/tapkee/methods.hpp index d8498536..e0499529 100644 --- a/include/tapkee/methods.hpp +++ b/include/tapkee/methods.hpp @@ -48,6 +48,7 @@ class DynamicImplementation : public ImplementationBase(self); \ + implementation.validate(); \ return implementation.embed(); \ } tapkee_method_handle(KernelLocallyLinearEmbedding); diff --git a/include/tapkee/methods/all.hpp b/include/tapkee/methods/all.hpp index c19bc251..c90bfca8 100644 --- a/include/tapkee/methods/all.hpp +++ b/include/tapkee/methods/all.hpp @@ -33,6 +33,10 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(PassThru) + void validate() + { + } + TapkeeOutput embed() { DenseMatrix feature_matrix = dense_matrix_from_features(this->features, this->current_dimension, this->begin, this->end); diff --git a/include/tapkee/methods/base.hpp b/include/tapkee/methods/base.hpp index 1a06a603..cb015d4e 100644 --- a/include/tapkee/methods/base.hpp +++ b/include/tapkee/methods/base.hpp @@ -62,7 +62,7 @@ class ImplementationBase } - public: + protected: ParametersSet parameters; Context context; KernelCallback kernel; @@ -78,20 +78,49 @@ class ImplementationBase IndexType current_dimension; protected: - template Neighbors findNeighborsWith(Distance d) + template + Neighbors find_neighbors_with(Distance d) { parameters[num_neighbors].checked().satisfies(InRange(3, n_vectors)).orThrow(); return find_neighbors(parameters[neighbors_method], begin, end, d, parameters[num_neighbors], parameters[check_connectivity]); } + template + EigendecompositionResult eigendecomposition_via(const EigendecompositionStrategy& eigen_strategy, const MatrixType& m, IndexType target_dimension) + { + return eigendecomposition( + parameters[eigen_method], + parameters[computation_strategy], + eigen_strategy, + m, + target_dimension + ); + } + + }; +// TODO can we avoid these using things? #define __TAPKEE_IMPLEMENTATION(Method) \ template \ class Method ## Implementation : public ImplementationBase \ { \ public: \ + typedef ImplementationBase Base; \ + using Base::parameters; \ + using Base::context; \ + using Base::kernel; \ + using Base::distance; \ + using Base::features; \ + using Base::plain_distance; \ + using Base::kernel_distance; \ + using Base::begin; \ + using Base::end; \ + using Base::n_vectors; \ + using Base::current_dimension; \ + using Base::find_neighbors_with; \ + using Base::eigendecomposition_via; \ Method ## Implementation(const ImplementationBase& other) : \ ImplementationBase(other) \ { \ diff --git a/include/tapkee/methods/diffusion_map.hpp b/include/tapkee/methods/diffusion_map.hpp index a1cf3caf..093c83c5 100644 --- a/include/tapkee/methods/diffusion_map.hpp +++ b/include/tapkee/methods/diffusion_map.hpp @@ -7,7 +7,6 @@ /* Tapkee includes */ #include #include -#include /* End of Tapkee includes */ namespace tapkee @@ -16,23 +15,25 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(DiffusionMap) - TapkeeOutput embed() + void validate() { - this->parameters[diffusion_map_timesteps].checked().satisfies(Positivity()).orThrow(); - this->parameters[gaussian_kernel_width].checked().satisfies(Positivity()).orThrow(); + parameters[diffusion_map_timesteps].checked().satisfies(Positivity()).orThrow(); + parameters[gaussian_kernel_width].checked().satisfies(Positivity()).orThrow(); + } - IndexType target_dimension_value = static_cast(this->parameters[target_dimension]); + TapkeeOutput embed() + { + IndexType target_dimension_value = static_cast(parameters[target_dimension]); Parameter target_dimension_add = Parameter::create("target_dimension", target_dimension_value + 1); DenseSymmetricMatrix diffusion_matrix = - compute_diffusion_matrix(this->begin, this->end, this->distance, this->parameters[gaussian_kernel_width]); + compute_diffusion_matrix(begin, end, distance, parameters[gaussian_kernel_width]); EigendecompositionResult decomposition_result = - eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], LargestEigenvalues, - diffusion_matrix, target_dimension_add); + eigendecomposition_via(LargestEigenvalues, diffusion_matrix, target_dimension_add); DenseMatrix embedding = (decomposition_result.first).leftCols(target_dimension_value); // scaling with lambda_i^t for (IndexType i = 0; i < target_dimension_value; i++) embedding.col(i).array() *= - pow(decomposition_result.second(i), static_cast(this->parameters[diffusion_map_timesteps])); + pow(decomposition_result.second(i), static_cast(parameters[diffusion_map_timesteps])); // scaling by eigenvector to largest eigenvalue 1 for (IndexType i = 0; i < target_dimension_value; i++) embedding.col(i).array() /= decomposition_result.first.col(target_dimension_value).array(); diff --git a/include/tapkee/methods/factor_analysis.hpp b/include/tapkee/methods/factor_analysis.hpp index 4227667e..a9e25f70 100644 --- a/include/tapkee/methods/factor_analysis.hpp +++ b/include/tapkee/methods/factor_analysis.hpp @@ -15,13 +15,16 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(FactorAnalysis) - TapkeeOutput embed() + void validate() { - this->parameters[fa_epsilon].checked().satisfies(NonNegativity()).orThrow(); + parameters[fa_epsilon].checked().satisfies(NonNegativity()).orThrow(); + } - DenseVector mean_vector = compute_mean(this->begin, this->end, this->features, this->current_dimension); - return TapkeeOutput(project(this->begin, this->end, this->features, this->current_dimension, this->parameters[max_iteration], - this->parameters[fa_epsilon], this->parameters[target_dimension], mean_vector), + TapkeeOutput embed() + { + DenseVector mean_vector = compute_mean(begin, end, features, current_dimension); + return TapkeeOutput(project(begin, end, features, current_dimension, parameters[max_iteration], + parameters[fa_epsilon], parameters[target_dimension], mean_vector), unimplementedProjectingFunction()); } __TAPKEE_END_IMPLEMENTATION() diff --git a/include/tapkee/methods/hessian_locally_linear_embedding.hpp b/include/tapkee/methods/hessian_locally_linear_embedding.hpp index d5954a5a..47f7f874 100644 --- a/include/tapkee/methods/hessian_locally_linear_embedding.hpp +++ b/include/tapkee/methods/hessian_locally_linear_embedding.hpp @@ -6,7 +6,6 @@ /* Tapkee includes */ #include -#include #include /* End of Tapkee includes */ @@ -16,13 +15,16 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(HessianLocallyLinearEmbedding) + void validate() + { + } + TapkeeOutput embed() { - Neighbors neighbors = this->findNeighborsWith(this->kernel_distance); + Neighbors neighbors = find_neighbors_with(kernel_distance); SparseWeightMatrix weight_matrix = - hessian_weight_matrix(this->begin, this->end, neighbors, this->kernel, this->parameters[target_dimension]); - return TapkeeOutput(eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], - SmallestEigenvalues, weight_matrix, this->parameters[target_dimension]).first, + hessian_weight_matrix(begin, end, neighbors, kernel, parameters[target_dimension]); + return TapkeeOutput(eigendecomposition_via(SmallestEigenvalues, weight_matrix, parameters[target_dimension]).first, unimplementedProjectingFunction()); } __TAPKEE_END_IMPLEMENTATION() diff --git a/include/tapkee/methods/isomap.hpp b/include/tapkee/methods/isomap.hpp index 81ba37a7..e7b9e3df 100644 --- a/include/tapkee/methods/isomap.hpp +++ b/include/tapkee/methods/isomap.hpp @@ -6,7 +6,6 @@ /* Tapkee includes */ #include -#include #include #include /* End of Tapkee includes */ @@ -17,20 +16,23 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(Isomap) + void validate() + { + } + TapkeeOutput embed() { - Neighbors neighbors = this->findNeighborsWith(this->plain_distance); + Neighbors neighbors = find_neighbors_with(plain_distance); DenseSymmetricMatrix shortest_distances_matrix = - compute_shortest_distances_matrix(this->begin, this->end, neighbors, this->distance); + compute_shortest_distances_matrix(begin, end, neighbors, distance); shortest_distances_matrix = shortest_distances_matrix.array().square(); centerMatrix(shortest_distances_matrix); shortest_distances_matrix.array() *= -0.5; EigendecompositionResult embedding = - eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], LargestEigenvalues, - shortest_distances_matrix, this->parameters[target_dimension]); + eigendecomposition_via(LargestEigenvalues, shortest_distances_matrix, parameters[target_dimension]); - for (IndexType i = 0; i < static_cast(this->parameters[target_dimension]); i++) + for (IndexType i = 0; i < static_cast(parameters[target_dimension]); i++) embedding.first.col(i).array() *= sqrt(embedding.second(i)); return TapkeeOutput(embedding.first, unimplementedProjectingFunction()); diff --git a/include/tapkee/methods/kernel_local_tangent_space_alignment.hpp b/include/tapkee/methods/kernel_local_tangent_space_alignment.hpp index 1d0e9c6c..479caed9 100644 --- a/include/tapkee/methods/kernel_local_tangent_space_alignment.hpp +++ b/include/tapkee/methods/kernel_local_tangent_space_alignment.hpp @@ -6,7 +6,6 @@ /* Tapkee includes */ #include -#include #include /* End of Tapkee includes */ @@ -16,13 +15,16 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(KernelLocalTangentSpaceAlignment) + void validate() + { + } + TapkeeOutput embed() { - Neighbors neighbors = this->findNeighborsWith(this->kernel_distance); + Neighbors neighbors = find_neighbors_with(kernel_distance); SparseWeightMatrix weight_matrix = tangent_weight_matrix( - this->begin, this->end, neighbors, this->kernel, this->parameters[target_dimension], this->parameters[nullspace_shift]); - DenseMatrix embedding = eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], - SmallestEigenvalues, weight_matrix, this->parameters[target_dimension]).first; + begin, end, neighbors, kernel, parameters[target_dimension], parameters[nullspace_shift]); + DenseMatrix embedding = eigendecomposition_via(SmallestEigenvalues, weight_matrix, parameters[target_dimension]).first; return TapkeeOutput(embedding, unimplementedProjectingFunction()); } diff --git a/include/tapkee/methods/kernel_locally_linear_embedding.hpp b/include/tapkee/methods/kernel_locally_linear_embedding.hpp index adcf2046..51007e37 100644 --- a/include/tapkee/methods/kernel_locally_linear_embedding.hpp +++ b/include/tapkee/methods/kernel_locally_linear_embedding.hpp @@ -6,7 +6,6 @@ /* Tapkee includes */ #include -#include #include /* End of Tapkee includes */ @@ -16,13 +15,16 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(KernelLocallyLinearEmbedding) + void validate() + { + } + TapkeeOutput embed() { - Neighbors neighbors = this->findNeighborsWith(this->kernel_distance); + Neighbors neighbors = find_neighbors_with(kernel_distance); SparseWeightMatrix weight_matrix = - linear_weight_matrix(this->begin, this->end, neighbors, this->kernel, this->parameters[nullspace_shift], this->parameters[klle_shift]); - DenseMatrix embedding = eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], - SmallestEigenvalues, weight_matrix, this->parameters[target_dimension]).first; + linear_weight_matrix(begin, end, neighbors, kernel, parameters[nullspace_shift], parameters[klle_shift]); + DenseMatrix embedding = eigendecomposition_via(SmallestEigenvalues, weight_matrix, parameters[target_dimension]).first; return TapkeeOutput(embedding, unimplementedProjectingFunction()); } diff --git a/include/tapkee/methods/kernel_pca.hpp b/include/tapkee/methods/kernel_pca.hpp index d9309996..50b14778 100644 --- a/include/tapkee/methods/kernel_pca.hpp +++ b/include/tapkee/methods/kernel_pca.hpp @@ -6,7 +6,6 @@ /* Tapkee includes */ #include -#include #include /* End of Tapkee includes */ @@ -16,13 +15,16 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(KernelPrincipalComponentAnalysis) + void validate() + { + } + TapkeeOutput embed() { - DenseSymmetricMatrix centered_kernel_matrix = compute_centered_kernel_matrix(this->begin, this->end, this->kernel); + DenseSymmetricMatrix centered_kernel_matrix = compute_centered_kernel_matrix(begin, end, kernel); EigendecompositionResult embedding = - eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], LargestEigenvalues, - centered_kernel_matrix, this->parameters[target_dimension]); - for (IndexType i = 0; i < static_cast(this->parameters[target_dimension]); i++) + eigendecomposition_via(LargestEigenvalues, centered_kernel_matrix, parameters[target_dimension]); + for (IndexType i = 0; i < static_cast(parameters[target_dimension]); i++) embedding.first.col(i).array() *= sqrt(embedding.second(i)); return TapkeeOutput(embedding.first, unimplementedProjectingFunction()); } diff --git a/include/tapkee/methods/landmark_isomap.hpp b/include/tapkee/methods/landmark_isomap.hpp index be309eb4..d2499375 100644 --- a/include/tapkee/methods/landmark_isomap.hpp +++ b/include/tapkee/methods/landmark_isomap.hpp @@ -6,9 +6,9 @@ /* Tapkee includes */ #include -#include #include #include +#include /* End of Tapkee includes */ namespace tapkee @@ -17,13 +17,17 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(LandmarkIsomap) + void validate() + { + parameters[landmark_ratio].checked().satisfies(InClosedRange(3.0 / n_vectors, 1.0)).orThrow(); + } + TapkeeOutput embed() { - this->parameters[landmark_ratio].checked().satisfies(InClosedRange(3.0 / this->n_vectors, 1.0)).orThrow(); - Neighbors neighbors = this->findNeighborsWith(this->plain_distance); - Landmarks landmarks = select_landmarks_random(this->begin, this->end, this->parameters[landmark_ratio]); - DenseMatrix distance_matrix = compute_shortest_distances_matrix(this->begin, this->end, landmarks, neighbors, this->distance); + Neighbors neighbors = find_neighbors_with(plain_distance); + Landmarks landmarks = select_landmarks_random(begin, end, parameters[landmark_ratio]); + DenseMatrix distance_matrix = compute_shortest_distances_matrix(begin, end, landmarks, neighbors, distance); distance_matrix = distance_matrix.array().square(); DenseVector col_means = distance_matrix.colwise().mean(); @@ -36,23 +40,21 @@ __TAPKEE_IMPLEMENTATION(LandmarkIsomap) EigendecompositionResult landmarks_embedding; - if (this->parameters[eigen_method].is(Dense)) + if (parameters[eigen_method].is(Dense)) { DenseMatrix distance_matrix_sym = distance_matrix * distance_matrix.transpose(); landmarks_embedding = - eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], LargestEigenvalues, - distance_matrix_sym, this->parameters[target_dimension]); + eigendecomposition_via(LargestEigenvalues, distance_matrix_sym, parameters[target_dimension]); } else { landmarks_embedding = - eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], - SquaredLargestEigenvalues, distance_matrix, this->parameters[target_dimension]); + eigendecomposition_via(SquaredLargestEigenvalues, distance_matrix, parameters[target_dimension]); } DenseMatrix embedding = distance_matrix.transpose() * landmarks_embedding.first; - for (IndexType i = 0; i < static_cast(this->parameters[target_dimension]); i++) + for (IndexType i = 0; i < static_cast(parameters[target_dimension]); i++) embedding.col(i).array() /= sqrt(sqrt(landmarks_embedding.second(i))); return TapkeeOutput(embedding, unimplementedProjectingFunction()); } diff --git a/include/tapkee/methods/landmark_multidimensional_scaling.hpp b/include/tapkee/methods/landmark_multidimensional_scaling.hpp index b7f97df6..9223d90e 100644 --- a/include/tapkee/methods/landmark_multidimensional_scaling.hpp +++ b/include/tapkee/methods/landmark_multidimensional_scaling.hpp @@ -6,8 +6,8 @@ /* Tapkee includes */ #include -#include #include +#include /* End of Tapkee includes */ namespace tapkee @@ -16,22 +16,25 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(LandmarkMultidimensionalScaling) + void validate() + { + parameters[landmark_ratio].checked().satisfies(InClosedRange(3.0 / n_vectors, 1.0)).orThrow(); + } + TapkeeOutput embed() { - this->parameters[landmark_ratio].checked().satisfies(InClosedRange(3.0 / this->n_vectors, 1.0)).orThrow(); - Landmarks landmarks = select_landmarks_random(this->begin, this->end, this->parameters[landmark_ratio]); - DenseSymmetricMatrix distance_matrix = compute_distance_matrix(this->begin, this->end, landmarks, this->distance); + Landmarks landmarks = select_landmarks_random(begin, end, parameters[landmark_ratio]); + DenseSymmetricMatrix distance_matrix = compute_distance_matrix(begin, end, landmarks, distance); DenseVector landmark_distances_squared = distance_matrix.colwise().mean(); centerMatrix(distance_matrix); distance_matrix.array() *= -0.5; EigendecompositionResult landmarks_embedding = - eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], LargestEigenvalues, - distance_matrix, this->parameters[target_dimension]); - for (IndexType i = 0; i < static_cast(this->parameters[target_dimension]); i++) + eigendecomposition_via(LargestEigenvalues, distance_matrix, parameters[target_dimension]); + for (IndexType i = 0; i < static_cast(parameters[target_dimension]); i++) landmarks_embedding.first.col(i).array() *= sqrt(landmarks_embedding.second(i)); - return TapkeeOutput(triangulate(this->begin, this->end, this->distance, landmarks, landmark_distances_squared, - landmarks_embedding, this->parameters[target_dimension]), + return TapkeeOutput(triangulate(begin, end, distance, landmarks, landmark_distances_squared, + landmarks_embedding, parameters[target_dimension]), unimplementedProjectingFunction()); } __TAPKEE_END_IMPLEMENTATION() diff --git a/include/tapkee/methods/laplacian_eigenmaps.hpp b/include/tapkee/methods/laplacian_eigenmaps.hpp index 8d4d3c1c..41b4194f 100644 --- a/include/tapkee/methods/laplacian_eigenmaps.hpp +++ b/include/tapkee/methods/laplacian_eigenmaps.hpp @@ -16,15 +16,18 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(LaplacianEigenmaps) - TapkeeOutput embed() + void validate() { - this->parameters[gaussian_kernel_width].checked().satisfies(Positivity()).orThrow(); + parameters[gaussian_kernel_width].checked().satisfies(Positivity()).orThrow(); + } - Neighbors neighbors = this->findNeighborsWith(this->plain_distance); - Laplacian laplacian = compute_laplacian(this->begin, this->end, neighbors, this->distance, this->parameters[gaussian_kernel_width]); - return TapkeeOutput(generalized_eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], + TapkeeOutput embed() + { + Neighbors neighbors = find_neighbors_with(plain_distance); + Laplacian laplacian = compute_laplacian(begin, end, neighbors, distance, parameters[gaussian_kernel_width]); + return TapkeeOutput(generalized_eigendecomposition(parameters[eigen_method], parameters[computation_strategy], SmallestEigenvalues, laplacian.first, laplacian.second, - this->parameters[target_dimension]).first, + parameters[target_dimension]).first, unimplementedProjectingFunction()); } __TAPKEE_END_IMPLEMENTATION() diff --git a/include/tapkee/methods/linear_local_tangent_space_alignment.hpp b/include/tapkee/methods/linear_local_tangent_space_alignment.hpp index f25fd70a..b566d780 100644 --- a/include/tapkee/methods/linear_local_tangent_space_alignment.hpp +++ b/include/tapkee/methods/linear_local_tangent_space_alignment.hpp @@ -16,20 +16,24 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(LinearLocalTangentSpaceAlignment) + void validate() + { + } + TapkeeOutput embed() { - Neighbors neighbors = this->findNeighborsWith(this->kernel_distance); + Neighbors neighbors = find_neighbors_with(kernel_distance); SparseWeightMatrix weight_matrix = tangent_weight_matrix( - this->begin, this->end, neighbors, this->kernel, this->parameters[target_dimension], this->parameters[nullspace_shift]); + begin, end, neighbors, kernel, parameters[target_dimension], parameters[nullspace_shift]); DenseSymmetricMatrixPair eig_matrices = - construct_lltsa_eigenproblem(weight_matrix, this->begin, this->end, this->features, this->current_dimension); + construct_lltsa_eigenproblem(weight_matrix, begin, end, features, current_dimension); EigendecompositionResult projection_result = generalized_eigendecomposition( - this->parameters[eigen_method], this->parameters[computation_strategy], SmallestEigenvalues, eig_matrices.first, - eig_matrices.second, this->parameters[target_dimension]); - DenseVector mean_vector = compute_mean(this->begin, this->end, this->features, this->current_dimension); + parameters[eigen_method], parameters[computation_strategy], SmallestEigenvalues, eig_matrices.first, + eig_matrices.second, parameters[target_dimension]); + DenseVector mean_vector = compute_mean(begin, end, features, current_dimension); tapkee::ProjectingFunction projecting_function( new tapkee::MatrixProjectionImplementation(projection_result.first, mean_vector)); - return TapkeeOutput(project(projection_result.first, mean_vector, this->begin, this->end, this->features, this->current_dimension), + return TapkeeOutput(project(projection_result.first, mean_vector, begin, end, features, current_dimension), projecting_function); } __TAPKEE_END_IMPLEMENTATION() diff --git a/include/tapkee/methods/locality_preserving_projections.hpp b/include/tapkee/methods/locality_preserving_projections.hpp index c72b16b3..7c54893f 100644 --- a/include/tapkee/methods/locality_preserving_projections.hpp +++ b/include/tapkee/methods/locality_preserving_projections.hpp @@ -7,7 +7,6 @@ /* Tapkee includes */ #include #include -#include #include #include /* End of Tapkee includes */ @@ -18,21 +17,24 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(LocalityPreservingProjections) - TapkeeOutput embed() + void validate() { - this->parameters[gaussian_kernel_width].checked().satisfies(Positivity()).orThrow(); + parameters[gaussian_kernel_width].checked().satisfies(Positivity()).orThrow(); + } - Neighbors neighbors = this->findNeighborsWith(this->plain_distance); - Laplacian laplacian = compute_laplacian(this->begin, this->end, neighbors, this->distance, this->parameters[gaussian_kernel_width]); + TapkeeOutput embed() + { + Neighbors neighbors = find_neighbors_with(plain_distance); + Laplacian laplacian = compute_laplacian(begin, end, neighbors, distance, parameters[gaussian_kernel_width]); DenseSymmetricMatrixPair eigenproblem_matrices = construct_locality_preserving_eigenproblem( - laplacian.first, laplacian.second, this->begin, this->end, this->features, this->current_dimension); + laplacian.first, laplacian.second, begin, end, features, current_dimension); EigendecompositionResult projection_result = generalized_eigendecomposition( - this->parameters[eigen_method], this->parameters[computation_strategy], SmallestEigenvalues, - eigenproblem_matrices.first, eigenproblem_matrices.second, this->parameters[target_dimension]); - DenseVector mean_vector = compute_mean(this->begin, this->end, this->features, this->current_dimension); + parameters[eigen_method], parameters[computation_strategy], SmallestEigenvalues, + eigenproblem_matrices.first, eigenproblem_matrices.second, parameters[target_dimension]); + DenseVector mean_vector = compute_mean(begin, end, features, current_dimension); tapkee::ProjectingFunction projecting_function( new tapkee::MatrixProjectionImplementation(projection_result.first, mean_vector)); - return TapkeeOutput(project(projection_result.first, mean_vector, this->begin, this->end, this->features, this->current_dimension), + return TapkeeOutput(project(projection_result.first, mean_vector, begin, end, features, current_dimension), projecting_function); } __TAPKEE_END_IMPLEMENTATION() diff --git a/include/tapkee/methods/manifold_sculpting.hpp b/include/tapkee/methods/manifold_sculpting.hpp index 098d9c2d..576a5c89 100644 --- a/include/tapkee/methods/manifold_sculpting.hpp +++ b/include/tapkee/methods/manifold_sculpting.hpp @@ -15,16 +15,20 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(ManifoldSculpting) + void validate() + { + parameters[squishing_rate].checked().satisfies(InRange(0.0, 1.0)).orThrow(); + } + TapkeeOutput embed() { - this->parameters[squishing_rate].checked().satisfies(InRange(0.0, 1.0)).orThrow(); - DenseMatrix embedding = dense_matrix_from_features(this->features, this->current_dimension, this->begin, this->end); + DenseMatrix embedding = dense_matrix_from_features(features, current_dimension, begin, end); - Neighbors neighbors = this->findNeighborsWith(this->plain_distance); + Neighbors neighbors = find_neighbors_with(plain_distance); - manifold_sculpting_embed(this->begin, this->end, embedding, this->parameters[target_dimension], neighbors, this->distance, - this->parameters[max_iteration], this->parameters[squishing_rate]); + manifold_sculpting_embed(begin, end, embedding, parameters[target_dimension], neighbors, distance, + parameters[max_iteration], parameters[squishing_rate]); return TapkeeOutput(embedding, unimplementedProjectingFunction()); } diff --git a/include/tapkee/methods/multidimensional_scaling.hpp b/include/tapkee/methods/multidimensional_scaling.hpp index b9c02473..bf1b754d 100644 --- a/include/tapkee/methods/multidimensional_scaling.hpp +++ b/include/tapkee/methods/multidimensional_scaling.hpp @@ -6,7 +6,6 @@ /* Tapkee includes */ #include -#include #include /* End of Tapkee includes */ @@ -16,16 +15,19 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(MultidimensionalScaling) + void validate() + { + } + TapkeeOutput embed() { - DenseSymmetricMatrix distance_matrix = compute_distance_matrix(this->begin, this->end, this->distance); + DenseSymmetricMatrix distance_matrix = compute_distance_matrix(begin, end, distance); centerMatrix(distance_matrix); distance_matrix.array() *= -0.5; EigendecompositionResult embedding = - eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], LargestEigenvalues, - distance_matrix, this->parameters[target_dimension]); + eigendecomposition_via(LargestEigenvalues, distance_matrix, parameters[target_dimension]); - for (IndexType i = 0; i < static_cast(this->parameters[target_dimension]); i++) + for (IndexType i = 0; i < static_cast(parameters[target_dimension]); i++) embedding.first.col(i).array() *= sqrt(embedding.second(i)); return TapkeeOutput(embedding.first, unimplementedProjectingFunction()); } diff --git a/include/tapkee/methods/neighborhood_preserving_embedding.hpp b/include/tapkee/methods/neighborhood_preserving_embedding.hpp index 04e5523e..d287e1d5 100644 --- a/include/tapkee/methods/neighborhood_preserving_embedding.hpp +++ b/include/tapkee/methods/neighborhood_preserving_embedding.hpp @@ -6,7 +6,6 @@ /* Tapkee includes */ #include -#include #include #include #include @@ -18,20 +17,24 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(NeighborhoodPreservingEmbedding) + void validate() + { + } + TapkeeOutput embed() { - Neighbors neighbors = this->findNeighborsWith(this->kernel_distance); + Neighbors neighbors = find_neighbors_with(kernel_distance); SparseWeightMatrix weight_matrix = - linear_weight_matrix(this->begin, this->end, neighbors, this->kernel, this->parameters[nullspace_shift], this->parameters[klle_shift]); + linear_weight_matrix(begin, end, neighbors, kernel, parameters[nullspace_shift], parameters[klle_shift]); DenseSymmetricMatrixPair eig_matrices = - construct_neighborhood_preserving_eigenproblem(weight_matrix, this->begin, this->end, this->features, this->current_dimension); + construct_neighborhood_preserving_eigenproblem(weight_matrix, begin, end, features, current_dimension); EigendecompositionResult projection_result = generalized_eigendecomposition( - this->parameters[eigen_method], this->parameters[computation_strategy], SmallestEigenvalues, eig_matrices.first, - eig_matrices.second, this->parameters[target_dimension]); - DenseVector mean_vector = compute_mean(this->begin, this->end, this->features, this->current_dimension); + parameters[eigen_method], parameters[computation_strategy], SmallestEigenvalues, eig_matrices.first, + eig_matrices.second, parameters[target_dimension]); + DenseVector mean_vector = compute_mean(begin, end, features, current_dimension); tapkee::ProjectingFunction projecting_function( new tapkee::MatrixProjectionImplementation(projection_result.first, mean_vector)); - return TapkeeOutput(project(projection_result.first, mean_vector, this->begin, this->end, this->features, this->current_dimension), + return TapkeeOutput(project(projection_result.first, mean_vector, begin, end, features, current_dimension), projecting_function); } __TAPKEE_END_IMPLEMENTATION() diff --git a/include/tapkee/methods/pca.hpp b/include/tapkee/methods/pca.hpp index c8117b63..92b05385 100644 --- a/include/tapkee/methods/pca.hpp +++ b/include/tapkee/methods/pca.hpp @@ -6,7 +6,6 @@ /* Tapkee includes */ #include -#include #include /* End of Tapkee includes */ @@ -16,17 +15,20 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(PrincipalComponentAnalysis) + void validate() + { + } + TapkeeOutput embed() { - DenseVector mean_vector = compute_mean(this->begin, this->end, this->features, this->current_dimension); + DenseVector mean_vector = compute_mean(begin, end, features, current_dimension); DenseSymmetricMatrix centered_covariance_matrix = - compute_covariance_matrix(this->begin, this->end, mean_vector, this->features, this->current_dimension); + compute_covariance_matrix(begin, end, mean_vector, features, current_dimension); EigendecompositionResult projection_result = - eigendecomposition(this->parameters[eigen_method], this->parameters[computation_strategy], LargestEigenvalues, - centered_covariance_matrix, this->parameters[target_dimension]); + eigendecomposition_via(LargestEigenvalues, centered_covariance_matrix, parameters[target_dimension]); tapkee::ProjectingFunction projecting_function( new tapkee::MatrixProjectionImplementation(projection_result.first, mean_vector)); - return TapkeeOutput(project(projection_result.first, mean_vector, this->begin, this->end, this->features, this->current_dimension), + return TapkeeOutput(project(projection_result.first, mean_vector, begin, end, features, current_dimension), projecting_function); } __TAPKEE_END_IMPLEMENTATION() diff --git a/include/tapkee/methods/random_projection.hpp b/include/tapkee/methods/random_projection.hpp index 2f5bf0a5..c54f4ba0 100644 --- a/include/tapkee/methods/random_projection.hpp +++ b/include/tapkee/methods/random_projection.hpp @@ -6,7 +6,6 @@ /* Tapkee includes */ #include -#include #include /* End of Tapkee includes */ @@ -16,14 +15,18 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(RandomProjection) + void validate() + { + } + TapkeeOutput embed() { - DenseMatrix projection_matrix = gaussian_projection_matrix(this->current_dimension, this->parameters[target_dimension]); - DenseVector mean_vector = compute_mean(this->begin, this->end, this->features, this->current_dimension); + DenseMatrix projection_matrix = gaussian_projection_matrix(current_dimension, parameters[target_dimension]); + DenseVector mean_vector = compute_mean(begin, end, features, current_dimension); tapkee::ProjectingFunction projecting_function( new tapkee::MatrixProjectionImplementation(projection_matrix, mean_vector)); - return TapkeeOutput(project(projection_matrix, mean_vector, this->begin, this->end, this->features, this->current_dimension), + return TapkeeOutput(project(projection_matrix, mean_vector, begin, end, features, current_dimension), projecting_function); } __TAPKEE_END_IMPLEMENTATION() diff --git a/include/tapkee/methods/stochastic_proximity_embedding.hpp b/include/tapkee/methods/stochastic_proximity_embedding.hpp index 6887fda0..a2cc4003 100644 --- a/include/tapkee/methods/stochastic_proximity_embedding.hpp +++ b/include/tapkee/methods/stochastic_proximity_embedding.hpp @@ -15,19 +15,23 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(StochasticProximityEmbedding) + void validate() + { + parameters[spe_tolerance].checked().satisfies(Positivity()).orThrow(); + parameters[spe_num_updates].checked().satisfies(Positivity()).orThrow(); + } + TapkeeOutput embed() { - this->parameters[spe_tolerance].checked().satisfies(Positivity()).orThrow(); - this->parameters[spe_num_updates].checked().satisfies(Positivity()).orThrow(); Neighbors neighbors; - if (this->parameters[spe_global_strategy].is(false)) + if (parameters[spe_global_strategy].is(false)) { - neighbors = this->findNeighborsWith(this->plain_distance); + neighbors = find_neighbors_with(plain_distance); } - return TapkeeOutput(spe_embedding(this->begin, this->end, this->distance, neighbors, this->parameters[target_dimension], - this->parameters[spe_global_strategy], this->parameters[spe_tolerance], - this->parameters[spe_num_updates], this->parameters[max_iteration]), + return TapkeeOutput(spe_embedding(begin, end, distance, neighbors, parameters[target_dimension], + parameters[spe_global_strategy], parameters[spe_tolerance], + parameters[spe_num_updates], parameters[max_iteration]), unimplementedProjectingFunction()); } __TAPKEE_END_IMPLEMENTATION() diff --git a/include/tapkee/methods/tsne.hpp b/include/tapkee/methods/tsne.hpp index 1f146fe1..5b4288f9 100644 --- a/include/tapkee/methods/tsne.hpp +++ b/include/tapkee/methods/tsne.hpp @@ -15,17 +15,20 @@ namespace tapkee_internal { __TAPKEE_IMPLEMENTATION(tDistributedStochasticNeighborEmbedding) - TapkeeOutput embed() + void validate() { - this->parameters[sne_perplexity].checked().satisfies(InClosedRange(0.0, (this->n_vectors - 1) / 3.0)).orThrow(); - this->parameters[sne_theta].checked().satisfies(NonNegativity()).orThrow(); + parameters[sne_perplexity].checked().satisfies(InClosedRange(0.0, (n_vectors - 1) / 3.0)).orThrow(); + parameters[sne_theta].checked().satisfies(NonNegativity()).orThrow(); + } - DenseMatrix data = dense_matrix_from_features(this->features, this->current_dimension, this->begin, this->end); + TapkeeOutput embed() + { + DenseMatrix data = dense_matrix_from_features(features, current_dimension, begin, end); - DenseMatrix embedding(static_cast(this->parameters[target_dimension]), this->n_vectors); + DenseMatrix embedding(static_cast(parameters[target_dimension]), n_vectors); tsne::TSNE tsne; - tsne.run(data, data.cols(), data.rows(), embedding.data(), this->parameters[target_dimension], - this->parameters[sne_perplexity], this->parameters[sne_theta]); + tsne.run(data, data.cols(), data.rows(), embedding.data(), parameters[target_dimension], + parameters[sne_perplexity], parameters[sne_theta]); return TapkeeOutput(embedding.transpose(), unimplementedProjectingFunction()); } diff --git a/include/tapkee/neighbors/neighbors.hpp b/include/tapkee/neighbors/neighbors.hpp index 180b0474..b9da9464 100644 --- a/include/tapkee/neighbors/neighbors.hpp +++ b/include/tapkee/neighbors/neighbors.hpp @@ -176,11 +176,11 @@ Neighbors find_neighbors(NeighborsMethod method, const RandomAccessIterator& beg { if (k > static_cast(end - begin - 1)) { - LoggingSingleton::instance().message_warning("Number of neighbors is greater than number of objects to embed. " + Logging::instance().message_warning("Number of neighbors is greater than number of objects to embed. " "Using greatest possible number of neighbors."); k = static_cast(end - begin - 1); } - LoggingSingleton::instance().message_info("Using the " + get_neighbors_method_name(method) + + Logging::instance().message_info("Using the " + get_neighbors_method_name(method) + " neighbors computation method."); Neighbors neighbors; @@ -199,7 +199,7 @@ Neighbors find_neighbors(NeighborsMethod method, const RandomAccessIterator& beg "is not connected. Recomputing with a " "larger number of neighbors {}.", k, 2 * k); - LoggingSingleton::instance().message_warning(message); + Logging::instance().message_warning(message); neighbors = find_neighbors(method, begin, end, callback, 2 * k, check_connectivity); } else @@ -207,7 +207,7 @@ Neighbors find_neighbors(NeighborsMethod method, const RandomAccessIterator& beg const std::string message = fmt::format("The neighborhood graph with {} neighbors " "is connected.", k); - LoggingSingleton::instance().message_info(message); + Logging::instance().message_info(message); } return neighbors; } diff --git a/include/tapkee/routines/eigendecomposition.hpp b/include/tapkee/routines/eigendecomposition.hpp index 18ac8d27..fcd781d9 100644 --- a/include/tapkee/routines/eigendecomposition.hpp +++ b/include/tapkee/routines/eigendecomposition.hpp @@ -37,7 +37,7 @@ EigendecompositionResult eigendecomposition_impl_arpack(const MatrixType& wm, In if (arpack.info() == Eigen::Success) { std::string message = fmt::format("Took {} iterations.", arpack.getNbrIterations()); - LoggingSingleton::instance().message_info(message); + Logging::instance().message_info(message); DenseMatrix selected_eigenvectors = arpack.eigenvectors().rightCols(target_dimension); return EigendecompositionResult(selected_eigenvectors, arpack.eigenvalues().tail(target_dimension)); } @@ -329,8 +329,7 @@ EigendecompositionResult eigendecomposition(const EigenMethod& method, const Com const EigendecompositionStrategy& eigen_strategy, const MatrixType& m, IndexType target_dimension) { - LoggingSingleton::instance().message_info( - fmt::format("Using the {} eigendecomposition method.", get_eigen_method_name(method))); + Logging::instance().message_info(fmt::format("Using the {} eigendecomposition method.", get_eigen_method_name(method))); #ifdef TAPKEE_WITH_ARPACK if (method.is(Arpack)) return eigendecomposition_impl().arpack(m, strategy, eigen_strategy, target_dimension); diff --git a/include/tapkee/routines/generalized_eigendecomposition.hpp b/include/tapkee/routines/generalized_eigendecomposition.hpp index 9240552c..1b29b854 100644 --- a/include/tapkee/routines/generalized_eigendecomposition.hpp +++ b/include/tapkee/routines/generalized_eigendecomposition.hpp @@ -30,7 +30,7 @@ EigendecompositionResult generalized_eigendecomposition_impl_arpack(const LMatri if (arpack.info() == Eigen::Success) { std::string message = fmt::format("Took {} iterations.", arpack.getNbrIterations()); - LoggingSingleton::instance().message_info(message); + Logging::instance().message_info(message); DenseMatrix selected_eigenvectors = (arpack.eigenvectors()).rightCols(target_dimension); return EigendecompositionResult(selected_eigenvectors, arpack.eigenvalues().tail(target_dimension)); } @@ -169,8 +169,7 @@ EigendecompositionResult generalized_eigendecomposition(const EigenMethod& metho const LMatrixType& lhs, const RMatrixType& rhs, IndexType target_dimension) { - LoggingSingleton::instance().message_info( - fmt::format("Using the {} eigendecomposition method.", get_eigen_method_name(method))); + Logging::instance().message_info(fmt::format("Using the {} eigendecomposition method.", get_eigen_method_name(method))); #ifdef TAPKEE_WITH_ARPACK if (method.is(Arpack)) return generalized_eigendecomposition_impl().arpack(lhs, rhs, strategy, diff --git a/include/tapkee/routines/landmarks.hpp b/include/tapkee/routines/landmarks.hpp new file mode 100644 index 00000000..ecb822bf --- /dev/null +++ b/include/tapkee/routines/landmarks.hpp @@ -0,0 +1,78 @@ +/* This software is distributed under BSD 3-clause license (see LICENSE file). + * + * Copyright (c) 2012-2024 Sergey Lisitsyn + */ +#pragma once + +/* Tapkee includes */ +#include +#include +/* End of Tapkee includes */ + +namespace tapkee +{ +namespace tapkee_internal +{ + +template +Landmarks select_landmarks_random(RandomAccessIterator begin, RandomAccessIterator end, ScalarType ratio) +{ + Landmarks landmarks; + landmarks.reserve(end - begin); + for (RandomAccessIterator iter = begin; iter != end; ++iter) + landmarks.push_back(iter - begin); + tapkee::random_shuffle(landmarks.begin(), landmarks.end()); + landmarks.erase(landmarks.begin() + static_cast(landmarks.size() * ratio), landmarks.end()); + return landmarks; +} + +template +DenseMatrix triangulate(RandomAccessIterator begin, RandomAccessIterator end, PairwiseCallback distance_callback, + Landmarks& landmarks, DenseVector& landmark_distances_squared, + EigendecompositionResult& landmarks_embedding, IndexType target_dimension) +{ + timed_context context("Landmark triangulation"); + + const IndexType n_vectors = end - begin; + const IndexType n_landmarks = landmarks.size(); + + std::vector to_process(n_vectors, true); + + DenseMatrix embedding(n_vectors, target_dimension); + + for (IndexType index_iter = 0; index_iter < n_landmarks; ++index_iter) + { + to_process[landmarks[index_iter]] = false; + embedding.row(landmarks[index_iter]).noalias() = landmarks_embedding.first.row(index_iter); + } + + for (IndexType i = 0; i < target_dimension; ++i) + landmarks_embedding.first.col(i).array() /= landmarks_embedding.second(i); + +#pragma omp parallel + { + DenseVector distances_to_landmarks(n_landmarks); + IndexType index_iter; +#pragma omp for nowait + for (index_iter = 0; index_iter < n_vectors; ++index_iter) + { + if (!to_process[index_iter]) + continue; + + for (IndexType i = 0; i < n_landmarks; ++i) + { + ScalarType d = distance_callback.distance(begin[index_iter], begin[landmarks[i]]); + distances_to_landmarks(i) = d * d; + } + // distances_to_landmarks.array().square(); + + distances_to_landmarks -= landmark_distances_squared; + embedding.row(index_iter).noalias() = -0.5 * landmarks_embedding.first.transpose() * distances_to_landmarks; + } + } + + return embedding; +} + +} // End of namespace tapkee_internal +} // End of namespace tapkee diff --git a/include/tapkee/routines/multidimensional_scaling.hpp b/include/tapkee/routines/multidimensional_scaling.hpp index c90fa6b0..ec4a1155 100644 --- a/include/tapkee/routines/multidimensional_scaling.hpp +++ b/include/tapkee/routines/multidimensional_scaling.hpp @@ -14,18 +14,6 @@ namespace tapkee namespace tapkee_internal { -template -Landmarks select_landmarks_random(RandomAccessIterator begin, RandomAccessIterator end, ScalarType ratio) -{ - Landmarks landmarks; - landmarks.reserve(end - begin); - for (RandomAccessIterator iter = begin; iter != end; ++iter) - landmarks.push_back(iter - begin); - tapkee::random_shuffle(landmarks.begin(), landmarks.end()); - landmarks.erase(landmarks.begin() + static_cast(landmarks.size() * ratio), landmarks.end()); - return landmarks; -} - template DenseSymmetricMatrix compute_distance_matrix(RandomAccessIterator begin, RandomAccessIterator /*end*/, Landmarks& landmarks, PairwiseCallback callback) @@ -53,57 +41,6 @@ DenseSymmetricMatrix compute_distance_matrix(RandomAccessIterator begin, RandomA return distance_matrix; } -template -DenseMatrix triangulate(RandomAccessIterator begin, RandomAccessIterator end, PairwiseCallback distance_callback, - Landmarks& landmarks, DenseVector& landmark_distances_squared, - EigendecompositionResult& landmarks_embedding, IndexType target_dimension) -{ - timed_context context("Landmark triangulation"); - - const IndexType n_vectors = end - begin; - const IndexType n_landmarks = landmarks.size(); - - bool* to_process = new bool[n_vectors]; - std::fill(to_process, to_process + n_vectors, true); - - DenseMatrix embedding(n_vectors, target_dimension); - - for (IndexType index_iter = 0; index_iter < n_landmarks; ++index_iter) - { - to_process[landmarks[index_iter]] = false; - embedding.row(landmarks[index_iter]).noalias() = landmarks_embedding.first.row(index_iter); - } - - for (IndexType i = 0; i < target_dimension; ++i) - landmarks_embedding.first.col(i).array() /= landmarks_embedding.second(i); - -#pragma omp parallel - { - DenseVector distances_to_landmarks(n_landmarks); - IndexType index_iter; -#pragma omp for nowait - for (index_iter = 0; index_iter < n_vectors; ++index_iter) - { - if (!to_process[index_iter]) - continue; - - for (IndexType i = 0; i < n_landmarks; ++i) - { - ScalarType d = distance_callback.distance(begin[index_iter], begin[landmarks[i]]); - distances_to_landmarks(i) = d * d; - } - // distances_to_landmarks.array().square(); - - distances_to_landmarks -= landmark_distances_squared; - embedding.row(index_iter).noalias() = -0.5 * landmarks_embedding.first.transpose() * distances_to_landmarks; - } - } - - delete[] to_process; - - return embedding; -} - template DenseSymmetricMatrix compute_distance_matrix(RandomAccessIterator begin, RandomAccessIterator end, PairwiseCallback callback) diff --git a/include/tapkee/utils/logging.hpp b/include/tapkee/utils/logging.hpp index 07f8dec1..201a9f95 100644 --- a/include/tapkee/utils/logging.hpp +++ b/include/tapkee/utils/logging.hpp @@ -82,19 +82,19 @@ class DefaultLoggerImplementation : public LoggerImplementation //! Main logging singleton used by the library. Can use provided //! @ref LoggerImplementation if necessary. By default uses //! @ref DefaultLoggerImplementation. -class LoggingSingleton +class Logging { private: - LoggingSingleton() + Logging() : impl(new DefaultLoggerImplementation), LEVEL_ENABLED_FIELD_INITIALIZER(info, false), LEVEL_ENABLED_FIELD_INITIALIZER(warning, true), LEVEL_ENABLED_FIELD_INITIALIZER(debug, false), LEVEL_ENABLED_FIELD_INITIALIZER(error, true), LEVEL_ENABLED_FIELD_INITIALIZER(benchmark, false){}; - ~LoggingSingleton() + ~Logging() { delete impl; } - LoggingSingleton(const LoggingSingleton& ls); - void operator=(const LoggingSingleton& ls); + Logging(const Logging& ls) = delete; + void operator=(const Logging& ls) = delete; LoggerImplementation* impl; @@ -106,9 +106,9 @@ class LoggingSingleton public: //! @return instance of the singleton - static LoggingSingleton& instance() + static Logging& instance() { - static LoggingSingleton s; + static Logging s; return s; } diff --git a/include/tapkee/utils/time.hpp b/include/tapkee/utils/time.hpp index 9e70b341..c2ed0706 100644 --- a/include/tapkee/utils/time.hpp +++ b/include/tapkee/utils/time.hpp @@ -37,7 +37,7 @@ struct timed_context { std::string message = fmt::format("{} took {} seconds.", operation_name, double(CLOCK_GET - start_clock) / CLOCK_DIVISOR); - LoggingSingleton::instance().message_benchmark(message); + Logging::instance().message_benchmark(message); } }; } // namespace tapkee_internal diff --git a/src/cli/main.cpp b/src/cli/main.cpp index b22919d8..2c108f8b 100644 --- a/src/cli/main.cpp +++ b/src/cli/main.cpp @@ -45,42 +45,119 @@ template auto with_default(T defs) return cxxopts::value()->default_value(std::to_string(defs)); } +static const char* INPUT_FILE_KEYWORD_SHORT = "i"; static const char* INPUT_FILE_KEYWORD = "input-file"; +static const char* INPUT_FILE_DESCRIPTION = "Input filename to be used. Can be any file that can be opened for reading by the program. Expects delimiter-separated matrix of real values. See transposing options for more details on rows and columns."; + static const char* TRANSPOSE_INPUT_KEYWORD = "transpose-input"; +static const char* TRANSPOSE_INPUT_DESCRIPTION = "Whether input file should be considered transposed. By default a line means a row in a matrix (a single vector to be embedded)."; + static const char* TRANSPOSE_OUTPUT_KEYWORD = "transpose-output"; +static const char* TRANSPOSE_OUTPUT_DESCRIPTION = "Whether output file should be transposed. By default a line would be a row of embedding matrix (a single embedding vector)"; + +static const char* OUTPUT_FILE_KEYWORD_SHORT = "o"; static const char* OUTPUT_FILE_KEYWORD = "output-file"; +static const char* OUTPUT_FILE_DESCRIPTION = "Output filename to be used. Can be any file that can be opened for writing by the program"; + +static const char* OUTPUT_PROJECTION_MATRIX_FILE_KEYWORD_SHORT = "opmat"; static const char* OUTPUT_PROJECTION_MATRIX_FILE_KEYWORD = "output-projection-matrix-file"; +static const char* OUTPUT_PROJECTION_MATRIX_FILE_DESCRIPTION = "Filename to store the projection matrix calculated by the selected algorithm. Usually supported by linear algorithms such as PCA."; + +static const char* OUTPUT_PROJECTION_MEAN_FILE_KEYWORD_SHORT = "opmean"; static const char* OUTPUT_PROJECTION_MEAN_FILE_KEYWORD = "output-projection-mean-file"; +static const char* OUTPUT_PROJECTION_MEAN_FILE_DESCRIPTION = "Filename to store the mean vector calculated by the selected algorithm. Usually supported by linear algorithms such as PCA"; + +static const char* DELIMITER_KEYWORD_SHORT = "d"; static const char* DELIMITER_KEYWORD = "delimiter"; +static const char* DELIMITER_DESCRIPTION = "Delimiter to be used in reading and writing matrices"; + +static const char* HELP_KEYWORD_SHORT = "h"; static const char* HELP_KEYWORD = "help"; +static const char* HELP_DESCRIPTION = "Print usage of the program"; + static const char* BENCHMARK_KEYWORD = "benchmark"; +static const char* BENCHMARK_DESCRIPTION = "Output benchmarking information about the time of algorithm steps"; + static const char* VERBOSE_KEYWORD = "verbose"; +static const char* VERBOSE_DESCRIPTION = "Be more verbose in logging"; + static const char* DEBUG_KEYWORD = "debug"; +static const char* DEBUG_DESCRIPTION = "Output debugging information such as intermediary steps, parameters, and other internals"; + +static const char* METHOD_KEYWORD_SHORT = "m"; static const char* METHOD_KEYWORD = "method"; +static const std::string METHOD_DESCRIPTION = "Dimension reduction method. One of the following: " + + comma_separated_keys(DIMENSION_REDUCTION_METHODS.begin(), DIMENSION_REDUCTION_METHODS.end()); + +static const char* NEIGHBORS_METHOD_KEYWORD_SHORT = "nm"; static const char* NEIGHBORS_METHOD_KEYWORD = "neighbors-method"; +static const std::string NEIGHBORS_METHOD_DESCRIPTION = "Neighbors search method. One of the following: " + + comma_separated_keys(NEIGHBORS_METHODS.begin(), NEIGHBORS_METHODS.end()); + +static const char* EIGEN_METHOD_KEYWORD_SHORT = "em"; static const char* EIGEN_METHOD_KEYWORD = "eigen-method"; +static const std::string EIGEN_METHOD_DESCRIPTION = "Eigendecomposition method. One of the following: " + + comma_separated_keys(EIGEN_METHODS.begin(), EIGEN_METHODS.end()); + +static const char* COMPUTATION_STRATEGY_KEYWORD_SHORT = "cs"; static const char* COMPUTATION_STRATEGY_KEYWORD = "computation-strategy"; +static const std::string COMPUTATION_STRATEGY_DESCRIPTION = "Computation strategy. One of the following: " + + comma_separated_keys(COMPUTATION_STRATEGIES.begin(), COMPUTATION_STRATEGIES.end()); + +static const char* TARGET_DIMENSION_KEYWORD_SHORT = "td"; static const char* TARGET_DIMENSION_KEYWORD = "target-dimension"; +static const char* TARGET_DIMENSION_DESCRIPTION = "Target dimension"; + +static const char* NUM_NEIGHBORS_KEYWORD_SHORT = "k"; static const char* NUM_NEIGHBORS_KEYWORD = "num-neighbors"; +static const char* NUM_NEIGHBORS_DESCRIPTION = "Number of neighbors"; + +static const char* GAUSSIAN_WIDTH_KEYWORD_SHORT = "gw"; static const char* GAUSSIAN_WIDTH_KEYWORD = "gaussian-width"; +static const char* GAUSSIAN_WIDTH_DESCRIPTION = "Width of gaussian kernel"; + static const char* TIMESTEPS_KEYWORD = "timesteps"; +static const char* TIMESTEPS_DESCRIPTION = "Number of timesteps for diffusion map"; + static const char* SPE_LOCAL_KEYWORD = "spe-local"; -static const char* EIGENSHIFT_KEYWORD = "eigenshift";; +static const char* SPE_LOCAL_DESCRIPTION = "Local strategy in SPE (default is global)"; + +static const char* EIGENSHIFT_KEYWORD = "eigenshift"; +static const char* EIGENSHIFT_DESCRIPTION = "Regularization diagonal shift for weight matrix"; + static const char* LANDMARK_RATIO_KEYWORD = "landmark-ratio"; +static const char* LANDMARK_RATIO_DESCRIPTION = "Ratio of landmarks. Should be in (0,1) range (0.2 means 20%)"; + static const char* SPE_TOLERANCE_KEYWORD = "spe-tolerance"; +static const char* SPE_TOLERANCE_DESCRIPTION = "Tolerance for SPE"; + static const char* SPE_NUM_UPDATES_KEYWORD = "spe-num-updates"; +static const char* SPE_NUM_UPDATES_DESCRIPTION = "Number of SPE updates"; + static const char* MAX_ITERS_KEYWORD = "max-iters"; +static const char* MAX_ITERS_DESCRIPTION = "Maximum number of iterations"; + static const char* FA_EPSILON_KEYWORD = "fa-epsilon"; +static const char* FA_EPSILON_DESCRIPTION = "FA convergence threshold"; + static const char* SNE_PERPLEXITY_KEYWORD = "sne-perplexity"; +static const char* SNE_PERPLEXITY_DESCRIPTION = "Perplexity for the t-SNE algorithm"; + static const char* SNE_THETA_KEYWORD = "sne-theta"; +static const char* SNE_THETA_DESCRIPTION = "Theta for the t-SNE algorithm"; + static const char* MS_SQUISHING_RATE_KEYWORD = "squishing-rate"; +static const char* MS_SQUISHING_RATE_DESCRIPTION = "Squishing rate of the Manifold Sculpting algorithm"; + static const char* PRECOMPUTE_KEYWORD = "precompute"; +static const char* PRECOMPUTE_DESCRIPTION = "Whether distance and kernel matrices should be precomputed"; + int run(int argc, const char **argv) { srand(static_cast(time(NULL))); - cxxopts::Options options("tapkee", "Tapkee: a tool for dimension reduction"); + cxxopts::Options options("tapkee", "Tapkee: a tool for dimensionality reduction."); using namespace std::string_literals; @@ -89,74 +166,62 @@ int run(int argc, const char **argv) .set_tab_expansion() .add_options() ( - either("i", INPUT_FILE_KEYWORD), - "Input file", + either(INPUT_FILE_KEYWORD_SHORT, INPUT_FILE_KEYWORD), + INPUT_FILE_DESCRIPTION, with_default("/dev/stdin"s) ) ( TRANSPOSE_INPUT_KEYWORD, - "Transpose input file if set" + TRANSPOSE_INPUT_DESCRIPTION ) ( TRANSPOSE_OUTPUT_KEYWORD, - "Transpose output file if set" + TRANSPOSE_OUTPUT_DESCRIPTION ) ( - either("o", OUTPUT_FILE_KEYWORD), - "Output file", + either(OUTPUT_FILE_KEYWORD_SHORT, OUTPUT_FILE_KEYWORD), + OUTPUT_FILE_DESCRIPTION, with_default("/dev/stdout"s) ) ( - either("opmat", OUTPUT_PROJECTION_MATRIX_FILE_KEYWORD), - "Output file for the projection matrix", + either(OUTPUT_PROJECTION_MATRIX_FILE_KEYWORD_SHORT, OUTPUT_PROJECTION_MATRIX_FILE_KEYWORD), + OUTPUT_PROJECTION_MATRIX_FILE_DESCRIPTION, with_default("/dev/null"s) ) ( - either("opmean", OUTPUT_PROJECTION_MEAN_FILE_KEYWORD), - "Output file for the mean of data", + either(OUTPUT_PROJECTION_MEAN_FILE_KEYWORD_SHORT, OUTPUT_PROJECTION_MEAN_FILE_KEYWORD), + OUTPUT_PROJECTION_MEAN_FILE_DESCRIPTION, with_default("/dev/null"s) ) ( - either("d", DELIMITER_KEYWORD), - "Delimiter", + either(DELIMITER_KEYWORD_SHORT, DELIMITER_KEYWORD), + DELIMITER_DESCRIPTION, with_default(","s) ) ( - either("h", HELP_KEYWORD), - "Print usage" + either(HELP_KEYWORD_SHORT, HELP_KEYWORD), + HELP_DESCRIPTION ) ( BENCHMARK_KEYWORD, - "Output benchmark information" + BENCHMARK_DESCRIPTION ) ( VERBOSE_KEYWORD, - "Output more information" + VERBOSE_DESCRIPTION ) ( DEBUG_KEYWORD, - "Output debug information" + DEBUG_DESCRIPTION ) ( - either("m", METHOD_KEYWORD), - "Dimension reduction method (default locally_linear_embedding). \n One of the following: \n" - "locally_linear_embedding (lle), neighborhood_preserving_embedding (npe), \n" - "local_tangent_space_alignment (ltsa), linear_local_tangent_space_alignment (lltsa), \n" - "hessian_locally_linear_embedding (hlle), laplacian_eigenmaps (la), locality_preserving_projections (lpp), \n" - "diffusion_map (dm), isomap, landmark_isomap (l-isomap), multidimensional_scaling (mds), \n" - "landmark_multidimensional_scaling (l-mds), stochastic_proximity_embedding (spe), \n" - "kernel_pca (kpca), pca, random_projection (ra), factor_analysis (fa), \n" - "t-stochastic_neighborhood_embedding (t-sne), manifold_sculpting (ms).", + either(METHOD_KEYWORD_SHORT, METHOD_KEYWORD), + METHOD_DESCRIPTION, with_default("locally_linear_embedding"s) ) ( - either("nm", NEIGHBORS_METHOD_KEYWORD), - "Neighbors search method (default is 'covertree' if available, 'vptree' otherwise). One of the following: " - "brute,vptree" -#ifdef TAPKEE_USE_LGPL_COVERTREE - ",covertree" -#endif - ".", + either(NEIGHBORS_METHOD_KEYWORD_SHORT, NEIGHBORS_METHOD_KEYWORD), + NEIGHBORS_METHOD_DESCRIPTION, #ifdef TAPKEE_USE_LGPL_COVERTREE with_default("covertree"s) #else @@ -164,12 +229,8 @@ int run(int argc, const char **argv) #endif ) ( - either("em", EIGEN_METHOD_KEYWORD), - "Eigendecomposition method (default is 'arpack' if available, 'dense' otherwise). One of the following: " -#ifdef TAPKEE_WITH_ARPACK - "arpack, " -#endif - "randomized, dense.", + either(EIGEN_METHOD_KEYWORD_SHORT, EIGEN_METHOD_KEYWORD), + EIGEN_METHOD_DESCRIPTION, #ifdef TAPKEE_WITH_ARPACK with_default("arpack"s) #else @@ -177,86 +238,82 @@ int run(int argc, const char **argv) #endif ) ( - either("cs", COMPUTATION_STRATEGY_KEYWORD), - "Computation strategy (default is 'cpu'). One of the following: " -#ifdef TAPKEE_WITH_VIENNACL - "opencl, " -#endif - "cpu.", + either(COMPUTATION_STRATEGY_KEYWORD_SHORT, COMPUTATION_STRATEGY_KEYWORD), + COMPUTATION_STRATEGY_DESCRIPTION, with_default("cpu"s) ) ( - either("td", TARGET_DIMENSION_KEYWORD), - "Target dimension", + either(TARGET_DIMENSION_KEYWORD_SHORT, TARGET_DIMENSION_KEYWORD), + TARGET_DIMENSION_DESCRIPTION, with_default(2) ) ( - either("k", NUM_NEIGHBORS_KEYWORD), - "Number of neighbors", + either(NUM_NEIGHBORS_KEYWORD_SHORT, NUM_NEIGHBORS_KEYWORD), + NUM_NEIGHBORS_DESCRIPTION, with_default(10) ) ( - either("gw", GAUSSIAN_WIDTH_KEYWORD), - "Width of gaussian kernel", + either(GAUSSIAN_WIDTH_KEYWORD_SHORT, GAUSSIAN_WIDTH_KEYWORD), + GAUSSIAN_WIDTH_DESCRIPTION, with_default(1.0) ) ( TIMESTEPS_KEYWORD, - "Number of timesteps for diffusion map", + TIMESTEPS_DESCRIPTION, with_default(1) ) ( EIGENSHIFT_KEYWORD, - "Regularization diagonal shift for weight matrix", + EIGENSHIFT_DESCRIPTION, with_default(1e-9) ) ( LANDMARK_RATIO_KEYWORD, - "Ratio of landmarks. Should be in (0,1) range (0.2 means 20%)", + LANDMARK_RATIO_DESCRIPTION, with_default(0.2) ) ( SPE_LOCAL_KEYWORD, - "Local strategy in SPE (default is global)" + SPE_LOCAL_DESCRIPTION ) ( SPE_TOLERANCE_KEYWORD, - "Tolerance for SPE", + SPE_TOLERANCE_DESCRIPTION, with_default(1e-5) ) ( SPE_NUM_UPDATES_KEYWORD, - "Number of SPE updates", + SPE_NUM_UPDATES_DESCRIPTION, with_default(100) ) ( MAX_ITERS_KEYWORD, - "Maximum number of iterations", + MAX_ITERS_DESCRIPTION, with_default(1000) ) ( FA_EPSILON_KEYWORD, - "FA convergence threshold", + FA_EPSILON_DESCRIPTION, with_default(1e-5) ) ( SNE_PERPLEXITY_KEYWORD, - "Perplexity for the t-SNE algorithm", + SNE_PERPLEXITY_DESCRIPTION, with_default(30.0) ) ( SNE_THETA_KEYWORD, - "Theta for the t-SNE algorithm", + SNE_THETA_DESCRIPTION, with_default(0.5) ) ( MS_SQUISHING_RATE_KEYWORD, - "Squishing rate of the Manifold Sculpting algorithm", + MS_SQUISHING_RATE_DESCRIPTION, with_default(0.99) ) ( PRECOMPUTE_KEYWORD, - "Whether distance and kernel matrices should be precomputed (default false)" + PRECOMPUTE_DESCRIPTION ) ; @@ -279,18 +336,18 @@ int run(int argc, const char **argv) } if (opt.count(VERBOSE_KEYWORD)) { - tapkee::LoggingSingleton::instance().enable_info(); + tapkee::Logging::instance().enable_info(); } if (opt.count(DEBUG_KEYWORD)) { - tapkee::LoggingSingleton::instance().enable_debug(); - tapkee::LoggingSingleton::instance().message_info("Debug messages enabled"); + tapkee::Logging::instance().enable_debug(); + tapkee::Logging::instance().message_info("Debug messages enabled"); } if (opt.count(BENCHMARK_KEYWORD)) { - tapkee::LoggingSingleton::instance().enable_benchmark(); - tapkee::LoggingSingleton::instance().message_info("Benchmarking enabled"); + tapkee::Logging::instance().enable_benchmark(); + tapkee::Logging::instance().message_info("Benchmarking enabled"); } tapkee::DimensionReductionMethod tapkee_method = tapkee::PassThru; @@ -298,11 +355,11 @@ int run(int argc, const char **argv) string method = opt[METHOD_KEYWORD].as(); try { - tapkee_method = parse_reduction_method(method.c_str()); + tapkee_method = parse_multiple(DIMENSION_REDUCTION_METHODS, method); } - catch (const std::exception &) + catch (const std::exception & ex) { - tapkee::LoggingSingleton::instance().message_error(string("Unknown method ") + method); + tapkee::Logging::instance().message_error(string("Unknown method ") + method); return 1; } } @@ -312,11 +369,11 @@ int run(int argc, const char **argv) string method = opt[NEIGHBORS_METHOD_KEYWORD].as(); try { - tapkee_neighbors_method = parse_neighbors_method(method.c_str()); + tapkee_neighbors_method = parse_multiple(NEIGHBORS_METHODS, method); } catch (const std::exception &) { - tapkee::LoggingSingleton::instance().message_error(string("Unknown neighbors method ") + method); + tapkee::Logging::instance().message_error(string("Unknown neighbors method ") + method); return 1; } } @@ -325,11 +382,11 @@ int run(int argc, const char **argv) string method = opt[EIGEN_METHOD_KEYWORD].as(); try { - tapkee_eigen_method = parse_eigen_method(method.c_str()); + tapkee_eigen_method = parse_multiple(EIGEN_METHODS, method); } catch (const std::exception &) { - tapkee::LoggingSingleton::instance().message_error(string("Unknown eigendecomposition method ") + method); + tapkee::Logging::instance().message_error(string("Unknown eigendecomposition method ") + method); return 1; } } @@ -338,20 +395,20 @@ int run(int argc, const char **argv) string method = opt[COMPUTATION_STRATEGY_KEYWORD].as(); try { - tapkee_computation_strategy = parse_computation_strategy(method.c_str()); + tapkee_computation_strategy = parse_multiple(COMPUTATION_STRATEGIES, method); } catch (const std::exception &) { - tapkee::LoggingSingleton::instance().message_error(string("Unknown computation strategy ") + method); + tapkee::Logging::instance().message_error(string("Unknown computation strategy ") + method); return 1; } } int target_dim = opt[TARGET_DIMENSION_KEYWORD].as(); - if (target_dim < 0) + if (target_dim <= 0) { - tapkee::LoggingSingleton::instance().message_error( - "Negative target dimensionality is not possible in current circumstances. " + tapkee::Logging::instance().message_error( + "\"Only\" a positive target dimensionality larger than zero is possible in current circumstances. " "Please visit other universe"); return 1; } @@ -359,33 +416,22 @@ int run(int argc, const char **argv) int k = opt[NUM_NEIGHBORS_KEYWORD].as(); if (k < 3) { - tapkee::LoggingSingleton::instance().message_error( + tapkee::Logging::instance().message_error( "The provided number of neighbors is too small, consider at least 3."); return 1; } double width = opt[GAUSSIAN_WIDTH_KEYWORD].as(); if (width < 0.0) { - tapkee::LoggingSingleton::instance().message_error("Width of the gaussian kernel is negative."); + tapkee::Logging::instance().message_error("Width of the gaussian kernel is negative."); return 1; } int timesteps = opt[TIMESTEPS_KEYWORD].as(); if (timesteps < 0) { - tapkee::LoggingSingleton::instance().message_error("Number of timesteps is negative."); + tapkee::Logging::instance().message_error("Number of timesteps is negative."); return 1; } - double eigenshift = opt[EIGENSHIFT_KEYWORD].as(); - double landmark_rt = opt[LANDMARK_RATIO_KEYWORD].as(); - bool spe_global = opt.count(SPE_LOCAL_KEYWORD); - double spe_tol = opt[SPE_TOLERANCE_KEYWORD].as(); - int spe_num_upd = opt[SPE_NUM_UPDATES_KEYWORD].as(); - int max_iters = opt[MAX_ITERS_KEYWORD].as(); - double fa_eps = opt[FA_EPSILON_KEYWORD].as(); - double perplexity = opt[SNE_PERPLEXITY_KEYWORD].as(); - double theta = opt[SNE_THETA_KEYWORD].as(); - double squishing = opt[MS_SQUISHING_RATE_KEYWORD].as(); - // Load data string input_filename = opt[INPUT_FILE_KEYWORD].as(); string output_filename = opt[OUTPUT_FILE_KEYWORD].as(); @@ -412,23 +458,33 @@ int run(int argc, const char **argv) input_data.transposeInPlace(); } - std::stringstream ss; - ss << "Data contains " << input_data.cols() << " feature vectors with dimension of " << input_data.rows(); - tapkee::LoggingSingleton::instance().message_info(ss.str()); + tapkee::Logging::instance().message_info(fmt::format("Data contains {} feature vectors with dimension of {}", input_data.cols(), input_data.rows())); tapkee::TapkeeOutput output; tapkee::ParametersSet parameters = - tapkee::kwargs[(tapkee::method = tapkee_method, tapkee::computation_strategy = tapkee_computation_strategy, - tapkee::eigen_method = tapkee_eigen_method, tapkee::neighbors_method = tapkee_neighbors_method, - tapkee::num_neighbors = k, tapkee::target_dimension = target_dim, - tapkee::diffusion_map_timesteps = timesteps, tapkee::gaussian_kernel_width = width, - tapkee::max_iteration = max_iters, tapkee::spe_global_strategy = spe_global, - tapkee::spe_num_updates = spe_num_upd, tapkee::spe_tolerance = spe_tol, - tapkee::landmark_ratio = landmark_rt, tapkee::nullspace_shift = eigenshift, - tapkee::check_connectivity = true, tapkee::fa_epsilon = fa_eps, - tapkee::sne_perplexity = perplexity, tapkee::sne_theta = theta, - tapkee::squishing_rate = squishing)]; + tapkee::kwargs[( + tapkee::method = tapkee_method, + tapkee::computation_strategy = tapkee_computation_strategy, + tapkee::eigen_method = tapkee_eigen_method, + tapkee::neighbors_method = tapkee_neighbors_method, + tapkee::num_neighbors = k, + tapkee::target_dimension = target_dim, + tapkee::diffusion_map_timesteps = timesteps, + tapkee::gaussian_kernel_width = width, + tapkee::max_iteration = opt[MAX_ITERS_KEYWORD].as(), + tapkee::spe_global_strategy = opt.count(SPE_LOCAL_KEYWORD), + tapkee::spe_num_updates = opt[SPE_NUM_UPDATES_KEYWORD].as(), + tapkee::spe_tolerance = opt[SPE_TOLERANCE_KEYWORD].as(), + tapkee::landmark_ratio = opt[LANDMARK_RATIO_KEYWORD].as(), + tapkee::nullspace_shift = opt[EIGENSHIFT_KEYWORD].as(), + tapkee::check_connectivity = true, + tapkee::fa_epsilon = opt[FA_EPSILON_KEYWORD].as(), + tapkee::sne_perplexity = opt[SNE_PERPLEXITY_KEYWORD].as(), + tapkee::sne_theta = opt[SNE_THETA_KEYWORD].as(), + tapkee::squishing_rate = opt[MS_SQUISHING_RATE_KEYWORD].as() + )]; + if (opt.count(PRECOMPUTE_KEYWORD)) { @@ -443,21 +499,20 @@ int run(int argc, const char **argv) { tapkee::tapkee_internal::timed_context context("[+] Distance matrix computation"); distance_matrix = matrix_from_callback(static_cast(input_data.cols()), - tapkee::eigen_distance_callback(input_data)); + tapkee::eigen_distance_callback(input_data)); } if (tapkee_method.needs_kernel) { tapkee::tapkee_internal::timed_context context("[+] Kernel matrix computation"); kernel_matrix = matrix_from_callback(static_cast(input_data.cols()), - tapkee::eigen_kernel_callback(input_data)); + tapkee::eigen_kernel_callback(input_data)); } } tapkee::precomputed_distance_callback dcb(distance_matrix); tapkee::precomputed_kernel_callback kcb(kernel_matrix); tapkee::eigen_features_callback fcb(input_data); - output = tapkee::initialize() - .withParameters(parameters) + output = tapkee::with(parameters) .withKernel(kcb) .withDistance(dcb) .withFeatures(fcb) @@ -465,7 +520,7 @@ int run(int argc, const char **argv) } else { - output = tapkee::initialize().withParameters(parameters).embedUsing(input_data); + output = tapkee::with(parameters).embedUsing(input_data); } // Save obtained data if (opt.count(TRANSPOSE_OUTPUT_KEYWORD)) @@ -481,7 +536,7 @@ int run(int argc, const char **argv) dynamic_cast(output.projection.implementation.get()); if (!matrix_projection) { - tapkee::LoggingSingleton::instance().message_error("Projection function unavailable"); + tapkee::Logging::instance().message_error("Projection function unavailable"); return 1; } write_matrix(&matrix_projection->proj_mat, ofs_matrix, delimiter[0]); diff --git a/src/cli/util.hpp b/src/cli/util.hpp index 5a5531fb..69b5c521 100644 --- a/src/cli/util.hpp +++ b/src/cli/util.hpp @@ -10,6 +10,7 @@ #include #include +#include using namespace std; @@ -22,6 +23,52 @@ inline bool is_wrong_char(char c) return false; } +int levenshtein_distance(const std::string& s1, const std::string& s2) +{ + const auto len1 = s1.size(); + const auto len2 = s2.size(); + + std::vector> d(len1 + 1, std::vector(len2 + 1)); + + d[0][0] = 0; + for (unsigned int i = 1; i <= len1; ++i) + { + d[i][0] = i; + } + for (unsigned int j = 1; j <= len2; ++j) + { + d[0][j] = j; + } + + for (unsigned int i = 1; i <= len1; ++i) + { + for (unsigned int j = 1; j <= len2; ++j) + { + d[i][j] = std::min({ + d[i - 1][j] + 1, + d[i][j - 1] + 1, + d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1) + }); + } + } + + return d[len1][len2]; +} + +template +std::string comma_separated_keys(Iterator begin, Iterator end) { + std::ostringstream oss; + for (Iterator it = begin; it != end; ++it) + { + oss << it->first; + if (std::next(it) != end) + { + oss << ", "; + } + } + return oss.str(); +} + tapkee::DenseMatrix read_data(ifstream& ifs, char delimiter) { string str; @@ -95,92 +142,87 @@ void write_vector(tapkee::DenseVector* matrix, ofstream& of) } } -tapkee::DimensionReductionMethod parse_reduction_method(const char* str) -{ - if (!strcmp(str, "local_tangent_space_alignment") || !strcmp(str, "ltsa")) - return tapkee::KernelLocalTangentSpaceAlignment; - if (!strcmp(str, "locally_linear_embedding") || !strcmp(str, "lle")) - return tapkee::KernelLocallyLinearEmbedding; - if (!strcmp(str, "hessian_locally_linear_embedding") || !strcmp(str, "hlle")) - return tapkee::HessianLocallyLinearEmbedding; - if (!strcmp(str, "multidimensional_scaling") || !strcmp(str, "mds")) - return tapkee::MultidimensionalScaling; - if (!strcmp(str, "landmark_multidimensional_scaling") || !strcmp(str, "l-mds")) - return tapkee::LandmarkMultidimensionalScaling; - if (!strcmp(str, "isomap")) - return tapkee::Isomap; - if (!strcmp(str, "landmark_isomap") || !strcmp(str, "l-isomap")) - return tapkee::LandmarkIsomap; - if (!strcmp(str, "diffusion_map") || !strcmp(str, "dm")) - return tapkee::DiffusionMap; - if (!strcmp(str, "kernel_pca") || !strcmp(str, "kpca")) - return tapkee::KernelPrincipalComponentAnalysis; - if (!strcmp(str, "pca")) - return tapkee::PrincipalComponentAnalysis; - if (!strcmp(str, "random_projection") || !strcmp(str, "ra")) - return tapkee::RandomProjection; - if (!strcmp(str, "laplacian_eigenmaps") || !strcmp(str, "la")) - return tapkee::LaplacianEigenmaps; - if (!strcmp(str, "locality_preserving_projections") || !strcmp(str, "lpp")) - return tapkee::LocalityPreservingProjections; - if (!strcmp(str, "neighborhood_preserving_embedding") || !strcmp(str, "npe")) - return tapkee::NeighborhoodPreservingEmbedding; - if (!strcmp(str, "linear_local_tangent_space_alignment") || !strcmp(str, "lltsa")) - return tapkee::LinearLocalTangentSpaceAlignment; - if (!strcmp(str, "stochastic_proximity_embedding") || !strcmp(str, "spe")) - return tapkee::StochasticProximityEmbedding; - if (!strcmp(str, "passthru")) - return tapkee::PassThru; - if (!strcmp(str, "factor_analysis") || !strcmp(str, "fa")) - return tapkee::FactorAnalysis; - if (!strcmp(str, "t-stochastic_neighbor_embedding") || !strcmp(str, "t-sne")) - return tapkee::tDistributedStochasticNeighborEmbedding; - if (!strcmp(str, "manifold_sculpting") || !strcmp(str, "ms")) - return tapkee::ManifoldSculpting; - - throw std::exception(); - return tapkee::PassThru; -} - -tapkee::NeighborsMethod parse_neighbors_method(const char* str) -{ - if (!strcmp(str, "brute")) - return tapkee::Brute; - if (!strcmp(str, "vptree")) - return tapkee::VpTree; +static const std::map DIMENSION_REDUCTION_METHODS = { + {"local_tangent_space_alignment", tapkee::KernelLocalTangentSpaceAlignment}, + {"ltsa", tapkee::KernelLocalTangentSpaceAlignment}, + {"locally_linear_embedding", tapkee::KernelLocallyLinearEmbedding}, + {"lle", tapkee::KernelLocallyLinearEmbedding}, + {"hessian_locally_linear_embedding", tapkee::HessianLocallyLinearEmbedding}, + {"hlle", tapkee::HessianLocallyLinearEmbedding}, + {"multidimensional_scaling", tapkee::MultidimensionalScaling}, + {"mds", tapkee::MultidimensionalScaling}, + {"landmark_multidimensional_scaling", tapkee::LandmarkMultidimensionalScaling}, + {"l-mds", tapkee::LandmarkMultidimensionalScaling}, + {"isomap", tapkee::Isomap}, + {"landmark_isomap", tapkee::LandmarkIsomap}, + {"l-isomap", tapkee::LandmarkIsomap}, + {"diffusion_map", tapkee::DiffusionMap}, + {"dm", tapkee::DiffusionMap}, + {"kernel_pca", tapkee::KernelPrincipalComponentAnalysis}, + {"kpca", tapkee::KernelPrincipalComponentAnalysis}, + {"pca", tapkee::PrincipalComponentAnalysis}, + {"random_projection", tapkee::RandomProjection}, + {"ra", tapkee::RandomProjection}, + {"laplacian_eigenmaps", tapkee::LaplacianEigenmaps}, + {"la", tapkee::LaplacianEigenmaps}, + {"locality_preserving_projections", tapkee::LocalityPreservingProjections}, + {"lpp", tapkee::LocalityPreservingProjections}, + {"neighborhood_preserving_embedding", tapkee::NeighborhoodPreservingEmbedding}, + {"npe", tapkee::NeighborhoodPreservingEmbedding}, + {"linear_local_tangent_space_alignment", tapkee::LinearLocalTangentSpaceAlignment}, + {"lltsa", tapkee::LinearLocalTangentSpaceAlignment}, + {"stochastic_proximity_embedding", tapkee::StochasticProximityEmbedding}, + {"spe", tapkee::StochasticProximityEmbedding}, + {"passthru", tapkee::PassThru}, + {"factor_analysis", tapkee::FactorAnalysis}, + {"fa", tapkee::FactorAnalysis}, + {"t-stochastic_proximity_embedding", tapkee::tDistributedStochasticNeighborEmbedding}, + {"t-sne", tapkee::tDistributedStochasticNeighborEmbedding}, + {"manifold_sculpting", tapkee::ManifoldSculpting}, +}; + +static const std::map NEIGHBORS_METHODS = { + {"brute", tapkee::Brute}, + {"vptree", tapkee::VpTree}, #ifdef TAPKEE_USE_LGPL_COVERTREE - if (!strcmp(str, "covertree")) - return tapkee::CoverTree; + {"covertree", tapkee::CoverTree}, #endif +}; - throw std::exception(); - return tapkee::Brute; -} - -tapkee::EigenMethod parse_eigen_method(const char* str) -{ +static const std::map EIGEN_METHODS = { + {"dense", tapkee::Dense}, + {"randomized", tapkee::Randomized}, #ifdef TAPKEE_WITH_ARPACK - if (!strcmp(str, "arpack")) - return tapkee::Arpack; + {"arpack", tapkee::Arpack}, #endif - if (!strcmp(str, "randomized")) - return tapkee::Randomized; - if (!strcmp(str, "dense")) - return tapkee::Dense; - - throw std::exception(); - return tapkee::Dense; -} +}; -tapkee::ComputationStrategy parse_computation_strategy(const char* str) -{ - if (!strcmp(str, "cpu")) - return tapkee::HomogeneousCPUStrategy; +static const std::map COMPUTATION_STRATEGIES = { + {"cpu", tapkee::HomogeneousCPUStrategy}, #ifdef TAPKEE_WITH_VIENNACL - if (!strcmp(str, "opencl")) - return tapkee::HeterogeneousOpenCLStrategy; + {"opencl", tapkee::HeterogeneousOpenCLStrategy}, #endif - return tapkee::HomogeneousCPUStrategy; +}; + +template +typename Mapping::mapped_type parse_multiple(Mapping mapping, const std::string& str) +{ + auto it = mapping.find(str); + if (it != mapping.end()) + { + return it->second; + } + + auto closest = std::min_element(mapping.begin(), mapping.end(), + [&str] (const auto &a, const auto &b) { + return levenshtein_distance(str, a.first) < levenshtein_distance(str, b.first); + }); + if (closest != mapping.end()) + { + tapkee::Logging::instance().message_info(fmt::format("Unknown parameter value `{}`. Did you mean `{}`?", str, closest->first)); + } + + throw std::logic_error(str); } template diff --git a/test/unit/data.hpp b/test/unit/data.hpp index 9cdeb058..05a8c2e8 100644 --- a/test/unit/data.hpp +++ b/test/unit/data.hpp @@ -1,14 +1,11 @@ -#include +#include -#include - -#ifndef M_PI -#define M_PI (3.14159265358979323846) -#endif +#include tapkee::DenseMatrix swissroll(int N) { - tapkee::DenseVector tt = (3.0 * M_PI / 4.0) * (tapkee::DenseVector::Random(N).array() + 0.5); + tapkee::DenseVector tt = (3.0 * std::numbers::pi_v / 4.0) * + (tapkee::DenseVector::Random(N).array() + 0.5); tapkee::DenseVector height = tapkee::DenseVector::Random(N).array() - 0.5; tapkee::DenseMatrix X(N, 3); X.col(0) = tt.array() * tt.array().cos(); diff --git a/test/unit/interface.cpp b/test/unit/interface.cpp index d63acba3..f386776c 100644 --- a/test/unit/interface.cpp +++ b/test/unit/interface.cpp @@ -20,96 +20,81 @@ TEST(Interface, ChainInterfaceOrder) TapkeeOutput output; - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = MultidimensionalScaling)) + ASSERT_NO_THROW(output = tapkee::with((method = MultidimensionalScaling)) .withKernel(kcb) .withFeatures(fcb) .withDistance(dcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = MultidimensionalScaling)) + ASSERT_NO_THROW(output = tapkee::with((method = MultidimensionalScaling)) .withKernel(kcb) .withDistance(dcb) .withFeatures(fcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = KernelPrincipalComponentAnalysis)) + ASSERT_NO_THROW(output = tapkee::with((method = KernelPrincipalComponentAnalysis)) .withDistance(dcb) .withKernel(kcb) .withFeatures(fcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = PassThru)) + ASSERT_NO_THROW(output = tapkee::with((method = PassThru)) .withDistance(dcb) .withFeatures(fcb) .withKernel(kcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = MultidimensionalScaling)) + ASSERT_NO_THROW(output = tapkee::with((method = MultidimensionalScaling)) .withFeatures(fcb) .withDistance(dcb) .withKernel(kcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = MultidimensionalScaling)) + ASSERT_NO_THROW(output = tapkee::with((method = MultidimensionalScaling)) .withFeatures(fcb) .withKernel(kcb) .withDistance(dcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = PassThru)) + ASSERT_NO_THROW(output = tapkee::with((method = PassThru)) .withFeatures(fcb) .withKernel(kcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = PassThru)) + ASSERT_NO_THROW(output = tapkee::with((method = PassThru)) .withFeatures(fcb) .withDistance(dcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = KernelPrincipalComponentAnalysis)) + ASSERT_NO_THROW(output = tapkee::with((method = KernelPrincipalComponentAnalysis)) .withKernel(kcb) .withDistance(dcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = KernelPrincipalComponentAnalysis)) + ASSERT_NO_THROW(output = tapkee::with((method = KernelPrincipalComponentAnalysis)) .withKernel(kcb) .withFeatures(fcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = MultidimensionalScaling)) + ASSERT_NO_THROW(output = tapkee::with((method = MultidimensionalScaling)) .withDistance(dcb) .withFeatures(fcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = MultidimensionalScaling)) + ASSERT_NO_THROW(output = tapkee::with((method = MultidimensionalScaling)) .withDistance(dcb) .withKernel(kcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = KernelPrincipalComponentAnalysis)) + ASSERT_NO_THROW(output = tapkee::with((method = KernelPrincipalComponentAnalysis)) .withKernel(kcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = MultidimensionalScaling)) + ASSERT_NO_THROW(output = tapkee::with((method = MultidimensionalScaling)) .withDistance(dcb) .embedRange(indices.begin(), indices.end())); - ASSERT_NO_THROW(output = tapkee::initialize() - .withParameters((method = PassThru)) + ASSERT_NO_THROW(output = tapkee::with((method = PassThru)) .withFeatures(fcb) .embedRange(indices.begin(), indices.end())); } diff --git a/test/unit/projecting.cpp b/test/unit/projecting.cpp index 1cddcf57..bd103e8d 100644 --- a/test/unit/projecting.cpp +++ b/test/unit/projecting.cpp @@ -13,7 +13,7 @@ TEST(Projecting, PrincipalComponentAnalysis) TapkeeOutput output; - ASSERT_NO_THROW(output = tapkee::initialize().withParameters((method = PrincipalComponentAnalysis, target_dimension = 2)).embedUsing(X)); + ASSERT_NO_THROW(output = tapkee::with((method = PrincipalComponentAnalysis, target_dimension = 2)).embedUsing(X)); auto projected = output.projection(X.col(0)); ASSERT_EQ(2, projected.size()); @@ -26,7 +26,7 @@ TEST(Projecting, RandomProjection) TapkeeOutput output; - ASSERT_NO_THROW(output = tapkee::initialize().withParameters((method = RandomProjection, target_dimension = 2)).embedUsing(X)); + ASSERT_NO_THROW(output = tapkee::with((method = RandomProjection, target_dimension = 2)).embedUsing(X)); auto projected = output.projection(X.col(0)); ASSERT_EQ(2, projected.size());