Skip to content
This repository has been archived by the owner on Jul 24, 2020. It is now read-only.

Made kmeans be multithreaded #12

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion milk/tests/test_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,6 @@ def test_kmeans_return_partial():
assignments,centroids = milk.unsupervised.kmeans(features, 2, R=129)
centroids_ = milk.unsupervised.kmeans(features, 2, R=129, return_assignments=False)
assignments_ = milk.unsupervised.kmeans(features, 2, R=129, return_centroids=False)
assert np.all(centroids == centroids_)
assert np.all(assignments == assignments_)
assert np.allclose(centroids, centroids_)

122 changes: 110 additions & 12 deletions milk/unsupervised/_kmeans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,79 @@ void assert_type_contiguous(PyArrayObject* array,int type) {
}

template <typename ftype>
int computecentroids(ftype* f, ftype* centroids, PyArrayObject* a_assignments, PyArrayObject* a_counts, const int N, const int Nf, const int k) {
bool assignclass_euclidian(ftype* f, ftype* centroids, PyArrayObject* a_assignments, const int N, const int Nf, const int k) {
Py_BEGIN_ALLOW_THREADS
npy_int32* assignments = static_cast<npy_int32*>(PyArray_DATA(a_assignments));

#pragma parallel for schedule(static)
for (int i=0; i < N; i++) {
int best_cluster = -1;
ftype min_distance = 0.0;
for (int c=0; c < k; c++) {
ftype cur_distance = 0.0;
for (int j=0; j < Nf; j++) {
ftype term = (f[i * Nf + j] - centroids[c * Nf + j]);
cur_distance += term * term;
}
if (best_cluster == -1 || cur_distance < min_distance) {
min_distance = cur_distance;
best_cluster = c;
}
}
assignments[i] = 1; //best_cluster;
}

return true;
Py_END_ALLOW_THREADS
}

PyObject* py_assignclass_euclidian(PyObject* self, PyObject* args) {
try {
PyArrayObject* fmatrix;
PyArrayObject* centroids;
PyArrayObject* assignments;
if (!PyArg_ParseTuple(args, "OOO", &fmatrix, &centroids, &assignments)) { throw Kmeans_Exception("Wrong number of arguments for assignclass_euclidian."); }
if (!PyArray_Check(fmatrix) || !PyArray_ISCONTIGUOUS(fmatrix)) throw Kmeans_Exception("fmatrix not what was expected.");
if (!PyArray_Check(centroids) || !PyArray_ISCONTIGUOUS(centroids)) throw Kmeans_Exception("centroids not what was expected.");
if (!PyArray_Check(assignments) || !PyArray_ISCONTIGUOUS(assignments)) throw Kmeans_Exception("assignments not what was expected.");
if (PyArray_TYPE(assignments) != NPY_INT32) throw Kmeans_Exception("assignments should be int32.");
if (PyArray_TYPE(fmatrix) != PyArray_TYPE(centroids)) throw Kmeans_Exception("centroids and fmatrix should have same type.");
if (PyArray_NDIM(fmatrix) != 2) throw Kmeans_Exception("fmatrix should be two dimensional");
if (PyArray_NDIM(centroids) != 2) throw Kmeans_Exception("centroids should be two dimensional");
if (PyArray_NDIM(assignments) != 1) throw Kmeans_Exception("assignments should be two dimensional");

const int N = PyArray_DIM(fmatrix, 0);
const int Nf = PyArray_DIM(fmatrix, 1);
const int k = PyArray_DIM(centroids, 0);
if (PyArray_DIM(centroids, 1) != Nf) throw Kmeans_Exception("centroids has wrong number of features.");
if (PyArray_DIM(assignments, 0) != N) throw Kmeans_Exception("assignments has wrong size.");
switch (PyArray_TYPE(fmatrix)) {
#define TRY_TYPE_ASSIGN(code, ftype) \
case code: \
if (assignclass_euclidian<ftype>( \
static_cast<ftype*>(PyArray_DATA(fmatrix)), \
static_cast<ftype*>(PyArray_DATA(centroids)), \
assignments, \
N, Nf, k)) { \
Py_RETURN_TRUE; \
} \
Py_RETURN_FALSE;

TRY_TYPE_ASSIGN(NPY_FLOAT, float);
TRY_TYPE_ASSIGN(NPY_DOUBLE, double);
}
throw Kmeans_Exception("Cannot handle this type.");
} catch (const Kmeans_Exception& exc) {
PyErr_SetString(PyExc_RuntimeError,exc.msg);
return 0;
} catch (...) {
PyErr_SetString(PyExc_RuntimeError,"Some sort of exception in assignclass_euclidian.");
return 0;
}
}

template <typename ftype>
int computecentroids(ftype* f, ftype* centroids, PyArrayObject* a_assignments, PyArrayObject* a_counts, const int N, const int Nf, const int k) {
int zero_counts = 0;
Py_BEGIN_ALLOW_THREADS
const npy_int32* assignments = static_cast<npy_int32*>(PyArray_DATA(a_assignments));
Expand All @@ -30,16 +101,42 @@ int computecentroids(ftype* f, ftype* centroids, PyArrayObject* a_assignments, P
std::fill(counts, counts + k, 0);
std::fill(centroids, centroids + k*Nf, 0.0);

for (int i = 0; i != N; i++){
const int c = assignments[i];
if (c >= k) continue; // throw Kmeans_Exception("wrong value in assignments");
ftype* ck = centroids + Nf*c;
for (int j = 0; j != Nf; ++j) {
*ck++ += *f++;
#pragma omp parallel shared(assignments, counts, centroids, f)
{
int *local_counts = (int*) malloc(k * sizeof(int));
ftype *local_ck = (ftype*) malloc(k * Nf * sizeof(ftype));
std::fill(local_counts, local_counts + k, 0);
std::fill(local_ck, local_ck + k*Nf, ftype());

#pragma omp for schedule(static)
for (int i = 0; i < N; i++){
const int c = assignments[i];
if (c >= k) continue; // throw Kmeans_Exception("wrong value in assignments");
ftype* lck = local_ck + Nf*c;
for (int j = 0; j != Nf; ++j) {
*lck++ += f[i*Nf + j];
}
++local_counts[c];
}
++counts[c];

ftype* ck = centroids;
ftype* lck = local_ck;
for (int c=0; c < k; c++) {
#pragma omp critical(reduce)
{
for (int j = 0; j != Nf; ++j) {
*ck++ += *lck++;
}
counts[c] += local_counts[c];
}
}

free(local_counts);
free(local_ck);
}
for (int i = 0; i != k; ++i) {

#pragma omp parallel for reduction(+:zero_counts) schedule(static) shared(counts,centroids)
for (int i = 0; i < k; ++i) {
ftype* ck = centroids + Nf*i;
const ftype c = counts[i];
if (c == 0) {
Expand Down Expand Up @@ -80,7 +177,7 @@ PyObject* py_computecentroids(PyObject* self, PyObject* args) {
if (PyArray_DIM(assignments, 0) != N) throw Kmeans_Exception("assignments has wrong size.");
if (PyArray_DIM(counts, 0) != k) throw Kmeans_Exception("counts has wrong size.");
switch (PyArray_TYPE(fmatrix)) {
#define TRY_TYPE(code, ftype) \
#define TRY_TYPE_CENTROIDS(code, ftype) \
case code: \
if (computecentroids<ftype>( \
static_cast<ftype*>(PyArray_DATA(fmatrix)), \
Expand All @@ -92,8 +189,8 @@ PyObject* py_computecentroids(PyObject* self, PyObject* args) {
} \
Py_RETURN_FALSE;

TRY_TYPE(NPY_FLOAT, float);
TRY_TYPE(NPY_DOUBLE, double);
TRY_TYPE_CENTROIDS(NPY_FLOAT, float);
TRY_TYPE_CENTROIDS(NPY_DOUBLE, double);
}
throw Kmeans_Exception("Cannot handle this type.");
} catch (const Kmeans_Exception& exc) {
Expand All @@ -107,6 +204,7 @@ PyObject* py_computecentroids(PyObject* self, PyObject* args) {

PyMethodDef methods[] = {
{"computecentroids", py_computecentroids, METH_VARARGS , "Do NOT call directly.\n" },
{"assignclass_euclidian", py_assignclass_euclidian, METH_VARARGS , "Do NOT call directly.\n" },
{NULL, NULL,0,NULL},
};

Expand Down
22 changes: 12 additions & 10 deletions milk/unsupervised/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,13 +219,13 @@ def kmeans(fmatrix, k, distance='euclidean', max_iter=1000, R=None, return_assig
fmatrix = zscore(fmatrix)
distance = 'euclidean'
if distance == 'euclidean':
def distfunction(fmatrix, cs, dists):
dists = _dot3(fmatrix, (-2)*cs.T, dists)
def distfunction(fmatrix, cs, _):
dists = _dot3(fmatrix, (-2)*cs.T, None)
dists += np.array([np.dot(c,c) for c in cs])
# For a distance, we'd need to add the fmatrix**2 components, but
# it doesn't matter because we are going to perform an argmin() on
# the result.
return dists
return dists.argmin(1)
elif distance == 'mahalanobis':
icov = kwargs.get('icov', None)
if icov is None:
Expand All @@ -234,13 +234,18 @@ def distfunction(fmatrix, cs, dists):
covmat = np.cov(fmatrix.T)
icov = linalg.inv(covmat)
def distfunction(fmatrix, cs, _):
return np.array([_mahalanobis2(fmatrix, c, icov) for c in cs]).T
dists = np.array([_mahalanobis2(fmatrix, c, icov) for c in cs]).T
return dists.argmin(1)
else:
raise ValueError('milk.unsupervised.kmeans: `distance` argument unknown (%s)' % distance)
if k < 2:
raise ValueError('milk.unsupervised.kmeans `k` should be >= 2.')
if fmatrix.dtype in (np.float32, np.float64) and fmatrix.flags['C_CONTIGUOUS']:
computecentroids = _kmeans.computecentroids
if distance == 'euclidian':
def distfunction(fmatrix, centroids, assignments):
_kmeans.assignclass_euclidian(fmatrix, centroids, assignments)
return assignments
else:
computecentroids = _pycomputecentroids

Expand All @@ -256,19 +261,16 @@ def distfunction(fmatrix, cs, _):

prev = np.zeros(len(fmatrix), np.int32)
counts = np.empty(k, np.int32)
dists = None
assignments = np.empty((fmatrix.shape[0],), dtype=np.int32)
for i in xrange(max_iter):
dists = distfunction(fmatrix, centroids, dists)
assignments = dists.argmin(1)
assignments = distfunction(fmatrix, centroids, assignments)
if np.all(assignments == prev):
break
if computecentroids(fmatrix, centroids, assignments.astype(np.int32), counts):
if computecentroids(fmatrix, centroids, assignments.astype(np.int32, copy=False), counts):
(empty,) = np.where(counts == 0)
centroids = np.delete(centroids, empty, axis=0)
k = len(centroids)
counts = np.empty(k, np.int32)
# This will cause new matrices to be allocated in the next iteration
dists = None
prev[:] = assignments
if return_centroids and return_assignments:
return assignments, centroids
Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@
'milk.supervised._lasso' : ['milk/supervised/_lasso.cpp'],
}

compiler_args = ['-std=c++0x']
compiler_args = ['-std=c++0x', '-fopenmp']
link_args=['-lgomp']
if platform.system() == 'Darwin':
compiler_args.append('-stdlib=libc++')

Expand All @@ -67,6 +68,7 @@
undef_macros=undef_macros,
define_macros=define_macros,
extra_compile_args=compiler_args,
extra_link_args=link_args,
)
for key,sources in _extensions.items()
]
Expand Down