-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #250 from cquil11/cam-cleanup
Cleaning up clustering code for v1.2 branch
- Loading branch information
Showing
8 changed files
with
662 additions
and
253 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
%%MatrixMarket matrix coordinate integer general | ||
%%GraphBLAS type bool | ||
10 10 18 | ||
1 2 1 | ||
1 3 1 | ||
1 4 1 | ||
2 3 1 | ||
4 2 1 | ||
4 3 1 | ||
4 5 1 | ||
5 6 1 | ||
5 7 1 | ||
6 7 1 | ||
7 5 1 | ||
8 3 1 | ||
8 9 1 | ||
8 10 1 | ||
9 8 1 | ||
9 10 1 | ||
10 5 1 | ||
10 9 1 | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,44 @@ | ||
#define LG_FREE_WORK \ | ||
{ \ | ||
GrB_free(&T); \ | ||
GrB_free(&T_temp); \ | ||
GrB_free(&w); \ | ||
GrB_free(&D); \ | ||
GrB_free(&ones); \ | ||
GrB_free(&MSE); \ | ||
GrB_free(&argmax_v); \ | ||
GrB_free(&argmax_p); \ | ||
GrB_free(&zero_FP32); \ | ||
GrB_free(&true_BOOL); \ | ||
//------------------------------------------------------------------------------ | ||
// LAGr_MarkovClustering: Graph clustering using the Markov cluster algorithm | ||
//------------------------------------------------------------------------------ | ||
|
||
// LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved. | ||
// SPDX-License-Identifier: BSD-2-Clause | ||
// | ||
// For additional details (including references to third party source code and | ||
// other files) see the LICENSE file or contact [email protected]. See | ||
// Contributors.txt for a full list of contributors. Created, in part, with | ||
// funding and support from the U.S. Government (see Acknowledgments.txt file). | ||
// DM22-0790 | ||
|
||
// Contributed by Cameron Quilici, Texas A&M University | ||
|
||
//------------------------------------------------------------------------------ | ||
|
||
#define LG_FREE_WORK \ | ||
{ \ | ||
GrB_free(&A); \ | ||
GrB_free(&T); \ | ||
GrB_free(&T_temp); \ | ||
GrB_free(&w); \ | ||
GrB_free(&D); \ | ||
GrB_free(&CC); \ | ||
GrB_free(&ones); \ | ||
GrB_free(&MSE); \ | ||
GrB_free(&argmax_v); \ | ||
GrB_free(&argmax_p); \ | ||
GrB_free(&zero_FP32); \ | ||
GrB_free(&true_BOOL); \ | ||
LAGraph_Free((void *)&pi, NULL); \ | ||
LAGraph_Free((void *)&px, NULL); \ | ||
LAGraph_Free((void *)&pi_new, NULL); \ | ||
LAGraph_Free((void *)&px_new, NULL); \ | ||
} | ||
|
||
#define LG_FREE_ALL \ | ||
{ \ | ||
LG_FREE_WORK; \ | ||
GrB_free(c_f); \ | ||
#define LG_FREE_ALL \ | ||
{ \ | ||
LG_FREE_WORK; \ | ||
GrB_free(c_f); \ | ||
} | ||
|
||
#define DEBUG | ||
|
@@ -25,7 +48,7 @@ | |
|
||
int LAGr_MarkovClustering( | ||
// output: | ||
GrB_Vector *c_f, // output matrix C_f(i, j) == 1 means vertex j is in cluster i | ||
GrB_Vector *c_f, // output cluster vector | ||
// input | ||
int e, // expansion coefficient | ||
int i, // inflation coefficient | ||
|
@@ -35,57 +58,55 @@ int LAGr_MarkovClustering( | |
LAGraph_Graph G, // input graph | ||
char *msg) | ||
{ | ||
char MATRIX_TYPE[LAGRAPH_MSG_LEN]; | ||
|
||
GrB_Matrix T = NULL; // Transfer workspace matrix | ||
GrB_Matrix T_temp = NULL; // The newly computed cluster matrix at the end of each loop | ||
GrB_Matrix A = NULL; | ||
GrB_Matrix T = NULL; // transfer workspace matrix | ||
GrB_Matrix T_temp = NULL; // subsequent iteration transfer matrix | ||
GrB_Matrix CC = NULL; | ||
|
||
GrB_Vector w = NULL; // weight vector to normalize T matrix | ||
|
||
GrB_Matrix D = NULL; // Diagonal workspace matrix | ||
GrB_Vector ones = NULL; // Vector of all 1's, used primarily to create identity | ||
GrB_Matrix D = NULL; // diagonal workspace matrix | ||
GrB_Vector ones = NULL; // vector of all 1 | ||
|
||
GrB_Matrix MSE = NULL; // Mean squared error between T and T_temp (between subsequent iterations) | ||
GrB_Matrix MSE = NULL; // Mean squared error between T and T_temp (between | ||
// subsequent iterations) | ||
|
||
GrB_Vector argmax_v = NULL; | ||
GrB_Vector argmax_p = NULL; | ||
|
||
GrB_Scalar zero_FP32 = NULL; | ||
GrB_Scalar true_BOOL = NULL; | ||
|
||
GrB_Index *pi = NULL, *px = NULL; | ||
GrB_Index *pi_new = NULL, *px_new = NULL; | ||
|
||
//-------------------------------------------------------------------------- | ||
// check inputs | ||
//-------------------------------------------------------------------------- | ||
|
||
LG_CLEAR_MSG; | ||
|
||
GrB_Matrix A = G->A; // Adjacency matrix of G | ||
GrB_Index n, nrows, ncols; // Dimension of A | ||
GRB_TRY(GrB_Matrix_nrows(&nrows, A)); | ||
GRB_TRY(GrB_Matrix_nrows(&ncols, A)); | ||
|
||
LG_ASSERT(c_f != NULL, GrB_NULL_POINTER); | ||
(*c_f) = NULL; | ||
LG_TRY(LAGraph_CheckGraph(G, msg)); | ||
|
||
LG_ASSERT_MSG(G->out_degree != NULL, -106, | ||
"G->out_degree must be defined"); | ||
|
||
LG_ASSERT_MSG(nrows == ncols, -1002, | ||
"Input matrix must be square"); | ||
GrB_Index n, nrows, ncols; | ||
GRB_TRY(GrB_Matrix_nrows(&nrows, G->A)); | ||
GRB_TRY(GrB_Matrix_nrows(&ncols, G->A)); | ||
|
||
LG_ASSERT_MSG(nrows == ncols, -1002, "Input matrix must be square"); | ||
n = nrows; | ||
|
||
// All types of input matrices get cast to type FP32 for this algorithm | ||
GRB_TRY(GrB_Matrix_new(&A, GrB_FP32, n, n)); | ||
GRB_TRY(GrB_apply(A, NULL, NULL, GrB_IDENTITY_FP32, G->A, NULL)); | ||
|
||
//-------------------------------------------------------------------------- | ||
// initializations | ||
//-------------------------------------------------------------------------- | ||
|
||
GRB_TRY(GrB_Matrix_new(&T, GrB_FP32, n, n)); | ||
GRB_TRY(GrB_Matrix_new(&CC, GrB_BOOL, n, n)); | ||
GRB_TRY(GrB_Matrix_new(&T_temp, GrB_FP32, n, n)); | ||
GRB_TRY(GrB_Matrix_new(&MSE, GrB_FP32, n, n)); | ||
GRB_TRY(GrB_Matrix_new(&D, GrB_FP32, n, n)); | ||
GRB_TRY(GrB_Vector_new(&w, GrB_FP32, n)); | ||
GRB_TRY(GrB_Vector_new(&ones, GrB_FP32, n)); | ||
GRB_TRY(GrB_Vector_new(&argmax_v, GrB_FP32, n)); | ||
|
@@ -100,17 +121,8 @@ int LAGr_MarkovClustering( | |
GRB_TRY(GrB_assign(ones, NULL, NULL, 1, GrB_ALL, n, NULL)); | ||
GRB_TRY(GrB_Matrix_diag(&D, ones, 0)); | ||
|
||
// Add self-edge to each vertex | ||
if (G->nself_edges != n) | ||
{ | ||
GRB_TRY(GrB_assign(A, A, NULL, D, GrB_ALL, n, GrB_ALL, n, GrB_DESC_SC)); | ||
G->A = A; | ||
G->out_degree, G->in_degree = NULL; | ||
G->nself_edges = LAGRAPH_UNKNOWN; | ||
LAGRAPH_TRY(LAGraph_Cached_OutDegree(G, msg)); | ||
LAGRAPH_TRY(LAGraph_Cached_InDegree(G, msg)); | ||
LAGRAPH_TRY(LAGraph_Cached_NSelfEdges(G, msg)); | ||
} | ||
// Ensure all vertices have a self-edge | ||
GRB_TRY(GrB_eWiseAdd(A, NULL, NULL, GrB_ONEB_FP32, A, D, NULL)); | ||
|
||
GRB_TRY(GrB_Matrix_dup(&T_temp, A)); | ||
GRB_TRY(GrB_Matrix_dup(&T, T_temp)); | ||
|
@@ -124,81 +136,81 @@ int LAGr_MarkovClustering( | |
// Normalization step: Scale each column in T_temp to add up to 1 | ||
// w = 1 ./ sum(A(:j)) | ||
// D = diag(w) | ||
GRB_TRY(GrB_reduce(w, NULL, NULL, GrB_PLUS_MONOID_FP32, T_temp, GrB_DESC_RT0)); | ||
GRB_TRY(GrB_apply(w, NULL, NULL, GrB_MINV_FP32, w, GrB_DESC_R)); | ||
GRB_TRY(GrB_reduce(w, NULL, NULL, GrB_PLUS_MONOID_FP32, T_temp, | ||
GrB_DESC_T0)); | ||
GRB_TRY(GrB_apply(w, NULL, NULL, GrB_MINV_FP32, w, NULL)); | ||
if (D != NULL) GrB_free(&D); | ||
GRB_TRY(GrB_Matrix_diag(&D, w, 0)); | ||
GRB_TRY(GrB_mxm(T_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32, T_temp, D, GrB_DESC_R)); | ||
GRB_TRY(GrB_mxm(T_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32, | ||
T_temp, D, NULL)); | ||
|
||
// Compute mean squared error between subsequent iterations | ||
GRB_TRY(GxB_Matrix_eWiseUnion(MSE, NULL, NULL, GrB_MINUS_FP32, T_temp, zero_FP32, T, zero_FP32, NULL)); | ||
GRB_TRY(GxB_Matrix_eWiseUnion(MSE, NULL, NULL, GrB_MINUS_FP32, T_temp, | ||
zero_FP32, T, zero_FP32, NULL)); | ||
GRB_TRY(GrB_eWiseMult(MSE, NULL, NULL, GrB_TIMES_FP32, MSE, MSE, NULL)); | ||
GRB_TRY(GrB_reduce(&mse, NULL, GrB_PLUS_MONOID_FP32, MSE, NULL)); | ||
GRB_TRY(GrB_Matrix_nvals(&nvals, MSE)); | ||
mse /= nvals; | ||
|
||
#ifdef DEBUG | ||
printf("\tMSE at iteration %lu: %f\n", iter, mse); | ||
printf("\tCurrent size of cluster matrix (nvals): %lu\n", nvals); | ||
#endif | ||
|
||
bool res = NULL; | ||
LAGRAPH_TRY(LAGraph_Matrix_IsEqual(&res, T, T_temp, msg)); | ||
if (res || iter > max_iter || mse < convergence_threshold) | ||
{ | ||
#ifdef DEBUG | ||
printf("\nTerminated after %lu iterations\n\n", iter); | ||
#endif | ||
if (iter > max_iter || mse < convergence_threshold) | ||
break; | ||
} | ||
|
||
// Set T to the previous iteration | ||
if (T != NULL) GrB_free(&T); | ||
GRB_TRY(GrB_Matrix_dup(&T, T_temp)); | ||
|
||
// Expansion step | ||
for (int i = 0; i < e - 1; i++) | ||
{ | ||
GRB_TRY(GrB_mxm(T_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32, T_temp, T_temp, NULL)); | ||
GRB_TRY(GrB_mxm(T_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32, | ||
T_temp, T_temp, NULL)); | ||
} | ||
|
||
// Inflation step | ||
GRB_TRY(GrB_Matrix_apply_BinaryOp2nd_FP32(T_temp, NULL, NULL, GxB_POW_FP32, T_temp, (double)i, NULL)); | ||
GRB_TRY(GrB_Matrix_apply_BinaryOp2nd_FP32( | ||
T_temp, NULL, NULL, GxB_POW_FP32, T_temp, (double)i, NULL)); | ||
|
||
// Prune values less than some small threshold | ||
GRB_TRY(GrB_select(T_temp, NULL, NULL, GrB_VALUEGT_FP32, T_temp, pruning_threshold, NULL)); | ||
|
||
GRB_TRY(GrB_select(T_temp, NULL, NULL, GrB_VALUEGT_FP32, T_temp, | ||
pruning_threshold, NULL)); | ||
|
||
iter++; | ||
} | ||
|
||
// In order to interpret the final iteration of the transfer matrix (T_temp), let | ||
// an attractor vertex be a vertex with at least one positive value within their corresponding | ||
// row. Each attractor is attracting the vertices (columns) which have positive values within | ||
// its row. To compute the output cluster vector, compute the argmax across columns of the | ||
// steady-state T_temp matrix. Then argmax_p(i) = k means vertex i is in cluster k. | ||
// In order to interpret the final iteration of the transfer matrix | ||
// (T_temp), let an attractor vertex be a vertex with at least one positive | ||
// value within their corresponding row. Each attractor is attracting the | ||
// vertices (columns) which have positive values within its row. To compute | ||
// the output cluster vector, compute the argmax across columns of the | ||
// steady-state T_temp matrix. Then argmax_p(i) = k means vertex i is in | ||
// cluster k. | ||
|
||
// argmax_v = max (T_temp) where argmax_v(j) = max (T_temp (:,j)) | ||
GRB_TRY(GrB_mxv(argmax_v, NULL, NULL, GrB_MAX_FIRST_SEMIRING_FP32, T_temp, ones, GrB_DESC_T0)); | ||
GRB_TRY(GrB_mxv(argmax_v, NULL, NULL, GrB_MAX_FIRST_SEMIRING_FP32, T_temp, | ||
ones, GrB_DESC_T0)); | ||
if (D != NULL) GrB_free(&D); | ||
GRB_TRY(GrB_Matrix_diag(&D, argmax_v, 0)); | ||
GRB_TRY(GrB_mxm(CC, NULL, NULL, GxB_ANY_EQ_FP32, T_temp, D, NULL)); | ||
GRB_TRY(GrB_select(CC, NULL, NULL, GrB_VALUENE_BOOL, CC, 0, NULL)); | ||
GRB_TRY(GrB_mxv(argmax_p, NULL, NULL, GxB_MIN_SECONDI_INT64, CC, ones, GrB_DESC_T0)); | ||
GRB_TRY(GrB_mxv(argmax_p, NULL, NULL, GxB_MIN_SECONDI_INT64, CC, ones, | ||
GrB_DESC_T0)); | ||
|
||
// pi := array of argmax_p indices, px := array of argmax_p values | ||
GrB_Index *pi, *px, *pi_new, *px_new = NULL; | ||
GrB_Index p_nvals; | ||
GRB_TRY(GrB_Vector_nvals(&p_nvals, argmax_p)); | ||
LAGRAPH_TRY(LAGraph_Malloc((void **)&pi, p_nvals, sizeof(GrB_Index), msg)); | ||
LAGRAPH_TRY(LAGraph_Malloc((void **)&px, p_nvals, sizeof(GrB_Index), msg)); | ||
LG_TRY(LAGraph_Malloc((void **)&pi, p_nvals, sizeof(GrB_Index), msg)); | ||
LG_TRY(LAGraph_Malloc((void **)&px, p_nvals, sizeof(GrB_Index), msg)); | ||
GRB_TRY(GrB_Vector_extractTuples_INT64(pi, px, &p_nvals, argmax_p)); | ||
|
||
// Sometimes (particularly, when the pruning threshold is high), some columns in the | ||
// steady-state T_temp have no values, i.e., they are not attracted to any vertex. In this | ||
// case, fill in the missing values with the index of the vertex (these vertices will be | ||
// arbitrarily put in the cluster of their index). | ||
// Sometimes (particularly, when the pruning threshold is high), some | ||
// columns in the steady-state T_temp have no values, i.e., they are not | ||
// attracted to any vertex. In this case, fill in the missing values with | ||
// the index of the vertex (these vertices will be arbitrarily put in the | ||
// cluster of their index). | ||
if (p_nvals < n) | ||
{ | ||
LAGRAPH_TRY(LAGraph_Malloc((void **)&pi_new, n, sizeof(GrB_Index), msg)); | ||
LAGRAPH_TRY(LAGraph_Malloc((void **)&px_new, n, sizeof(GrB_Index), msg)); | ||
LG_TRY(LAGraph_Malloc((void **)&pi_new, n, sizeof(GrB_Index), msg)); | ||
LG_TRY(LAGraph_Malloc((void **)&px_new, n, sizeof(GrB_Index), msg)); | ||
|
||
GrB_Index j = 0; | ||
GrB_Index currentValue = pi[0]; | ||
|
@@ -208,7 +220,8 @@ int LAGr_MarkovClustering( | |
while (currentValue < pi[i] && j < n) | ||
{ | ||
pi_new[j] = currentValue; | ||
px_new[j] = currentValue; // Fill skipped px values with their index or another placeholder value | ||
px_new[j] = currentValue; // Fill skipped px values with their | ||
// index | ||
currentValue++; | ||
j++; | ||
} | ||
|
@@ -230,11 +243,13 @@ int LAGr_MarkovClustering( | |
j++; | ||
} | ||
|
||
LAGraph_Free((void *)&pi, NULL); | ||
LAGraph_Free((void *)&px, NULL); | ||
|
||
LAGraph_Free((void **)&pi, NULL); | ||
LAGraph_Free((void **)&px, NULL); | ||
pi = pi_new; | ||
px = px_new; | ||
// Avoid double free | ||
pi_new = NULL; | ||
px_new = NULL; | ||
} | ||
|
||
GrB_Vector c = NULL; | ||
|
Oops, something went wrong.