Skip to content

Commit

Permalink
Merge pull request #250 from cquil11/cam-cleanup
Browse files Browse the repository at this point in the history
Cleaning up clustering code for v1.2 branch
  • Loading branch information
DrTimothyAldenDavis authored Apr 17, 2024
2 parents d5b6e93 + fe2bdf5 commit 7ebd378
Show file tree
Hide file tree
Showing 8 changed files with 662 additions and 253 deletions.
23 changes: 23 additions & 0 deletions data/mcl.mtx
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
%%MatrixMarket matrix coordinate integer general
%%GraphBLAS type bool
10 10 18
1 2 1
1 3 1
1 4 1
2 3 1
4 2 1
4 3 1
4 5 1
5 6 1
5 7 1
6 7 1
7 5 1
8 3 1
8 9 1
8 10 1
9 8 1
9 10 1
10 5 1
10 9 1


193 changes: 104 additions & 89 deletions experimental/algorithm/LAGr_MarkovClustering.c
Original file line number Diff line number Diff line change
@@ -1,21 +1,44 @@
#define LG_FREE_WORK \
{ \
GrB_free(&T); \
GrB_free(&T_temp); \
GrB_free(&w); \
GrB_free(&D); \
GrB_free(&ones); \
GrB_free(&MSE); \
GrB_free(&argmax_v); \
GrB_free(&argmax_p); \
GrB_free(&zero_FP32); \
GrB_free(&true_BOOL); \
//------------------------------------------------------------------------------
// LAGr_MarkovClustering: Graph clustering using the Markov cluster algorithm
//------------------------------------------------------------------------------

// LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
// SPDX-License-Identifier: BSD-2-Clause
//
// For additional details (including references to third party source code and
// other files) see the LICENSE file or contact [email protected]. See
// Contributors.txt for a full list of contributors. Created, in part, with
// funding and support from the U.S. Government (see Acknowledgments.txt file).
// DM22-0790

// Contributed by Cameron Quilici, Texas A&M University

//------------------------------------------------------------------------------

#define LG_FREE_WORK \
{ \
GrB_free(&A); \
GrB_free(&T); \
GrB_free(&T_temp); \
GrB_free(&w); \
GrB_free(&D); \
GrB_free(&CC); \
GrB_free(&ones); \
GrB_free(&MSE); \
GrB_free(&argmax_v); \
GrB_free(&argmax_p); \
GrB_free(&zero_FP32); \
GrB_free(&true_BOOL); \
LAGraph_Free((void *)&pi, NULL); \
LAGraph_Free((void *)&px, NULL); \
LAGraph_Free((void *)&pi_new, NULL); \
LAGraph_Free((void *)&px_new, NULL); \
}

#define LG_FREE_ALL \
{ \
LG_FREE_WORK; \
GrB_free(c_f); \
#define LG_FREE_ALL \
{ \
LG_FREE_WORK; \
GrB_free(c_f); \
}

#define DEBUG
Expand All @@ -25,7 +48,7 @@

int LAGr_MarkovClustering(
// output:
GrB_Vector *c_f, // output matrix C_f(i, j) == 1 means vertex j is in cluster i
GrB_Vector *c_f, // output cluster vector
// input
int e, // expansion coefficient
int i, // inflation coefficient
Expand All @@ -35,57 +58,55 @@ int LAGr_MarkovClustering(
LAGraph_Graph G, // input graph
char *msg)
{
char MATRIX_TYPE[LAGRAPH_MSG_LEN];

GrB_Matrix T = NULL; // Transfer workspace matrix
GrB_Matrix T_temp = NULL; // The newly computed cluster matrix at the end of each loop
GrB_Matrix A = NULL;
GrB_Matrix T = NULL; // transfer workspace matrix
GrB_Matrix T_temp = NULL; // subsequent iteration transfer matrix
GrB_Matrix CC = NULL;

GrB_Vector w = NULL; // weight vector to normalize T matrix

GrB_Matrix D = NULL; // Diagonal workspace matrix
GrB_Vector ones = NULL; // Vector of all 1's, used primarily to create identity
GrB_Matrix D = NULL; // diagonal workspace matrix
GrB_Vector ones = NULL; // vector of all 1

GrB_Matrix MSE = NULL; // Mean squared error between T and T_temp (between subsequent iterations)
GrB_Matrix MSE = NULL; // Mean squared error between T and T_temp (between
// subsequent iterations)

GrB_Vector argmax_v = NULL;
GrB_Vector argmax_p = NULL;

GrB_Scalar zero_FP32 = NULL;
GrB_Scalar true_BOOL = NULL;

GrB_Index *pi = NULL, *px = NULL;
GrB_Index *pi_new = NULL, *px_new = NULL;

//--------------------------------------------------------------------------
// check inputs
//--------------------------------------------------------------------------

LG_CLEAR_MSG;

GrB_Matrix A = G->A; // Adjacency matrix of G
GrB_Index n, nrows, ncols; // Dimension of A
GRB_TRY(GrB_Matrix_nrows(&nrows, A));
GRB_TRY(GrB_Matrix_nrows(&ncols, A));

LG_ASSERT(c_f != NULL, GrB_NULL_POINTER);
(*c_f) = NULL;
LG_TRY(LAGraph_CheckGraph(G, msg));

LG_ASSERT_MSG(G->out_degree != NULL, -106,
"G->out_degree must be defined");

LG_ASSERT_MSG(nrows == ncols, -1002,
"Input matrix must be square");
GrB_Index n, nrows, ncols;
GRB_TRY(GrB_Matrix_nrows(&nrows, G->A));
GRB_TRY(GrB_Matrix_nrows(&ncols, G->A));

LG_ASSERT_MSG(nrows == ncols, -1002, "Input matrix must be square");
n = nrows;

// All types of input matrices get cast to type FP32 for this algorithm
GRB_TRY(GrB_Matrix_new(&A, GrB_FP32, n, n));
GRB_TRY(GrB_apply(A, NULL, NULL, GrB_IDENTITY_FP32, G->A, NULL));

//--------------------------------------------------------------------------
// initializations
//--------------------------------------------------------------------------

GRB_TRY(GrB_Matrix_new(&T, GrB_FP32, n, n));
GRB_TRY(GrB_Matrix_new(&CC, GrB_BOOL, n, n));
GRB_TRY(GrB_Matrix_new(&T_temp, GrB_FP32, n, n));
GRB_TRY(GrB_Matrix_new(&MSE, GrB_FP32, n, n));
GRB_TRY(GrB_Matrix_new(&D, GrB_FP32, n, n));
GRB_TRY(GrB_Vector_new(&w, GrB_FP32, n));
GRB_TRY(GrB_Vector_new(&ones, GrB_FP32, n));
GRB_TRY(GrB_Vector_new(&argmax_v, GrB_FP32, n));
Expand All @@ -100,17 +121,8 @@ int LAGr_MarkovClustering(
GRB_TRY(GrB_assign(ones, NULL, NULL, 1, GrB_ALL, n, NULL));
GRB_TRY(GrB_Matrix_diag(&D, ones, 0));

// Add self-edge to each vertex
if (G->nself_edges != n)
{
GRB_TRY(GrB_assign(A, A, NULL, D, GrB_ALL, n, GrB_ALL, n, GrB_DESC_SC));
G->A = A;
G->out_degree, G->in_degree = NULL;
G->nself_edges = LAGRAPH_UNKNOWN;
LAGRAPH_TRY(LAGraph_Cached_OutDegree(G, msg));
LAGRAPH_TRY(LAGraph_Cached_InDegree(G, msg));
LAGRAPH_TRY(LAGraph_Cached_NSelfEdges(G, msg));
}
// Ensure all vertices have a self-edge
GRB_TRY(GrB_eWiseAdd(A, NULL, NULL, GrB_ONEB_FP32, A, D, NULL));

GRB_TRY(GrB_Matrix_dup(&T_temp, A));
GRB_TRY(GrB_Matrix_dup(&T, T_temp));
Expand All @@ -124,81 +136,81 @@ int LAGr_MarkovClustering(
// Normalization step: Scale each column in T_temp to add up to 1
// w = 1 ./ sum(A(:j))
// D = diag(w)
GRB_TRY(GrB_reduce(w, NULL, NULL, GrB_PLUS_MONOID_FP32, T_temp, GrB_DESC_RT0));
GRB_TRY(GrB_apply(w, NULL, NULL, GrB_MINV_FP32, w, GrB_DESC_R));
GRB_TRY(GrB_reduce(w, NULL, NULL, GrB_PLUS_MONOID_FP32, T_temp,
GrB_DESC_T0));
GRB_TRY(GrB_apply(w, NULL, NULL, GrB_MINV_FP32, w, NULL));
if (D != NULL) GrB_free(&D);
GRB_TRY(GrB_Matrix_diag(&D, w, 0));
GRB_TRY(GrB_mxm(T_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32, T_temp, D, GrB_DESC_R));
GRB_TRY(GrB_mxm(T_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32,
T_temp, D, NULL));

// Compute mean squared error between subsequent iterations
GRB_TRY(GxB_Matrix_eWiseUnion(MSE, NULL, NULL, GrB_MINUS_FP32, T_temp, zero_FP32, T, zero_FP32, NULL));
GRB_TRY(GxB_Matrix_eWiseUnion(MSE, NULL, NULL, GrB_MINUS_FP32, T_temp,
zero_FP32, T, zero_FP32, NULL));
GRB_TRY(GrB_eWiseMult(MSE, NULL, NULL, GrB_TIMES_FP32, MSE, MSE, NULL));
GRB_TRY(GrB_reduce(&mse, NULL, GrB_PLUS_MONOID_FP32, MSE, NULL));
GRB_TRY(GrB_Matrix_nvals(&nvals, MSE));
mse /= nvals;

#ifdef DEBUG
printf("\tMSE at iteration %lu: %f\n", iter, mse);
printf("\tCurrent size of cluster matrix (nvals): %lu\n", nvals);
#endif

bool res = NULL;
LAGRAPH_TRY(LAGraph_Matrix_IsEqual(&res, T, T_temp, msg));
if (res || iter > max_iter || mse < convergence_threshold)
{
#ifdef DEBUG
printf("\nTerminated after %lu iterations\n\n", iter);
#endif
if (iter > max_iter || mse < convergence_threshold)
break;
}

// Set T to the previous iteration
if (T != NULL) GrB_free(&T);
GRB_TRY(GrB_Matrix_dup(&T, T_temp));

// Expansion step
for (int i = 0; i < e - 1; i++)
{
GRB_TRY(GrB_mxm(T_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32, T_temp, T_temp, NULL));
GRB_TRY(GrB_mxm(T_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32,
T_temp, T_temp, NULL));
}

// Inflation step
GRB_TRY(GrB_Matrix_apply_BinaryOp2nd_FP32(T_temp, NULL, NULL, GxB_POW_FP32, T_temp, (double)i, NULL));
GRB_TRY(GrB_Matrix_apply_BinaryOp2nd_FP32(
T_temp, NULL, NULL, GxB_POW_FP32, T_temp, (double)i, NULL));

// Prune values less than some small threshold
GRB_TRY(GrB_select(T_temp, NULL, NULL, GrB_VALUEGT_FP32, T_temp, pruning_threshold, NULL));

GRB_TRY(GrB_select(T_temp, NULL, NULL, GrB_VALUEGT_FP32, T_temp,
pruning_threshold, NULL));

iter++;
}

// In order to interpret the final iteration of the transfer matrix (T_temp), let
// an attractor vertex be a vertex with at least one positive value within their corresponding
// row. Each attractor is attracting the vertices (columns) which have positive values within
// its row. To compute the output cluster vector, compute the argmax across columns of the
// steady-state T_temp matrix. Then argmax_p(i) = k means vertex i is in cluster k.
// In order to interpret the final iteration of the transfer matrix
// (T_temp), let an attractor vertex be a vertex with at least one positive
// value within their corresponding row. Each attractor is attracting the
// vertices (columns) which have positive values within its row. To compute
// the output cluster vector, compute the argmax across columns of the
// steady-state T_temp matrix. Then argmax_p(i) = k means vertex i is in
// cluster k.

// argmax_v = max (T_temp) where argmax_v(j) = max (T_temp (:,j))
GRB_TRY(GrB_mxv(argmax_v, NULL, NULL, GrB_MAX_FIRST_SEMIRING_FP32, T_temp, ones, GrB_DESC_T0));
GRB_TRY(GrB_mxv(argmax_v, NULL, NULL, GrB_MAX_FIRST_SEMIRING_FP32, T_temp,
ones, GrB_DESC_T0));
if (D != NULL) GrB_free(&D);
GRB_TRY(GrB_Matrix_diag(&D, argmax_v, 0));
GRB_TRY(GrB_mxm(CC, NULL, NULL, GxB_ANY_EQ_FP32, T_temp, D, NULL));
GRB_TRY(GrB_select(CC, NULL, NULL, GrB_VALUENE_BOOL, CC, 0, NULL));
GRB_TRY(GrB_mxv(argmax_p, NULL, NULL, GxB_MIN_SECONDI_INT64, CC, ones, GrB_DESC_T0));
GRB_TRY(GrB_mxv(argmax_p, NULL, NULL, GxB_MIN_SECONDI_INT64, CC, ones,
GrB_DESC_T0));

// pi := array of argmax_p indices, px := array of argmax_p values
GrB_Index *pi, *px, *pi_new, *px_new = NULL;
GrB_Index p_nvals;
GRB_TRY(GrB_Vector_nvals(&p_nvals, argmax_p));
LAGRAPH_TRY(LAGraph_Malloc((void **)&pi, p_nvals, sizeof(GrB_Index), msg));
LAGRAPH_TRY(LAGraph_Malloc((void **)&px, p_nvals, sizeof(GrB_Index), msg));
LG_TRY(LAGraph_Malloc((void **)&pi, p_nvals, sizeof(GrB_Index), msg));
LG_TRY(LAGraph_Malloc((void **)&px, p_nvals, sizeof(GrB_Index), msg));
GRB_TRY(GrB_Vector_extractTuples_INT64(pi, px, &p_nvals, argmax_p));

// Sometimes (particularly, when the pruning threshold is high), some columns in the
// steady-state T_temp have no values, i.e., they are not attracted to any vertex. In this
// case, fill in the missing values with the index of the vertex (these vertices will be
// arbitrarily put in the cluster of their index).
// Sometimes (particularly, when the pruning threshold is high), some
// columns in the steady-state T_temp have no values, i.e., they are not
// attracted to any vertex. In this case, fill in the missing values with
// the index of the vertex (these vertices will be arbitrarily put in the
// cluster of their index).
if (p_nvals < n)
{
LAGRAPH_TRY(LAGraph_Malloc((void **)&pi_new, n, sizeof(GrB_Index), msg));
LAGRAPH_TRY(LAGraph_Malloc((void **)&px_new, n, sizeof(GrB_Index), msg));
LG_TRY(LAGraph_Malloc((void **)&pi_new, n, sizeof(GrB_Index), msg));
LG_TRY(LAGraph_Malloc((void **)&px_new, n, sizeof(GrB_Index), msg));

GrB_Index j = 0;
GrB_Index currentValue = pi[0];
Expand All @@ -208,7 +220,8 @@ int LAGr_MarkovClustering(
while (currentValue < pi[i] && j < n)
{
pi_new[j] = currentValue;
px_new[j] = currentValue; // Fill skipped px values with their index or another placeholder value
px_new[j] = currentValue; // Fill skipped px values with their
// index
currentValue++;
j++;
}
Expand All @@ -230,11 +243,13 @@ int LAGr_MarkovClustering(
j++;
}

LAGraph_Free((void *)&pi, NULL);
LAGraph_Free((void *)&px, NULL);

LAGraph_Free((void **)&pi, NULL);
LAGraph_Free((void **)&px, NULL);
pi = pi_new;
px = px_new;
// Avoid double free
pi_new = NULL;
px_new = NULL;
}

GrB_Vector c = NULL;
Expand Down
Loading

0 comments on commit 7ebd378

Please sign in to comment.