Skip to content

Commit

Permalink
Updated MCL to fill in unclustered vertices upon termination
Browse files Browse the repository at this point in the history
  • Loading branch information
cquil11 committed Mar 12, 2024
1 parent bbd8ef6 commit 55f35af
Showing 1 changed file with 96 additions and 71 deletions.
167 changes: 96 additions & 71 deletions experimental/algorithm/LAGr_MarkovClustering.c
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
#define LG_FREE_WORK \
{ \
GrB_free(&C); \
GrB_free(&C_temp); \
GrB_free(&vpc); \
GrB_free(&w); \
GrB_free(&D); \
GrB_free(&ones); \
GrB_free(&MSE); \
GrB_free(&argmax_v); \
GrB_free(&argmax_p); \
GrB_free(&zero_INT64); \
GrB_free(&true_BOOL); \
#define LG_FREE_WORK \
{ \
GrB_free(&T); \
GrB_free(&T_temp); \
GrB_free(&w); \
GrB_free(&D); \
GrB_free(&ones); \
GrB_free(&MSE); \
GrB_free(&argmax_v); \
GrB_free(&argmax_p); \
GrB_free(&zero_FP32); \
GrB_free(&true_BOOL); \
}

#define LG_FREE_ALL \
Expand Down Expand Up @@ -38,22 +37,21 @@ int LAGr_MarkovClustering(
{
char MATRIX_TYPE[LAGRAPH_MSG_LEN];

GrB_Matrix C = NULL; // Cluster workspace matrix
GrB_Matrix C_temp = NULL; // The newly computed cluster matrix at the end of each loop
GrB_Matrix T = NULL; // Transfer workspace matrix
GrB_Matrix T_temp = NULL; // The newly computed cluster matrix at the end of each loop
GrB_Matrix CC = NULL;
GrB_Vector vpc = NULL; // vertices per cluster

GrB_Vector w = NULL; // weight vector to normalize C matrix
GrB_Vector w = NULL; // weight vector to normalize T matrix

GrB_Matrix D = NULL; // Diagonal workspace matrix
GrB_Vector ones = NULL; // Vector of all 1's, used primarily to create identity

GrB_Matrix MSE = NULL; // Mean squared error between C and C_temp (between subsequent iterations)
GrB_Matrix MSE = NULL; // Mean squared error between T and T_temp (between subsequent iterations)

GrB_Vector argmax_v = NULL;
GrB_Vector argmax_p = NULL;

GrB_Scalar zero_INT64 = NULL;
GrB_Scalar zero_FP32 = NULL;
GrB_Scalar true_BOOL = NULL;

//--------------------------------------------------------------------------
Expand Down Expand Up @@ -83,20 +81,19 @@ int LAGr_MarkovClustering(
// initializations
//--------------------------------------------------------------------------

GRB_TRY(GrB_Matrix_new(&C, GrB_FP32, n, n));
GRB_TRY(GrB_Matrix_new(&T, GrB_FP32, n, n));
GRB_TRY(GrB_Matrix_new(&CC, GrB_BOOL, n, n));
GRB_TRY(GrB_Matrix_new(&C_temp, GrB_FP32, n, n));
GRB_TRY(GrB_Vector_new(&vpc, GrB_INT64, n));
GRB_TRY(GrB_Matrix_new(&T_temp, GrB_FP32, n, n));
GRB_TRY(GrB_Matrix_new(&MSE, GrB_FP32, n, n));
GRB_TRY(GrB_Matrix_new(&D, GrB_FP32, n, n));
GRB_TRY(GrB_Vector_new(&w, GrB_FP32, n));
GRB_TRY(GrB_Vector_new(&ones, GrB_FP32, n));
GRB_TRY(GrB_Vector_new(&argmax_v, GrB_FP32, n));
GRB_TRY(GrB_Vector_new(&argmax_p, GrB_INT64, n));
GRB_TRY(GrB_Scalar_new(&zero_INT64, GrB_INT64));
GRB_TRY(GrB_Scalar_new(&zero_FP32, GrB_FP32));
GRB_TRY(GrB_Scalar_new(&true_BOOL, GrB_BOOL));

GRB_TRY(GrB_Scalar_setElement(zero_INT64, 0));
GRB_TRY(GrB_Scalar_setElement(zero_FP32, 0));
GRB_TRY(GrB_Scalar_setElement(true_BOOL, 1));

// Create identity
Expand All @@ -115,34 +112,28 @@ int LAGr_MarkovClustering(
LAGRAPH_TRY(LAGraph_Cached_NSelfEdges(G, msg));
}

GRB_TRY(GrB_Matrix_dup(&C_temp, A));
GRB_TRY(GrB_Matrix_dup(&C, C_temp));
GRB_TRY(GrB_Matrix_dup(&T_temp, A));
GRB_TRY(GrB_Matrix_dup(&T, T_temp));

double mse = 0;
GrB_Index iter = 0;
GrB_Index nvals;

while (true)
{
printf("Iteration %lu\n", iter);
GxB_print(C_temp, GxB_SUMMARY);

// Normalization step: Scale each column in C_temp to add up to 1
// Normalization step: Scale each column in T_temp to add up to 1
// w = 1 ./ sum(A(:j))
// D = diag(w)
GRB_TRY(GrB_reduce(w, NULL, NULL, GrB_PLUS_MONOID_FP32, C_temp, GrB_DESC_RT0));
GRB_TRY(GrB_reduce(w, NULL, NULL, GrB_PLUS_MONOID_FP32, T_temp, GrB_DESC_RT0));
GRB_TRY(GrB_apply(w, NULL, NULL, GrB_MINV_FP32, w, GrB_DESC_R));
GRB_TRY(GrB_Matrix_diag(&D, w, 0));
GRB_TRY(GrB_mxm(C_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32, C_temp, D, GrB_DESC_R));

// Prune values less than some small threshold
GRB_TRY(GrB_select(C_temp, NULL, NULL, GrB_VALUEGT_FP32, C_temp, pruning_threshold, NULL));
GRB_TRY(GrB_mxm(T_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32, T_temp, D, GrB_DESC_R));

// Compute mean squared error between subsequent iterations
GRB_TRY(GxB_Matrix_eWiseUnion(MSE, NULL, NULL, GrB_MINUS_FP32, C_temp, zero_INT64, C, zero_INT64, NULL));
GRB_TRY(GxB_Matrix_eWiseUnion(MSE, NULL, NULL, GrB_MINUS_FP32, T_temp, zero_FP32, T, zero_FP32, NULL));
GRB_TRY(GrB_eWiseMult(MSE, NULL, NULL, GrB_TIMES_FP32, MSE, MSE, NULL));
GRB_TRY(GrB_reduce(&mse, NULL, GrB_PLUS_MONOID_FP32, MSE, NULL));
GRB_TRY(GrB_Matrix_nvals(&nvals, C_temp));
GRB_TRY(GrB_Matrix_nvals(&nvals, MSE));
mse /= nvals;

#ifdef DEBUG
Expand All @@ -151,7 +142,7 @@ int LAGr_MarkovClustering(
#endif

bool res = NULL;
LAGRAPH_TRY(LAGraph_Matrix_IsEqual(&res, C, C_temp, msg));
LAGRAPH_TRY(LAGraph_Matrix_IsEqual(&res, T, T_temp, msg));
if (res || iter > max_iter || mse < convergence_threshold)
{
#ifdef DEBUG
Expand All @@ -160,67 +151,101 @@ int LAGr_MarkovClustering(
break;
}

// Set C to the previous iteration
GRB_TRY(GrB_Matrix_dup(&C, C_temp));
// Set T to the previous iteration
GRB_TRY(GrB_Matrix_dup(&T, T_temp));

// Expansion step
for (int i = 0; i < e - 1; i++)
{
GRB_TRY(GrB_mxm(C_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32, C_temp, C_temp, NULL));
GRB_TRY(GrB_mxm(T_temp, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32, T_temp, T_temp, NULL));
}

// Inflation step
GRB_TRY(GrB_Matrix_apply_BinaryOp2nd_FP32(C_temp, NULL, NULL, GxB_POW_FP32, C_temp, (double)i, NULL));
GRB_TRY(GrB_Matrix_apply_BinaryOp2nd_FP32(T_temp, NULL, NULL, GxB_POW_FP32, T_temp, (double)i, NULL));

// Prune values less than some small threshold
GRB_TRY(GrB_select(T_temp, NULL, NULL, GrB_VALUEGT_FP32, T_temp, pruning_threshold, NULL));


iter++;
}

// argmax_v = max (C_temp) where argmax_v(j) = max (C_temp (:,j))
GRB_TRY(GrB_mxv(argmax_v, NULL, NULL, GrB_MAX_FIRST_SEMIRING_FP32, C_temp, ones, GrB_DESC_T0));
// D = daig (argmax_v)
// In order to interpret the final iteration of the transfer matrix (T_temp), let
// an attractor vertex be a vertex with at least one positive value within their corresponding
// row. Each attractor is attracting the vertices (columns) which have positive values within
// its row. To compute the output cluster vector, compute the argmax across columns of the
// steady-state T_temp matrix. Then argmax_p(i) = k means vertex i is in cluster k.

// argmax_v = max (T_temp) where argmax_v(j) = max (T_temp (:,j))
GRB_TRY(GrB_mxv(argmax_v, NULL, NULL, GrB_MAX_FIRST_SEMIRING_FP32, T_temp, ones, GrB_DESC_T0));
GRB_TRY(GrB_Matrix_diag(&D, argmax_v, 0));
GRB_TRY(GrB_mxm(CC, NULL, NULL, GxB_ANY_EQ_FP32, C_temp, D, NULL));
GRB_TRY(GrB_mxm(CC, NULL, NULL, GxB_ANY_EQ_FP32, T_temp, D, NULL));
GRB_TRY(GrB_select(CC, NULL, NULL, GrB_VALUENE_BOOL, CC, 0, NULL));
GRB_TRY(GrB_mxv(argmax_p, NULL, NULL, GxB_MIN_SECONDI_INT64, CC, ones, GrB_DESC_T0));

// pi := array of argmax_p indices, px := array of argmax_p values
GrB_Index *pi, *px = NULL;
GrB_Index *pi, *px, *pi_new, *px_new = NULL;
GrB_Index p_nvals;
GRB_TRY(GrB_Vector_nvals(&p_nvals, argmax_p));
LAGRAPH_TRY(LAGraph_Malloc((void **)&pi, n, sizeof(GrB_Index), msg));
LAGRAPH_TRY(LAGraph_Malloc((void **)&px, n, sizeof(GrB_Index), msg));
GRB_TRY(GrB_Vector_extractTuples_INT64(pi, px, &n, argmax_p));
// Can reuse CC matrix, this will hold the final clustering result
GRB_TRY(GrB_Matrix_clear(CC));
GRB_TRY(GxB_Matrix_build_Scalar(CC, px, pi, true_BOOL, p_nvals));
LAGraph_Free((void *)&pi, NULL);
LAGraph_Free((void *)&px, NULL);
LAGRAPH_TRY(LAGraph_Malloc((void **)&pi, p_nvals, sizeof(GrB_Index), msg));
LAGRAPH_TRY(LAGraph_Malloc((void **)&px, p_nvals, sizeof(GrB_Index), msg));
GRB_TRY(GrB_Vector_extractTuples_INT64(pi, px, &p_nvals, argmax_p));

// Sometimes (particularly, when the pruning threshold is high), some columns in the
// steady-state T_temp have no values, i.e., they are not attracted to any vertex. In this
// case, fill in the missing values with the index of the vertex (these vertices will be
// arbitrarily put in the cluster of their index).
if (p_nvals < n)
{
LAGRAPH_TRY(LAGraph_Malloc((void **)&pi_new, n, sizeof(GrB_Index), msg));
LAGRAPH_TRY(LAGraph_Malloc((void **)&px_new, n, sizeof(GrB_Index), msg));

GRB_TRY(GrB_reduce(vpc, NULL, NULL, GrB_PLUS_MONOID_INT64, CC, NULL));
GrB_Index j = 0;
GrB_Index currentValue = pi[0];

#ifdef DEBUG
printf("Vertices per cluster\n");
GxB_print(vpc, GxB_SHORT);
#endif
for (int i = 0; i < p_nvals && j < n; ++i)
{
while (currentValue < pi[i] && j < n)
{
pi_new[j] = currentValue;
px_new[j] = currentValue; // Fill skipped px values with their index or another placeholder value
currentValue++;
j++;
}
if (j < n)
{
pi_new[j] = pi[i];
px_new[j] = px[i];
currentValue++;
j++;
}
}

GrB_Index *CfI, *CfJ, ncc;
// Handle any skipped values at the end
while (j < n)
{
pi_new[j] = currentValue;
px_new[j] = currentValue; // Fill remaining px values
currentValue++;
j++;
}

GRB_TRY(GrB_Matrix_nvals(&ncc, CC));
LAGraph_Free((void *)&pi, NULL);
LAGraph_Free((void *)&px, NULL);

LAGRAPH_TRY(LAGraph_Malloc((void **)&CfI, ncc, sizeof(GrB_Index), msg));
LAGRAPH_TRY(LAGraph_Malloc((void **)&CfJ, ncc, sizeof(GrB_Index), msg));
GRB_TRY(GrB_Matrix_extractTuples_BOOL(CfI, CfJ, NULL, &ncc, CC));
pi = pi_new;
px = px_new;
}

GrB_Vector c = NULL;
GRB_TRY(GrB_Vector_new(&c, GrB_INT64, ncc));
GRB_TRY(GrB_Vector_build(c, CfJ, CfI, ncc, NULL));

GRB_TRY(GrB_Vector_new(&c, GrB_INT64, n));
GRB_TRY(GrB_Vector_build_INT64(c, pi, px, n, NULL));
GrB_Vector_wait(c, GrB_MATERIALIZE);

LAGraph_Free((void **)&CfI, NULL);
LAGraph_Free((void **)&CfJ, NULL);
LAGraph_Free((void *)&pi, NULL);
LAGraph_Free((void *)&px, NULL);

(*c_f) = c; // Set output matrix
(*c_f) = c; // Set output vector

LG_FREE_WORK;

Expand Down

0 comments on commit 55f35af

Please sign in to comment.