From 6816a6411c8f9f30aab2d3cd1da89dfabf69e09e Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Mon, 8 Jan 2024 12:01:10 +0900 Subject: [PATCH] skip single chained complex for scorecomplex --- src/strucclustutils/scorecomplex.cpp | 70 +++++++++++++++++----------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/src/strucclustutils/scorecomplex.cpp b/src/strucclustutils/scorecomplex.cpp index c75ee5f1..f2fd4df2 100644 --- a/src/strucclustutils/scorecomplex.cpp +++ b/src/strucclustutils/scorecomplex.cpp @@ -299,11 +299,38 @@ class DBSCANCluster { fillDistMap(); } - unsigned int clusterAlns() { + unsigned int getAlnClusters() { + if (searchResult.alnVec.size() <= idealClusterSize) + return checkClusteringNecessity(); + + // TO skip single chained complex + if (idealClusterSize==1) + return UNCLUSTERED; + + return runDBSCAN(); + } + +private: + const double LEARNING_RATE = 0.1; + const double DEFAULT_EPS = 0.1; + SearchResult &searchResult; + double eps; + unsigned int cLabel; + unsigned int prevMaxClusterSize; + unsigned int maxClusterSize; + unsigned int recursiveNum; + unsigned int idealClusterSize; + unsigned int minClusterSize; + std::vector neighbors; + std::vector neighborsOfCurrNeighbor; + std::vector qFoundChainKeys; + std::vector dbFoundChainKeys; + distMap_t distMap; + cluster_t currClusters; + cluster_t bestClusters; + + unsigned int runDBSCAN() { initializeAlnLabels(); - if (recursiveNum==0 && searchResult.alnVec.size() <= idealClusterSize) { - checkClusteringNecessity(); - } if (++recursiveNum > MAX_RECURSIVE_NUM) return UNCLUSTERED; for (size_t centerAlnIdx=0; centerAlnIdx < searchResult.alnVec.size(); centerAlnIdx++) { ChainToChainAln ¢erAln = searchResult.alnVec[centerAlnIdx]; @@ -335,7 +362,7 @@ class DBSCANCluster { continue; else if (neighbors.size() == maxClusterSize) currClusters.emplace_back(neighbors); - // new Biggest cluster + // new Biggest cluster else if (neighbors.size() > maxClusterSize) { maxClusterSize = neighbors.size(); currClusters.clear(); @@ -352,28 +379,9 @@ class DBSCANCluster { bestClusters = currClusters; prevMaxClusterSize = maxClusterSize; eps += LEARNING_RATE; - return clusterAlns(); + return runDBSCAN(); } -private: - const double LEARNING_RATE = 0.1; - const double DEFAULT_EPS = 0.1; - SearchResult &searchResult; - double eps; - unsigned int cLabel; - unsigned int prevMaxClusterSize; - unsigned int maxClusterSize; - unsigned int recursiveNum; - unsigned int idealClusterSize; - unsigned int minClusterSize; - std::vector neighbors; - std::vector neighborsOfCurrNeighbor; - std::vector qFoundChainKeys; - std::vector dbFoundChainKeys; - distMap_t distMap; - cluster_t currClusters; - cluster_t bestClusters; - void fillDistMap() { distMap.clear(); for (size_t i=0; i < searchResult.alnVec.size(); i++) { @@ -441,8 +449,13 @@ class DBSCANCluster { for (size_t alnIdx=0; alnIdx 1 searchResults.emplace_back(paredSearchResult); paredSearchResult.alnVec.clear(); } @@ -537,7 +550,7 @@ class ComplexScorer { } unsigned int currLabel; DBSCANCluster dbscanCluster = DBSCANCluster(searchResult, minAssignedChainsRatio); - currLabel = dbscanCluster.clusterAlns(); + currLabel = dbscanCluster.getAlnClusters(); if (currLabel == UNCLUSTERED) return; assignment = Assignment(searchResult.qResidueLen, searchResult.dbResidueLen); for (auto &currAln: searchResult.alnVec) { @@ -700,6 +713,7 @@ int scorecomplex(int argc, const char **argv, const Command &command) { for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) { unsigned int qComplexId = qComplexIndices[qCompIdx]; std::vector &qChainKeys = qComplexIdToChainKeysMap.at(qComplexId); +// if (qChainKeys.size()==1) continue; complexScorer.getSearchResults(qComplexId, qChainKeys, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, searchResults); // for each db complex for (size_t dbId = 0; dbId < searchResults.size(); dbId++) {