-
Notifications
You must be signed in to change notification settings - Fork 109
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
implement complexsearch and sanitizing expandcomplex
- Loading branch information
1 parent
b220b5a
commit b156e06
Showing
9 changed files
with
233 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#!/bin/sh -e | ||
fail() { | ||
echo "Error: $1" | ||
exit 1 | ||
} | ||
|
||
notExists() { | ||
[ ! -f "$1" ] | ||
} | ||
|
||
if notExists "${TMP_PATH}/result.dbtype"; then | ||
# shellcheck disable=SC2086 | ||
"$MMSEQS" search "${QUERYDB}" "${TARGETDB}" "${TMP_PATH}/result" "${TMP_PATH}/search_tmp" ${SEARCH_PAR} \ | ||
|| fail "Search died" | ||
fi | ||
|
||
RESULT="${TMP_PATH}/result" | ||
if [ "$PREFMODE" != "EXHAUSTIVE" ]; then | ||
if notExists "${TMP_PATH}/result_expand_pref.dbtype"; then | ||
# shellcheck disable=SC2086 | ||
"$MMSEQS" expandcomplex "${QUERYDB}" "${TARGETDB}" "${RESULT}" "${TMP_PATH}/result_expand_pref" ${THREADS_PAR} \ | ||
|| fail "Expandcomplex died" | ||
fi | ||
if notExists "${TMP_PATH}/result_expand_aligned.dbtype"; then | ||
# shellcheck disable=SC2086 | ||
"$MMSEQS" $COMPLEX_ALIGNMENT_ALGO "${QUERYDB}" "${TARGETDB}" "${TMP_PATH}/result_expand_pref" "${TMP_PATH}/result_expand_aligned" ${COMPLEX_ALIGN_PAR} \ | ||
|| fail $COMPLEX_ALIGNMENT_ALGO "died" | ||
fi | ||
RESULT="${TMP_PATH}/result_expand_aligned" | ||
fi | ||
if notExists "${TMP_PATH}/complex_result.dbtype"; then | ||
# shellcheck disable=SC2086 | ||
$MMSEQS scorecomplex "${QUERYDB}" "${TARGETDB}" "${RESULT}" "${OUTPUT}" ${SCORECOMPLEX_PAR} \ | ||
|| fail "ScoreComplex died" | ||
fi | ||
|
||
if [ -n "${REMOVE_TMP}" ]; then | ||
# shellcheck disable=SC2086 | ||
"$MMSEQS" rmdb "${TMP_PATH}/result" ${VERBOSITY} | ||
if [ "$PREFMODE" != "EXHAUSTIVE" ]; then | ||
# shellcheck disable=SC2086 | ||
"$MMSEQS" rmdb "${TMP_PATH}/result_expand_aligned" ${VERBOSITY} | ||
fi | ||
rm -rf "${TMP_PATH}/search_tmp" | ||
rm -f "${TMP_PATH}/complexsearch.sh" | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -263,7 +263,26 @@ std::vector<Command> foldseekCommands = { | |
{"complexDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::alignmentDb} | ||
} | ||
}, | ||
{"easy-complexsearch", easycomplexsearch, &localPar.easyscorecomplexworkflow, COMMAND_EASY, | ||
{"complexsearch", complexsearch, &localPar.complexsearchworkflow, COMMAND_MAIN, | ||
"Complex level search", | ||
"# Search a single/multiple PDB file against a set of PDB files and get complex level alignments\n" | ||
"foldseek complexsearch queryDB targetDB result tmp\n" | ||
"# Format output differently\n" | ||
"foldseek easy-complexsearch queryDB targetDB result tmp --format-output query,target,qstart,tstart,cigar\n" | ||
"# Align with TMalign (global)\n" | ||
"foldseek complexsearch queryDB targetDB result tmp --alignment-type 1\n" | ||
"# Skip prefilter and perform an exhaustive alignment (slower but more sensitive)\n" | ||
"foldseek complexsearch queryDB targetDB result tmp --exhaustive-search 1\n\n", | ||
"Woosub Kim <[email protected]>", | ||
"<i:queryDB> <i:targetDB> <o:outputFileName> <tmpDir>", | ||
CITATION_FOLDSEEK, { | ||
{"queryDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA | DbType::NEED_HEADER, &DbValidator::sequenceDb}, | ||
{"targetDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA | DbType::NEED_HEADER, &DbValidator::sequenceDb}, | ||
{"complexDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::alignmentDb}, | ||
{"tempDir", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::directory} | ||
} | ||
}, | ||
{"easy-complexsearch", easycomplexsearch, &localPar.easyscomplexsearchworkflow, COMMAND_EASY, | ||
"Complex level search", | ||
"# Search a single/multiple PDB file against a set of PDB files and get complex level alignments\n" | ||
"foldseek easy-complexsearch example/1tim.pdb.gz example/8tim.pdb.gz result tmp\n" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
#include <cassert> | ||
|
||
#include "LocalParameters.h" | ||
#include "FileUtil.h" | ||
#include "CommandCaller.h" | ||
#include "Util.h" | ||
#include "Debug.h" | ||
|
||
#include "complexsearch.sh.h" | ||
|
||
int complexsearch(int argc, const char **argv, const Command &command) { | ||
LocalParameters &par = LocalParameters::getLocalInstance(); | ||
par.PARAM_ADD_BACKTRACE.addCategory(MMseqsParameter::COMMAND_EXPERT); | ||
par.PARAM_MAX_REJECTED.addCategory(MMseqsParameter::COMMAND_EXPERT); | ||
par.PARAM_ZDROP.addCategory(MMseqsParameter::COMMAND_EXPERT); | ||
par.PARAM_DB_OUTPUT.addCategory(MMseqsParameter::COMMAND_EXPERT); | ||
par.PARAM_OVERLAP.addCategory(MMseqsParameter::COMMAND_EXPERT); | ||
par.PARAM_RESCORE_MODE.addCategory(MMseqsParameter::COMMAND_EXPERT); | ||
for (size_t i = 0; i < par.createdb.size(); i++){ | ||
par.createdb[i]->addCategory(MMseqsParameter::COMMAND_EXPERT); | ||
} | ||
|
||
par.PARAM_COMPRESSED.removeCategory(MMseqsParameter::COMMAND_EXPERT); | ||
par.PARAM_THREADS.removeCategory(MMseqsParameter::COMMAND_EXPERT); | ||
par.PARAM_V.removeCategory(MMseqsParameter::COMMAND_EXPERT); | ||
|
||
par.parseParameters(argc, argv, command, false, Parameters::PARSE_VARIADIC, 0); | ||
if(par.PARAM_FORMAT_OUTPUT.wasSet == false){ | ||
par.outfmt = "query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,complexassignid"; | ||
} | ||
par.addBacktrace = true; | ||
par.PARAM_ADD_BACKTRACE.wasSet = true; | ||
par.printParameters(command.cmd, argc, argv, *command.params); | ||
|
||
bool needBacktrace = false; | ||
bool needTaxonomy = false; | ||
bool needTaxonomyMapping = false; | ||
bool needLookup = false; | ||
{ | ||
bool needSequenceDB = false; | ||
bool needFullHeaders = false; | ||
bool needSource = false; | ||
bool needQCA = false; | ||
bool needTCA = false; | ||
bool needTMalign = false; | ||
bool needLDDT = false; | ||
LocalParameters::getOutputFormat(par.formatAlignmentMode, par.outfmt, needSequenceDB, needBacktrace, needFullHeaders, | ||
needLookup, needSource, needTaxonomyMapping, needTaxonomy, needQCA, needTCA, needTMalign, needLDDT); | ||
} | ||
|
||
if (par.formatAlignmentMode == Parameters::FORMAT_ALIGNMENT_SAM || | ||
par.formatAlignmentMode == LocalParameters::FORMAT_ALIGNMENT_PDB_SUPERPOSED || | ||
par.greedyBestHits) { | ||
needBacktrace = true; | ||
} | ||
if (needBacktrace) { | ||
Debug(Debug::INFO) << "Alignment backtraces will be computed, since they were requested by output format.\n"; | ||
par.addBacktrace = true; | ||
par.PARAM_ADD_BACKTRACE.wasSet = true; | ||
} | ||
if (needLookup) { | ||
par.writeLookup = true; | ||
} | ||
|
||
std::string tmpDir = par.filenames.back(); | ||
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, *command.params)); | ||
if (par.reuseLatest) { | ||
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest"); | ||
} | ||
tmpDir = FileUtil::createTemporaryDirectory(tmpDir, hash); | ||
par.filenames.pop_back(); | ||
CommandCaller cmd; | ||
if(par.alignmentType == LocalParameters::ALIGNMENT_TYPE_TMALIGN){ | ||
cmd.addVariable("COMPLEX_ALIGNMENT_ALGO", "tmalign"); | ||
cmd.addVariable("COMPLEX_ALIGN_PAR", par.createParameterString(par.tmalign).c_str()); | ||
}else if(par.alignmentType == LocalParameters::ALIGNMENT_TYPE_3DI_AA || par.alignmentType == LocalParameters::ALIGNMENT_TYPE_3DI){ | ||
cmd.addVariable("COMPLEX_ALIGNMENT_ALGO", "structurealign"); | ||
cmd.addVariable("COMPLEX_ALIGN_PAR", par.createParameterString(par.structurealign).c_str()); | ||
} | ||
|
||
switch(par.prefMode){ | ||
case LocalParameters::PREF_MODE_KMER: | ||
cmd.addVariable("PREFMODE", "KMER"); | ||
break; | ||
case LocalParameters::PREF_MODE_UNGAPPED: | ||
cmd.addVariable("PREFMODE", "UNGAPPED"); | ||
break; | ||
case LocalParameters::PREF_MODE_EXHAUSTIVE: | ||
cmd.addVariable("PREFMODE", "EXHAUSTIVE"); | ||
break; | ||
} | ||
if(par.exhaustiveSearch){ | ||
cmd.addVariable("PREFMODE", "EXHAUSTIVE"); | ||
} | ||
cmd.addVariable("NO_REPORT", par.complexReportMode == 0 ? "TRUE" : NULL); | ||
cmd.addVariable("TMP_PATH", tmpDir.c_str()); | ||
cmd.addVariable("OUTPUT", par.filenames.back().c_str()); | ||
par.filenames.pop_back(); | ||
cmd.addVariable("TARGETDB", par.filenames.back().c_str()); | ||
par.filenames.pop_back(); | ||
cmd.addVariable("QUERYDB", par.filenames.back().c_str()); | ||
cmd.addVariable("LEAVE_INPUT", par.dbOut ? "TRUE" : NULL); | ||
par.filenames.pop_back(); | ||
cmd.addVariable("SEARCH_PAR", par.createParameterString(par.structuresearchworkflow, true).c_str()); | ||
cmd.addVariable("SCORECOMPLEX_PAR", par.createParameterString(par.scorecomplex).c_str()); | ||
cmd.addVariable("THREADS_PAR", par.createParameterString(par.onlythreads).c_str()); | ||
cmd.addVariable("REMOVE_TMP", par.removeTmpFiles ? "TRUE" : NULL); | ||
cmd.addVariable("VERBOSITY", par.createParameterString(par.onlyverbosity).c_str()); | ||
std::string program = tmpDir + "/complexsearch.sh"; | ||
FileUtil::writeFile(program, complexsearch_sh, complexsearch_sh_len); | ||
cmd.execProgram(program.c_str(), par.filenames); | ||
// Should never get here | ||
assert(false); | ||
return EXIT_FAILURE; | ||
} |