Skip to content

Commit

Permalink
Merge pull request #391 from rachelse/gpu_sooyoung
Browse files Browse the repository at this point in the history
initial GPU support for monomer search
  • Loading branch information
martin-steinegger authored Jan 5, 2025
2 parents ef927db + 2436244 commit fb2b4ac
Show file tree
Hide file tree
Showing 265 changed files with 50,782 additions and 10,946 deletions.
14 changes: 7 additions & 7 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
matrix:
avx2:
SIMD: 'AVX2'
STATIC: 1
STATIC: 0
MPI: 0
BUILD_TYPE: RelWithDebInfo
# sse41:
Expand All @@ -26,7 +26,7 @@ jobs:
# BUILD_TYPE: RelWithDebInfo
sse2:
SIMD: 'SSE2'
STATIC: 1
STATIC: 0
MPI: 0
BUILD_TYPE: RelWithDebInfo
avx2_mpi:
Expand Down Expand Up @@ -57,15 +57,15 @@ jobs:
export CC=gcc-11 ; export CXX=g++-11
if [ "${STATIC}" -eq "1" ]; then
cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DBUILD_SHARED_LIBS=OFF \
-DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc \
-static-libstdc++" -DCMAKE_FIND_LIBRARY_SUFFIXES=".a" \
-DPREFER_STATIC=1 -DCMAKE_FIND_LIBRARY_SUFFIXES=".a" \
-DCMAKE_FIND_LIBRARY_SUFFIXES=".a" \
-DENABLE_WERROR=1 -DHAVE_${SIMD}=1 -DHAVE_MPI=${MPI} ..
else
cmake -DHAVE_SANITIZER=1 -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DENABLE_WERROR=1 -DHAVE_${SIMD}=1 -DHAVE_MPI=${MPI} ..
fi
make -j $(nproc --all)
make -j $(nproc --all) VERBOSE=1
displayName: Build foldseek
- script: |
if [ "${SIMD}" = "SSE2" ]; then
Expand Down Expand Up @@ -113,8 +113,8 @@ jobs:
CC=${CPREF}-linux-gnu-gcc CXX=${CPREF}-linux-gnu-g++ \
cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DHAVE_TESTS=1 \
-DBUILD_SHARED_LIBS=OFF \
-DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc \
-static-libstdc++" -DCMAKE_FIND_LIBRARY_SUFFIXES=".a" \
-DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc -static-libstdc++" \
-DPREFER_STATIC=1 -DCMAKE_FIND_LIBRARY_SUFFIXES=".a" \
-DRust_CARGO_TARGET=${CPREF}-unknown-linux-gnu \
-DENABLE_WERROR=1 -DHAVE_${SIMD}=1 ..
make -j $(nproc --all)
Expand Down
3 changes: 3 additions & 0 deletions data/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ set(COMPILED_RESOURCES
evalue_nn.kerasify
main.js
vendor.js.zst
makepaddeddb.sh
result2structprofile.sh
createstructsubdb.sh
multimersearch.sh
easymultimersearch.sh
multimercluster.sh
Expand Down
33 changes: 33 additions & 0 deletions data/createstructsubdb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/sh -e

LIST="$1"
IN="$2"
OUT="$3"

if [ -e "${IN}.dbtype" ]; then
# shellcheck disable=SC2086
"$MMSEQS" base:createsubdb "${LIST}" "${IN}" "${OUT}" ${CREATESTRUCTSUBDB_PAR} \
|| fail "createsubdb died"
fi

if [ -e "${IN}_ss.dbtype" ]; then
# shellcheck disable=SC2086
"$MMSEQS" base:createsubdb "${LIST}" "${IN}_ss" "${OUT}_ss" ${CREATESTRUCTSUBDB_PAR} \
|| fail "createsubdb died"
fi

if [ -e "${IN}_ca.dbtype" ]; then
# shellcheck disable=SC2086
"$MMSEQS" base:createsubdb "${LIST}" "${IN}_ca" "${OUT}_ca" ${CREATESTRUCTSUBDB_PAR} \
|| fail "createsubdb died"
fi

# if [ -e "${IN}_h.dbtype" ]; then
# # shellcheck disable=SC2086
# "$MMSEQS" base:createsubdb "${LIST}" "${IN}_h" "${OUT}_h" ${CREATESTRUCTSUBDB_PAR} \
# || fail "createsubdb died"
# fi

if [ -e "${OUT}.sh" ]; then
rm -f -- "${OUT}.sh"
fi
19 changes: 19 additions & 0 deletions data/easystructuresearch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ if notExists "${TARGET}.dbtype"; then
|| fail "target createdb died"
fi
TARGET="${TMP_PATH}/target"

if [ -n "${GPU}" ]; then
if notExists "${TMP_PATH}/target_pad"; then
# shellcheck disable=SC2086
"$MMSEQS" makepaddedseqdb "${TMP_PATH}/target" "${TMP_PATH}/target_pad" "${TMP_PATH}/pad_tmp" ${MAKEPADDEDSEQDB_PAR} \
|| fail "makepaddedseqdb died"
fi
TARGET="${TMP_PATH}/target_pad"
fi
fi


Expand Down Expand Up @@ -75,6 +84,16 @@ if [ -n "${REMOVE_TMP}" ]; then
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/target_ss" ${VERBOSITY}
fi
if [ -f "${TMP_PATH}/target_pad" ]; then
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/target_pad" ${VERBOSITY}
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/target_pad_h" ${VERBOSITY}
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/target_pad_ss" ${VERBOSITY}
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/target_pad_ss_h" ${VERBOSITY}
fi
if [ -f "${TMP_PATH}/query.dbtype" ]; then
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/query" ${VERBOSITY}
Expand Down
186 changes: 186 additions & 0 deletions data/makepaddeddb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#!/bin/sh -e

exists() {
[ -f "$1" ]
}

if [ "${CLUSEARCH_PAR}" = 0 ]; then
if exists "${IN}_ss.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" lndb "${IN}_h" "${OUT}_tmp_ss_h" ${VERBOSITY} \
|| fail "lndb died"

# shellcheck disable=SC2086
"$MMSEQS" lndb "${IN}_ss" "${OUT}_tmp_ss" ${VERBOSITY} \
|| fail "lndb died"

# shellcheck disable=SC2086
"$MMSEQS" base:makepaddedseqdb "${OUT}_tmp_ss" "${OUT}_ss" ${MAKEPADDEDSEQDB_PAR} \
|| fail "mmseqs makepaddedseqdb died"

# shellcheck disable=SC2086
"$MMSEQS" rmdb "${OUT}_tmp_ss" ${VERBOSITY} \
|| fail "rmdb died"

# shellcheck disable=SC2086
"$MMSEQS" rmdb "${OUT}_tmp_ss_h" ${VERBOSITY} \
|| fail "rmdb died"

awk '{ print $3"\t"$1; }' "${OUT}_ss.lookup" > "${OUT}_ss.gpu_mapping1"


if exists "${IN}.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping1" "${IN}" "${OUT}" \
--subdb-mode 1 ${THREADS_PAR} \
|| fail "renamedbkeys died"
fi

if exists "${IN}_ca.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping1" "${IN}_ca" "${OUT}_ca" \
--subdb-mode 1 ${THREADS_PAR} \
|| fail "renamedbkeys died"
fi

if exists "${IN}_h.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping1" "${IN}_h" "${OUT}_h" \
--subdb-mode 1 ${THREADS_PAR} \
|| fail "renamedbkeys died"
fi

rm -f -- "${OUT}.lookup"
awk '{print $1"\t"$2"\t"int($3/2)}' "${OUT}_ss.lookup" > "${OUT}.lookup"
rm -f -- "${OUT}_ss.gpu_mapping1"

else
if exists "${IN}.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" base:makepaddedseqdb "${IN}" "${OUT}" ${MAKEPADDEDSEQDB_PAR} \
|| fail "mmseqs makepaddedseqdb died"
fi

if exists "${IN}_ca.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" base:makepaddedseqdb "${IN}_ca" "${OUT}_ca" ${MAKEPADDEDSEQDB_PAR} \
|| fail "mmseqs makepaddedseqdb died"
fi
fi
else
if exists "${IN}_ss.dbtype"; then
# # shellcheck disable=SC2086
"$MMSEQS" lndb "${IN}_h" "${OUT}_tmp_ss_h" ${VERBOSITY} \
|| fail "lndb died"

# shellcheck disable=SC2086
"$MMSEQS" lndb "${IN}_ss" "${OUT}_tmp_ss" ${VERBOSITY} \
|| fail "lndb died"

# shellcheck disable=SC2086
"$MMSEQS" base:makepaddedseqdb "${OUT}_tmp_ss" "${OUT}_ss" ${MAKEPADDEDSEQDB_PAR} \
|| fail "mmseqs makepaddedseqdb died"

# shellcheck disable=SC2086
"$MMSEQS" rmdb "${OUT}_tmp_ss" ${VERBOSITY} \
|| fail "rmdb died"

# shellcheck disable=SC2086
"$MMSEQS" rmdb "${OUT}_tmp_ss_h" ${VERBOSITY} \
|| fail "rmdb died"

awk '{ print $3"\t"$1; }' "${OUT}_ss.lookup" > "${OUT}_ss.gpu_mapping1"

awk 'BEGIN{i=0} FNR==NR{name[$3]=1;print $3"\t"$1;i++; next} {if (!($1 in name)){print $1"\t"i; i++}}' \
"${OUT}_ss.lookup" "${IN}.lookup" > "${OUT}_ss.gpu_mapping2"

if exists "${IN}.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping1" "${IN}" "${OUT}" \
--subdb-mode 1 ${THREADS_PAR} \
|| fail "renamedbkeys died"
fi

if exists "${IN}_ca.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping1" "${IN}_ca" "${OUT}_ca" \
--subdb-mode 1 ${THREADS_PAR} \
|| fail "renamedbkeys died"
fi

if exists "${IN}_h.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping1" "${IN}_h" "${OUT}_h" \
--subdb-mode 1 ${THREADS_PAR} \
|| fail "renamedbkeys died"
fi

# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping2" "${IN}_seq" "${OUT}_seq" \
--subdb-mode 1 ${THREADS_PAR} \
|| fail "renamedbkeys died"

# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping2" "${IN}_seq_ca" "${OUT}_seq_ca" \
--subdb-mode 1 ${THREADS_PAR} \
|| fail "renamedbkeys died"

# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping2" "${IN}_seq_h" "${OUT}_seq_h" \
--subdb-mode 1 ${THREADS_PAR} \
|| fail "renamedbkeys died"

# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping2" "${IN}_seq_ss" "${OUT}_seq_ss" \
--subdb-mode 1 ${THREADS_PAR} \
|| fail "renamedbkeys died"

if exists "${IN}_clu.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" filterdb "${IN}_clu" "${OUT}_clutmp" --mapping-file "${OUT}_ss.gpu_mapping2" ${VERBOSITY} ${THREADS_PAR} \
|| fail "filterdb died"
# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping1" "${OUT}_clutmp" "${OUT}_clu" \
--subdb-mode 0 ${THREADS_PAR} \
|| fail "renamedbkeys died"

# shellcheck disable=SC2086
"$MMSEQS" rmdb "${OUT}_clutmp" ${VERBOSITY} \
|| fail "rmdb died"
fi

if exists "${IN}_aln.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" filterdb "${IN}_aln" "${OUT}_alntmp" --mapping-file "${OUT}_ss.gpu_mapping2" ${VERBOSITY} ${THREADS_PAR} \
|| fail "filterdb died"
# shellcheck disable=SC2086
"$MMSEQS" renamedbkeys "${OUT}_ss.gpu_mapping1" "${OUT}_alntmp" "${OUT}_aln" \
--subdb-mode 0 ${THREADS_PAR} \
|| fail "renamedbkeys died"
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${OUT}_alntmp" ${VERBOSITY} \
|| fail "rmdb died"
fi

rm -f -- "${OUT}.lookup"
awk '{print $1"\t"$2"\t"int($3/2)}' "${OUT}_ss.lookup" > "${OUT}.lookup"
rm -f -- "${OUT}_ss.gpu_mapping1"
rm -f -- "${OUT}_ss.gpu_mapping2"

rm "${OUT}_seq"
rm "${OUT}_seq_ss"
rm "${OUT}_seq_ca"
rm "${OUT}_seq_h"

else
if exists "${IN}.dbtype"; then
# shellcheck disable=SC2086
"$MMSEQS" base:makepaddedseqdb "${IN}" "${OUT}" ${MAKEPADDEDSEQDB_PAR} \
|| fail "mmseqs makepaddedseqdb died"
fi
fi
fi

if [ -e "${OUT}.sh" ]; then
rm -f -- "${OUT}.sh"
fi
34 changes: 34 additions & 0 deletions data/result2structprofile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/sh -e

IN1="$1"
IN2="$2"
RESULT="$3"
OUT="$4"

if [ -e "${IN1}.dbtype" ]; then
# shellcheck disable=SC2086
"$MMSEQS" base:result2profile "${IN1}" "${IN2}" "${RESULT}" "${OUT}" ${PROFILE_PAR} \
|| fail "result2profile died"
fi

if [ -e "${IN1}_ss.dbtype" ]; then
# shellcheck disable=SC2086
"$MMSEQS" base:result2profile "${IN1}_ss" "${IN2}_ss" "${RESULT}" "${OUT}_ss" ${PROFILE_SS_PAR} \
|| fail "result2profile died"
fi

if [ -e "${IN1}_ca.dbtype" ]; then
# shellcheck disable=SC2086
"$MMSEQS" lndb "${IN1}_ca" "${OUT}_ca" ${VERBOSITY} \
|| fail "Create lndb died"
fi

if [ -e "${IN1}_h.dbtype" ]; then
# shellcheck disable=SC2086
"$MMSEQS" lndb "${IN1}_h" "${OUT}_h" ${VERBOSITY} \
|| fail "Create lndb died"
fi

if [ -e "${OUT}.sh" ]; then
rm -f -- "${OUT}.sh"
fi
Loading

0 comments on commit fb2b4ac

Please sign in to comment.