-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathbuild_embeddings_run_zero_shot.sh
62 lines (51 loc) · 2.14 KB
/
build_embeddings_run_zero_shot.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash
# This script will generate all encodings
declare -a unlearned=("onehot" "georgiev")
declare -a tape=("resnet" "bepler" "unirep" "transformer" "lstm")
declare -a others=("esm1b_t33_650M_UR50S" "prot_bert_bfd")
# Activate the conda mlde environment
source ~/anaconda3/etc/profile.d/conda.sh
conda activate mlde
# Define common arguments
fasta_loc="./code/validation/basic_test_data/2GI9.fasta"
alignment_loc="./code/validation/basic_test_data/GB1_Alignment.a2m"
evmut_mod_loc="./code/validation/basic_test_data/GB1_EVcouplingsModel.model"
output_loc="/mnt/Data/BJW/MLDE/RepeatEmbeddings2_ZeroShots"
# Define a function for running a learned embedding
run_learned () {
python generate_encoding.py "$1" GB1_T2Q --fasta "$fasta_loc" \
--positions V39 D40 G41 V54 --output "$output_loc" --batch_size 16
}
# Run generate_encoding.py for the unlearned encodings
for encoding in "${unlearned[@]}"; do
echo "$encoding"
python generate_encoding.py "$encoding" GB1_T2Q --n_combined 4 \
--output "$output_loc"
sleep 2
done
# Run generate_encoding.py for the TAPE embeddings
for encoding in "${tape[@]}"; do
echo "$encoding"
run_learned "$encoding"
done
# Activate mlde2
conda deactivate
conda activate mlde2
# Run generate_encoding.py for the other models
for encoding in "${others[@]}"; do
echo "$encoding"
run_learned "$encoding"
done
python generate_encoding.py esm_msa1_t12_100M_UR50S GB1_T2Q \
--fasta "$alignment_loc" --positions V39 D40 G41 V54 --output "$output_loc" --batch_size 8
# Run zero-shot predictions with non-deep sequence models
python predict_zero_shot.py --positions V39 D40 G41 V54 \
--models EVmutation esm_msa1_t12_100M_UR50S esm1b_t33_650M_UR50S esm1_t34_670M_UR50S \
esm1_t34_670M_UR50D esm1_t34_670M_UR100 esm1_t12_85M_UR50S esm1_t6_43M_UR50S prot_bert_bfd prot_bert \
--fasta "$fasta_loc" --alignment "$alignment_loc" --evmutation_model "$evmut_mod_loc" \
--include_conditional --output "$output_loc" --batch_size 8
# Run zero-shot predictions with DeepSequence
conda deactivate
conda activate deep_sequence
python run_deepsequence.py "$alignment_loc" --positions V39 D40 G41 V54 \
--output "$output_loc" --no_cudnn