forked from FMInference/FlexLLMGen
-
Notifications
You must be signed in to change notification settings - Fork 3
/
bench_6.7b_4x1.sh
40 lines (34 loc) · 1.04 KB
/
bench_6.7b_4x1.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
N_GPUS=1
N_NODES=4
N_CORES_PER_GPU=16
MY_IPADDR=$(hostname -i)
all_public_ips=$(ray get-worker-ips ~/ray_bootstrap_config.yaml)
for s in $all_public_ips; do
ssh -o StrictHostKeyChecking=no $s hostname -i > /tmp/$s.ip &
done
wait
for s in $all_public_ips; do
OTHERS_IPADDR+=($(cat /tmp/$s.ip))
done
ALL_IPADDR=($MY_IPADDR ${OTHERS_IPADDR[@]})
all_hosts=$(echo ${ALL_IPADDR[@]:0:$N_NODES} | sed 's/ /,/g')
PYTHON_EXEC=$CONDA_PREFIX/bin/python
PYTHON_SCRIPT=flexgen.dist_flex_opt
pgrep -fl python | awk '!/dist_flex_opt\.py/{print $1}' | xargs sudo kill
set -x
mpirun \
--mca btl_tcp_if_exclude lo,docker0 \
--mca oob_tcp_if_exclude lo,docker0 \
--map-by ppr:$N_GPUS:node:pe=$N_CORES_PER_GPU --oversubscribe -H $all_hosts \
--bind-to core -x OMP_NUM_THREADS=$N_CORES_PER_GPU \
$PYTHON_EXEC -m $PYTHON_SCRIPT \
--head-ip $MY_IPADDR \
--port 7777 \
--use-mpi \
--model facebook/opt-6.7b \
--gpu-batch-size 24 \
--percent 100 0 100 0 100 0 \
--comm-device gpu \
--cut-gen-len 5 \
--path _DUMMY_