forked from FMInference/FlexLLMGen
-
Notifications
You must be signed in to change notification settings - Fork 3
/
bench_30b_4x1.sh
42 lines (36 loc) · 1.11 KB
/
bench_30b_4x1.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/bin/bash
N_GPUS=1
N_NODES=4
N_CORES_PER_GPU=16
MY_IPADDR=$(hostname -i)
all_public_ips=$(ray get-worker-ips ~/ray_bootstrap_config.yaml)
for s in $all_public_ips; do
ssh -o StrictHostKeyChecking=no $s hostname -i > /tmp/$s.ip &
done
wait
for s in $all_public_ips; do
OTHERS_IPADDR+=($(cat /tmp/$s.ip))
done
ALL_IPADDR=($MY_IPADDR ${OTHERS_IPADDR[@]})
all_hosts=$(echo ${ALL_IPADDR[@]:0:$N_NODES} | sed 's/ /,/g')
PYTHON_EXEC=$CONDA_PREFIX/bin/python
PYTHON_SCRIPT=flexgen.dist_flex_opt
pgrep -fl python | awk '!/dist_flex_opt\.py/{print $1}' | xargs sudo kill
set -x
mpirun \
--mca btl_tcp_if_exclude lo,docker0 \
--mca oob_tcp_if_exclude lo,docker0 \
--map-by ppr:$N_GPUS:node:pe=$N_CORES_PER_GPU --oversubscribe -H $all_hosts \
--bind-to core -x OMP_NUM_THREADS=$N_CORES_PER_GPU \
$PYTHON_EXEC -m $PYTHON_SCRIPT \
--head-ip $MY_IPADDR \
--port 7777 \
--use-mpi \
--model facebook/opt-30b \
--num-inner-iterations 4 \
--percent 20 80 0 100 0 100 --gpu-batch-size 64 --num-gpu-batches 3 \
--comm-device cpu \
--path _DUMMY_ \
--cut-gen-len 5 \
--cpu \
--async-comm