PaddlePaddle · wangna11BD · Mar 20, 2023
diff --git a/.gitmodules b/.gitmodules
@@ -125,3 +125,7 @@
 [submodule "OtherFrame/ocr/TensorFlow/models/EAST"]
 	path = OtherFrame/ocr/TensorFlow/models/EAST
 	url = https://github.com/argman/EAST.git
+[submodule "OtherFrame/3d/PyTorch/petr/models/petr"]
+	path = OtherFrame/3d/PyTorch/petr/models/petr
+	url = https://github.com/wangna11BD/PETR.git
+	branch = main
diff --git a/OtherFrame/3d/PyTorch/petr/README.md b/OtherFrame/3d/PyTorch/petr/README.md
@@ -0,0 +1,87 @@
+# PyTorch 生成模型 性能复现
+## 目录 
+
+```
+├── README.md               # 说明文档 
+├── run_PyTorch.sh          # 执行入口，包括环境搭建、测试获取所有生成模型的训练性能 
+├── scripts/PrepareEnv.sh   # PyTorch和PETR运行环境搭建、训练数据下载
+├── scripts/analysis_log.py         # 分析训练的log得到训练性能的数据
+├── scripts/run_benchmark.sh        # 执行实体，测试单个生成模型的训练性能
+└── models                          # 提供竞品PyTorch框架的repo
+```
+
+## 环境介绍
+### 物理机环境
+- 单机（单卡、4卡、8卡）
+  - 系统：CentOS release 7.5 (Final)
+  - GPU：Tesla V100-SXM2-32GB * 8
+  - CPU：Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz * 80
+  - CUDA、cudnn Version: cuda10.2-cudnn7
+
+### Docker 镜像
+
+- **镜像版本**: `registry.baidubce.com/paddlepaddle/paddle:2.1.2-gpu-cuda10.2-cudnn7`
+- **PyTorch 版本**: `1.9.1` 
+- **CUDA 版本**: `10.2`
+- **cuDnn 版本**: `7`
+
+## 测试步骤
+
+```bash
+bash run_PyTorch.sh     # 创建容器,在该标准环境中测试模型   
+```
+
+如果在docker内部按住torch等框架耗时很久，可以设置代理。下载测试数据的时候，需要关闭代理，否则下载耗时很久。
+
+脚本内容,如:
+
+```bash
+#!/usr/bin/env bash
+ImageName="registry.baidubce.com/paddlepaddle/paddle:2.1.2-gpu-cuda10.2-cudnn7";
+docker pull ${ImageName}
+run_cmd="cp /workspace/scripts/PrepareEnv.sh ./;
+         bash PrepareEnv.sh;
+         cd /workspace/models/mmedi;
+         cp -r /workspace/mmedi_benchmark_configs ./;
+         cp /workspace/scripts/run_benchmark.sh ./;
+         cp /workspace/scripts/analysis_log.py ./;
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh esrgan_sp_bs32 sp fp32 32 300 4;
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh esrgan_sp_bs64 sp fp32 64 300 4;
+         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh esrgan_mp_bs32 mp fp32 32 300 4;
+         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh esrgan_mp_bs64 mp fp32 64 300 4;
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh edvr_sp_bs4 sp fp32 4 300 3;
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh edvr_sp_bs64 sp fp32 64 300 3;
+         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh edvr_mp_bs4 mp fp32 4 300 3;
+         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh edvr_mp_bs64 mp fp32 64 300 3;
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh basicvsr_sp_bs2 sp fp32 2 300 4;
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh basicvsr_sp_bs4 sp fp32 4 300 4;
+         CUDA_VISIBLE_DEVICES=0,1,2,3 bash run_benchmark.sh basicvsr_mp_bs2 mp fp32 2 300 4;
+         CUDA_VISIBLE_DEVICES=0,1,2,3 bash run_benchmark.sh basicvsr_mp_bs4 mp fp32 4 300 4;
+         "
+nvidia-docker run --name test_torch_gan -i  \
+    --net=host \
+    --shm-size=128g \
+    -v $PWD:/workspace \
+    ${ImageName}  /bin/bash -c "${run_cmd}"
+nvidia-docker stop test_torch_gan
+nvidia-docker rm test_torch_gan
+```
+
+## 输出
+
+执行完成后，在当前目录会产出分割模型训练性能数据的文件，比如`petr_sp_bs1_fp32_1_speed`等文件，内容如下所示。
+
+```bash
+{
+"log_file": "/workspace/models/mmedi/petr_sp_bs1_fp32_1_speed", \    # log 目录,创建规范见PrepareEnv.sh 
+"model_name": "petr_sp_bs1", \    # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 
+"mission_name": "3D检测", \           # 模型case所属任务名称，具体可参考scripts/config.ini      
+"direction_id": 0, \                 # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini    
+"run_mode": "sp", \                  # 单卡:sp|多卡:mp
+"index": 1, \                        # 速度验证默认为1
+"gpu_num": 1, \                      # 1|8
+"FINAL_RESULT": 75.655, \            # 速度计算后的平均值,需要skip掉不稳定的前几步值
+"JOB_FAIL_FLAG": 0, \                # 该模型case运行0:成功|1:失败
+"UNIT": "images/s" \                 # 速度指标的单位 
+}
+```
diff --git a/OtherFrame/3d/PyTorch/petr/run_PyTorch.sh b/OtherFrame/3d/PyTorch/petr/run_PyTorch.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+ImageName="registry.baidubce.com/paddlepaddle/paddle:2.1.2-gpu-cuda10.2-cudnn7";
+docker pull ${ImageName}
+
+run_cmd="cp /workspace/scripts/PrepareEnv.sh ./;
+         bash PrepareEnv.sh;
+         cd /workspace/models/petr;
+         cp /workspace/scripts/run_benchmark.sh ./;
+         cp /workspace/scripts/analysis_log.py ./;
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh petr_sp_fp32_bs1 sp fp32 1 300 4;
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh petr_sp_fp16_bs1 sp fp16 1 300 4;
+         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh petr_mp_fp32_bs1 mp fp32 8 300 4;
+         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh petr_mp_fp16_bs1 mp fp16 8 300 4;
+         "
+
+nvidia-docker run --name test_torch_3d -i  \
+    --net=host \
+    --shm-size=128g \
+    -v $PWD:/workspace \
+    ${ImageName}  /bin/bash -c "${run_cmd}"
+
+nvidia-docker stop test_torch_3d
+nvidia-docker rm test_torch_3d
diff --git a/OtherFrame/3d/PyTorch/petr/scripts/PrepareEnv.sh b/OtherFrame/3d/PyTorch/petr/scripts/PrepareEnv.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+
+# 公共配置文件,配置python 安装pytorch,运行目录:/workspace (起容器的时候映射的目录:benchmark/OtherFrameworks/gan/PyTorch/mmedting)
+echo "*******prepare benchmark***********"
+
+################################# 创建一些log目录,如:
+export BENCHMARK_ROOT=/workspace
+log_date=`date "+%Y.%m%d.%H%M%S"`
+frame=pytorch1.9.0
+cuda_version=10.2
+save_log_dir=${BENCHMARK_ROOT}/logs/${frame}_${log_date}_${cuda_version}/
+
+if [[ -d ${save_log_dir} ]]; then
+    rm -rf ${save_log_dir}
+fi
+# this for update the log_path coding mat
+export TRAIN_LOG_DIR=${save_log_dir}/train_log
+mkdir -p ${TRAIN_LOG_DIR}
+
+log_path=${TRAIN_LOG_DIR}
+
+################################# 配置python, 如:
+rm -rf run_env
+mkdir run_env
+ln -s $(which python3.7) run_env/python
+ln -s $(which pip3.7) run_env/pip
+export PATH=/workspace/run_env:${PATH}
+
+################################# 安装框架 如:
+pip install -U pip
+echo `pip --version`
+pip install torch==1.9.1+cu102 -f https://download.pytorch.org/whl/torch_stable.html
+pip install torchvision==0.10.1+cu102 -f https://download.pytorch.org/whl/torch_stable.html
+
+pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html
+
+cd /workspace/models
+git clone https://github.com/open-mmlab/mmdetection.git
+cd /workspace/models/mmdetection
+git checkout v2.24.1 
+pip install -r requirements/build.txt
+python setup.py develop
+
+pip install mmsegmentation==0.20.2
+
+cd /workspace/models
+git clone  https://github.com/open-mmlab/mmdetection3d.git
+cd /workspace/models/mmdetection3d
+git checkout v0.17.1 
+pip install -r requirements/build.txt
+python setup.py develop
+
+cd /workspace/models/petr
+mkdir ckpts
+ln -s /workspace/models/mmdetection3d /workspace/models/petr/mmdetection3d
+
+################################# 准备训练数据 如:
+mkdir -p data
+# 由于nuscenes数据集太大，为避免每次下载过于耗时，请将nuscenes数据集拷贝到data目录下
+# 并软链到/data/Dataset/nuScenes目录
+# cp -r /nuscenes_dataste_root data/
+# ln -s /nuscenes_dataste_root /data/Dataset/nuScenes
+
+echo "*******prepare benchmark end***********"
diff --git a/OtherFrame/3d/PyTorch/petr/scripts/analysis_log.py b/OtherFrame/3d/PyTorch/petr/scripts/analysis_log.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python
+
+import re
+import sys
+import json
+
+def analyze(model_name, batch_size, log_file, res_log_file):
+    gpu_ids_pat = re.compile(r"GPU (.*):")
+    time_pat = re.compile(r"time: (.*), data_time")
+
+    logs = open(log_file).readlines()
+    logs = ";".join(logs)
+    gpu_ids_res = gpu_ids_pat.findall(logs)
+    time_res = time_pat.findall(logs)
+
+    fail_flag = 0
+    run_mode = ""
+    gpu_num = 0
+    ips = 0
+
+    if gpu_ids_res == [] or time_res == []:
+        fail_flag = 1
+    else:
+        gpu_num = int(gpu_ids_res[0][-1])
+        run_mode = "sp" if gpu_num == 1 else "mp"
+
+        skip_num = 1
+        total_time = 0
+        for i in range(skip_num, len(time_res)):
+            total_time += float(time_res[i])
+        avg_time = total_time / (len(time_res) - skip_num)
+        ips = float(batch_size) * round(1 / avg_time, 3)
+
+    info = {"log_file": log_file, "model_name": model_name, "mission_name": "3D检测",
+            "direction_id": 0, "run_mode": run_mode, "index": 1, "gpu_num": gpu_num,
+            "FINAL_RESULT": ips, "JOB_FAIL_FLAG": fail_flag, "UNIT": "images/s"}
+    json_info = json.dumps(info)
+    with open(res_log_file, "w") as of:
+        of.write(json_info)
+
+if __name__ == "__main__":
+    if len(sys.argv) != 5:
+        print("Usage:" + sys.argv[0] + " model_name path/to/log/file path/to/res/log/file")
+        sys.exit()
+
+    model_name = sys.argv[1]
+    batch_size = sys.argv[2]
+    log_file = sys.argv[3]
+    res_log_file = sys.argv[4]
+
+    analyze(model_name, batch_size, log_file, res_log_file) 
diff --git a/OtherFrame/3d/PyTorch/petr/scripts/run_benchmark.sh b/OtherFrame/3d/PyTorch/petr/scripts/run_benchmark.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+set -xe
+
+# Test training benchmark for a model.
+
+# Usage: CUDA_VISIBLE_DEVICES=xxx bash run_benchmark.sh ${model_name} ${run_mode} ${fp_item} ${bs_item} ${max_iter} ${num_workers}
+
+function _set_params(){
+    model_name=${1:-"model_name"}
+    run_mode=${2:-"sp"}         # sp or mp
+    fp_item=${3:-"fp32"}        # fp32 or fp16
+    batch_size=${4:-"2"}
+    max_iter=${5:-"100"}
+    num_workers=${6:-"3"}
+    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}
+
+    device=${CUDA_VISIBLE_DEVICES//,/ }
+    arr=(${device})
+    num_gpu_devices=${#arr[*]}
+    log_file=${run_log_path}/${model_name}_${fp_item}_${num_gpu_devices}
+    res_log_file=${run_log_path}/${model_name}_${fp_item}_${num_gpu_devices}_speed
+}
+
+function _analysis_log(){
+    python analysis_log.py ${model_name} ${batch_size} ${log_file} ${res_log_file}
+    cp ${log_file} /workspace
+    cp ${res_log_file} /workspace
+}
+
+function _train(){
+    echo "Train ${model_name} on ${num_gpu_devices} GPUs"
+    echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"
+
+    train_config="projects/configs/petrv2/${model_name}.py"
+    train_options="work_dirs/${model_name}/"
+
+    case ${run_mode} in
+    sp) train_cmd="./tools/dist_train.sh ${train_config} 1 --work-dir ${train_options}" ;;
+    mp) train_cmd="./tools/dist_train.sh ${train_config} 8 --work-dir ${train_options}" ;;
+    *) echo "choose run_mode(sp or mp)"; exit 1;
+    esac
+
+    timeout 15m ${train_cmd} > ${log_file} 2>&1
+    if [ $? -ne 0 ];then
+        echo -e "${model_name}, FAIL"
+        export job_fail_flag=1
+    else
+        echo -e "${model_name}, SUCCESS"
+        export job_fail_flag=0
+    fi
+    if [ $run_mode = "mp" -a -d mylog ]; then
+        rm ${log_file}
+        cp mylog/workerlog.0 ${log_file}
+    fi
+
+    _analysis_log
+
+    kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
+}
+
+_set_params $@
+_train