From 766dfd8c68e41f1c33794477290567a3366bb185 Mon Sep 17 00:00:00 2001 From: tim-win Date: Sat, 31 Aug 2024 15:50:22 -0400 Subject: [PATCH 01/10] Basic --- Dockerfile | 16 ++++++++++++---- pyproject.toml | 10 +++++----- requirements/basic_requirements.txt | 8 ++++---- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4869c792..89009fa0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,6 +18,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ python3-wheel RUN pip3 install --upgrade pip \ + && pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0+cu113 --index-url https://download.pytorch.org/whl/cu113 \ && pip3 install \ gradio \ opencv-python \ @@ -25,7 +26,7 @@ RUN pip3 install --upgrade pip \ mmengine \ setuptools \ openmim \ - && mim install mmcv==2.0.0 \ + && mim install 'mmcv<=2.2.0' \ && pip3 install --no-cache-dir --index-url https://download.pytorch.org/whl/cu118 \ wheel \ torch \ @@ -35,9 +36,16 @@ RUN pip3 install --upgrade pip \ COPY . /yolo WORKDIR /yolo -RUN pip3 install -e . +RUN pip3 install -e .[demo] + +RUN pip3 install onnx + +RUN apt install -y curl +RUN mkdir weights RUN curl -o weights/$WEIGHT -L https://huggingface.co/wondervictor/YOLO-World/resolve/main/$WEIGHT -ENTRYPOINT [ "python3", "demo.py" ] -CMD ["configs/pretrain/$MODEL", "weights/$WEIGHT"] \ No newline at end of file +RUN pip3 install onnxsim + +ENTRYPOINT [ "python3", "demo/gradio_demo.py" ] +CMD ["configs/pretrain/$MODEL", "weights/$WEIGHT"] diff --git a/pyproject.toml b/pyproject.toml index 8351de59..42bd1838 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,13 +35,13 @@ dependencies = [ "tokenizers", "numpy", "opencv-python", - "supervision==0.19.0", + "supervision", "openmim", - "mmcv-lite>=2.0.0rc4", - "mmdet==3.0.0", + "mmcv-lite<2.2.0", + "mmdet>=3.0.0", "mmengine>=0.7.1", "openmim", - "mmcv", + "mmcv<2.2.0", 'mmyolo @ git+https://github.com/onuralpszr/mmyolo.git', ] @@ -54,4 +54,4 @@ zip-safe = true [tool.setuptools.packages.find] include = ["yolo_world*"] -exclude = ["docs*", "tests*","third_party*","assets*"] \ No newline at end of file +exclude = ["docs*", "tests*","third_party*","assets*"] diff --git a/requirements/basic_requirements.txt b/requirements/basic_requirements.txt index d9c56e20..4efa3d8d 100644 --- a/requirements/basic_requirements.txt +++ b/requirements/basic_requirements.txt @@ -1,9 +1,9 @@ opencv-python==4.9.0.80 opencv-python-headless==4.2.0.34 -mmcv==2.0.0 -mmdet==3.0.0 +mmcv +mmdet mmengine==0.10.3 -mmyolo==0.6.0 +mmyolo timm==0.6.13 transformers==4.36.2 -albumentations \ No newline at end of file +albumentations From d7bebb28107356bbe1f9210357153450ae4e1976 Mon Sep 17 00:00:00 2001 From: tim-win Date: Sat, 31 Aug 2024 18:09:06 -0400 Subject: [PATCH 02/10] Reasonable facimile of working dependencies --- .dockerignore | 3 +- Dockerfile | 45 +++++++++++++---------------- pyproject.toml | 21 +++++++------- requirements/basic_requirements.txt | 17 +++++++---- requirements/demo_requirements.txt | 2 +- 5 files changed, 45 insertions(+), 43 deletions(-) diff --git a/.dockerignore b/.dockerignore index 1aefdd17..94c2aed7 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,2 +1,3 @@ docs -Dockerfile \ No newline at end of file +Dockerfile +.idea \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 89009fa0..fbffea49 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 +FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 ARG MODEL="yolo_world_l_dual_vlpan_l2norm_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py" ARG WEIGHT="yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_train_pretrained-0e566235.pth" @@ -15,37 +15,32 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libglib2.0-0 \ git \ python3-dev \ - python3-wheel + python3-wheel \ + curl + +RUN mkdir weights +RUN curl -o weights/$WEIGHT -L https://huggingface.co/wondervictor/YOLO-World/resolve/main/$WEIGHT RUN pip3 install --upgrade pip \ - && pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0+cu113 --index-url https://download.pytorch.org/whl/cu113 \ + && pip3 install wheel \ + && pip3 install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121 \ && pip3 install \ - gradio \ - opencv-python \ - supervision \ - mmengine \ - setuptools \ - openmim \ - && mim install 'mmcv<=2.2.0' \ - && pip3 install --no-cache-dir --index-url https://download.pytorch.org/whl/cu118 \ - wheel \ - torch \ - torchvision \ - torchaudio + gradio==4.16.0 \ + opencv-python==4.9.0.80 \ + supervision \ + mmengine==0.10.4 \ + setuptools \ + openmim \ + && mim install mmcv==2.1.0 \ + && mim install mmdet==3.3.0 \ + && pip install git+https://github.com/onuralpszr/mmyolo.git COPY . /yolo WORKDIR /yolo RUN pip3 install -e .[demo] -RUN pip3 install onnx - -RUN apt install -y curl -RUN mkdir weights - -RUN curl -o weights/$WEIGHT -L https://huggingface.co/wondervictor/YOLO-World/resolve/main/$WEIGHT - -RUN pip3 install onnxsim +RUN pip3 install onnx onnxsim -ENTRYPOINT [ "python3", "demo/gradio_demo.py" ] -CMD ["configs/pretrain/$MODEL", "weights/$WEIGHT"] +CMD [ "python3", "demo/gradio_demo.py" ] +# CMD ["configs/pretrain/$MODEL", "weights/$WEIGHT"] \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 42bd1838..3ffdf156 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,21 +29,20 @@ requires-python = ">= 3.7" dependencies = [ "wheel", - "torch>=1.11.0", - "torchvision>=0.16.2", - "transformers", + "torch==2.1.2", + "torchvision==0.16.2", + "transformers==4.36.2", "tokenizers", "numpy", - "opencv-python", + "opencv-python==4.9.0.80", "supervision", "openmim", - "mmcv-lite<2.2.0", - "mmdet>=3.0.0", - "mmengine>=0.7.1", - "openmim", "mmcv<2.2.0", - 'mmyolo @ git+https://github.com/onuralpszr/mmyolo.git', - + "mmdet>=3.0.0", + "mmengine==0.10.4", + "mmyolo @ git+https://github.com/onuralpszr/mmyolo.git", + "timm==0.6.13", + "albumentations", ] [tool.setuptools] @@ -54,4 +53,4 @@ zip-safe = true [tool.setuptools.packages.find] include = ["yolo_world*"] -exclude = ["docs*", "tests*","third_party*","assets*"] +exclude = ["docs*", "tests*","third_party*","assets*"] \ No newline at end of file diff --git a/requirements/basic_requirements.txt b/requirements/basic_requirements.txt index 4efa3d8d..d05e66b2 100644 --- a/requirements/basic_requirements.txt +++ b/requirements/basic_requirements.txt @@ -1,9 +1,16 @@ +torch==2.1.2 +torchvision==0.16.2 +torchaudio==2.1.2 opencv-python==4.9.0.80 -opencv-python-headless==4.2.0.34 -mmcv -mmdet -mmengine==0.10.3 -mmyolo +mmcv<2.2.0 +mmdet>=3.0.0 +mmengine==0.10.4 +git+https://github.com/onuralpszr/mmyolo.git timm==0.6.13 transformers==4.36.2 albumentations +gradio==4.16.0 +supervision +onnx +onnxruntime +onnxsim \ No newline at end of file diff --git a/requirements/demo_requirements.txt b/requirements/demo_requirements.txt index 0268ad3c..30d26ba1 100644 --- a/requirements/demo_requirements.txt +++ b/requirements/demo_requirements.txt @@ -1,2 +1,2 @@ -gradio==4.16.0 +gradio supervision \ No newline at end of file From 91543e4077f09db9819fb3f25c22d6651aa7cbb4 Mon Sep 17 00:00:00 2001 From: tim-win Date: Sat, 31 Aug 2024 18:23:43 -0400 Subject: [PATCH 03/10] Use off the shelf clip --- ...3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py | 1 + ...3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py | 4 ++-- ...eg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis.py | 2 +- ..._v2_seg_m_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/configs/pretrain/yolo_world_v2_m_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py b/configs/pretrain/yolo_world_v2_m_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py index 59507204..a1ad7aac 100644 --- a/configs/pretrain/yolo_world_v2_m_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py +++ b/configs/pretrain/yolo_world_v2_m_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py @@ -16,6 +16,7 @@ weight_decay = 0.05 / 2 train_batch_size_per_gpu = 16 text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' +text_model_name = 'openai/clip-vit-base-patch32' img_scale = (1280, 1280) text_model_name = 'openai/clip-vit-base-patch32' diff --git a/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py b/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py index ab4cd23f..40c2e5c1 100644 --- a/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py +++ b/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py @@ -15,8 +15,8 @@ base_lr = 2e-3 weight_decay = 0.05 / 2 train_batch_size_per_gpu = 16 -text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' -# text_model_name = 'openai/clip-vit-base-patch32' +# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' +text_model_name = 'openai/clip-vit-base-patch32' img_scale = (1280, 1280) # model settings diff --git a/configs/segmentation/yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis.py b/configs/segmentation/yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis.py index 01885dd5..21e25797 100644 --- a/configs/segmentation/yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis.py +++ b/configs/segmentation/yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis.py @@ -18,7 +18,7 @@ load_from = 'pretrained_models/yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_train_pretrained-0e566235.pth' persistent_workers = False text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' -# text_model_name = 'openai/clip-vit-base-patch32' +text_model_name = 'openai/clip-vit-base-patch32' # Polygon2Mask downsample_ratio = 4 mask_overlap = False diff --git a/configs/segmentation/yolo_world_v2_seg_m_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py b/configs/segmentation/yolo_world_v2_seg_m_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py index d196d4ee..f8ce6c96 100644 --- a/configs/segmentation/yolo_world_v2_seg_m_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py +++ b/configs/segmentation/yolo_world_v2_seg_m_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py @@ -17,7 +17,7 @@ train_batch_size_per_gpu = 8 load_from = 'pretrained_models/yolo_world_m_clip_t2i_bn_2e-3adamw_32xb16-100e_obj365v1_goldg_train-c6237d5b.pth' text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' -# text_model_name = 'openai/clip-vit-base-patch32' +text_model_name = 'openai/clip-vit-base-patch32' persistent_workers = False # Polygon2Mask From 76ffdc77816937c4fb5e1a48beb3e86ab954f471 Mon Sep 17 00:00:00 2001 From: tim-win Date: Sat, 31 Aug 2024 19:20:30 -0400 Subject: [PATCH 04/10] Latest working dockerfile --- Dockerfile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fbffea49..1df674d5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,12 +35,19 @@ RUN pip3 install --upgrade pip \ && mim install mmdet==3.3.0 \ && pip install git+https://github.com/onuralpszr/mmyolo.git -COPY . /yolo +RUN git clone https://github.com/tim-win/YOLO-World /yolo/ + WORKDIR /yolo RUN pip3 install -e .[demo] RUN pip3 install onnx onnxsim +RUN cd third_party && git clone https://github.com/onuralpszr/mmyolo.git ./mmyolo/ + +RUN chmod a+rwx /weights/ +RUN chmod a+rwx /yolo/configs/*/* + + CMD [ "python3", "demo/gradio_demo.py" ] # CMD ["configs/pretrain/$MODEL", "weights/$WEIGHT"] \ No newline at end of file From a34bae56221f9f7493ed034226b547478f2906a8 Mon Sep 17 00:00:00 2001 From: tim-win Date: Sat, 31 Aug 2024 20:29:39 -0400 Subject: [PATCH 05/10] Experimental update to libraries --- Dockerfile | 6 +++--- build_and_run.sh | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) create mode 100755 build_and_run.sh diff --git a/Dockerfile b/Dockerfile index 1df674d5..3b39ae4a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,14 +35,14 @@ RUN pip3 install --upgrade pip \ && mim install mmdet==3.3.0 \ && pip install git+https://github.com/onuralpszr/mmyolo.git -RUN git clone https://github.com/tim-win/YOLO-World /yolo/ - +# RUN git clone --recursive https://github.com/tim-win/YOLO-World /yolo/ +COPY . /yolo WORKDIR /yolo RUN pip3 install -e .[demo] RUN pip3 install onnx onnxsim -RUN cd third_party && git clone https://github.com/onuralpszr/mmyolo.git ./mmyolo/ +# RUN cd third_party/ && rm -rf ./mmyolo && git clone https://github.com/onuralpszr/mmyolo.git . RUN chmod a+rwx /weights/ RUN chmod a+rwx /yolo/configs/*/* diff --git a/build_and_run.sh b/build_and_run.sh new file mode 100755 index 00000000..b8677d48 --- /dev/null +++ b/build_and_run.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -e + +export MODEL=yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py +export WEIGHT=yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis-8c58c916.pth + +export MODEL=yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_cc3mlite_train_lvis_minival.py +export WEIGHT=yolo_world_v2_x_obj365v1_goldg_cc3mlite_pretrain-8698fbfa.pth + +docker build -f ./Dockerfile --build-arg="MODEL=$MODEL" --build-arg="WEIGHT=$WEIGHT" -t yolo-demo . && docker run --runtime nvidia -p 8080:8080 yolo-demo python3 demo/gradio_demo.py "configs/pretrain/$MODEL" "/weights/$WEIGHT" \ No newline at end of file From 57b9244adc8dba3340669ab4cff608ee28fde66b Mon Sep 17 00:00:00 2001 From: tim-win Date: Sat, 31 Aug 2024 20:31:56 -0400 Subject: [PATCH 06/10] Fully featured build and run script --- build_and_run.sh | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/build_and_run.sh b/build_and_run.sh index b8677d48..1202ce78 100755 --- a/build_and_run.sh +++ b/build_and_run.sh @@ -1,10 +1,44 @@ #!/usr/bin/env bash set -e -export MODEL=yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py -export WEIGHT=yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis-8c58c916.pth +declare -A models +models["seg-l"]="yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis-8c58c916.pth" +models["pretrain-l-clip-800ft"]="yolo_world_v2_l_clip_large_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_800ft_lvis_minival.py yolo_world_v2_l_clip_large_o365v1_goldg_pretrain_800ft-9df82e55.pth" +models["pretrain-l-clip"]="yolo_world_v2_l_clip_large_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py yolo_world_v2_l_clip_large_o365v1_goldg_pretrain-8ff2e744.pth" +models["pretrain-l-1280ft"]="yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py yolo_world_v2_l_obj365v1_goldg_pretrain_1280ft-9babe3f6.pth" +models["pretrain-l"]="yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py yolo_world_v2_l_obj365v1_goldg_pretrain-a82b1fe3.pth" +models["pretrain-m-1280ft"]="yolo_world_v2_m_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py yolo_world_v2_m_obj365v1_goldg_pretrain_1280ft-77d0346d.pth" +models["pretrain-m"]="yolo_world_v2_m_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py yolo_world_v2_m_obj365v1_goldg_pretrain-c6237d5b.pth" +models["pretrain-s-1280ft"]="yolo_world_v2_s_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py yolo_world_v2_s_obj365v1_goldg_pretrain_1280ft-fc4ff4f7.pth" +models["pretrain-s"]="yolo_world_v2_s_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py yolo_world_v2_s_obj365v1_goldg_pretrain-55b943ea.pth" +models["pretrain-x-cc3mlite"]="yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_cc3mlite_train_lvis_minival.py yolo_world_v2_x_obj365v1_goldg_cc3mlite_pretrain-8698fbfa.pth" +models["pretrain-x-1280ft"]="yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py yolo_world_v2_x_obj365v1_goldg_cc3mlite_pretrain_1280ft-14996a36.pth" -export MODEL=yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_cc3mlite_train_lvis_minival.py -export WEIGHT=yolo_world_v2_x_obj365v1_goldg_cc3mlite_pretrain-8698fbfa.pth +if [ $# -eq 0 ]; then + echo "Available model keys:" + for key in "${!models[@]}"; do + echo " $key" + done + echo "Usage: $0 " + exit 1 +fi -docker build -f ./Dockerfile --build-arg="MODEL=$MODEL" --build-arg="WEIGHT=$WEIGHT" -t yolo-demo . && docker run --runtime nvidia -p 8080:8080 yolo-demo python3 demo/gradio_demo.py "configs/pretrain/$MODEL" "/weights/$WEIGHT" \ No newline at end of file +model_key=$1 + +if [ -z "${models[$model_key]}" ]; then + echo "Invalid model key. Available keys are:" + for key in "${!models[@]}"; do + echo " $key" + done + exit 1 +fi + +read MODEL WEIGHT <<< "${models[$model_key]}" + +config_dir="configs/pretrain" +if [[ $model_key == seg-* ]]; then + config_dir="configs/segmentation" +fi + +docker build -f ./Dockerfile --build-arg="MODEL=$MODEL" --build-arg="WEIGHT=$WEIGHT" -t "yolo-demo:$model_key" . && \ +docker run --runtime nvidia -p 8080:8080 "yolo-demo:$model_key" python3 demo/gradio_demo.py "$config_dir/$MODEL" "/weights/$WEIGHT" \ No newline at end of file From 512e9a1264823c85eff46a3a891fefc6f7bfad6f Mon Sep 17 00:00:00 2001 From: tim-win Date: Sat, 31 Aug 2024 20:53:26 -0400 Subject: [PATCH 07/10] Add basic segmentation demo support --- Dockerfile | 9 +- build_and_run.sh | 6 +- demo/segmentation_demo.py | 167 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 6 deletions(-) create mode 100644 demo/segmentation_demo.py diff --git a/Dockerfile b/Dockerfile index 3b39ae4a..4a733f9a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,8 +18,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ python3-wheel \ curl -RUN mkdir weights -RUN curl -o weights/$WEIGHT -L https://huggingface.co/wondervictor/YOLO-World/resolve/main/$WEIGHT +# Uncomment the following if you want to download a specific set of weights +# RUN mkdir weights +# RUN curl -o weights/$WEIGHT -L https://huggingface.co/wondervictor/YOLO-World/resolve/main/$WEIGHT RUN pip3 install --upgrade pip \ && pip3 install wheel \ @@ -47,7 +48,5 @@ RUN pip3 install onnx onnxsim RUN chmod a+rwx /weights/ RUN chmod a+rwx /yolo/configs/*/* - - -CMD [ "python3", "demo/gradio_demo.py" ] +CMD [ "python3", "demo/gradio_demo.py", "", ""] # CMD ["configs/pretrain/$MODEL", "weights/$WEIGHT"] \ No newline at end of file diff --git a/build_and_run.sh b/build_and_run.sh index 1202ce78..2aa5ad68 100755 --- a/build_and_run.sh +++ b/build_and_run.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash set -e +MODEL_DIR="../models/models-yoloworld" + declare -A models models["seg-l"]="yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis-8c58c916.pth" models["pretrain-l-clip-800ft"]="yolo_world_v2_l_clip_large_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_800ft_lvis_minival.py yolo_world_v2_l_clip_large_o365v1_goldg_pretrain_800ft-9df82e55.pth" @@ -36,9 +38,11 @@ fi read MODEL WEIGHT <<< "${models[$model_key]}" config_dir="configs/pretrain" +demo_file=demo/gradio_demo.py if [[ $model_key == seg-* ]]; then config_dir="configs/segmentation" + demo_file="demo/segmentation_demo.py" fi docker build -f ./Dockerfile --build-arg="MODEL=$MODEL" --build-arg="WEIGHT=$WEIGHT" -t "yolo-demo:$model_key" . && \ -docker run --runtime nvidia -p 8080:8080 "yolo-demo:$model_key" python3 demo/gradio_demo.py "$config_dir/$MODEL" "/weights/$WEIGHT" \ No newline at end of file +docker run -it -v "$MODEL_DIR:/weights/" --runtime nvidia -p 8080:8080 "yolo-demo:$model_key" bash # python3 demo/gradio_demo.py "$config_dir/$MODEL" "/weights/$WEIGHT" \ No newline at end of file diff --git a/demo/segmentation_demo.py b/demo/segmentation_demo.py new file mode 100644 index 00000000..4ab7a465 --- /dev/null +++ b/demo/segmentation_demo.py @@ -0,0 +1,167 @@ +# Copyright (c) Tencent Inc. All rights reserved. +import os +import sys +import argparse +import os.path as osp +from functools import partial + +import cv2 +import torch +import numpy as np +import gradio as gr +from PIL import Image +import supervision as sv +from torchvision.ops import nms +from mmengine.runner import Runner +from mmengine.dataset import Compose +from mmengine.runner.amp import autocast +from mmengine.config import Config, DictAction +from mmdet.datasets import CocoDataset +from mmyolo.registry import RUNNERS + +sys.path.append('./deploy') + +BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=1) +MASK_ANNOTATOR = sv.MaskAnnotator() +LABEL_ANNOTATOR = sv.LabelAnnotator(text_padding=4, text_scale=0.5, text_thickness=1) + +def parse_args(): + parser = argparse.ArgumentParser(description='YOLO-World Segmentation Demo') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument( + '--work-dir', + help='the directory to save the file containing evaluation metrics', + default='output') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + return args + +def run_segmentation(runner, + image, + text, + max_num_boxes, + score_thr, + nms_thr): + texts = [[t.strip()] for t in text.split(',')] + [[' ']] + data_info = dict(img_id=0, img=np.array(image), texts=texts) + data_info = runner.pipeline(data_info) + data_batch = dict(inputs=data_info['inputs'].unsqueeze(0), + data_samples=[data_info['data_samples']]) + + with autocast(enabled=False), torch.no_grad(): + output = runner.model.test_step(data_batch)[0] + pred_instances = output.pred_instances + + keep = nms(pred_instances.bboxes, + pred_instances.scores, + iou_threshold=nms_thr) + pred_instances = pred_instances[keep] + pred_instances = pred_instances[pred_instances.scores.float() > score_thr] + + if len(pred_instances.scores) > max_num_boxes: + indices = pred_instances.scores.float().topk(max_num_boxes)[1] + pred_instances = pred_instances[indices] + + pred_instances = pred_instances.cpu().numpy() + masks = pred_instances['masks'] if 'masks' in pred_instances else None + detections = sv.Detections(xyxy=pred_instances['bboxes'], + class_id=pred_instances['labels'], + confidence=pred_instances['scores'], + mask=masks) + labels = [ + f"{texts[class_id][0]} {confidence:0.2f}" for class_id, confidence in + zip(detections.class_id, detections.confidence) + ] + + image = np.array(image) + image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + image = BOUNDING_BOX_ANNOTATOR.annotate(image, detections) + image = LABEL_ANNOTATOR.annotate(image, detections, labels=labels) + if masks is not None: + image = MASK_ANNOTATOR.annotate(image, detections) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = Image.fromarray(image) + return image + +def demo(runner, args): + with gr.Blocks(title="YOLO-World Segmentation") as demo: + gr.Markdown('

YOLO-World: Real-Time Open-Vocabulary ' + 'Object Detector and Segmentation

') + with gr.Row(): + with gr.Column(scale=0.5): + image = gr.Image(type='pil', label='Input Image') + input_text = gr.Textbox( + lines=3, + label='Enter the classes to be detected and segmented, ' + 'separated by comma', + value=', '.join(CocoDataset.METAINFO['classes'][:5]), + elem_id='textbox') + max_num_boxes = gr.Slider(minimum=1, + maximum=300, + value=100, + step=1, + interactive=True, + label='Maximum Number of Boxes') + score_thr = gr.Slider(minimum=0, + maximum=1, + value=0.3, + step=0.01, + interactive=True, + label='Score Threshold') + nms_thr = gr.Slider(minimum=0, + maximum=1, + value=0.5, + step=0.01, + interactive=True, + label='NMS Threshold') + submit = gr.Button('Submit') + clear = gr.Button('Clear') + with gr.Column(scale=0.5): + output_image = gr.Image(type='pil', label='Output Image') + + submit.click(partial(run_segmentation, runner), + [image, input_text, max_num_boxes, score_thr, nms_thr], + [output_image]) + clear.click(lambda: [None, '', None], None, + [image, input_text, output_image]) + + demo.launch(server_name='0.0.0.0', server_port=8080) + +if __name__ == '__main__': + args = parse_args() + + # load config + cfg = Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + if args.work_dir is not None: + cfg.work_dir = args.work_dir + elif cfg.get('work_dir', None) is None: + cfg.work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + + cfg.load_from = args.checkpoint + + if 'runner_type' not in cfg: + runner = Runner.from_cfg(cfg) + else: + runner = RUNNERS.build(cfg) + + runner.call_hook('before_run') + runner.load_or_resume() + pipeline = cfg.test_dataloader.dataset.pipeline + pipeline[0].type = 'mmdet.LoadImageFromNDArray' + runner.pipeline = Compose(pipeline) + runner.model.eval() + demo(runner, args) \ No newline at end of file From 797b29389d1ba599e8ffc1c80ca027cdbffe65f7 Mon Sep 17 00:00:00 2001 From: tim-win Date: Sat, 31 Aug 2024 21:00:13 -0400 Subject: [PATCH 08/10] Reference remote code to avoid duplicating build steps --- Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4a733f9a..1f7a3bbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,17 +36,17 @@ RUN pip3 install --upgrade pip \ && mim install mmdet==3.3.0 \ && pip install git+https://github.com/onuralpszr/mmyolo.git -# RUN git clone --recursive https://github.com/tim-win/YOLO-World /yolo/ -COPY . /yolo -WORKDIR /yolo +RUN git clone --recursive https://github.com/tim-win/YOLO-World /yolo/ +#COPY . /yolo +#WORKDIR /yolo RUN pip3 install -e .[demo] RUN pip3 install onnx onnxsim # RUN cd third_party/ && rm -rf ./mmyolo && git clone https://github.com/onuralpszr/mmyolo.git . -RUN chmod a+rwx /weights/ +RUN mkdir /weights/ RUN chmod a+rwx /yolo/configs/*/* CMD [ "python3", "demo/gradio_demo.py", "", ""] -# CMD ["configs/pretrain/$MODEL", "weights/$WEIGHT"] \ No newline at end of file +# CMD ["configs/pretrain/$MODEL", "weights/$WEIGHT"] From 40419490611fd80d83e9eeb5b40061d34fcdcc7f Mon Sep 17 00:00:00 2001 From: tim-win Date: Sat, 31 Aug 2024 21:31:33 -0400 Subject: [PATCH 09/10] Make it sort of work all together now --- Dockerfile | 11 ++++++----- build_and_run.sh | 11 +++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1f7a3bbd..e53efcbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 +FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS DEPENDENCIES ARG MODEL="yolo_world_l_dual_vlpan_l2norm_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py" ARG WEIGHT="yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_train_pretrained-0e566235.pth" @@ -36,17 +36,18 @@ RUN pip3 install --upgrade pip \ && mim install mmdet==3.3.0 \ && pip install git+https://github.com/onuralpszr/mmyolo.git +FROM DEPENDENCIES as INSTALLING_YOLO RUN git clone --recursive https://github.com/tim-win/YOLO-World /yolo/ #COPY . /yolo -#WORKDIR /yolo +WORKDIR /yolo RUN pip3 install -e .[demo] RUN pip3 install onnx onnxsim -# RUN cd third_party/ && rm -rf ./mmyolo && git clone https://github.com/onuralpszr/mmyolo.git . + +FROM INSTALLING_YOLO as OK_THIS_PART_IS_TRICKY_DONT_HATE RUN mkdir /weights/ RUN chmod a+rwx /yolo/configs/*/* -CMD [ "python3", "demo/gradio_demo.py", "", ""] -# CMD ["configs/pretrain/$MODEL", "weights/$WEIGHT"] +CMD [ "bash" ] diff --git a/build_and_run.sh b/build_and_run.sh index 2aa5ad68..edb73942 100755 --- a/build_and_run.sh +++ b/build_and_run.sh @@ -5,6 +5,9 @@ MODEL_DIR="../models/models-yoloworld" declare -A models models["seg-l"]="yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis-8c58c916.pth" +models["seg-l-seghead"]="yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_seghead_finetune_lvis-5a642d30.pth" +models["seg-m"]="yolo_world_v2_seg_m_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py yolo_world_seg_m_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis-ca465825.pth" +models["seg-m-seghead"]="yolo_world_v2_seg_m_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py yolo_world_seg_m_dual_vlpan_2e-4_80e_8gpus_seghead_finetune_lvis-7bca59a7.pth" models["pretrain-l-clip-800ft"]="yolo_world_v2_l_clip_large_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_800ft_lvis_minival.py yolo_world_v2_l_clip_large_o365v1_goldg_pretrain_800ft-9df82e55.pth" models["pretrain-l-clip"]="yolo_world_v2_l_clip_large_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py yolo_world_v2_l_clip_large_o365v1_goldg_pretrain-8ff2e744.pth" models["pretrain-l-1280ft"]="yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py yolo_world_v2_l_obj365v1_goldg_pretrain_1280ft-9babe3f6.pth" @@ -40,9 +43,9 @@ read MODEL WEIGHT <<< "${models[$model_key]}" config_dir="configs/pretrain" demo_file=demo/gradio_demo.py if [[ $model_key == seg-* ]]; then - config_dir="configs/segmentation" - demo_file="demo/segmentation_demo.py" + export config_dir="configs/segmentation" + export demo_file="demo/segmentation_demo.py" fi -docker build -f ./Dockerfile --build-arg="MODEL=$MODEL" --build-arg="WEIGHT=$WEIGHT" -t "yolo-demo:$model_key" . && \ -docker run -it -v "$MODEL_DIR:/weights/" --runtime nvidia -p 8080:8080 "yolo-demo:$model_key" bash # python3 demo/gradio_demo.py "$config_dir/$MODEL" "/weights/$WEIGHT" \ No newline at end of file +# docker build -f ./Dockerfile --build-arg="MODEL=$MODEL" --build-arg="WEIGHT=$WEIGHT" -t "yolo-demo:latest" . && \ +docker run -it -v "$(readlink -f $MODEL_DIR):/weights/" --runtime nvidia -p 8080:8080 "yolo-demo:latest" python3 $demo_file "$config_dir/$MODEL" "/weights/$WEIGHT" \ No newline at end of file From bf71d2bfba1329949ee56acbcf76cfc5329c7aba Mon Sep 17 00:00:00 2001 From: tim-win Date: Sat, 31 Aug 2024 21:47:15 -0400 Subject: [PATCH 10/10] Cleanup MR so its a little more professional --- Dockerfile | 76 +++++++++++-------- README.md | 1 + build_and_run.sh | 70 +++++++++++++---- ...e-4_80e_8gpus_mask-refine_finetune_coco.py | 1 - ...bj365v1_goldg_train_1280ft_lvis_minival.py | 1 - ...8gpus_obj365v1_goldg_train_lvis_minival.py | 1 - ...e_4x8gpus_obj365v1_goldg_train_lvis_val.py | 1 - ...365v1_goldg_cc3mlite_train_lvis_minival.py | 1 - ...bj365v1_goldg_train_1280ft_lvis_minival.py | 1 - ...8gpus_obj365v1_goldg_train_lvis_minival.py | 1 - ...bn_2e-4_80e_8gpus_seghead_finetune_lvis.py | 1 - demo/README.md | 19 ++++- 12 files changed, 117 insertions(+), 57 deletions(-) diff --git a/Dockerfile b/Dockerfile index e53efcbd..7c43e743 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,53 +1,63 @@ -FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS DEPENDENCIES +# Base image with CUDA support +FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS base -ARG MODEL="yolo_world_l_dual_vlpan_l2norm_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py" -ARG WEIGHT="yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_train_pretrained-0e566235.pth" - -ENV FORCE_CUDA="1" -ENV MMCV_WITH_OPS=1 +# Set environment variables +ENV FORCE_CUDA="1" \ + MMCV_WITH_OPS=1 \ + DEBIAN_FRONTEND=noninteractive +# Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ - python3-pip \ + python3-pip \ libgl1-mesa-glx \ - libsm6 \ - libxext6 \ - libxrender-dev \ - libglib2.0-0 \ - git \ - python3-dev \ - python3-wheel \ - curl - -# Uncomment the following if you want to download a specific set of weights -# RUN mkdir weights -# RUN curl -o weights/$WEIGHT -L https://huggingface.co/wondervictor/YOLO-World/resolve/main/$WEIGHT - -RUN pip3 install --upgrade pip \ - && pip3 install wheel \ - && pip3 install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121 \ - && pip3 install \ + libsm6 \ + libxext6 \ + libxrender-dev \ + libglib2.0-0 \ + git \ + python3-dev \ + python3-wheel \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Install Python dependencies +FROM base AS python_deps + +RUN pip3 install --upgrade pip wheel \ + && pip3 install --no-cache-dir torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121 \ + && pip3 install --no-cache-dir \ gradio==4.16.0 \ opencv-python==4.9.0.80 \ supervision \ mmengine==0.10.4 \ setuptools \ openmim \ + onnx \ + onnxsim \ && mim install mmcv==2.1.0 \ && mim install mmdet==3.3.0 \ - && pip install git+https://github.com/onuralpszr/mmyolo.git + && pip3 install --no-cache-dir git+https://github.com/onuralpszr/mmyolo.git + +# Clone and install YOLO-World +FROM python_deps AS yolo_world -FROM DEPENDENCIES as INSTALLING_YOLO -RUN git clone --recursive https://github.com/tim-win/YOLO-World /yolo/ -#COPY . /yolo +RUN git clone --recursive https://github.com/AILab-CVC/YOLO-World /yolo/ WORKDIR /yolo RUN pip3 install -e .[demo] -RUN pip3 install onnx onnxsim +# Final stage +FROM yolo_world AS final + +ARG MODEL="yolo_world_l_dual_vlpan_l2norm_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py" +ARG WEIGHT="yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_train_pretrained-0e566235.pth" -FROM INSTALLING_YOLO as OK_THIS_PART_IS_TRICKY_DONT_HATE +# Create weights directory and set permissions +RUN mkdir /weights/ \ + && chmod a+rwx /yolo/configs/*/* -RUN mkdir /weights/ -RUN chmod a+rwx /yolo/configs/*/* +# Optionally download weights (commented out by default) +# RUN curl -o /weights/$WEIGHT -L https://huggingface.co/wondervictor/YOLO-World/resolve/main/$WEIGHT -CMD [ "bash" ] +# Set the default command +CMD ["bash"] \ No newline at end of file diff --git a/README.md b/README.md index d89e9a4f..c213924d 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ We recommend that everyone **use English to communicate on issues**, as this hel For business licensing and other related inquiries, don't hesitate to contact `yixiaoge@tencent.com`. ## 🔥 Updates +`[2024-8-31]`: Segmentation demo added to the demo/ folder. Try it out in docker with `./build_and_run.sh seg-l`! `[2024-7-8]`: YOLO-World now has been integrated into [ComfyUI](https://github.com/StevenGrove/ComfyUI-YOLOWorld)! Come and try adding YOLO-World to your workflow now! You can access it at [StevenGrove/ComfyUI-YOLOWorld](https://github.com/StevenGrove/ComfyUI-YOLOWorld)! `[2024-5-18]:` YOLO-World models have been [integrated with the FiftyOne computer vision toolkit](https://docs.voxel51.com/integrations/ultralytics.html#open-vocabulary-detection) for streamlined open-vocabulary inference across image and video datasets. `[2024-5-16]:` Hey guys! Long time no see! This update contains (1) [fine-tuning guide](https://github.com/AILab-CVC/YOLO-World?#highlights--introduction) and (2) [TFLite Export](./docs/tflite_deploy.md) with INT8 Quantization. diff --git a/build_and_run.sh b/build_and_run.sh index edb73942..2ee55e8d 100755 --- a/build_and_run.sh +++ b/build_and_run.sh @@ -1,8 +1,33 @@ #!/usr/bin/env bash + +# Exit immediately if a command exits with a non-zero status. set -e -MODEL_DIR="../models/models-yoloworld" +# Set MODEL_DIR if not already set in the environment +: "${MODEL_DIR:="../models/models-yoloworld"}" + +# DocString for the script +: ' +This script builds and runs a Docker container for YOLO-World demos. +It supports various pre-trained models and configurations for object detection and segmentation. + +Usage: + ./build_and_run.sh + +Environment Variables: + MODEL_DIR: Path to the directory containing model weights (default: "../models/models-yoloworld") +Arguments: + : Key for the desired model configuration (see available keys below) + +Available model keys: + seg-l, seg-l-seghead, seg-m, seg-m-seghead, + pretrain-l-clip-800ft, pretrain-l-clip, pretrain-l-1280ft, pretrain-l, + pretrain-m-1280ft, pretrain-m, pretrain-s-1280ft, pretrain-s, + pretrain-x-cc3mlite, pretrain-x-1280ft +' + +# Define associative array for model configurations declare -A models models["seg-l"]="yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis-8c58c916.pth" models["seg-l-seghead"]="yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_seghead_finetune_lvis-5a642d30.pth" @@ -19,33 +44,52 @@ models["pretrain-s"]="yolo_world_v2_s_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_ models["pretrain-x-cc3mlite"]="yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_cc3mlite_train_lvis_minival.py yolo_world_v2_x_obj365v1_goldg_cc3mlite_pretrain-8698fbfa.pth" models["pretrain-x-1280ft"]="yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py yolo_world_v2_x_obj365v1_goldg_cc3mlite_pretrain_1280ft-14996a36.pth" -if [ $# -eq 0 ]; then +# Function to display usage information +show_usage() { + echo "Usage: $0 " echo "Available model keys:" for key in "${!models[@]}"; do echo " $key" done - echo "Usage: $0 " +} + +# Check if a model key is provided +if [ $# -eq 0 ]; then + show_usage exit 1 fi model_key=$1 +# Validate the model key if [ -z "${models[$model_key]}" ]; then - echo "Invalid model key. Available keys are:" - for key in "${!models[@]}"; do - echo " $key" - done + echo "Invalid model key." + show_usage exit 1 fi -read MODEL WEIGHT <<< "${models[$model_key]}" +# Extract model and weight information +read -r MODEL WEIGHT <<< "${models[$model_key]}" +# Set configuration directory and demo file based on model type config_dir="configs/pretrain" -demo_file=demo/gradio_demo.py +demo_file="demo/gradio_demo.py" if [[ $model_key == seg-* ]]; then - export config_dir="configs/segmentation" - export demo_file="demo/segmentation_demo.py" + config_dir="configs/segmentation" + demo_file="demo/segmentation_demo.py" fi -# docker build -f ./Dockerfile --build-arg="MODEL=$MODEL" --build-arg="WEIGHT=$WEIGHT" -t "yolo-demo:latest" . && \ -docker run -it -v "$(readlink -f $MODEL_DIR):/weights/" --runtime nvidia -p 8080:8080 "yolo-demo:latest" python3 $demo_file "$config_dir/$MODEL" "/weights/$WEIGHT" \ No newline at end of file +# Build Docker image and run container +echo "Building Docker image..." +docker build -f ./Dockerfile --no-cache \ + --build-arg="MODEL=$MODEL" \ + --build-arg="WEIGHT=$WEIGHT" \ + -t "yolo-demo:latest" . + +echo "Running Docker container..." +docker run -it \ + -v "$(readlink -f "$MODEL_DIR"):/weights/" \ + --runtime nvidia \ + -p 8080:8080 \ + "yolo-demo:latest" \ + python3 "$demo_file" "$config_dir/$MODEL" "/weights/$WEIGHT" diff --git a/configs/finetune_coco/yolo_world_v2_m_vlpan_bn_2e-4_80e_8gpus_mask-refine_finetune_coco.py b/configs/finetune_coco/yolo_world_v2_m_vlpan_bn_2e-4_80e_8gpus_mask-refine_finetune_coco.py index 32fcc51c..714e1492 100644 --- a/configs/finetune_coco/yolo_world_v2_m_vlpan_bn_2e-4_80e_8gpus_mask-refine_finetune_coco.py +++ b/configs/finetune_coco/yolo_world_v2_m_vlpan_bn_2e-4_80e_8gpus_mask-refine_finetune_coco.py @@ -18,7 +18,6 @@ weight_decay = 0.05 train_batch_size_per_gpu = 16 load_from = 'pretrained_models/yolo_world_m_clip_t2i_bn_2e-3adamw_32xb16-100e_obj365v1_goldg_train-c6237d5b.pth' -# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' text_model_name = 'openai/clip-vit-base-patch32' persistent_workers = False diff --git a/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py b/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py index 1c34f3a4..630f5710 100644 --- a/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py +++ b/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py @@ -16,7 +16,6 @@ weight_decay = 0.025 train_batch_size_per_gpu = 4 load_from = "pretrained_models/yolo_world_v2_l_obj365v1_goldg_pretrain-a82b1fe3.pth" -# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' text_model_name = 'openai/clip-vit-base-patch32' img_scale = (1280, 1280) diff --git a/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py b/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py index cb8beec0..5a770bce 100644 --- a/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py +++ b/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py @@ -15,7 +15,6 @@ base_lr = 2e-3 weight_decay = 0.05 / 2 train_batch_size_per_gpu = 16 -# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' text_model_name = 'openai/clip-vit-base-patch32' # model settings model = dict( diff --git a/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_val.py b/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_val.py index 70b19b28..197289bb 100644 --- a/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_val.py +++ b/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_val.py @@ -15,7 +15,6 @@ base_lr = 2e-3 weight_decay = 0.05 / 2 train_batch_size_per_gpu = 16 -# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' text_model_name = 'openai/clip-vit-base-patch32' # model settings model = dict( diff --git a/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_cc3mlite_train_lvis_minival.py b/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_cc3mlite_train_lvis_minival.py index a2ba421e..4d8ff3aa 100644 --- a/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_cc3mlite_train_lvis_minival.py +++ b/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_cc3mlite_train_lvis_minival.py @@ -15,7 +15,6 @@ base_lr = 2e-3 weight_decay = 0.05 / 2 train_batch_size_per_gpu = 16 -# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' text_model_name = 'openai/clip-vit-base-patch32' # model settings model = dict( diff --git a/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py b/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py index 40c2e5c1..35050ecc 100644 --- a/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py +++ b/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py @@ -15,7 +15,6 @@ base_lr = 2e-3 weight_decay = 0.05 / 2 train_batch_size_per_gpu = 16 -# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' text_model_name = 'openai/clip-vit-base-patch32' img_scale = (1280, 1280) diff --git a/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py b/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py index e3c1226d..92afae3b 100644 --- a/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py +++ b/configs/pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py @@ -15,7 +15,6 @@ base_lr = 2e-3 weight_decay = 0.05 / 2 train_batch_size_per_gpu = 16 -# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' text_model_name = 'openai/clip-vit-base-patch32' # model settings model = dict( diff --git a/configs/segmentation/yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py b/configs/segmentation/yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py index 062c9e31..d2006659 100644 --- a/configs/segmentation/yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py +++ b/configs/segmentation/yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py @@ -16,7 +16,6 @@ weight_decay = 0.05 train_batch_size_per_gpu = 8 load_from = 'pretrained_models/yolo_world_l_clip_t2i_bn_2e-3adamw_32xb16-100e_obj365v1_goldg_cc3mlite_train-ca93cd1f.pth' -# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' text_model_name = 'openai/clip-vit-base-patch32' persistent_workers = False diff --git a/demo/README.md b/demo/README.md index c6f607c5..9fe600d9 100644 --- a/demo/README.md +++ b/demo/README.md @@ -19,11 +19,24 @@ pip install gradio==4.16.0 python demo/demo.py path/to/config path/to/weights ``` -Additionaly, you can use a Dockerfile to build an image with gradio. As a prerequisite, make sure you have respective drivers installed alongside [nvidia-container-runtime](https://stackoverflow.com/questions/59691207/docker-build-with-nvidia-runtime). Replace MODEL_NAME and WEIGHT_NAME with the respective values or ommit this and use default values from the [Dockerfile](Dockerfile#3) +Additionally, you can use our Docker build system for an easier setup: ```bash -docker build --build-arg="MODEL=MODEL_NAME" --build-arg="WEIGHT=WEIGHT_NAME" -t yolo_demo . -docker run --runtime nvidia -p 8080:8080 +./build_and_run.sh +``` + +Available model keys include: +- seg-l, seg-l-seghead, seg-m, seg-m-seghead +- pretrain-l-clip-800ft, pretrain-l-clip, pretrain-l-1280ft, pretrain-l +- pretrain-m-1280ft, pretrain-m, pretrain-s-1280ft, pretrain-s +- pretrain-x-cc3mlite, pretrain-x-1280ft + +This script will build the Docker image and run the container with the specified model configuration. The Gradio interface will be accessible at `http://localhost:8080`. + +You can also customize the model weights directory by setting the `MODEL_DIR` environment variable: + +```bash +MODEL_DIR=/path/to/your/weights ./build_and_run.sh ``` #### Image Demo