diff --git a/demo/demo.ipynb b/demo/demo.ipynb index ebcf2ff538..9d5e958864 100644 --- a/demo/demo.ipynb +++ b/demo/demo.ipynb @@ -70,7 +70,7 @@ "label = '../tools/data/kinetics/label_map_k400.txt'\n", "results = inference_recognizer(model, video)\n", "\n", - "pred_scores = results.pred_scores.item.tolist()\n", + "pred_scores = results.pred_score.tolist()\n", "score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))\n", "score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)\n", "top5_label = score_sorted[:5]\n", diff --git a/demo/demo.py b/demo/demo.py index 6c9b5db5a5..d2ec044a04 100644 --- a/demo/demo.py +++ b/demo/demo.py @@ -119,7 +119,7 @@ def main(): model = init_recognizer(cfg, args.checkpoint, device=args.device) pred_result = inference_recognizer(model, args.video) - pred_scores = pred_result.pred_scores.item.tolist() + pred_scores = pred_result.pred_score.tolist() score_tuples = tuple(zip(range(len(pred_scores)), pred_scores)) score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True) top5_label = score_sorted[:5] diff --git a/demo/demo_audio.py b/demo/demo_audio.py index 2da446a2da..c874813f1f 100644 --- a/demo/demo_audio.py +++ b/demo/demo_audio.py @@ -39,7 +39,7 @@ def main(): raise NotImplementedError('Demo works on extracted audio features') pred_result = inference_recognizer(model, args.audio) - pred_scores = pred_result.pred_scores.item.tolist() + pred_scores = pred_result.pred_score.tolist() score_tuples = tuple(zip(range(len(pred_scores)), pred_scores)) score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True) top5_label = score_sorted[:5] diff --git a/demo/demo_skeleton.py b/demo/demo_skeleton.py index 7a162ef468..19245b6540 100644 --- a/demo/demo_skeleton.py +++ b/demo/demo_skeleton.py @@ -152,7 +152,7 @@ def main(): model = init_recognizer(config, args.checkpoint, args.device) result = inference_skeleton(model, pose_results, (h, w)) - max_pred_index = result.pred_scores.item.argmax().item() + max_pred_index = result.pred_score.argmax().item() label_map = [x.strip() for x in open(args.label_map).readlines()] action_label = label_map[max_pred_index] diff --git a/demo/demo_video_structuralize.py b/demo/demo_video_structuralize.py index 805dda7e14..85784efbf5 100644 --- a/demo/demo_video_structuralize.py +++ b/demo/demo_video_structuralize.py @@ -373,7 +373,7 @@ def skeleton_based_action_recognition(args, pose_results, h, w): skeleton_model = init_recognizer( skeleton_config, args.skeleton_checkpoint, device=args.device) result = inference_skeleton(skeleton_model, pose_results, (h, w)) - action_idx = result.pred_scores.item.argmax().item() + action_idx = result.pred_score.argmax().item() return label_map[action_idx] @@ -382,7 +382,7 @@ def rgb_based_action_recognition(args): rgb_config.model.backbone.pretrained = None rgb_model = init_recognizer(rgb_config, args.rgb_checkpoint, args.device) action_results = inference_recognizer(rgb_model, args.video) - rgb_action_result = action_results.pred_scores.item.argmax().item() + rgb_action_result = action_results.pred_score.argmax().item() label_map = [x.strip() for x in open(args.label_map).readlines()] return label_map[rgb_action_result] @@ -460,7 +460,7 @@ def skeleton_based_stdet(args, label_map, human_detections, pose_results, output = inference_recognizer(skeleton_stdet_model, fake_anno) # for multi-label recognition - score = output.pred_scores.item.tolist() + score = output.pred_score.tolist() for k in range(len(score)): # 81 if k not in label_map: continue diff --git a/demo/fuse/bone.pkl b/demo/fuse/bone.pkl index a5cc72b3a1..21d311924c 100644 Binary files a/demo/fuse/bone.pkl and b/demo/fuse/bone.pkl differ diff --git a/demo/fuse/joint.pkl b/demo/fuse/joint.pkl index 1259a508ce..96d023b336 100644 Binary files a/demo/fuse/joint.pkl and b/demo/fuse/joint.pkl differ diff --git a/demo/long_video_demo.py b/demo/long_video_demo.py index bb7e51a234..eea03348ff 100644 --- a/demo/long_video_demo.py +++ b/demo/long_video_demo.py @@ -216,7 +216,7 @@ def inference(model, data, args, frame_queue): result = inference_recognizer( model, cur_data, test_pipeline=args.test_pipeline) - scores = result.pred_scores.item.tolist() + scores = result.pred_score.tolist() if args.stride > 0: pred_stride = int(args.sample_length * args.stride) diff --git a/demo/mmaction2_tutorial.ipynb b/demo/mmaction2_tutorial.ipynb index 1a9d6ec70e..4d24a04d5e 100644 --- a/demo/mmaction2_tutorial.ipynb +++ b/demo/mmaction2_tutorial.ipynb @@ -1,1936 +1,1936 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "VcjSRFELVbNk" - }, - "source": [ - "# MMAction2 Tutorial\n", - "\n", - "Welcome to MMAction2! This is the official colab tutorial for using MMAction2. In this tutorial, you will learn\n", - "- Perform inference with a MMAction2 recognizer.\n", - "- Train a new recognizer with a new dataset.\n", - "\n", - "\n", - "Let's start!" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7LqHGkGEVqpm" - }, - "source": [ - "## Install MMAction2" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Bf8PpPXtVvmg", - "outputId": "9d3f4594-f151-4ee9-a19b-09f8a439ac04" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "nvcc: NVIDIA (R) Cuda compiler driver\n", - "Copyright (c) 2005-2022 NVIDIA Corporation\n", - "Built on Wed_Sep_21_10:33:58_PDT_2022\n", - "Cuda compilation tools, release 11.8, V11.8.89\n", - "Build cuda_11.8.r11.8/compiler.31833905_0\n", - "gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0\n", - "Copyright (C) 2019 Free Software Foundation, Inc.\n", - "This is free software; see the source for copying conditions. There is NO\n", - "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n", - "\n" - ] - } - ], - "source": [ - "# Check nvcc version\n", - "!nvcc -V\n", - "# Check GCC version\n", - "!gcc --version" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "ZPwKGzqydnb2", - "outputId": "27506fa7-48a2-4fe0-d377-56f940dafec4", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Looking in indexes: https://download.pytorch.org/whl/cu118, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.0.0+cu118)\n", - "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (0.15.1+cu118)\n", - "Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (2.0.1+cu118)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.12.0)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.11.1)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n", - "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.0.0)\n", - "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (3.25.2)\n", - "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (16.0.3)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision) (1.22.4)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision) (2.27.1)\n", - "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision) (8.4.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.2)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (1.26.15)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2022.12.7)\n", - "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2.0.12)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (3.4)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n" - ] - } - ], - "source": [ - "# install dependencies: (if your colab has CUDA 11.8)\n", - "%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "5PAJ4ArzV5Ry", - "outputId": "eb8539a0-9524-4c48-f3e1-0b013ce0d344" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting openmim\n", - " Downloading openmim-0.3.7-py2.py3-none-any.whl (51 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.3/51.3 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: Click in /usr/local/lib/python3.10/dist-packages (from openmim) (8.1.3)\n", - "Collecting colorama (from openmim)\n", - " Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n", - "Collecting model-index (from openmim)\n", - " Downloading model_index-0.1.11-py3-none-any.whl (34 kB)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from openmim) (1.5.3)\n", - "Requirement already satisfied: pip>=19.3 in /usr/local/lib/python3.10/dist-packages (from openmim) (23.1.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from openmim) (2.27.1)\n", - "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from openmim) (13.3.4)\n", - "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from openmim) (0.8.10)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from model-index->openmim) (6.0)\n", - "Requirement already satisfied: markdown in /usr/local/lib/python3.10/dist-packages (from model-index->openmim) (3.4.3)\n", - "Collecting ordered-set (from model-index->openmim)\n", - " Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)\n", - "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (2022.7.1)\n", - "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (1.22.4)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (1.26.15)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (2022.12.7)\n", - "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (2.0.12)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (3.4)\n", - "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->openmim) (2.2.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->openmim) (2.14.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->openmim) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->openmim) (1.16.0)\n", - "Installing collected packages: ordered-set, colorama, model-index, openmim\n", - "Successfully installed colorama-0.4.6 model-index-0.1.11 openmim-0.3.7 ordered-set-4.1.0\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Looking in links: https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/index.html\n", - "Collecting mmengine\n", - " Downloading mmengine-0.7.3-py3-none-any.whl (372 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m372.1/372.1 kB\u001b[0m \u001b[31m20.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting addict (from mmengine)\n", - " Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmengine) (3.7.1)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmengine) (1.22.4)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from mmengine) (6.0)\n", - "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from mmengine) (13.3.4)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from mmengine) (2.3.0)\n", - "Collecting yapf (from mmengine)\n", - " Downloading yapf-0.33.0-py2.py3-none-any.whl (200 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.9/200.9 kB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: opencv-python>=3 in /usr/local/lib/python3.10/dist-packages (from mmengine) (4.7.0.72)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (1.0.7)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (0.11.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (4.39.3)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (1.4.4)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (23.1)\n", - "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (8.4.0)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (3.0.9)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (2.8.2)\n", - "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine) (2.2.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine) (2.14.0)\n", - "Requirement already satisfied: tomli>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from yapf->mmengine) (2.0.1)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->mmengine) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmengine) (1.16.0)\n", - "Installing collected packages: addict, yapf, mmengine\n", - "Successfully installed addict-2.4.0 mmengine-0.7.3 yapf-0.33.0\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Looking in links: https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/index.html\n", - "Collecting mmcv>=2.0.0\n", - " Downloading https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/mmcv-2.0.0-cp310-cp310-manylinux1_x86_64.whl (74.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.4/74.4 MB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: addict in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (2.4.0)\n", - "Requirement already satisfied: mmengine>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (0.7.3)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (1.22.4)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (23.1)\n", - "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (8.4.0)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (6.0)\n", - "Requirement already satisfied: yapf in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (0.33.0)\n", - "Requirement already satisfied: opencv-python>=3 in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (4.7.0.72)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (3.7.1)\n", - "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (13.3.4)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (2.3.0)\n", - "Requirement already satisfied: tomli>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from yapf->mmcv>=2.0.0) (2.0.1)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.0.7)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (0.11.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (4.39.3)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.4.4)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (3.0.9)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (2.8.2)\n", - "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine>=0.2.0->mmcv>=2.0.0) (2.2.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine>=0.2.0->mmcv>=2.0.0) (2.14.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->mmengine>=0.2.0->mmcv>=2.0.0) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.16.0)\n", - "Installing collected packages: mmcv\n", - "Successfully installed mmcv-2.0.0\n", - "Cloning into 'mmaction2'...\n", - "remote: Enumerating objects: 21284, done.\u001b[K\n", - "remote: Counting objects: 100% (394/394), done.\u001b[K\n", - "remote: Compressing objects: 100% (287/287), done.\u001b[K\n", - "remote: Total 21284 (delta 175), reused 248 (delta 103), pack-reused 20890\u001b[K\n", - "Receiving objects: 100% (21284/21284), 68.63 MiB | 16.59 MiB/s, done.\n", - "Resolving deltas: 100% (14990/14990), done.\n", - "/content/mmaction2\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Obtaining file:///content/mmaction2\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting decord>=0.4.1 (from mmaction2==1.0.0)\n", - " Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m76.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting einops (from mmaction2==1.0.0)\n", - " Downloading einops-0.6.1-py3-none-any.whl (42 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (3.7.1)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (1.22.4)\n", - "Requirement already satisfied: opencv-contrib-python in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (4.7.0.72)\n", - "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (8.4.0)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (1.10.1)\n", - "Requirement already satisfied: torch>=1.3 in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (2.0.0+cu118)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.12.0)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (4.5.0)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (1.11.1)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.1.2)\n", - "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (2.0.0)\n", - "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.3->mmaction2==1.0.0) (3.25.2)\n", - "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.3->mmaction2==1.0.0) (16.0.3)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (1.0.7)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (0.11.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (4.39.3)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (1.4.4)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (23.1)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (3.0.9)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (2.8.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmaction2==1.0.0) (1.16.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.3->mmaction2==1.0.0) (2.1.2)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.3->mmaction2==1.0.0) (1.3.0)\n", - "Installing collected packages: einops, decord, mmaction2\n", - " Running setup.py develop for mmaction2\n", - "Successfully installed decord-0.6.0 einops-0.6.1 mmaction2-1.0.0\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting av>=9.0 (from -r requirements/optional.txt (line 1))\n", - " Downloading av-10.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m31.0/31.0 MB\u001b[0m \u001b[31m38.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: future in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 2)) (0.18.3)\n", - "Collecting fvcore (from -r requirements/optional.txt (line 3))\n", - " Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.2/50.2 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: imgaug in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 4)) (0.4.0)\n", - "Requirement already satisfied: librosa in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 5)) (0.10.0.post2)\n", - "Collecting lmdb (from -r requirements/optional.txt (line 6))\n", - " Downloading lmdb-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (299 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m299.2/299.2 kB\u001b[0m \u001b[31m30.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: moviepy in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 7)) (1.0.3)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 8)) (23.1)\n", - "Collecting pims (from -r requirements/optional.txt (line 9))\n", - " Downloading PIMS-0.6.1.tar.gz (86 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting PyTurboJPEG (from -r requirements/optional.txt (line 10))\n", - " Downloading PyTurboJPEG-1.7.1.tar.gz (11 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 11)) (0.12.1)\n", - "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 12)) (2.12.2)\n", - "Collecting wandb (from -r requirements/optional.txt (line 13))\n", - " Downloading wandb-0.15.2-py3-none-any.whl (2.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m79.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (1.22.4)\n", - "Collecting yacs>=0.1.6 (from fvcore->-r requirements/optional.txt (line 3))\n", - " Downloading yacs-0.1.8-py3-none-any.whl (14 kB)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (6.0)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (4.65.0)\n", - "Requirement already satisfied: termcolor>=1.1 in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (2.3.0)\n", - "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (8.4.0)\n", - "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (0.8.10)\n", - "Collecting iopath>=0.1.7 (from fvcore->-r requirements/optional.txt (line 3))\n", - " Downloading iopath-0.1.10.tar.gz (42 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (1.16.0)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (1.10.1)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (3.7.1)\n", - "Requirement already satisfied: scikit-image>=0.14.2 in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (0.19.3)\n", - "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (4.7.0.72)\n", - "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (2.25.1)\n", - "Requirement already satisfied: Shapely in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (2.0.1)\n", - "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (3.0.0)\n", - "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.2.2)\n", - "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.2.0)\n", - "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (4.4.2)\n", - "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.56.4)\n", - "Requirement already satisfied: pooch<1.7,>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.6.0)\n", - "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.3.5)\n", - "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (4.5.0)\n", - "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.2)\n", - "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.0.5)\n", - "Requirement already satisfied: requests<3.0,>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (2.27.1)\n", - "Requirement already satisfied: proglog<=1.0.0 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (0.1.10)\n", - "Requirement already satisfied: imageio-ffmpeg>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (0.4.8)\n", - "Collecting slicerator>=0.9.8 (from pims->-r requirements/optional.txt (line 9))\n", - " Downloading slicerator-1.1.0-py3-none-any.whl (10 kB)\n", - "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->-r requirements/optional.txt (line 11)) (1.15.1)\n", - "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.4.0)\n", - "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.54.0)\n", - "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (2.17.3)\n", - "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.0.0)\n", - "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (3.4.3)\n", - "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (3.20.3)\n", - "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (67.7.2)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (0.7.0)\n", - "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.8.1)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (2.3.0)\n", - "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (0.40.0)\n", - "Requirement already satisfied: Click!=8.0.0,>=7.0 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (8.1.3)\n", - "Collecting GitPython!=3.1.29,>=1.0.0 (from wandb->-r requirements/optional.txt (line 13))\n", - " Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m184.3/184.3 kB\u001b[0m \u001b[31m22.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (5.9.5)\n", - "Collecting sentry-sdk>=1.0.0 (from wandb->-r requirements/optional.txt (line 13))\n", - " Downloading sentry_sdk-1.22.2-py2.py3-none-any.whl (203 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m203.3/203.3 kB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting docker-pycreds>=0.4.0 (from wandb->-r requirements/optional.txt (line 13))\n", - " Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n", - "Collecting pathtools (from wandb->-r requirements/optional.txt (line 13))\n", - " Downloading pathtools-0.1.2.tar.gz (11 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting setproctitle (from wandb->-r requirements/optional.txt (line 13))\n", - " Downloading setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n", - "Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (1.4.4)\n", - "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->-r requirements/optional.txt (line 11)) (2.21)\n", - "Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wandb->-r requirements/optional.txt (line 13))\n", - " Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (5.3.0)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (0.3.0)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (4.9)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->-r requirements/optional.txt (line 12)) (1.3.1)\n", - "Collecting portalocker (from iopath>=0.1.7->fvcore->-r requirements/optional.txt (line 3))\n", - " Downloading portalocker-2.7.0-py2.py3-none-any.whl (15 kB)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa->-r requirements/optional.txt (line 5)) (0.39.1)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (1.26.15)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (2022.12.7)\n", - "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (2.0.12)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (3.4)\n", - "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (3.1)\n", - "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (2023.4.12)\n", - "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (1.4.1)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->librosa->-r requirements/optional.txt (line 5)) (3.1.0)\n", - "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard->-r requirements/optional.txt (line 12)) (2.1.2)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (1.0.7)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (0.11.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (4.39.3)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (1.4.4)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (3.0.9)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (2.8.2)\n", - "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb->-r requirements/optional.txt (line 13))\n", - " Downloading smmap-5.0.0-py3-none-any.whl (24 kB)\n", - "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (0.5.0)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->-r requirements/optional.txt (line 12)) (3.2.2)\n", - "Building wheels for collected packages: fvcore, pims, PyTurboJPEG, iopath, pathtools\n", - " Building wheel for fvcore (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for fvcore: filename=fvcore-0.1.5.post20221221-py3-none-any.whl size=61405 sha256=25c1e50155c8788d00eec898793c96133a746a8bb076ffc5c01f5a4dc256751e\n", - " Stored in directory: /root/.cache/pip/wheels/01/c0/af/77c1cf53a1be9e42a52b48e5af2169d40ec2e89f7362489dd0\n", - " Building wheel for pims (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pims: filename=PIMS-0.6.1-py3-none-any.whl size=82619 sha256=59a328dc88a438c60cfb6e937e04c8a7dd55ad2a2905034cd41ff80cdbba6497\n", - " Stored in directory: /root/.cache/pip/wheels/cc/bf/3e/bfa77232d942f8244145f9c713b6b38f6ef04b6fb5c021c114\n", - " Building wheel for PyTurboJPEG (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for PyTurboJPEG: filename=PyTurboJPEG-1.7.1-py3-none-any.whl size=12243 sha256=ddf6424c85ac533335abd96dd9e98b014ea1dd4f143c88cd35ecb08d6128f411\n", - " Stored in directory: /root/.cache/pip/wheels/de/6e/b1/e7ba70c328c3395555cb92ca8820babb32950d867858b1948b\n", - " Building wheel for iopath (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for iopath: filename=iopath-0.1.10-py3-none-any.whl size=31531 sha256=db977a4344bebbdd710665e767caab4fbcf53cc6aea0707cd38d26c45718331e\n", - " Stored in directory: /root/.cache/pip/wheels/9a/a3/b6/ac0fcd1b4ed5cfeb3db92e6a0e476cfd48ed0df92b91080c1d\n", - " Building wheel for pathtools (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pathtools: filename=pathtools-0.1.2-py3-none-any.whl size=8791 sha256=08bb5753ce029aef01f25c3e81882d93c0e040e5932e90a02a062ad058756b52\n", - " Stored in directory: /root/.cache/pip/wheels/e7/f3/22/152153d6eb222ee7a56ff8617d80ee5207207a8c00a7aab794\n", - "Successfully built fvcore pims PyTurboJPEG iopath pathtools\n", - "Installing collected packages: slicerator, pathtools, lmdb, av, yacs, smmap, setproctitle, sentry-sdk, PyTurboJPEG, portalocker, docker-pycreds, pims, iopath, gitdb, GitPython, fvcore, wandb\n", - "Successfully installed GitPython-3.1.31 PyTurboJPEG-1.7.1 av-10.0.0 docker-pycreds-0.4.0 fvcore-0.1.5.post20221221 gitdb-4.0.10 iopath-0.1.10 lmdb-1.4.1 pathtools-0.1.2 pims-0.6.1 portalocker-2.7.0 sentry-sdk-1.22.2 setproctitle-1.3.2 slicerator-1.1.0 smmap-5.0.0 wandb-0.15.2 yacs-0.1.8\n" - ] - } - ], - "source": [ - "# install MMEngine, MMCV and MMDetection using MIM\n", - "%pip install -U openmim\n", - "!mim install mmengine\n", - "!mim install \"mmcv>=2.0.0\"\n", - "\n", - "# Install mmaction2\n", - "!rm -rf mmaction2\n", - "!git clone https://github.com/open-mmlab/mmaction2.git -b main\n", - "%cd mmaction2\n", - "\n", - "!pip install -e .\n", - "\n", - "# Install some optional requirements\n", - "!pip install -r requirements/optional.txt" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "No_zZAFpWC-a", - "outputId": "9386dd81-2308-4adb-d3cb-798de11c035e" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "2.0.0+cu118 True\n", - "1.0.0\n", - "11.8\n", - "GCC 9.3\n", - "OrderedDict([('sys.platform', 'linux'), ('Python', '3.10.11 (main, Apr 5 2023, 14:15:10) [GCC 9.4.0]'), ('CUDA available', True), ('numpy_random_seed', 2147483648), ('GPU 0', 'Tesla T4'), ('CUDA_HOME', '/usr/local/cuda'), ('NVCC', 'Cuda compilation tools, release 11.8, V11.8.89'), ('GCC', 'x86_64-linux-gnu-gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0'), ('PyTorch', '2.0.0+cu118'), ('PyTorch compiling details', 'PyTorch built with:\\n - GCC 9.3\\n - C++ Version: 201703\\n - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications\\n - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)\\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\\n - LAPACK is enabled (usually provided by MKL)\\n - NNPACK is enabled\\n - CPU capability usage: AVX2\\n - CUDA Runtime 11.8\\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\\n - CuDNN 8.7\\n - Magma 2.6.1\\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.8, CUDNN_VERSION=8.7.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.0.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \\n'), ('TorchVision', '0.15.1+cu118'), ('OpenCV', '4.7.0'), ('MMEngine', '0.7.3')])\n" - ] - } - ], - "source": [ - "# Check Pytorch installation\n", - "import torch, torchvision\n", - "print(torch.__version__, torch.cuda.is_available())\n", - "\n", - "# Check MMAction2 installation\n", - "import mmaction\n", - "print(mmaction.__version__)\n", - "\n", - "# Check MMCV installation\n", - "from mmcv.ops import get_compiling_cuda_version, get_compiler_version\n", - "print(get_compiling_cuda_version())\n", - "print(get_compiler_version())\n", - "\n", - "# Check MMEngine installation\n", - "from mmengine.utils.dl_utils import collect_env\n", - "print(collect_env())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pXf7oV5DWdab" - }, - "source": [ - "## Perform inference with a MMAction2 recognizer\n", - "MMAction2 already provides high level APIs to do inference and training." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "64CW6d_AaT-Q", - "outputId": "ea330d8c-2e20-4dbd-d046-51d7c9ec4f7a" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2023-05-15 03:33:08-- https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n", - "Resolving download.openmmlab.com (download.openmmlab.com)... 163.181.82.216, 163.181.82.218, 163.181.82.213, ...\n", - "Connecting to download.openmmlab.com (download.openmmlab.com)|163.181.82.216|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 97579339 (93M) [application/octet-stream]\n", - "Saving to: ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’\n", - "\n", - "checkpoints/tsn_r50 100%[===================>] 93.06M 26.1MB/s in 3.6s \n", - "\n", - "2023-05-15 03:33:12 (26.2 MB/s) - ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’ saved [97579339/97579339]\n", - "\n" - ] - } - ], - "source": [ - "!mkdir checkpoints\n", - "!wget -c https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \\\n", - " -O checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "VcjSRFELVbNk" + }, + "source": [ + "# MMAction2 Tutorial\n", + "\n", + "Welcome to MMAction2! This is the official colab tutorial for using MMAction2. In this tutorial, you will learn\n", + "- Perform inference with a MMAction2 recognizer.\n", + "- Train a new recognizer with a new dataset.\n", + "\n", + "\n", + "Let's start!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7LqHGkGEVqpm" + }, + "source": [ + "## Install MMAction2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "Bf8PpPXtVvmg", + "outputId": "9d3f4594-f151-4ee9-a19b-09f8a439ac04" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "HNZB7NoSabzj", - "outputId": "c0c2ba71-72ff-4cac-a5b8-65590f5a6bb0" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Loads checkpoint by local backend from path: checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n" - ] - } - ], - "source": [ - "from mmaction.apis import inference_recognizer, init_recognizer\n", - "from mmengine import Config\n", - "\n", - "\n", - "# Choose to use a config and initialize the recognizer\n", - "config = 'configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py'\n", - "config = Config.fromfile(config)\n", - "# Setup a checkpoint file to load\n", - "checkpoint = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n", - "# Initialize the recognizer\n", - "model = init_recognizer(config, checkpoint, device='cuda:0')" - ] - }, + "output_type": "stream", + "name": "stdout", + "text": [ + "nvcc: NVIDIA (R) Cuda compiler driver\n", + "Copyright (c) 2005-2022 NVIDIA Corporation\n", + "Built on Wed_Sep_21_10:33:58_PDT_2022\n", + "Cuda compilation tools, release 11.8, V11.8.89\n", + "Build cuda_11.8.r11.8/compiler.31833905_0\n", + "gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0\n", + "Copyright (C) 2019 Free Software Foundation, Inc.\n", + "This is free software; see the source for copying conditions. There is NO\n", + "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n", + "\n" + ] + } + ], + "source": [ + "# Check nvcc version\n", + "!nvcc -V\n", + "# Check GCC version\n", + "!gcc --version" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "ZPwKGzqydnb2", + "outputId": "27506fa7-48a2-4fe0-d377-56f940dafec4", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "rEMsBnpHapAn", - "outputId": "ec05049e-7289-4798-94fa-2b773cb23634", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "05/15 03:33:18 - mmengine - WARNING - \"FileClient\" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io\n", - "05/15 03:33:18 - mmengine - WARNING - \"HardDiskBackend\" is the alias of \"LocalBackend\" and the former will be deprecated in future.\n" - ] - } - ], - "source": [ - "# Use the recognizer to do inference\n", - "from operator import itemgetter\n", - "video = 'demo/demo.mp4'\n", - "label = 'tools/data/kinetics/label_map_k400.txt'\n", - "results = inference_recognizer(model, video)\n", - "\n", - "pred_scores = results.pred_scores.item.tolist()\n", - "score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))\n", - "score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)\n", - "top5_label = score_sorted[:5]\n", - "\n", - "labels = open(label).readlines()\n", - "labels = [x.strip() for x in labels]\n", - "results = [(labels[k[0]], k[1]) for k in top5_label]\n" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Looking in indexes: https://download.pytorch.org/whl/cu118, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.0.0+cu118)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (0.15.1+cu118)\n", + "Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (2.0.1+cu118)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.12.0)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.11.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.0.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (3.25.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (16.0.3)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision) (1.22.4)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision) (2.27.1)\n", + "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision) (8.4.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.2)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (1.26.15)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2022.12.7)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2.0.12)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (3.4)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n" + ] + } + ], + "source": [ + "# install dependencies: (if your colab has CUDA 11.8)\n", + "%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "5PAJ4ArzV5Ry", + "outputId": "eb8539a0-9524-4c48-f3e1-0b013ce0d344" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NIyJXqfWathq", - "outputId": "cb25aca9-e72d-4c54-f295-4c889713cb3a" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "The top-5 labels with corresponding scores are:\n", - "arm wrestling: 1.0\n", - "rock scissors paper: 6.434453414527752e-09\n", - "shaking hands: 2.7599860175087088e-09\n", - "clapping: 1.3454612979302283e-09\n", - "massaging feet: 5.555100823784187e-10\n" - ] - } - ], - "source": [ - "print('The top-5 labels with corresponding scores are:')\n", - "for result in results:\n", - " print(f'{result[0]}: ', result[1])" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Collecting openmim\n", + " Downloading openmim-0.3.7-py2.py3-none-any.whl (51 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m51.3/51.3 kB\u001B[0m \u001B[31m4.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: Click in /usr/local/lib/python3.10/dist-packages (from openmim) (8.1.3)\n", + "Collecting colorama (from openmim)\n", + " Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n", + "Collecting model-index (from openmim)\n", + " Downloading model_index-0.1.11-py3-none-any.whl (34 kB)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from openmim) (1.5.3)\n", + "Requirement already satisfied: pip>=19.3 in /usr/local/lib/python3.10/dist-packages (from openmim) (23.1.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from openmim) (2.27.1)\n", + "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from openmim) (13.3.4)\n", + "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from openmim) (0.8.10)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from model-index->openmim) (6.0)\n", + "Requirement already satisfied: markdown in /usr/local/lib/python3.10/dist-packages (from model-index->openmim) (3.4.3)\n", + "Collecting ordered-set (from model-index->openmim)\n", + " Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (2022.7.1)\n", + "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (1.22.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (1.26.15)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (2022.12.7)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (2.0.12)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (3.4)\n", + "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->openmim) (2.2.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->openmim) (2.14.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->openmim) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->openmim) (1.16.0)\n", + "Installing collected packages: ordered-set, colorama, model-index, openmim\n", + "Successfully installed colorama-0.4.6 model-index-0.1.11 openmim-0.3.7 ordered-set-4.1.0\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Looking in links: https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/index.html\n", + "Collecting mmengine\n", + " Downloading mmengine-0.7.3-py3-none-any.whl (372 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m372.1/372.1 kB\u001B[0m \u001B[31m20.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting addict (from mmengine)\n", + " Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmengine) (3.7.1)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmengine) (1.22.4)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from mmengine) (6.0)\n", + "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from mmengine) (13.3.4)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from mmengine) (2.3.0)\n", + "Collecting yapf (from mmengine)\n", + " Downloading yapf-0.33.0-py2.py3-none-any.whl (200 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m200.9/200.9 kB\u001B[0m \u001B[31m21.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: opencv-python>=3 in /usr/local/lib/python3.10/dist-packages (from mmengine) (4.7.0.72)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (1.0.7)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (4.39.3)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (1.4.4)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (23.1)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (8.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (3.0.9)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (2.8.2)\n", + "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine) (2.2.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine) (2.14.0)\n", + "Requirement already satisfied: tomli>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from yapf->mmengine) (2.0.1)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->mmengine) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmengine) (1.16.0)\n", + "Installing collected packages: addict, yapf, mmengine\n", + "Successfully installed addict-2.4.0 mmengine-0.7.3 yapf-0.33.0\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Looking in links: https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/index.html\n", + "Collecting mmcv>=2.0.0\n", + " Downloading https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/mmcv-2.0.0-cp310-cp310-manylinux1_x86_64.whl (74.4 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m74.4/74.4 MB\u001B[0m \u001B[31m9.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: addict in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (2.4.0)\n", + "Requirement already satisfied: mmengine>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (0.7.3)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (1.22.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (23.1)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (8.4.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (6.0)\n", + "Requirement already satisfied: yapf in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (0.33.0)\n", + "Requirement already satisfied: opencv-python>=3 in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (4.7.0.72)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (3.7.1)\n", + "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (13.3.4)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (2.3.0)\n", + "Requirement already satisfied: tomli>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from yapf->mmcv>=2.0.0) (2.0.1)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.0.7)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (4.39.3)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.4.4)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (3.0.9)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (2.8.2)\n", + "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine>=0.2.0->mmcv>=2.0.0) (2.2.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine>=0.2.0->mmcv>=2.0.0) (2.14.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->mmengine>=0.2.0->mmcv>=2.0.0) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.16.0)\n", + "Installing collected packages: mmcv\n", + "Successfully installed mmcv-2.0.0\n", + "Cloning into 'mmaction2'...\n", + "remote: Enumerating objects: 21284, done.\u001B[K\n", + "remote: Counting objects: 100% (394/394), done.\u001B[K\n", + "remote: Compressing objects: 100% (287/287), done.\u001B[K\n", + "remote: Total 21284 (delta 175), reused 248 (delta 103), pack-reused 20890\u001B[K\n", + "Receiving objects: 100% (21284/21284), 68.63 MiB | 16.59 MiB/s, done.\n", + "Resolving deltas: 100% (14990/14990), done.\n", + "/content/mmaction2\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Obtaining file:///content/mmaction2\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + "Collecting decord>=0.4.1 (from mmaction2==1.0.0)\n", + " Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m13.6/13.6 MB\u001B[0m \u001B[31m76.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting einops (from mmaction2==1.0.0)\n", + " Downloading einops-0.6.1-py3-none-any.whl (42 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m42.2/42.2 kB\u001B[0m \u001B[31m4.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (3.7.1)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (1.22.4)\n", + "Requirement already satisfied: opencv-contrib-python in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (4.7.0.72)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (8.4.0)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (1.10.1)\n", + "Requirement already satisfied: torch>=1.3 in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (2.0.0+cu118)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.12.0)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (4.5.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (1.11.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.1.2)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (2.0.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.3->mmaction2==1.0.0) (3.25.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.3->mmaction2==1.0.0) (16.0.3)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (1.0.7)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (4.39.3)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (1.4.4)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (23.1)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (3.0.9)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (2.8.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmaction2==1.0.0) (1.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.3->mmaction2==1.0.0) (2.1.2)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.3->mmaction2==1.0.0) (1.3.0)\n", + "Installing collected packages: einops, decord, mmaction2\n", + " Running setup.py develop for mmaction2\n", + "Successfully installed decord-0.6.0 einops-0.6.1 mmaction2-1.0.0\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Collecting av>=9.0 (from -r requirements/optional.txt (line 1))\n", + " Downloading av-10.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31.0 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m31.0/31.0 MB\u001B[0m \u001B[31m38.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: future in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 2)) (0.18.3)\n", + "Collecting fvcore (from -r requirements/optional.txt (line 3))\n", + " Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m50.2/50.2 kB\u001B[0m \u001B[31m6.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25h Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + "Requirement already satisfied: imgaug in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 4)) (0.4.0)\n", + "Requirement already satisfied: librosa in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 5)) (0.10.0.post2)\n", + "Collecting lmdb (from -r requirements/optional.txt (line 6))\n", + " Downloading lmdb-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (299 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m299.2/299.2 kB\u001B[0m \u001B[31m30.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: moviepy in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 7)) (1.0.3)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 8)) (23.1)\n", + "Collecting pims (from -r requirements/optional.txt (line 9))\n", + " Downloading PIMS-0.6.1.tar.gz (86 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m86.0/86.0 kB\u001B[0m \u001B[31m12.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25h Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + "Collecting PyTurboJPEG (from -r requirements/optional.txt (line 10))\n", + " Downloading PyTurboJPEG-1.7.1.tar.gz (11 kB)\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + "Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 11)) (0.12.1)\n", + "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 12)) (2.12.2)\n", + "Collecting wandb (from -r requirements/optional.txt (line 13))\n", + " Downloading wandb-0.15.2-py3-none-any.whl (2.0 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m2.0/2.0 MB\u001B[0m \u001B[31m79.1 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (1.22.4)\n", + "Collecting yacs>=0.1.6 (from fvcore->-r requirements/optional.txt (line 3))\n", + " Downloading yacs-0.1.8-py3-none-any.whl (14 kB)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (6.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (4.65.0)\n", + "Requirement already satisfied: termcolor>=1.1 in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (2.3.0)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (8.4.0)\n", + "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (0.8.10)\n", + "Collecting iopath>=0.1.7 (from fvcore->-r requirements/optional.txt (line 3))\n", + " Downloading iopath-0.1.10.tar.gz (42 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m42.2/42.2 kB\u001B[0m \u001B[31m4.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25h Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (1.16.0)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (1.10.1)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (3.7.1)\n", + "Requirement already satisfied: scikit-image>=0.14.2 in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (0.19.3)\n", + "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (4.7.0.72)\n", + "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (2.25.1)\n", + "Requirement already satisfied: Shapely in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (2.0.1)\n", + "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (3.0.0)\n", + "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.2.2)\n", + "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.2.0)\n", + "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (4.4.2)\n", + "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.56.4)\n", + "Requirement already satisfied: pooch<1.7,>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.6.0)\n", + "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.3.5)\n", + "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (4.5.0)\n", + "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.2)\n", + "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.0.5)\n", + "Requirement already satisfied: requests<3.0,>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (2.27.1)\n", + "Requirement already satisfied: proglog<=1.0.0 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (0.1.10)\n", + "Requirement already satisfied: imageio-ffmpeg>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (0.4.8)\n", + "Collecting slicerator>=0.9.8 (from pims->-r requirements/optional.txt (line 9))\n", + " Downloading slicerator-1.1.0-py3-none-any.whl (10 kB)\n", + "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->-r requirements/optional.txt (line 11)) (1.15.1)\n", + "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.4.0)\n", + "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.54.0)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (2.17.3)\n", + "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.0.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (3.4.3)\n", + "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (3.20.3)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (67.7.2)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (0.7.0)\n", + "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.8.1)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (2.3.0)\n", + "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (0.40.0)\n", + "Requirement already satisfied: Click!=8.0.0,>=7.0 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (8.1.3)\n", + "Collecting GitPython!=3.1.29,>=1.0.0 (from wandb->-r requirements/optional.txt (line 13))\n", + " Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m184.3/184.3 kB\u001B[0m \u001B[31m22.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (5.9.5)\n", + "Collecting sentry-sdk>=1.0.0 (from wandb->-r requirements/optional.txt (line 13))\n", + " Downloading sentry_sdk-1.22.2-py2.py3-none-any.whl (203 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m203.3/203.3 kB\u001B[0m \u001B[31m25.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting docker-pycreds>=0.4.0 (from wandb->-r requirements/optional.txt (line 13))\n", + " Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n", + "Collecting pathtools (from wandb->-r requirements/optional.txt (line 13))\n", + " Downloading pathtools-0.1.2.tar.gz (11 kB)\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + "Collecting setproctitle (from wandb->-r requirements/optional.txt (line 13))\n", + " Downloading setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n", + "Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (1.4.4)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->-r requirements/optional.txt (line 11)) (2.21)\n", + "Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wandb->-r requirements/optional.txt (line 13))\n", + " Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m62.7/62.7 kB\u001B[0m \u001B[31m9.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (5.3.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (4.9)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->-r requirements/optional.txt (line 12)) (1.3.1)\n", + "Collecting portalocker (from iopath>=0.1.7->fvcore->-r requirements/optional.txt (line 3))\n", + " Downloading portalocker-2.7.0-py2.py3-none-any.whl (15 kB)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa->-r requirements/optional.txt (line 5)) (0.39.1)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (1.26.15)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (2022.12.7)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (2.0.12)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (3.4)\n", + "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (3.1)\n", + "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (2023.4.12)\n", + "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (1.4.1)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->librosa->-r requirements/optional.txt (line 5)) (3.1.0)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard->-r requirements/optional.txt (line 12)) (2.1.2)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (1.0.7)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (4.39.3)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (1.4.4)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (3.0.9)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (2.8.2)\n", + "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb->-r requirements/optional.txt (line 13))\n", + " Downloading smmap-5.0.0-py3-none-any.whl (24 kB)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (0.5.0)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->-r requirements/optional.txt (line 12)) (3.2.2)\n", + "Building wheels for collected packages: fvcore, pims, PyTurboJPEG, iopath, pathtools\n", + " Building wheel for fvcore (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Created wheel for fvcore: filename=fvcore-0.1.5.post20221221-py3-none-any.whl size=61405 sha256=25c1e50155c8788d00eec898793c96133a746a8bb076ffc5c01f5a4dc256751e\n", + " Stored in directory: /root/.cache/pip/wheels/01/c0/af/77c1cf53a1be9e42a52b48e5af2169d40ec2e89f7362489dd0\n", + " Building wheel for pims (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Created wheel for pims: filename=PIMS-0.6.1-py3-none-any.whl size=82619 sha256=59a328dc88a438c60cfb6e937e04c8a7dd55ad2a2905034cd41ff80cdbba6497\n", + " Stored in directory: /root/.cache/pip/wheels/cc/bf/3e/bfa77232d942f8244145f9c713b6b38f6ef04b6fb5c021c114\n", + " Building wheel for PyTurboJPEG (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Created wheel for PyTurboJPEG: filename=PyTurboJPEG-1.7.1-py3-none-any.whl size=12243 sha256=ddf6424c85ac533335abd96dd9e98b014ea1dd4f143c88cd35ecb08d6128f411\n", + " Stored in directory: /root/.cache/pip/wheels/de/6e/b1/e7ba70c328c3395555cb92ca8820babb32950d867858b1948b\n", + " Building wheel for iopath (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Created wheel for iopath: filename=iopath-0.1.10-py3-none-any.whl size=31531 sha256=db977a4344bebbdd710665e767caab4fbcf53cc6aea0707cd38d26c45718331e\n", + " Stored in directory: /root/.cache/pip/wheels/9a/a3/b6/ac0fcd1b4ed5cfeb3db92e6a0e476cfd48ed0df92b91080c1d\n", + " Building wheel for pathtools (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Created wheel for pathtools: filename=pathtools-0.1.2-py3-none-any.whl size=8791 sha256=08bb5753ce029aef01f25c3e81882d93c0e040e5932e90a02a062ad058756b52\n", + " Stored in directory: /root/.cache/pip/wheels/e7/f3/22/152153d6eb222ee7a56ff8617d80ee5207207a8c00a7aab794\n", + "Successfully built fvcore pims PyTurboJPEG iopath pathtools\n", + "Installing collected packages: slicerator, pathtools, lmdb, av, yacs, smmap, setproctitle, sentry-sdk, PyTurboJPEG, portalocker, docker-pycreds, pims, iopath, gitdb, GitPython, fvcore, wandb\n", + "Successfully installed GitPython-3.1.31 PyTurboJPEG-1.7.1 av-10.0.0 docker-pycreds-0.4.0 fvcore-0.1.5.post20221221 gitdb-4.0.10 iopath-0.1.10 lmdb-1.4.1 pathtools-0.1.2 pims-0.6.1 portalocker-2.7.0 sentry-sdk-1.22.2 setproctitle-1.3.2 slicerator-1.1.0 smmap-5.0.0 wandb-0.15.2 yacs-0.1.8\n" + ] + } + ], + "source": [ + "# install MMEngine, MMCV and MMDetection using MIM\n", + "%pip install -U openmim\n", + "!mim install mmengine\n", + "!mim install \"mmcv>=2.0.0\"\n", + "\n", + "# Install mmaction2\n", + "!rm -rf mmaction2\n", + "!git clone https://github.com/open-mmlab/mmaction2.git -b main\n", + "%cd mmaction2\n", + "\n", + "!pip install -e .\n", + "\n", + "# Install some optional requirements\n", + "!pip install -r requirements/optional.txt" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "No_zZAFpWC-a", + "outputId": "9386dd81-2308-4adb-d3cb-798de11c035e" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "QuZG8kZ2fJ5d" - }, - "source": [ - "## Train a recognizer on customized dataset\n", - "\n", - "To train a new recognizer, there are usually three things to do:\n", - "1. Support a new dataset\n", - "2. Modify the config\n", - "3. Train a new recognizer" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "2.0.0+cu118 True\n", + "1.0.0\n", + "11.8\n", + "GCC 9.3\n", + "OrderedDict([('sys.platform', 'linux'), ('Python', '3.10.11 (main, Apr 5 2023, 14:15:10) [GCC 9.4.0]'), ('CUDA available', True), ('numpy_random_seed', 2147483648), ('GPU 0', 'Tesla T4'), ('CUDA_HOME', '/usr/local/cuda'), ('NVCC', 'Cuda compilation tools, release 11.8, V11.8.89'), ('GCC', 'x86_64-linux-gnu-gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0'), ('PyTorch', '2.0.0+cu118'), ('PyTorch compiling details', 'PyTorch built with:\\n - GCC 9.3\\n - C++ Version: 201703\\n - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications\\n - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)\\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\\n - LAPACK is enabled (usually provided by MKL)\\n - NNPACK is enabled\\n - CPU capability usage: AVX2\\n - CUDA Runtime 11.8\\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\\n - CuDNN 8.7\\n - Magma 2.6.1\\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.8, CUDNN_VERSION=8.7.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.0.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \\n'), ('TorchVision', '0.15.1+cu118'), ('OpenCV', '4.7.0'), ('MMEngine', '0.7.3')])\n" + ] + } + ], + "source": [ + "# Check Pytorch installation\n", + "import torch, torchvision\n", + "print(torch.__version__, torch.cuda.is_available())\n", + "\n", + "# Check MMAction2 installation\n", + "import mmaction\n", + "print(mmaction.__version__)\n", + "\n", + "# Check MMCV installation\n", + "from mmcv.ops import get_compiling_cuda_version, get_compiler_version\n", + "print(get_compiling_cuda_version())\n", + "print(get_compiler_version())\n", + "\n", + "# Check MMEngine installation\n", + "from mmengine.utils.dl_utils import collect_env\n", + "print(collect_env())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pXf7oV5DWdab" + }, + "source": [ + "## Perform inference with a MMAction2 recognizer\n", + "MMAction2 already provides high level APIs to do inference and training." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "64CW6d_AaT-Q", + "outputId": "ea330d8c-2e20-4dbd-d046-51d7c9ec4f7a" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "neEFyxChfgiJ" - }, - "source": [ - "### Support a new dataset\n", - "\n", - "In this tutorial, we gives an example to convert the data into the format of existing datasets. Other methods and more advanced usages can be found in the [doc](/docs/tutorials/new_dataset.md)\n", - "\n", - "Firstly, let's download a tiny dataset obtained from [Kinetics-400](https://deepmind.com/research/open-source/open-source-datasets/kinetics/). We select 30 videos with their labels as train dataset and 10 videos with their labels as test dataset." - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "--2023-05-15 03:33:08-- https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n", + "Resolving download.openmmlab.com (download.openmmlab.com)... 163.181.82.216, 163.181.82.218, 163.181.82.213, ...\n", + "Connecting to download.openmmlab.com (download.openmmlab.com)|163.181.82.216|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 97579339 (93M) [application/octet-stream]\n", + "Saving to: ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’\n", + "\n", + "checkpoints/tsn_r50 100%[===================>] 93.06M 26.1MB/s in 3.6s \n", + "\n", + "2023-05-15 03:33:12 (26.2 MB/s) - ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’ saved [97579339/97579339]\n", + "\n" + ] + } + ], + "source": [ + "!mkdir checkpoints\n", + "!wget -c https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \\\n", + " -O checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "HNZB7NoSabzj", + "outputId": "c0c2ba71-72ff-4cac-a5b8-65590f5a6bb0" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "gjsUj9JzgUlJ", - "outputId": "96a0e6e9-0dd8-4c07-9fed-22b93d5c1318" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "rm: cannot remove 'kinetics400_tiny.zip*': No such file or directory\n", - "--2023-05-15 03:33:27-- https://download.openmmlab.com/mmaction/kinetics400_tiny.zip\n", - "Resolving download.openmmlab.com (download.openmmlab.com)... 163.181.82.216, 163.181.82.218, 163.181.82.213, ...\n", - "Connecting to download.openmmlab.com (download.openmmlab.com)|163.181.82.216|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 18308682 (17M) [application/zip]\n", - "Saving to: ‘kinetics400_tiny.zip’\n", - "\n", - "kinetics400_tiny.zi 100%[===================>] 17.46M 32.7MB/s in 0.5s \n", - "\n", - "2023-05-15 03:33:28 (32.7 MB/s) - ‘kinetics400_tiny.zip’ saved [18308682/18308682]\n", - "\n" - ] - } - ], - "source": [ - "# download, decompress the data\n", - "!rm kinetics400_tiny.zip*\n", - "!rm -rf kinetics400_tiny\n", - "!wget https://download.openmmlab.com/mmaction/kinetics400_tiny.zip\n", - "!unzip kinetics400_tiny.zip > /dev/null" - ] - }, + "output_type": "stream", + "name": "stdout", + "text": [ + "Loads checkpoint by local backend from path: checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n" + ] + } + ], + "source": [ + "from mmaction.apis import inference_recognizer, init_recognizer\n", + "from mmengine import Config\n", + "\n", + "\n", + "# Choose to use a config and initialize the recognizer\n", + "config = 'configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py'\n", + "config = Config.fromfile(config)\n", + "# Setup a checkpoint file to load\n", + "checkpoint = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n", + "# Initialize the recognizer\n", + "model = init_recognizer(config, checkpoint, device='cuda:0')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "rEMsBnpHapAn", + "outputId": "ec05049e-7289-4798-94fa-2b773cb23634", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "AbZ-o7V6hNw4", - "outputId": "f229f352-1b43-41b7-a374-21404f618581" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Reading package lists...\n", - "Building dependency tree...\n", - "Reading state information...\n", - "The following NEW packages will be installed:\n", - " tree\n", - "0 upgraded, 1 newly installed, 0 to remove and 24 not upgraded.\n", - "Need to get 43.0 kB of archives.\n", - "After this operation, 115 kB of additional disk space will be used.\n", - "Get:1 http://archive.ubuntu.com/ubuntu focal/universe amd64 tree amd64 1.8.0-1 [43.0 kB]\n", - "Fetched 43.0 kB in 1s (48.9 kB/s)\n", - "Selecting previously unselected package tree.\n", - "(Reading database ... 122519 files and directories currently installed.)\n", - "Preparing to unpack .../tree_1.8.0-1_amd64.deb ...\n", - "Unpacking tree (1.8.0-1) ...\n", - "Setting up tree (1.8.0-1) ...\n", - "Processing triggers for man-db (2.9.1-1) ...\n", - "\u001b[01;34mkinetics400_tiny\u001b[00m\n", - "├── kinetics_tiny_train_video.txt\n", - "├── kinetics_tiny_val_video.txt\n", - "├── \u001b[01;34mtrain\u001b[00m\n", - "│   ├── 27_CSXByd3s.mp4\n", - "│   ├── 34XczvTaRiI.mp4\n", - "│   ├── A-wiliK50Zw.mp4\n", - "│   ├── D32_1gwq35E.mp4\n", - "│   ├── D92m0HsHjcQ.mp4\n", - "│   ├── DbX8mPslRXg.mp4\n", - "│   ├── FMlSTTpN3VY.mp4\n", - "│   ├── h10B9SVE-nk.mp4\n", - "│   ├── h2YqqUhnR34.mp4\n", - "│   ├── iRuyZSKhHRg.mp4\n", - "│   ├── IyfILH9lBRo.mp4\n", - "│   ├── kFC3KY2bOP8.mp4\n", - "│   ├── LvcFDgCAXQs.mp4\n", - "│   ├── O46YA8tI530.mp4\n", - "│   ├── oMrZaozOvdQ.mp4\n", - "│   ├── oXy-e_P_cAI.mp4\n", - "│   ├── P5M-hAts7MQ.mp4\n", - "│   ├── phDqGd0NKoo.mp4\n", - "│   ├── PnOe3GZRVX8.mp4\n", - "│   ├── R8HXQkdgKWA.mp4\n", - "│   ├── RqnKtCEoEcA.mp4\n", - "│   ├── soEcZZsBmDs.mp4\n", - "│   ├── TkkZPZHbAKA.mp4\n", - "│   ├── T_TMNGzVrDk.mp4\n", - "│   ├── WaS0qwP46Us.mp4\n", - "│   ├── Wh_YPQdH1Zg.mp4\n", - "│   ├── WWP5HZJsg-o.mp4\n", - "│   ├── xGY2dP0YUjA.mp4\n", - "│   ├── yLC9CtWU5ws.mp4\n", - "│   └── ZQV4U2KQ370.mp4\n", - "└── \u001b[01;34mval\u001b[00m\n", - " ├── 0pVGiAU6XEA.mp4\n", - " ├── AQrbRSnRt8M.mp4\n", - " ├── b6Q_b7vgc7Q.mp4\n", - " ├── ddvJ6-faICE.mp4\n", - " ├── IcLztCtvhb8.mp4\n", - " ├── ik4BW3-SCts.mp4\n", - " ├── jqRrH30V0k4.mp4\n", - " ├── SU_x2LQqSLs.mp4\n", - " ├── u4Rm6srmIS8.mp4\n", - " └── y5Iu7XkTqV0.mp4\n", - "\n", - "2 directories, 42 files\n" - ] - } - ], - "source": [ - "# Check the directory structure of the tiny data\n", - "\n", - "# Install tree first\n", - "!apt-get -q install tree\n", - "!tree kinetics400_tiny" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "05/15 03:33:18 - mmengine - WARNING - \"FileClient\" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io\n", + "05/15 03:33:18 - mmengine - WARNING - \"HardDiskBackend\" is the alias of \"LocalBackend\" and the former will be deprecated in future.\n" + ] + } + ], + "source": [ + "# Use the recognizer to do inference\n", + "from operator import itemgetter\n", + "video = 'demo/demo.mp4'\n", + "label = 'tools/data/kinetics/label_map_k400.txt'\n", + "results = inference_recognizer(model, video)\n", + "\n", + "pred_scores = results.pred_score.tolist()\n", + "score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))\n", + "score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)\n", + "top5_label = score_sorted[:5]\n", + "\n", + "labels = open(label).readlines()\n", + "labels = [x.strip() for x in labels]\n", + "results = [(labels[k[0]], k[1]) for k in top5_label]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "NIyJXqfWathq", + "outputId": "cb25aca9-e72d-4c54-f295-4c889713cb3a" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fTdi6dI0hY3g", - "outputId": "95f22438-566c-4496-fe0c-50e128b47b5e" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "D32_1gwq35E.mp4 0\n", - "iRuyZSKhHRg.mp4 1\n", - "oXy-e_P_cAI.mp4 0\n", - "34XczvTaRiI.mp4 1\n", - "h2YqqUhnR34.mp4 0\n", - "O46YA8tI530.mp4 0\n", - "kFC3KY2bOP8.mp4 1\n", - "WWP5HZJsg-o.mp4 1\n", - "phDqGd0NKoo.mp4 1\n", - "yLC9CtWU5ws.mp4 0\n", - "27_CSXByd3s.mp4 1\n", - "IyfILH9lBRo.mp4 1\n", - "T_TMNGzVrDk.mp4 1\n", - "TkkZPZHbAKA.mp4 0\n", - "PnOe3GZRVX8.mp4 1\n", - "soEcZZsBmDs.mp4 1\n", - "FMlSTTpN3VY.mp4 1\n", - "WaS0qwP46Us.mp4 0\n", - "A-wiliK50Zw.mp4 1\n", - "oMrZaozOvdQ.mp4 1\n", - "ZQV4U2KQ370.mp4 0\n", - "DbX8mPslRXg.mp4 1\n", - "h10B9SVE-nk.mp4 1\n", - "P5M-hAts7MQ.mp4 0\n", - "R8HXQkdgKWA.mp4 0\n", - "D92m0HsHjcQ.mp4 0\n", - "RqnKtCEoEcA.mp4 0\n", - "LvcFDgCAXQs.mp4 0\n", - "xGY2dP0YUjA.mp4 0\n", - "Wh_YPQdH1Zg.mp4 0\n" - ] - } - ], - "source": [ - "# After downloading the data, we need to check the annotation format\n", - "!cat kinetics400_tiny/kinetics_tiny_train_video.txt" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "The top-5 labels with corresponding scores are:\n", + "arm wrestling: 1.0\n", + "rock scissors paper: 6.434453414527752e-09\n", + "shaking hands: 2.7599860175087088e-09\n", + "clapping: 1.3454612979302283e-09\n", + "massaging feet: 5.555100823784187e-10\n" + ] + } + ], + "source": [ + "print('The top-5 labels with corresponding scores are:')\n", + "for result in results:\n", + " print(f'{result[0]}: ', result[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QuZG8kZ2fJ5d" + }, + "source": [ + "## Train a recognizer on customized dataset\n", + "\n", + "To train a new recognizer, there are usually three things to do:\n", + "1. Support a new dataset\n", + "2. Modify the config\n", + "3. Train a new recognizer" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "neEFyxChfgiJ" + }, + "source": [ + "### Support a new dataset\n", + "\n", + "In this tutorial, we gives an example to convert the data into the format of existing datasets. Other methods and more advanced usages can be found in the [doc](/docs/tutorials/new_dataset.md)\n", + "\n", + "Firstly, let's download a tiny dataset obtained from [Kinetics-400](https://deepmind.com/research/open-source/open-source-datasets/kinetics/). We select 30 videos with their labels as train dataset and 10 videos with their labels as test dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "gjsUj9JzgUlJ", + "outputId": "96a0e6e9-0dd8-4c07-9fed-22b93d5c1318" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "0bq0mxmEi29H" - }, - "source": [ - "According to the format defined in [`VideoDataset`](./datasets/video_dataset.py), each line indicates a sample video with the filepath and label, which are split with a whitespace." - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "rm: cannot remove 'kinetics400_tiny.zip*': No such file or directory\n", + "--2023-05-15 03:33:27-- https://download.openmmlab.com/mmaction/kinetics400_tiny.zip\n", + "Resolving download.openmmlab.com (download.openmmlab.com)... 163.181.82.216, 163.181.82.218, 163.181.82.213, ...\n", + "Connecting to download.openmmlab.com (download.openmmlab.com)|163.181.82.216|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 18308682 (17M) [application/zip]\n", + "Saving to: ‘kinetics400_tiny.zip’\n", + "\n", + "kinetics400_tiny.zi 100%[===================>] 17.46M 32.7MB/s in 0.5s \n", + "\n", + "2023-05-15 03:33:28 (32.7 MB/s) - ‘kinetics400_tiny.zip’ saved [18308682/18308682]\n", + "\n" + ] + } + ], + "source": [ + "# download, decompress the data\n", + "!rm kinetics400_tiny.zip*\n", + "!rm -rf kinetics400_tiny\n", + "!wget https://download.openmmlab.com/mmaction/kinetics400_tiny.zip\n", + "!unzip kinetics400_tiny.zip > /dev/null" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "AbZ-o7V6hNw4", + "outputId": "f229f352-1b43-41b7-a374-21404f618581" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "Ht_DGJA9jQar" - }, - "source": [ - "### Modify the config\n", - "\n", - "In the next step, we need to modify the config for the training.\n", - "To accelerate the process, we finetune a recognizer using a pre-trained recognizer." - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Reading package lists...\n", + "Building dependency tree...\n", + "Reading state information...\n", + "The following NEW packages will be installed:\n", + " tree\n", + "0 upgraded, 1 newly installed, 0 to remove and 24 not upgraded.\n", + "Need to get 43.0 kB of archives.\n", + "After this operation, 115 kB of additional disk space will be used.\n", + "Get:1 http://archive.ubuntu.com/ubuntu focal/universe amd64 tree amd64 1.8.0-1 [43.0 kB]\n", + "Fetched 43.0 kB in 1s (48.9 kB/s)\n", + "Selecting previously unselected package tree.\n", + "(Reading database ... 122519 files and directories currently installed.)\n", + "Preparing to unpack .../tree_1.8.0-1_amd64.deb ...\n", + "Unpacking tree (1.8.0-1) ...\n", + "Setting up tree (1.8.0-1) ...\n", + "Processing triggers for man-db (2.9.1-1) ...\n", + "\u001B[01;34mkinetics400_tiny\u001B[00m\n", + "├── kinetics_tiny_train_video.txt\n", + "├── kinetics_tiny_val_video.txt\n", + "├── \u001B[01;34mtrain\u001B[00m\n", + "│   ├── 27_CSXByd3s.mp4\n", + "│   ├── 34XczvTaRiI.mp4\n", + "│   ├── A-wiliK50Zw.mp4\n", + "│   ├── D32_1gwq35E.mp4\n", + "│   ├── D92m0HsHjcQ.mp4\n", + "│   ├── DbX8mPslRXg.mp4\n", + "│   ├── FMlSTTpN3VY.mp4\n", + "│   ├── h10B9SVE-nk.mp4\n", + "│   ├── h2YqqUhnR34.mp4\n", + "│   ├── iRuyZSKhHRg.mp4\n", + "│   ├── IyfILH9lBRo.mp4\n", + "│   ├── kFC3KY2bOP8.mp4\n", + "│   ├── LvcFDgCAXQs.mp4\n", + "│   ├── O46YA8tI530.mp4\n", + "│   ├── oMrZaozOvdQ.mp4\n", + "│   ├── oXy-e_P_cAI.mp4\n", + "│   ├── P5M-hAts7MQ.mp4\n", + "│   ├── phDqGd0NKoo.mp4\n", + "│   ├── PnOe3GZRVX8.mp4\n", + "│   ├── R8HXQkdgKWA.mp4\n", + "│   ├── RqnKtCEoEcA.mp4\n", + "│   ├── soEcZZsBmDs.mp4\n", + "│   ├── TkkZPZHbAKA.mp4\n", + "│   ├── T_TMNGzVrDk.mp4\n", + "│   ├── WaS0qwP46Us.mp4\n", + "│   ├── Wh_YPQdH1Zg.mp4\n", + "│   ├── WWP5HZJsg-o.mp4\n", + "│   ├── xGY2dP0YUjA.mp4\n", + "│   ├── yLC9CtWU5ws.mp4\n", + "│   └── ZQV4U2KQ370.mp4\n", + "└── \u001B[01;34mval\u001B[00m\n", + " ├── 0pVGiAU6XEA.mp4\n", + " ├── AQrbRSnRt8M.mp4\n", + " ├── b6Q_b7vgc7Q.mp4\n", + " ├── ddvJ6-faICE.mp4\n", + " ├── IcLztCtvhb8.mp4\n", + " ├── ik4BW3-SCts.mp4\n", + " ├── jqRrH30V0k4.mp4\n", + " ├── SU_x2LQqSLs.mp4\n", + " ├── u4Rm6srmIS8.mp4\n", + " └── y5Iu7XkTqV0.mp4\n", + "\n", + "2 directories, 42 files\n" + ] + } + ], + "source": [ + "# Check the directory structure of the tiny data\n", + "\n", + "# Install tree first\n", + "!apt-get -q install tree\n", + "!tree kinetics400_tiny" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "fTdi6dI0hY3g", + "outputId": "95f22438-566c-4496-fe0c-50e128b47b5e" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "id": "LjCcmCKOjktc" - }, - "outputs": [], - "source": [ - "cfg = Config.fromfile('./configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py')" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "D32_1gwq35E.mp4 0\n", + "iRuyZSKhHRg.mp4 1\n", + "oXy-e_P_cAI.mp4 0\n", + "34XczvTaRiI.mp4 1\n", + "h2YqqUhnR34.mp4 0\n", + "O46YA8tI530.mp4 0\n", + "kFC3KY2bOP8.mp4 1\n", + "WWP5HZJsg-o.mp4 1\n", + "phDqGd0NKoo.mp4 1\n", + "yLC9CtWU5ws.mp4 0\n", + "27_CSXByd3s.mp4 1\n", + "IyfILH9lBRo.mp4 1\n", + "T_TMNGzVrDk.mp4 1\n", + "TkkZPZHbAKA.mp4 0\n", + "PnOe3GZRVX8.mp4 1\n", + "soEcZZsBmDs.mp4 1\n", + "FMlSTTpN3VY.mp4 1\n", + "WaS0qwP46Us.mp4 0\n", + "A-wiliK50Zw.mp4 1\n", + "oMrZaozOvdQ.mp4 1\n", + "ZQV4U2KQ370.mp4 0\n", + "DbX8mPslRXg.mp4 1\n", + "h10B9SVE-nk.mp4 1\n", + "P5M-hAts7MQ.mp4 0\n", + "R8HXQkdgKWA.mp4 0\n", + "D92m0HsHjcQ.mp4 0\n", + "RqnKtCEoEcA.mp4 0\n", + "LvcFDgCAXQs.mp4 0\n", + "xGY2dP0YUjA.mp4 0\n", + "Wh_YPQdH1Zg.mp4 0\n" + ] + } + ], + "source": [ + "# After downloading the data, we need to check the annotation format\n", + "!cat kinetics400_tiny/kinetics_tiny_train_video.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0bq0mxmEi29H" + }, + "source": [ + "According to the format defined in [`VideoDataset`](./datasets/video_dataset.py), each line indicates a sample video with the filepath and label, which are split with a whitespace." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ht_DGJA9jQar" + }, + "source": [ + "### Modify the config\n", + "\n", + "In the next step, we need to modify the config for the training.\n", + "To accelerate the process, we finetune a recognizer using a pre-trained recognizer." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "LjCcmCKOjktc" + }, + "outputs": [], + "source": [ + "cfg = Config.fromfile('./configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tc8YhFFGjp3e" + }, + "source": [ + "Given a config that trains a TSN model on kinetics400-full dataset, we need to modify some values to use it for training TSN on Kinetics400-tiny dataset.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "tlhu9byjjt-K", + "outputId": "2d984a1d-93f7-493f-fd77-e19af8285f38" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "tc8YhFFGjp3e" - }, - "source": [ - "Given a config that trains a TSN model on kinetics400-full dataset, we need to modify some values to use it for training TSN on Kinetics400-tiny dataset.\n" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Config:\n", + "model = dict(\n", + " type='Recognizer2D',\n", + " backbone=dict(\n", + " type='ResNet',\n", + " pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',\n", + " depth=50,\n", + " norm_eval=False),\n", + " cls_head=dict(\n", + " type='TSNHead',\n", + " num_classes=2,\n", + " in_channels=2048,\n", + " spatial_type='avg',\n", + " consensus=dict(type='AvgConsensus', dim=1),\n", + " dropout_ratio=0.4,\n", + " init_std=0.01,\n", + " average_clips='prob'),\n", + " data_preprocessor=dict(\n", + " type='ActionDataPreprocessor',\n", + " mean=[123.675, 116.28, 103.53],\n", + " std=[58.395, 57.12, 57.375],\n", + " format_shape='NCHW'),\n", + " train_cfg=None,\n", + " test_cfg=None)\n", + "train_cfg = dict(\n", + " type='EpochBasedTrainLoop', max_epochs=10, val_begin=1, val_interval=1)\n", + "val_cfg = dict(type='ValLoop')\n", + "test_cfg = dict(type='TestLoop')\n", + "param_scheduler = [\n", + " dict(\n", + " type='MultiStepLR',\n", + " begin=0,\n", + " end=100,\n", + " by_epoch=True,\n", + " milestones=[40, 80],\n", + " gamma=0.1)\n", + "]\n", + "optim_wrapper = dict(\n", + " optimizer=dict(\n", + " type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001),\n", + " clip_grad=dict(max_norm=40, norm_type=2))\n", + "default_scope = 'mmaction'\n", + "default_hooks = dict(\n", + " runtime_info=dict(type='RuntimeInfoHook'),\n", + " timer=dict(type='IterTimerHook'),\n", + " logger=dict(type='LoggerHook', interval=20, ignore_last=False),\n", + " param_scheduler=dict(type='ParamSchedulerHook'),\n", + " checkpoint=dict(\n", + " type='CheckpointHook', interval=3, save_best='auto', max_keep_ckpts=3),\n", + " sampler_seed=dict(type='DistSamplerSeedHook'),\n", + " sync_buffers=dict(type='SyncBuffersHook'))\n", + "env_cfg = dict(\n", + " cudnn_benchmark=False,\n", + " mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),\n", + " dist_cfg=dict(backend='nccl'))\n", + "log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)\n", + "vis_backends = [dict(type='LocalVisBackend')]\n", + "visualizer = dict(\n", + " type='ActionVisualizer', vis_backends=[dict(type='LocalVisBackend')])\n", + "log_level = 'INFO'\n", + "load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n", + "resume = False\n", + "dataset_type = 'VideoDataset'\n", + "data_root = 'kinetics400_tiny/train/'\n", + "data_root_val = 'kinetics400_tiny/val/'\n", + "ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n", + "ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n", + "file_client_args = dict(io_backend='disk')\n", + "train_pipeline = [\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(\n", + " type='MultiScaleCrop',\n", + " input_size=224,\n", + " scales=(1, 0.875, 0.75, 0.66),\n", + " random_crop=False,\n", + " max_wh_scale_gap=1),\n", + " dict(type='Resize', scale=(224, 224), keep_ratio=False),\n", + " dict(type='Flip', flip_ratio=0.5),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + "]\n", + "val_pipeline = [\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(\n", + " type='SampleFrames',\n", + " clip_len=1,\n", + " frame_interval=1,\n", + " num_clips=3,\n", + " test_mode=True),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(type='CenterCrop', crop_size=224),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + "]\n", + "test_pipeline = [\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(\n", + " type='SampleFrames',\n", + " clip_len=1,\n", + " frame_interval=1,\n", + " num_clips=25,\n", + " test_mode=True),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(type='TenCrop', crop_size=224),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + "]\n", + "train_dataloader = dict(\n", + " batch_size=2,\n", + " num_workers=2,\n", + " persistent_workers=True,\n", + " sampler=dict(type='DefaultSampler', shuffle=True),\n", + " dataset=dict(\n", + " type='VideoDataset',\n", + " ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n", + " data_prefix=dict(video='kinetics400_tiny/train/'),\n", + " pipeline=[\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(\n", + " type='SampleFrames', clip_len=1, frame_interval=1,\n", + " num_clips=3),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(\n", + " type='MultiScaleCrop',\n", + " input_size=224,\n", + " scales=(1, 0.875, 0.75, 0.66),\n", + " random_crop=False,\n", + " max_wh_scale_gap=1),\n", + " dict(type='Resize', scale=(224, 224), keep_ratio=False),\n", + " dict(type='Flip', flip_ratio=0.5),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + " ]))\n", + "val_dataloader = dict(\n", + " batch_size=2,\n", + " num_workers=2,\n", + " persistent_workers=True,\n", + " sampler=dict(type='DefaultSampler', shuffle=False),\n", + " dataset=dict(\n", + " type='VideoDataset',\n", + " ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n", + " data_prefix=dict(video='kinetics400_tiny/val/'),\n", + " pipeline=[\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(\n", + " type='SampleFrames',\n", + " clip_len=1,\n", + " frame_interval=1,\n", + " num_clips=3,\n", + " test_mode=True),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(type='CenterCrop', crop_size=224),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + " ],\n", + " test_mode=True))\n", + "test_dataloader = dict(\n", + " batch_size=1,\n", + " num_workers=2,\n", + " persistent_workers=True,\n", + " sampler=dict(type='DefaultSampler', shuffle=False),\n", + " dataset=dict(\n", + " type='VideoDataset',\n", + " ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n", + " data_prefix=dict(video='kinetics400_tiny/val/'),\n", + " pipeline=[\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(\n", + " type='SampleFrames',\n", + " clip_len=1,\n", + " frame_interval=1,\n", + " num_clips=25,\n", + " test_mode=True),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(type='TenCrop', crop_size=224),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + " ],\n", + " test_mode=True))\n", + "val_evaluator = dict(type='AccMetric')\n", + "test_evaluator = dict(type='AccMetric')\n", + "auto_scale_lr = dict(enable=False, base_batch_size=256)\n", + "work_dir = './tutorial_exps'\n", + "\n" + ] + } + ], + "source": [ + "from mmengine.runner import set_random_seed\n", + "\n", + "# Modify dataset type and path\n", + "cfg.data_root = 'kinetics400_tiny/train/'\n", + "cfg.data_root_val = 'kinetics400_tiny/val/'\n", + "cfg.ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n", + "cfg.ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n", + "\n", + "\n", + "cfg.test_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n", + "cfg.test_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/val/'\n", + "\n", + "cfg.train_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n", + "cfg.train_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/train/'\n", + "\n", + "cfg.val_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n", + "cfg.val_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/val/'\n", + "\n", + "\n", + "# Modify num classes of the model in cls_head\n", + "cfg.model.cls_head.num_classes = 2\n", + "# We can use the pre-trained TSN model\n", + "cfg.load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n", + "\n", + "# Set up working dir to save files and logs.\n", + "cfg.work_dir = './tutorial_exps'\n", + "\n", + "# The original learning rate (LR) is set for 8-GPU training.\n", + "# We divide it by 8 since we only use one GPU.\n", + "cfg.train_dataloader.batch_size = cfg.train_dataloader.batch_size // 16\n", + "cfg.val_dataloader.batch_size = cfg.val_dataloader.batch_size // 16\n", + "cfg.optim_wrapper.optimizer.lr = cfg.optim_wrapper.optimizer.lr / 8 / 16\n", + "cfg.train_cfg.max_epochs = 10\n", + "\n", + "cfg.train_dataloader.num_workers = 2\n", + "cfg.val_dataloader.num_workers = 2\n", + "cfg.test_dataloader.num_workers = 2\n", + "\n", + "# We can initialize the logger for training and have a look\n", + "# at the final config used for training\n", + "print(f'Config:\\n{cfg.pretty_text}')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tES-qnZ3k38Z" + }, + "source": [ + "### Train a new recognizer\n", + "\n", + "Finally, lets initialize the dataset and recognizer, then train a new recognizer!" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "dDBWkdDRk6oz", + "outputId": "044b9e09-2038-41c9-d5a3-8a74ae11ade2" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "tlhu9byjjt-K", - "outputId": "2d984a1d-93f7-493f-fd77-e19af8285f38" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Config:\n", - "model = dict(\n", - " type='Recognizer2D',\n", - " backbone=dict(\n", - " type='ResNet',\n", - " pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',\n", - " depth=50,\n", - " norm_eval=False),\n", - " cls_head=dict(\n", - " type='TSNHead',\n", - " num_classes=2,\n", - " in_channels=2048,\n", - " spatial_type='avg',\n", - " consensus=dict(type='AvgConsensus', dim=1),\n", - " dropout_ratio=0.4,\n", - " init_std=0.01,\n", - " average_clips='prob'),\n", - " data_preprocessor=dict(\n", - " type='ActionDataPreprocessor',\n", - " mean=[123.675, 116.28, 103.53],\n", - " std=[58.395, 57.12, 57.375],\n", - " format_shape='NCHW'),\n", - " train_cfg=None,\n", - " test_cfg=None)\n", - "train_cfg = dict(\n", - " type='EpochBasedTrainLoop', max_epochs=10, val_begin=1, val_interval=1)\n", - "val_cfg = dict(type='ValLoop')\n", - "test_cfg = dict(type='TestLoop')\n", - "param_scheduler = [\n", - " dict(\n", - " type='MultiStepLR',\n", - " begin=0,\n", - " end=100,\n", - " by_epoch=True,\n", - " milestones=[40, 80],\n", - " gamma=0.1)\n", - "]\n", - "optim_wrapper = dict(\n", - " optimizer=dict(\n", - " type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001),\n", - " clip_grad=dict(max_norm=40, norm_type=2))\n", - "default_scope = 'mmaction'\n", - "default_hooks = dict(\n", - " runtime_info=dict(type='RuntimeInfoHook'),\n", - " timer=dict(type='IterTimerHook'),\n", - " logger=dict(type='LoggerHook', interval=20, ignore_last=False),\n", - " param_scheduler=dict(type='ParamSchedulerHook'),\n", - " checkpoint=dict(\n", - " type='CheckpointHook', interval=3, save_best='auto', max_keep_ckpts=3),\n", - " sampler_seed=dict(type='DistSamplerSeedHook'),\n", - " sync_buffers=dict(type='SyncBuffersHook'))\n", - "env_cfg = dict(\n", - " cudnn_benchmark=False,\n", - " mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),\n", - " dist_cfg=dict(backend='nccl'))\n", - "log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)\n", - "vis_backends = [dict(type='LocalVisBackend')]\n", - "visualizer = dict(\n", - " type='ActionVisualizer', vis_backends=[dict(type='LocalVisBackend')])\n", - "log_level = 'INFO'\n", - "load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n", - "resume = False\n", - "dataset_type = 'VideoDataset'\n", - "data_root = 'kinetics400_tiny/train/'\n", - "data_root_val = 'kinetics400_tiny/val/'\n", - "ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n", - "ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n", - "file_client_args = dict(io_backend='disk')\n", - "train_pipeline = [\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(\n", - " type='MultiScaleCrop',\n", - " input_size=224,\n", - " scales=(1, 0.875, 0.75, 0.66),\n", - " random_crop=False,\n", - " max_wh_scale_gap=1),\n", - " dict(type='Resize', scale=(224, 224), keep_ratio=False),\n", - " dict(type='Flip', flip_ratio=0.5),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - "]\n", - "val_pipeline = [\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(\n", - " type='SampleFrames',\n", - " clip_len=1,\n", - " frame_interval=1,\n", - " num_clips=3,\n", - " test_mode=True),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(type='CenterCrop', crop_size=224),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - "]\n", - "test_pipeline = [\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(\n", - " type='SampleFrames',\n", - " clip_len=1,\n", - " frame_interval=1,\n", - " num_clips=25,\n", - " test_mode=True),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(type='TenCrop', crop_size=224),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - "]\n", - "train_dataloader = dict(\n", - " batch_size=2,\n", - " num_workers=2,\n", - " persistent_workers=True,\n", - " sampler=dict(type='DefaultSampler', shuffle=True),\n", - " dataset=dict(\n", - " type='VideoDataset',\n", - " ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n", - " data_prefix=dict(video='kinetics400_tiny/train/'),\n", - " pipeline=[\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(\n", - " type='SampleFrames', clip_len=1, frame_interval=1,\n", - " num_clips=3),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(\n", - " type='MultiScaleCrop',\n", - " input_size=224,\n", - " scales=(1, 0.875, 0.75, 0.66),\n", - " random_crop=False,\n", - " max_wh_scale_gap=1),\n", - " dict(type='Resize', scale=(224, 224), keep_ratio=False),\n", - " dict(type='Flip', flip_ratio=0.5),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - " ]))\n", - "val_dataloader = dict(\n", - " batch_size=2,\n", - " num_workers=2,\n", - " persistent_workers=True,\n", - " sampler=dict(type='DefaultSampler', shuffle=False),\n", - " dataset=dict(\n", - " type='VideoDataset',\n", - " ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n", - " data_prefix=dict(video='kinetics400_tiny/val/'),\n", - " pipeline=[\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(\n", - " type='SampleFrames',\n", - " clip_len=1,\n", - " frame_interval=1,\n", - " num_clips=3,\n", - " test_mode=True),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(type='CenterCrop', crop_size=224),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - " ],\n", - " test_mode=True))\n", - "test_dataloader = dict(\n", - " batch_size=1,\n", - " num_workers=2,\n", - " persistent_workers=True,\n", - " sampler=dict(type='DefaultSampler', shuffle=False),\n", - " dataset=dict(\n", - " type='VideoDataset',\n", - " ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n", - " data_prefix=dict(video='kinetics400_tiny/val/'),\n", - " pipeline=[\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(\n", - " type='SampleFrames',\n", - " clip_len=1,\n", - " frame_interval=1,\n", - " num_clips=25,\n", - " test_mode=True),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(type='TenCrop', crop_size=224),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - " ],\n", - " test_mode=True))\n", - "val_evaluator = dict(type='AccMetric')\n", - "test_evaluator = dict(type='AccMetric')\n", - "auto_scale_lr = dict(enable=False, base_batch_size=256)\n", - "work_dir = './tutorial_exps'\n", - "\n" - ] - } - ], - "source": [ - "from mmengine.runner import set_random_seed\n", - "\n", - "# Modify dataset type and path\n", - "cfg.data_root = 'kinetics400_tiny/train/'\n", - "cfg.data_root_val = 'kinetics400_tiny/val/'\n", - "cfg.ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n", - "cfg.ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n", - "\n", - "\n", - "cfg.test_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n", - "cfg.test_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/val/'\n", - "\n", - "cfg.train_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n", - "cfg.train_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/train/'\n", - "\n", - "cfg.val_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n", - "cfg.val_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/val/'\n", - "\n", - "\n", - "# Modify num classes of the model in cls_head\n", - "cfg.model.cls_head.num_classes = 2\n", - "# We can use the pre-trained TSN model\n", - "cfg.load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n", - "\n", - "# Set up working dir to save files and logs.\n", - "cfg.work_dir = './tutorial_exps'\n", - "\n", - "# The original learning rate (LR) is set for 8-GPU training.\n", - "# We divide it by 8 since we only use one GPU.\n", - "cfg.train_dataloader.batch_size = cfg.train_dataloader.batch_size // 16\n", - "cfg.val_dataloader.batch_size = cfg.val_dataloader.batch_size // 16\n", - "cfg.optim_wrapper.optimizer.lr = cfg.optim_wrapper.optimizer.lr / 8 / 16\n", - "cfg.train_cfg.max_epochs = 10\n", - "\n", - "cfg.train_dataloader.num_workers = 2\n", - "cfg.val_dataloader.num_workers = 2\n", - "cfg.test_dataloader.num_workers = 2\n", - "\n", - "# We can initialize the logger for training and have a look\n", - "# at the final config used for training\n", - "print(f'Config:\\n{cfg.pretty_text}')\n" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "05/15 03:33:34 - mmengine - INFO - \n", + "------------------------------------------------------------\n", + "System environment:\n", + " sys.platform: linux\n", + " Python: 3.10.11 (main, Apr 5 2023, 14:15:10) [GCC 9.4.0]\n", + " CUDA available: True\n", + " numpy_random_seed: 1853452922\n", + " GPU 0: Tesla T4\n", + " CUDA_HOME: /usr/local/cuda\n", + " NVCC: Cuda compilation tools, release 11.8, V11.8.89\n", + " GCC: x86_64-linux-gnu-gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0\n", + " PyTorch: 2.0.0+cu118\n", + " PyTorch compiling details: PyTorch built with:\n", + " - GCC 9.3\n", + " - C++ Version: 201703\n", + " - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications\n", + " - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)\n", + " - OpenMP 201511 (a.k.a. OpenMP 4.5)\n", + " - LAPACK is enabled (usually provided by MKL)\n", + " - NNPACK is enabled\n", + " - CPU capability usage: AVX2\n", + " - CUDA Runtime 11.8\n", + " - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n", + " - CuDNN 8.7\n", + " - Magma 2.6.1\n", + " - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.8, CUDNN_VERSION=8.7.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.0.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n", + "\n", + " TorchVision: 0.15.1+cu118\n", + " OpenCV: 4.7.0\n", + " MMEngine: 0.7.3\n", + "\n", + "Runtime environment:\n", + " cudnn_benchmark: False\n", + " mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}\n", + " dist_cfg: {'backend': 'nccl'}\n", + " seed: None\n", + " Distributed launcher: none\n", + " Distributed training: False\n", + " GPU number: 1\n", + "------------------------------------------------------------\n", + "\n", + "05/15 03:33:34 - mmengine - INFO - Config:\n", + "model = dict(\n", + " type='Recognizer2D',\n", + " backbone=dict(\n", + " type='ResNet',\n", + " pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',\n", + " depth=50,\n", + " norm_eval=False),\n", + " cls_head=dict(\n", + " type='TSNHead',\n", + " num_classes=2,\n", + " in_channels=2048,\n", + " spatial_type='avg',\n", + " consensus=dict(type='AvgConsensus', dim=1),\n", + " dropout_ratio=0.4,\n", + " init_std=0.01,\n", + " average_clips='prob'),\n", + " data_preprocessor=dict(\n", + " type='ActionDataPreprocessor',\n", + " mean=[123.675, 116.28, 103.53],\n", + " std=[58.395, 57.12, 57.375],\n", + " format_shape='NCHW'),\n", + " train_cfg=None,\n", + " test_cfg=None)\n", + "train_cfg = dict(\n", + " type='EpochBasedTrainLoop', max_epochs=10, val_begin=1, val_interval=1)\n", + "val_cfg = dict(type='ValLoop')\n", + "test_cfg = dict(type='TestLoop')\n", + "param_scheduler = [\n", + " dict(\n", + " type='MultiStepLR',\n", + " begin=0,\n", + " end=100,\n", + " by_epoch=True,\n", + " milestones=[40, 80],\n", + " gamma=0.1)\n", + "]\n", + "optim_wrapper = dict(\n", + " optimizer=dict(\n", + " type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001),\n", + " clip_grad=dict(max_norm=40, norm_type=2))\n", + "default_scope = 'mmaction'\n", + "default_hooks = dict(\n", + " runtime_info=dict(type='RuntimeInfoHook'),\n", + " timer=dict(type='IterTimerHook'),\n", + " logger=dict(type='LoggerHook', interval=20, ignore_last=False),\n", + " param_scheduler=dict(type='ParamSchedulerHook'),\n", + " checkpoint=dict(\n", + " type='CheckpointHook', interval=3, save_best='auto', max_keep_ckpts=3),\n", + " sampler_seed=dict(type='DistSamplerSeedHook'),\n", + " sync_buffers=dict(type='SyncBuffersHook'))\n", + "env_cfg = dict(\n", + " cudnn_benchmark=False,\n", + " mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),\n", + " dist_cfg=dict(backend='nccl'))\n", + "log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)\n", + "vis_backends = [dict(type='LocalVisBackend')]\n", + "visualizer = dict(\n", + " type='ActionVisualizer', vis_backends=[dict(type='LocalVisBackend')])\n", + "log_level = 'INFO'\n", + "load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n", + "resume = False\n", + "dataset_type = 'VideoDataset'\n", + "data_root = 'kinetics400_tiny/train/'\n", + "data_root_val = 'kinetics400_tiny/val/'\n", + "ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n", + "ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n", + "file_client_args = dict(io_backend='disk')\n", + "train_pipeline = [\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(\n", + " type='MultiScaleCrop',\n", + " input_size=224,\n", + " scales=(1, 0.875, 0.75, 0.66),\n", + " random_crop=False,\n", + " max_wh_scale_gap=1),\n", + " dict(type='Resize', scale=(224, 224), keep_ratio=False),\n", + " dict(type='Flip', flip_ratio=0.5),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + "]\n", + "val_pipeline = [\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(\n", + " type='SampleFrames',\n", + " clip_len=1,\n", + " frame_interval=1,\n", + " num_clips=3,\n", + " test_mode=True),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(type='CenterCrop', crop_size=224),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + "]\n", + "test_pipeline = [\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(\n", + " type='SampleFrames',\n", + " clip_len=1,\n", + " frame_interval=1,\n", + " num_clips=25,\n", + " test_mode=True),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(type='TenCrop', crop_size=224),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + "]\n", + "train_dataloader = dict(\n", + " batch_size=2,\n", + " num_workers=2,\n", + " persistent_workers=True,\n", + " sampler=dict(type='DefaultSampler', shuffle=True),\n", + " dataset=dict(\n", + " type='VideoDataset',\n", + " ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n", + " data_prefix=dict(video='kinetics400_tiny/train/'),\n", + " pipeline=[\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(\n", + " type='SampleFrames', clip_len=1, frame_interval=1,\n", + " num_clips=3),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(\n", + " type='MultiScaleCrop',\n", + " input_size=224,\n", + " scales=(1, 0.875, 0.75, 0.66),\n", + " random_crop=False,\n", + " max_wh_scale_gap=1),\n", + " dict(type='Resize', scale=(224, 224), keep_ratio=False),\n", + " dict(type='Flip', flip_ratio=0.5),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + " ]))\n", + "val_dataloader = dict(\n", + " batch_size=2,\n", + " num_workers=2,\n", + " persistent_workers=True,\n", + " sampler=dict(type='DefaultSampler', shuffle=False),\n", + " dataset=dict(\n", + " type='VideoDataset',\n", + " ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n", + " data_prefix=dict(video='kinetics400_tiny/val/'),\n", + " pipeline=[\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(\n", + " type='SampleFrames',\n", + " clip_len=1,\n", + " frame_interval=1,\n", + " num_clips=3,\n", + " test_mode=True),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(type='CenterCrop', crop_size=224),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + " ],\n", + " test_mode=True))\n", + "test_dataloader = dict(\n", + " batch_size=1,\n", + " num_workers=2,\n", + " persistent_workers=True,\n", + " sampler=dict(type='DefaultSampler', shuffle=False),\n", + " dataset=dict(\n", + " type='VideoDataset',\n", + " ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n", + " data_prefix=dict(video='kinetics400_tiny/val/'),\n", + " pipeline=[\n", + " dict(type='DecordInit', io_backend='disk'),\n", + " dict(\n", + " type='SampleFrames',\n", + " clip_len=1,\n", + " frame_interval=1,\n", + " num_clips=25,\n", + " test_mode=True),\n", + " dict(type='DecordDecode'),\n", + " dict(type='Resize', scale=(-1, 256)),\n", + " dict(type='TenCrop', crop_size=224),\n", + " dict(type='FormatShape', input_format='NCHW'),\n", + " dict(type='PackActionInputs')\n", + " ],\n", + " test_mode=True))\n", + "val_evaluator = dict(type='AccMetric')\n", + "test_evaluator = dict(type='AccMetric')\n", + "auto_scale_lr = dict(enable=False, base_batch_size=256)\n", + "work_dir = './tutorial_exps'\n", + "\n", + "05/15 03:33:35 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.\n", + "05/15 03:33:35 - mmengine - INFO - Hooks will be executed in the following order:\n", + "before_run:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(BELOW_NORMAL) LoggerHook \n", + " -------------------- \n", + "before_train:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + "(VERY_LOW ) CheckpointHook \n", + " -------------------- \n", + "before_train_epoch:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + "(NORMAL ) DistSamplerSeedHook \n", + " -------------------- \n", + "before_train_iter:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + " -------------------- \n", + "after_train_iter:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + "(BELOW_NORMAL) LoggerHook \n", + "(LOW ) ParamSchedulerHook \n", + "(VERY_LOW ) CheckpointHook \n", + " -------------------- \n", + "after_train_epoch:\n", + "(NORMAL ) IterTimerHook \n", + "(NORMAL ) SyncBuffersHook \n", + "(LOW ) ParamSchedulerHook \n", + "(VERY_LOW ) CheckpointHook \n", + " -------------------- \n", + "before_val_epoch:\n", + "(NORMAL ) IterTimerHook \n", + "(NORMAL ) SyncBuffersHook \n", + " -------------------- \n", + "before_val_iter:\n", + "(NORMAL ) IterTimerHook \n", + " -------------------- \n", + "after_val_iter:\n", + "(NORMAL ) IterTimerHook \n", + "(BELOW_NORMAL) LoggerHook \n", + " -------------------- \n", + "after_val_epoch:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + "(BELOW_NORMAL) LoggerHook \n", + "(LOW ) ParamSchedulerHook \n", + "(VERY_LOW ) CheckpointHook \n", + " -------------------- \n", + "after_train:\n", + "(VERY_LOW ) CheckpointHook \n", + " -------------------- \n", + "before_test_epoch:\n", + "(NORMAL ) IterTimerHook \n", + " -------------------- \n", + "before_test_iter:\n", + "(NORMAL ) IterTimerHook \n", + " -------------------- \n", + "after_test_iter:\n", + "(NORMAL ) IterTimerHook \n", + "(BELOW_NORMAL) LoggerHook \n", + " -------------------- \n", + "after_test_epoch:\n", + "(VERY_HIGH ) RuntimeInfoHook \n", + "(NORMAL ) IterTimerHook \n", + "(BELOW_NORMAL) LoggerHook \n", + " -------------------- \n", + "after_run:\n", + "(BELOW_NORMAL) LoggerHook \n", + " -------------------- \n", + "Loads checkpoint by http backend from path: https://download.pytorch.org/models/resnet50-11ad3fa6.pth\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "tES-qnZ3k38Z" - }, - "source": [ - "### Train a new recognizer\n", - "\n", - "Finally, lets initialize the dataset and recognizer, then train a new recognizer!" - ] + "output_type": "stream", + "name": "stderr", + "text": [ + "Downloading: \"https://download.pytorch.org/models/resnet50-11ad3fa6.pth\" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth\n" + ] }, { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "dDBWkdDRk6oz", - "outputId": "044b9e09-2038-41c9-d5a3-8a74ae11ade2" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "05/15 03:33:34 - mmengine - INFO - \n", - "------------------------------------------------------------\n", - "System environment:\n", - " sys.platform: linux\n", - " Python: 3.10.11 (main, Apr 5 2023, 14:15:10) [GCC 9.4.0]\n", - " CUDA available: True\n", - " numpy_random_seed: 1853452922\n", - " GPU 0: Tesla T4\n", - " CUDA_HOME: /usr/local/cuda\n", - " NVCC: Cuda compilation tools, release 11.8, V11.8.89\n", - " GCC: x86_64-linux-gnu-gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0\n", - " PyTorch: 2.0.0+cu118\n", - " PyTorch compiling details: PyTorch built with:\n", - " - GCC 9.3\n", - " - C++ Version: 201703\n", - " - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications\n", - " - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)\n", - " - OpenMP 201511 (a.k.a. OpenMP 4.5)\n", - " - LAPACK is enabled (usually provided by MKL)\n", - " - NNPACK is enabled\n", - " - CPU capability usage: AVX2\n", - " - CUDA Runtime 11.8\n", - " - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n", - " - CuDNN 8.7\n", - " - Magma 2.6.1\n", - " - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.8, CUDNN_VERSION=8.7.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.0.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n", - "\n", - " TorchVision: 0.15.1+cu118\n", - " OpenCV: 4.7.0\n", - " MMEngine: 0.7.3\n", - "\n", - "Runtime environment:\n", - " cudnn_benchmark: False\n", - " mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}\n", - " dist_cfg: {'backend': 'nccl'}\n", - " seed: None\n", - " Distributed launcher: none\n", - " Distributed training: False\n", - " GPU number: 1\n", - "------------------------------------------------------------\n", - "\n", - "05/15 03:33:34 - mmengine - INFO - Config:\n", - "model = dict(\n", - " type='Recognizer2D',\n", - " backbone=dict(\n", - " type='ResNet',\n", - " pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',\n", - " depth=50,\n", - " norm_eval=False),\n", - " cls_head=dict(\n", - " type='TSNHead',\n", - " num_classes=2,\n", - " in_channels=2048,\n", - " spatial_type='avg',\n", - " consensus=dict(type='AvgConsensus', dim=1),\n", - " dropout_ratio=0.4,\n", - " init_std=0.01,\n", - " average_clips='prob'),\n", - " data_preprocessor=dict(\n", - " type='ActionDataPreprocessor',\n", - " mean=[123.675, 116.28, 103.53],\n", - " std=[58.395, 57.12, 57.375],\n", - " format_shape='NCHW'),\n", - " train_cfg=None,\n", - " test_cfg=None)\n", - "train_cfg = dict(\n", - " type='EpochBasedTrainLoop', max_epochs=10, val_begin=1, val_interval=1)\n", - "val_cfg = dict(type='ValLoop')\n", - "test_cfg = dict(type='TestLoop')\n", - "param_scheduler = [\n", - " dict(\n", - " type='MultiStepLR',\n", - " begin=0,\n", - " end=100,\n", - " by_epoch=True,\n", - " milestones=[40, 80],\n", - " gamma=0.1)\n", - "]\n", - "optim_wrapper = dict(\n", - " optimizer=dict(\n", - " type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001),\n", - " clip_grad=dict(max_norm=40, norm_type=2))\n", - "default_scope = 'mmaction'\n", - "default_hooks = dict(\n", - " runtime_info=dict(type='RuntimeInfoHook'),\n", - " timer=dict(type='IterTimerHook'),\n", - " logger=dict(type='LoggerHook', interval=20, ignore_last=False),\n", - " param_scheduler=dict(type='ParamSchedulerHook'),\n", - " checkpoint=dict(\n", - " type='CheckpointHook', interval=3, save_best='auto', max_keep_ckpts=3),\n", - " sampler_seed=dict(type='DistSamplerSeedHook'),\n", - " sync_buffers=dict(type='SyncBuffersHook'))\n", - "env_cfg = dict(\n", - " cudnn_benchmark=False,\n", - " mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),\n", - " dist_cfg=dict(backend='nccl'))\n", - "log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)\n", - "vis_backends = [dict(type='LocalVisBackend')]\n", - "visualizer = dict(\n", - " type='ActionVisualizer', vis_backends=[dict(type='LocalVisBackend')])\n", - "log_level = 'INFO'\n", - "load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n", - "resume = False\n", - "dataset_type = 'VideoDataset'\n", - "data_root = 'kinetics400_tiny/train/'\n", - "data_root_val = 'kinetics400_tiny/val/'\n", - "ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n", - "ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n", - "file_client_args = dict(io_backend='disk')\n", - "train_pipeline = [\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(\n", - " type='MultiScaleCrop',\n", - " input_size=224,\n", - " scales=(1, 0.875, 0.75, 0.66),\n", - " random_crop=False,\n", - " max_wh_scale_gap=1),\n", - " dict(type='Resize', scale=(224, 224), keep_ratio=False),\n", - " dict(type='Flip', flip_ratio=0.5),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - "]\n", - "val_pipeline = [\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(\n", - " type='SampleFrames',\n", - " clip_len=1,\n", - " frame_interval=1,\n", - " num_clips=3,\n", - " test_mode=True),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(type='CenterCrop', crop_size=224),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - "]\n", - "test_pipeline = [\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(\n", - " type='SampleFrames',\n", - " clip_len=1,\n", - " frame_interval=1,\n", - " num_clips=25,\n", - " test_mode=True),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(type='TenCrop', crop_size=224),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - "]\n", - "train_dataloader = dict(\n", - " batch_size=2,\n", - " num_workers=2,\n", - " persistent_workers=True,\n", - " sampler=dict(type='DefaultSampler', shuffle=True),\n", - " dataset=dict(\n", - " type='VideoDataset',\n", - " ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n", - " data_prefix=dict(video='kinetics400_tiny/train/'),\n", - " pipeline=[\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(\n", - " type='SampleFrames', clip_len=1, frame_interval=1,\n", - " num_clips=3),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(\n", - " type='MultiScaleCrop',\n", - " input_size=224,\n", - " scales=(1, 0.875, 0.75, 0.66),\n", - " random_crop=False,\n", - " max_wh_scale_gap=1),\n", - " dict(type='Resize', scale=(224, 224), keep_ratio=False),\n", - " dict(type='Flip', flip_ratio=0.5),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - " ]))\n", - "val_dataloader = dict(\n", - " batch_size=2,\n", - " num_workers=2,\n", - " persistent_workers=True,\n", - " sampler=dict(type='DefaultSampler', shuffle=False),\n", - " dataset=dict(\n", - " type='VideoDataset',\n", - " ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n", - " data_prefix=dict(video='kinetics400_tiny/val/'),\n", - " pipeline=[\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(\n", - " type='SampleFrames',\n", - " clip_len=1,\n", - " frame_interval=1,\n", - " num_clips=3,\n", - " test_mode=True),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(type='CenterCrop', crop_size=224),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - " ],\n", - " test_mode=True))\n", - "test_dataloader = dict(\n", - " batch_size=1,\n", - " num_workers=2,\n", - " persistent_workers=True,\n", - " sampler=dict(type='DefaultSampler', shuffle=False),\n", - " dataset=dict(\n", - " type='VideoDataset',\n", - " ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n", - " data_prefix=dict(video='kinetics400_tiny/val/'),\n", - " pipeline=[\n", - " dict(type='DecordInit', io_backend='disk'),\n", - " dict(\n", - " type='SampleFrames',\n", - " clip_len=1,\n", - " frame_interval=1,\n", - " num_clips=25,\n", - " test_mode=True),\n", - " dict(type='DecordDecode'),\n", - " dict(type='Resize', scale=(-1, 256)),\n", - " dict(type='TenCrop', crop_size=224),\n", - " dict(type='FormatShape', input_format='NCHW'),\n", - " dict(type='PackActionInputs')\n", - " ],\n", - " test_mode=True))\n", - "val_evaluator = dict(type='AccMetric')\n", - "test_evaluator = dict(type='AccMetric')\n", - "auto_scale_lr = dict(enable=False, base_batch_size=256)\n", - "work_dir = './tutorial_exps'\n", - "\n", - "05/15 03:33:35 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.\n", - "05/15 03:33:35 - mmengine - INFO - Hooks will be executed in the following order:\n", - "before_run:\n", - "(VERY_HIGH ) RuntimeInfoHook \n", - "(BELOW_NORMAL) LoggerHook \n", - " -------------------- \n", - "before_train:\n", - "(VERY_HIGH ) RuntimeInfoHook \n", - "(NORMAL ) IterTimerHook \n", - "(VERY_LOW ) CheckpointHook \n", - " -------------------- \n", - "before_train_epoch:\n", - "(VERY_HIGH ) RuntimeInfoHook \n", - "(NORMAL ) IterTimerHook \n", - "(NORMAL ) DistSamplerSeedHook \n", - " -------------------- \n", - "before_train_iter:\n", - "(VERY_HIGH ) RuntimeInfoHook \n", - "(NORMAL ) IterTimerHook \n", - " -------------------- \n", - "after_train_iter:\n", - "(VERY_HIGH ) RuntimeInfoHook \n", - "(NORMAL ) IterTimerHook \n", - "(BELOW_NORMAL) LoggerHook \n", - "(LOW ) ParamSchedulerHook \n", - "(VERY_LOW ) CheckpointHook \n", - " -------------------- \n", - "after_train_epoch:\n", - "(NORMAL ) IterTimerHook \n", - "(NORMAL ) SyncBuffersHook \n", - "(LOW ) ParamSchedulerHook \n", - "(VERY_LOW ) CheckpointHook \n", - " -------------------- \n", - "before_val_epoch:\n", - "(NORMAL ) IterTimerHook \n", - "(NORMAL ) SyncBuffersHook \n", - " -------------------- \n", - "before_val_iter:\n", - "(NORMAL ) IterTimerHook \n", - " -------------------- \n", - "after_val_iter:\n", - "(NORMAL ) IterTimerHook \n", - "(BELOW_NORMAL) LoggerHook \n", - " -------------------- \n", - "after_val_epoch:\n", - "(VERY_HIGH ) RuntimeInfoHook \n", - "(NORMAL ) IterTimerHook \n", - "(BELOW_NORMAL) LoggerHook \n", - "(LOW ) ParamSchedulerHook \n", - "(VERY_LOW ) CheckpointHook \n", - " -------------------- \n", - "after_train:\n", - "(VERY_LOW ) CheckpointHook \n", - " -------------------- \n", - "before_test_epoch:\n", - "(NORMAL ) IterTimerHook \n", - " -------------------- \n", - "before_test_iter:\n", - "(NORMAL ) IterTimerHook \n", - " -------------------- \n", - "after_test_iter:\n", - "(NORMAL ) IterTimerHook \n", - "(BELOW_NORMAL) LoggerHook \n", - " -------------------- \n", - "after_test_epoch:\n", - "(VERY_HIGH ) RuntimeInfoHook \n", - "(NORMAL ) IterTimerHook \n", - "(BELOW_NORMAL) LoggerHook \n", - " -------------------- \n", - "after_run:\n", - "(BELOW_NORMAL) LoggerHook \n", - " -------------------- \n", - "Loads checkpoint by http backend from path: https://download.pytorch.org/models/resnet50-11ad3fa6.pth\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "Downloading: \"https://download.pytorch.org/models/resnet50-11ad3fa6.pth\" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "05/15 03:33:37 - mmengine - INFO - These parameters in pretrained checkpoint are not loaded: {'fc.weight', 'fc.bias'}\n", - "Loads checkpoint by local backend from path: ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n", - "The model and loaded state dict do not match exactly\n", - "\n", - "size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 2048]) from checkpoint, the shape in current model is torch.Size([2, 2048]).\n", - "size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([2]).\n", - "05/15 03:33:37 - mmengine - INFO - Load checkpoint from ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n", - "05/15 03:33:37 - mmengine - WARNING - \"FileClient\" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io\n", - "05/15 03:33:37 - mmengine - INFO - Checkpoints will be saved to /content/mmaction2/tutorial_exps.\n", - "05/15 03:33:41 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", - "05/15 03:33:41 - mmengine - INFO - Epoch(train) [1][15/15] lr: 7.8125e-05 eta: 0:00:31 time: 0.2334 data_time: 0.0793 memory: 2917 grad_norm: 11.9900 loss: 0.6971 top1_acc: 1.0000 top5_acc: 1.0000 loss_cls: 0.6971\n", - "05/15 03:33:42 - mmengine - INFO - Epoch(val) [1][5/5] acc/top1: 0.3000 acc/top5: 1.0000 acc/mean1: 0.3000 data_time: 0.1994 time: 0.2254\n", - "05/15 03:33:42 - mmengine - INFO - The best checkpoint with 0.3000 acc/top1 at 1 epoch is saved to best_acc_top1_epoch_1.pth.\n", - "05/15 03:33:46 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", - "05/15 03:33:46 - mmengine - INFO - Epoch(train) [2][15/15] lr: 7.8125e-05 eta: 0:00:29 time: 0.2373 data_time: 0.1369 memory: 961 grad_norm: 12.4935 loss: 0.7158 top1_acc: 0.5000 top5_acc: 1.0000 loss_cls: 0.7158\n", - "05/15 03:33:48 - mmengine - INFO - Epoch(val) [2][5/5] acc/top1: 0.7000 acc/top5: 1.0000 acc/mean1: 0.7000 data_time: 0.2692 time: 0.3006\n", - "05/15 03:33:48 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_1.pth is removed\n", - "05/15 03:33:48 - mmengine - INFO - The best checkpoint with 0.7000 acc/top1 at 2 epoch is saved to best_acc_top1_epoch_2.pth.\n", - "05/15 03:33:51 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", - "05/15 03:33:51 - mmengine - INFO - Epoch(train) [3][15/15] lr: 7.8125e-05 eta: 0:00:24 time: 0.2112 data_time: 0.1163 memory: 961 grad_norm: 13.4063 loss: 0.7338 top1_acc: 0.0000 top5_acc: 1.0000 loss_cls: 0.7338\n", - "05/15 03:33:51 - mmengine - INFO - Saving checkpoint at 3 epochs\n", - "05/15 03:33:53 - mmengine - INFO - Epoch(val) [3][5/5] acc/top1: 0.4000 acc/top5: 1.0000 acc/mean1: 0.4000 data_time: 0.1669 time: 0.1906\n", - "05/15 03:33:56 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", - "05/15 03:33:56 - mmengine - INFO - Epoch(train) [4][15/15] lr: 7.8125e-05 eta: 0:00:19 time: 0.1750 data_time: 0.0907 memory: 961 grad_norm: 12.4322 loss: 0.6894 top1_acc: 0.0000 top5_acc: 1.0000 loss_cls: 0.6894\n", - "05/15 03:33:57 - mmengine - INFO - Epoch(val) [4][5/5] acc/top1: 0.7000 acc/top5: 1.0000 acc/mean1: 0.7000 data_time: 0.1791 time: 0.2030\n", - "05/15 03:34:00 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", - "05/15 03:34:00 - mmengine - INFO - Epoch(train) [5][15/15] lr: 7.8125e-05 eta: 0:00:16 time: 0.2016 data_time: 0.1155 memory: 961 grad_norm: 11.5982 loss: 0.6940 top1_acc: 0.0000 top5_acc: 1.0000 loss_cls: 0.6940\n", - "05/15 03:34:02 - mmengine - INFO - Epoch(val) [5][5/5] acc/top1: 0.7000 acc/top5: 1.0000 acc/mean1: 0.7000 data_time: 0.3145 time: 0.3455\n", - "05/15 03:34:05 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", - "05/15 03:34:05 - mmengine - INFO - Epoch(train) [6][15/15] lr: 7.8125e-05 eta: 0:00:13 time: 0.2366 data_time: 0.1440 memory: 961 grad_norm: 12.0952 loss: 0.6667 top1_acc: 0.0000 top5_acc: 1.0000 loss_cls: 0.6667\n", - "05/15 03:34:05 - mmengine - INFO - Saving checkpoint at 6 epochs\n", - "05/15 03:34:08 - mmengine - INFO - Epoch(val) [6][5/5] acc/top1: 0.6000 acc/top5: 1.0000 acc/mean1: 0.6000 data_time: 0.2172 time: 0.2403\n", - "05/15 03:34:10 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", - "05/15 03:34:10 - mmengine - INFO - Epoch(train) [7][15/15] lr: 7.8125e-05 eta: 0:00:09 time: 0.1784 data_time: 0.0942 memory: 961 grad_norm: 12.4209 loss: 0.6570 top1_acc: 1.0000 top5_acc: 1.0000 loss_cls: 0.6570\n", - "05/15 03:34:11 - mmengine - INFO - Epoch(val) [7][5/5] acc/top1: 0.9000 acc/top5: 1.0000 acc/mean1: 0.9000 data_time: 0.1898 time: 0.2118\n", - "05/15 03:34:11 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_2.pth is removed\n", - "05/15 03:34:12 - mmengine - INFO - The best checkpoint with 0.9000 acc/top1 at 7 epoch is saved to best_acc_top1_epoch_7.pth.\n", - "05/15 03:34:15 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", - "05/15 03:34:15 - mmengine - INFO - Epoch(train) [8][15/15] lr: 7.8125e-05 eta: 0:00:06 time: 0.2073 data_time: 0.1220 memory: 961 grad_norm: 11.4271 loss: 0.6241 top1_acc: 1.0000 top5_acc: 1.0000 loss_cls: 0.6241\n", - "05/15 03:34:17 - mmengine - INFO - Epoch(val) [8][5/5] acc/top1: 1.0000 acc/top5: 1.0000 acc/mean1: 1.0000 data_time: 0.3497 time: 0.3890\n", - "05/15 03:34:17 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_7.pth is removed\n", - "05/15 03:34:18 - mmengine - INFO - The best checkpoint with 1.0000 acc/top1 at 8 epoch is saved to best_acc_top1_epoch_8.pth.\n", - "05/15 03:34:21 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", - "05/15 03:34:21 - mmengine - INFO - Epoch(train) [9][15/15] lr: 7.8125e-05 eta: 0:00:03 time: 0.2309 data_time: 0.1390 memory: 961 grad_norm: 12.3066 loss: 0.6451 top1_acc: 0.5000 top5_acc: 1.0000 loss_cls: 0.6451\n", - "05/15 03:34:21 - mmengine - INFO - Saving checkpoint at 9 epochs\n", - "05/15 03:34:23 - mmengine - INFO - Epoch(val) [9][5/5] acc/top1: 1.0000 acc/top5: 1.0000 acc/mean1: 1.0000 data_time: 0.2023 time: 0.2256\n", - "05/15 03:34:26 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", - "05/15 03:34:26 - mmengine - INFO - Epoch(train) [10][15/15] lr: 7.8125e-05 eta: 0:00:00 time: 0.1733 data_time: 0.0951 memory: 961 grad_norm: 11.1461 loss: 0.5931 top1_acc: 1.0000 top5_acc: 1.0000 loss_cls: 0.5931\n", - "05/15 03:34:26 - mmengine - INFO - Saving checkpoint at 10 epochs\n", - "05/15 03:34:27 - mmengine - INFO - Epoch(val) [10][5/5] acc/top1: 1.0000 acc/top5: 1.0000 acc/mean1: 1.0000 data_time: 0.1836 time: 0.2048\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "Recognizer2D(\n", - " (data_preprocessor): ActionDataPreprocessor()\n", - " (backbone): ResNet(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n", - " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", - " (layer1): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): ConvModule(\n", - " (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer2): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): ConvModule(\n", - " (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (3): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer3): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): ConvModule(\n", - " (conv): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (3): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (4): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (5): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer4): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " (downsample): ConvModule(\n", - " (conv): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): ConvModule(\n", - " (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv2): ConvModule(\n", - " (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (activate): ReLU(inplace=True)\n", - " )\n", - " (conv3): ConvModule(\n", - " (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " (relu): ReLU(inplace=True)\n", - " )\n", - " )\n", - " )\n", - " (cls_head): TSNHead(\n", - " (loss_cls): CrossEntropyLoss()\n", - " (consensus): AvgConsensus()\n", - " (avg_pool): AdaptiveAvgPool2d(output_size=(1, 1))\n", - " (dropout): Dropout(p=0.4, inplace=False)\n", - " (fc_cls): Linear(in_features=2048, out_features=2, bias=True)\n", - " )\n", - ")" - ] - }, - "metadata": {}, - "execution_count": 15 - } - ], - "source": [ - "import os.path as osp\n", - "import mmengine\n", - "from mmengine.runner import Runner\n", - "\n", - "# Create work_dir\n", - "mmengine.mkdir_or_exist(osp.abspath(cfg.work_dir))\n", - "\n", - "# build the runner from config\n", - "runner = Runner.from_cfg(cfg)\n", - "\n", - "# start training\n", - "runner.train()" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "05/15 03:33:37 - mmengine - INFO - These parameters in pretrained checkpoint are not loaded: {'fc.weight', 'fc.bias'}\n", + "Loads checkpoint by local backend from path: ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n", + "The model and loaded state dict do not match exactly\n", + "\n", + "size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 2048]) from checkpoint, the shape in current model is torch.Size([2, 2048]).\n", + "size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([2]).\n", + "05/15 03:33:37 - mmengine - INFO - Load checkpoint from ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n", + "05/15 03:33:37 - mmengine - WARNING - \"FileClient\" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io\n", + "05/15 03:33:37 - mmengine - INFO - Checkpoints will be saved to /content/mmaction2/tutorial_exps.\n", + "05/15 03:33:41 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", + "05/15 03:33:41 - mmengine - INFO - Epoch(train) [1][15/15] lr: 7.8125e-05 eta: 0:00:31 time: 0.2334 data_time: 0.0793 memory: 2917 grad_norm: 11.9900 loss: 0.6971 top1_acc: 1.0000 top5_acc: 1.0000 loss_cls: 0.6971\n", + "05/15 03:33:42 - mmengine - INFO - Epoch(val) [1][5/5] acc/top1: 0.3000 acc/top5: 1.0000 acc/mean1: 0.3000 data_time: 0.1994 time: 0.2254\n", + "05/15 03:33:42 - mmengine - INFO - The best checkpoint with 0.3000 acc/top1 at 1 epoch is saved to best_acc_top1_epoch_1.pth.\n", + "05/15 03:33:46 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", + "05/15 03:33:46 - mmengine - INFO - Epoch(train) [2][15/15] lr: 7.8125e-05 eta: 0:00:29 time: 0.2373 data_time: 0.1369 memory: 961 grad_norm: 12.4935 loss: 0.7158 top1_acc: 0.5000 top5_acc: 1.0000 loss_cls: 0.7158\n", + "05/15 03:33:48 - mmengine - INFO - Epoch(val) [2][5/5] acc/top1: 0.7000 acc/top5: 1.0000 acc/mean1: 0.7000 data_time: 0.2692 time: 0.3006\n", + "05/15 03:33:48 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_1.pth is removed\n", + "05/15 03:33:48 - mmengine - INFO - The best checkpoint with 0.7000 acc/top1 at 2 epoch is saved to best_acc_top1_epoch_2.pth.\n", + "05/15 03:33:51 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", + "05/15 03:33:51 - mmengine - INFO - Epoch(train) [3][15/15] lr: 7.8125e-05 eta: 0:00:24 time: 0.2112 data_time: 0.1163 memory: 961 grad_norm: 13.4063 loss: 0.7338 top1_acc: 0.0000 top5_acc: 1.0000 loss_cls: 0.7338\n", + "05/15 03:33:51 - mmengine - INFO - Saving checkpoint at 3 epochs\n", + "05/15 03:33:53 - mmengine - INFO - Epoch(val) [3][5/5] acc/top1: 0.4000 acc/top5: 1.0000 acc/mean1: 0.4000 data_time: 0.1669 time: 0.1906\n", + "05/15 03:33:56 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", + "05/15 03:33:56 - mmengine - INFO - Epoch(train) [4][15/15] lr: 7.8125e-05 eta: 0:00:19 time: 0.1750 data_time: 0.0907 memory: 961 grad_norm: 12.4322 loss: 0.6894 top1_acc: 0.0000 top5_acc: 1.0000 loss_cls: 0.6894\n", + "05/15 03:33:57 - mmengine - INFO - Epoch(val) [4][5/5] acc/top1: 0.7000 acc/top5: 1.0000 acc/mean1: 0.7000 data_time: 0.1791 time: 0.2030\n", + "05/15 03:34:00 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", + "05/15 03:34:00 - mmengine - INFO - Epoch(train) [5][15/15] lr: 7.8125e-05 eta: 0:00:16 time: 0.2016 data_time: 0.1155 memory: 961 grad_norm: 11.5982 loss: 0.6940 top1_acc: 0.0000 top5_acc: 1.0000 loss_cls: 0.6940\n", + "05/15 03:34:02 - mmengine - INFO - Epoch(val) [5][5/5] acc/top1: 0.7000 acc/top5: 1.0000 acc/mean1: 0.7000 data_time: 0.3145 time: 0.3455\n", + "05/15 03:34:05 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", + "05/15 03:34:05 - mmengine - INFO - Epoch(train) [6][15/15] lr: 7.8125e-05 eta: 0:00:13 time: 0.2366 data_time: 0.1440 memory: 961 grad_norm: 12.0952 loss: 0.6667 top1_acc: 0.0000 top5_acc: 1.0000 loss_cls: 0.6667\n", + "05/15 03:34:05 - mmengine - INFO - Saving checkpoint at 6 epochs\n", + "05/15 03:34:08 - mmengine - INFO - Epoch(val) [6][5/5] acc/top1: 0.6000 acc/top5: 1.0000 acc/mean1: 0.6000 data_time: 0.2172 time: 0.2403\n", + "05/15 03:34:10 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", + "05/15 03:34:10 - mmengine - INFO - Epoch(train) [7][15/15] lr: 7.8125e-05 eta: 0:00:09 time: 0.1784 data_time: 0.0942 memory: 961 grad_norm: 12.4209 loss: 0.6570 top1_acc: 1.0000 top5_acc: 1.0000 loss_cls: 0.6570\n", + "05/15 03:34:11 - mmengine - INFO - Epoch(val) [7][5/5] acc/top1: 0.9000 acc/top5: 1.0000 acc/mean1: 0.9000 data_time: 0.1898 time: 0.2118\n", + "05/15 03:34:11 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_2.pth is removed\n", + "05/15 03:34:12 - mmengine - INFO - The best checkpoint with 0.9000 acc/top1 at 7 epoch is saved to best_acc_top1_epoch_7.pth.\n", + "05/15 03:34:15 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", + "05/15 03:34:15 - mmengine - INFO - Epoch(train) [8][15/15] lr: 7.8125e-05 eta: 0:00:06 time: 0.2073 data_time: 0.1220 memory: 961 grad_norm: 11.4271 loss: 0.6241 top1_acc: 1.0000 top5_acc: 1.0000 loss_cls: 0.6241\n", + "05/15 03:34:17 - mmengine - INFO - Epoch(val) [8][5/5] acc/top1: 1.0000 acc/top5: 1.0000 acc/mean1: 1.0000 data_time: 0.3497 time: 0.3890\n", + "05/15 03:34:17 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_7.pth is removed\n", + "05/15 03:34:18 - mmengine - INFO - The best checkpoint with 1.0000 acc/top1 at 8 epoch is saved to best_acc_top1_epoch_8.pth.\n", + "05/15 03:34:21 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", + "05/15 03:34:21 - mmengine - INFO - Epoch(train) [9][15/15] lr: 7.8125e-05 eta: 0:00:03 time: 0.2309 data_time: 0.1390 memory: 961 grad_norm: 12.3066 loss: 0.6451 top1_acc: 0.5000 top5_acc: 1.0000 loss_cls: 0.6451\n", + "05/15 03:34:21 - mmengine - INFO - Saving checkpoint at 9 epochs\n", + "05/15 03:34:23 - mmengine - INFO - Epoch(val) [9][5/5] acc/top1: 1.0000 acc/top5: 1.0000 acc/mean1: 1.0000 data_time: 0.2023 time: 0.2256\n", + "05/15 03:34:26 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n", + "05/15 03:34:26 - mmengine - INFO - Epoch(train) [10][15/15] lr: 7.8125e-05 eta: 0:00:00 time: 0.1733 data_time: 0.0951 memory: 961 grad_norm: 11.1461 loss: 0.5931 top1_acc: 1.0000 top5_acc: 1.0000 loss_cls: 0.5931\n", + "05/15 03:34:26 - mmengine - INFO - Saving checkpoint at 10 epochs\n", + "05/15 03:34:27 - mmengine - INFO - Epoch(val) [10][5/5] acc/top1: 1.0000 acc/top5: 1.0000 acc/mean1: 1.0000 data_time: 0.1836 time: 0.2048\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "zdSd7oTLlxIf" - }, - "source": [ - "### Understand the log\n", - "From the log, we can have a basic understanding the training process and know how well the recognizer is trained.\n", - "\n", - "Firstly, the ResNet-50 backbone pre-trained on ImageNet is loaded, this is a common practice since training from scratch is more cost. The log shows that all the weights of the ResNet-50 backbone are loaded except the `fc.bias` and `fc.weight`.\n", - "\n", - "Second, since the dataset we are using is small, we loaded a TSN model and finetune it for action recognition.\n", - "The original TSN is trained on original Kinetics-400 dataset which contains 400 classes but Kinetics-400 Tiny dataset only have 2 classes. Therefore, the last FC layer of the pre-trained TSN for classification has different weight shape and is not used.\n", - "\n", - "Third, after training, the recognizer is evaluated by the default evaluation. The results show that the recognizer achieves 100% top1 accuracy and 100% top5 accuracy on the val dataset,\n", - " \n", - "Not bad!" + "output_type": "execute_result", + "data": { + "text/plain": [ + "Recognizer2D(\n", + " (data_preprocessor): ActionDataPreprocessor()\n", + " (backbone): ResNet(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n", + " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", + " (layer1): Sequential(\n", + " (0): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " (downsample): ConvModule(\n", + " (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (1): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " (2): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " )\n", + " (layer2): Sequential(\n", + " (0): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " (downsample): ConvModule(\n", + " (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", + " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (1): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " (2): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " (3): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " )\n", + " (layer3): Sequential(\n", + " (0): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " (downsample): ConvModule(\n", + " (conv): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", + " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (1): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " (2): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " (3): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " (4): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " (5): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " )\n", + " (layer4): Sequential(\n", + " (0): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " (downsample): ConvModule(\n", + " (conv): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", + " (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (1): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " (2): Bottleneck(\n", + " (conv1): ConvModule(\n", + " (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv2): ConvModule(\n", + " (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activate): ReLU(inplace=True)\n", + " )\n", + " (conv3): ConvModule(\n", + " (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (relu): ReLU(inplace=True)\n", + " )\n", + " )\n", + " )\n", + " (cls_head): TSNHead(\n", + " (loss_cls): CrossEntropyLoss()\n", + " (consensus): AvgConsensus()\n", + " (avg_pool): AdaptiveAvgPool2d(output_size=(1, 1))\n", + " (dropout): Dropout(p=0.4, inplace=False)\n", + " (fc_cls): Linear(in_features=2048, out_features=2, bias=True)\n", + " )\n", + ")" ] + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "import os.path as osp\n", + "import mmengine\n", + "from mmengine.runner import Runner\n", + "\n", + "# Create work_dir\n", + "mmengine.mkdir_or_exist(osp.abspath(cfg.work_dir))\n", + "\n", + "# build the runner from config\n", + "runner = Runner.from_cfg(cfg)\n", + "\n", + "# start training\n", + "runner.train()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zdSd7oTLlxIf" + }, + "source": [ + "### Understand the log\n", + "From the log, we can have a basic understanding the training process and know how well the recognizer is trained.\n", + "\n", + "Firstly, the ResNet-50 backbone pre-trained on ImageNet is loaded, this is a common practice since training from scratch is more cost. The log shows that all the weights of the ResNet-50 backbone are loaded except the `fc.bias` and `fc.weight`.\n", + "\n", + "Second, since the dataset we are using is small, we loaded a TSN model and finetune it for action recognition.\n", + "The original TSN is trained on original Kinetics-400 dataset which contains 400 classes but Kinetics-400 Tiny dataset only have 2 classes. Therefore, the last FC layer of the pre-trained TSN for classification has different weight shape and is not used.\n", + "\n", + "Third, after training, the recognizer is evaluated by the default evaluation. The results show that the recognizer achieves 100% top1 accuracy and 100% top5 accuracy on the val dataset,\n", + " \n", + "Not bad!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ryVoSfZVmogw" + }, + "source": [ + "## Test the trained recognizer\n", + "\n", + "After finetuning the recognizer, let's check the prediction results!" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "eyY3hCMwyTct", + "outputId": "34fbbdc5-b9fd-4fd2-8030-3ba56b10adbf" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "ryVoSfZVmogw" - }, - "source": [ - "## Test the trained recognizer\n", - "\n", - "After finetuning the recognizer, let's check the prediction results!" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "05/15 03:34:36 - mmengine - INFO - Epoch(test) [10/10] acc/top1: 0.9000 acc/top5: 1.0000 acc/mean1: 0.9000 data_time: 0.0586 time: 0.7817\n" + ] }, { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "eyY3hCMwyTct", - "outputId": "34fbbdc5-b9fd-4fd2-8030-3ba56b10adbf" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "05/15 03:34:36 - mmengine - INFO - Epoch(test) [10/10] acc/top1: 0.9000 acc/top5: 1.0000 acc/mean1: 0.9000 data_time: 0.0586 time: 0.7817\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "{'acc/top1': 0.9, 'acc/top5': 1.0, 'acc/mean1': 0.9}" - ] - }, - "metadata": {}, - "execution_count": 16 - } - ], - "source": [ - "runner.test()" + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'acc/top1': 0.9, 'acc/top5': 1.0, 'acc/mean1': 0.9}" ] + }, + "metadata": {}, + "execution_count": 16 } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "mmact_dev", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.12" - }, - "vscode": { - "interpreter": { - "hash": "189c342a4747645665e89db23000ac4d4edb7a87c4cd0b2f881610f468fb778d" - } - } + ], + "source": [ + "runner.test()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "mmact_dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" }, - "nbformat": 4, - "nbformat_minor": 0 + "vscode": { + "interpreter": { + "hash": "189c342a4747645665e89db23000ac4d4edb7a87c4cd0b2f881610f468fb778d" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/demo/webcam_demo.py b/demo/webcam_demo.py index cdd8585540..de87c8aa32 100644 --- a/demo/webcam_demo.py +++ b/demo/webcam_demo.py @@ -139,7 +139,7 @@ def inference(): # Forward the model with torch.no_grad(): result = model.test_step(cur_data)[0] - scores = result.pred_scores.item.tolist() + scores = result.pred_score.tolist() scores = np.array(scores) score_cache.append(scores) scores_sum += scores diff --git a/docs/en/get_started/guide_to_framework.md b/docs/en/get_started/guide_to_framework.md index c65d65331b..3dc1c2314b 100644 --- a/docs/en/get_started/guide_to_framework.md +++ b/docs/en/get_started/guide_to_framework.md @@ -179,7 +179,8 @@ class VideoPack(BaseTransform): def transform(self, results): packed_results = dict() inputs = to_tensor(results['imgs']) - data_sample = ActionDataSample().set_gt_labels(results['label']) + data_sample = ActionDataSample() + data_sample.set_gt_label(results['label']) metainfo = {k: results[k] for k in self.meta_keys if k in results} data_sample.set_metainfo(metainfo) packed_results['inputs'] = inputs @@ -219,7 +220,7 @@ print('num_clips: ', data_sample.num_clips) print('clip_len: ', data_sample.clip_len) # Get label of the inputs -print('label: ', data_sample.gt_labels.item) +print('label: ', data_sample.gt_label) ``` ``` @@ -321,7 +322,7 @@ print('num_clips: ', data_sample.num_clips) print('clip_len: ', data_sample.clip_len) # Get label of the inputs -print('label: ', data_sample.gt_labels.item) +print('label: ', data_sample.gt_label) from mmengine.runner import Runner @@ -481,7 +482,7 @@ class ClsHeadZelda(BaseModule): def loss(self, feats, data_samples): cls_scores = self(feats) - labels = torch.stack([x.gt_labels.item for x in data_samples]) + labels = torch.stack([x.gt_label for x in data_samples]) labels = labels.squeeze() if labels.shape == torch.Size([]): @@ -589,8 +590,8 @@ with torch.no_grad(): data_batch_test = copy.deepcopy(batched_packed_results) data = model.data_preprocessor(data_batch_test, training=False) predictions = model(**data, mode='predict') -print('Label of Sample[0]', predictions[0].gt_labels.item) -print('Scores of Sample[0]', predictions[0].pred_scores.item) +print('Label of Sample[0]', predictions[0].gt_label) +print('Scores of Sample[0]', predictions[0].pred_score) ``` ```shell @@ -661,8 +662,8 @@ class AccuracyMetric(BaseMetric): data_samples = copy.deepcopy(data_samples) for data_sample in data_samples: result = dict() - scores = data_sample['pred_scores']['item'].cpu().numpy() - label = data_sample['gt_labels']['item'].item() + scores = data_sample['pred_score'].cpu().numpy() + label = data_sample['gt_label'].item() result['scores'] = scores result['label'] = label self.results.append(result) diff --git a/docs/en/get_started/installation.md b/docs/en/get_started/installation.md index 8cc64b7798..1685f97478 100644 --- a/docs/en/get_started/installation.md +++ b/docs/en/get_started/installation.md @@ -121,7 +121,7 @@ label_file = 'tools/data/kinetics/label_map_k400.txt' model = init_recognizer(config_file, checkpoint_file, device='cpu') # or device='cuda:0' pred_result = inference_recognizer(model, video_file) -pred_scores = pred_result.pred_scores.item.tolist() +pred_scores = pred_result.pred_score.tolist() score_tuples = tuple(zip(range(len(pred_scores)), pred_scores)) score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True) top5_label = score_sorted[:5] diff --git a/docs/zh_cn/get_started/guide_to_framework.md b/docs/zh_cn/get_started/guide_to_framework.md index b92c376b5d..0dc6462195 100644 --- a/docs/zh_cn/get_started/guide_to_framework.md +++ b/docs/zh_cn/get_started/guide_to_framework.md @@ -180,7 +180,7 @@ class VideoPack(BaseTransform): def transform(self, results): packed_results = dict() inputs = to_tensor(results['imgs']) - data_sample = ActionDataSample().set_gt_labels(results['label']) + data_sample = ActionDataSample().set_gt_label(results['label']) metainfo = {k: results[k] for k in self.meta_keys if k in results} data_sample.set_metainfo(metainfo) packed_results['inputs'] = inputs @@ -220,7 +220,7 @@ print('num_clips: ', data_sample.num_clips) print('clip_len: ', data_sample.clip_len) # 获取输入的标签 -print('label: ', data_sample.gt_labels.item) +print('label: ', data_sample.gt_label) ``` ``` @@ -322,7 +322,7 @@ print('num_clips: ', data_sample.num_clips) print('clip_len: ', data_sample.clip_len) # 获取输入的标签 -print('label: ', data_sample.gt_labels.item) +print('label: ', data_sample.gt_label) from mmengine.runner import Runner @@ -482,7 +482,7 @@ class ClsHeadZelda(BaseModule): def loss(self, feats, data_samples): cls_scores = self(feats) - labels = torch.stack([x.gt_labels.item for x in data_samples]) + labels = torch.stack([x.gt_label for x in data_samples]) labels = labels.squeeze() if labels.shape == torch.Size([]): @@ -590,8 +590,8 @@ with torch.no_grad(): data_batch_test = copy.deepcopy(batched_packed_results) data = model.data_preprocessor(data_batch_test, training=False) predictions = model(**data, mode='predict') -print('Label of Sample[0]', predictions[0].gt_labels.item) -print('Scores of Sample[0]', predictions[0].pred_scores.item) +print('Label of Sample[0]', predictions[0].gt_label) +print('Scores of Sample[0]', predictions[0].pred_score) ``` ```shell @@ -662,8 +662,8 @@ class AccuracyMetric(BaseMetric): data_samples = copy.deepcopy(data_samples) for data_sample in data_samples: result = dict() - scores = data_sample['pred_scores']['item'].cpu().numpy() - label = data_sample['gt_labels']['item'].item() + scores = data_sample['pred_score'].cpu().numpy() + label = data_sample['gt_label'].item() result['scores'] = scores result['label'] = label self.results.append(result) diff --git a/docs/zh_cn/get_started/installation.md b/docs/zh_cn/get_started/installation.md index 0e144ce6eb..091a8a5e03 100644 --- a/docs/zh_cn/get_started/installation.md +++ b/docs/zh_cn/get_started/installation.md @@ -120,7 +120,7 @@ label_file = 'tools/data/kinetics/label_map_k400.txt' model = init_recognizer(config_file, checkpoint_file, device='cpu') # or device='cuda:0' pred_result = inference_recognizer(model, video_file) -pred_scores = pred_result.pred_scores.item.tolist() +pred_scores = pred_result.pred_score.tolist() score_tuples = tuple(zip(range(len(pred_scores)), pred_scores)) score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True) top5_label = score_sorted[:5] diff --git a/mmaction/apis/inference.py b/mmaction/apis/inference.py index 749395099e..4b2b4f8c4b 100644 --- a/mmaction/apis/inference.py +++ b/mmaction/apis/inference.py @@ -70,7 +70,7 @@ def inference_recognizer(model: nn.Module, Returns: :obj:`ActionDataSample`: The inference results. Specifically, the - predicted scores are saved at ``result.pred_scores.item``. + predicted scores are saved at ``result.pred_score``. """ if test_pipeline is None: @@ -131,7 +131,7 @@ def inference_skeleton(model: nn.Module, Returns: :obj:`ActionDataSample`: The inference results. Specifically, the - predicted scores are saved at ``result.pred_scores.item``. + predicted scores are saved at ``result.pred_score``. """ if test_pipeline is None: cfg = model.cfg diff --git a/mmaction/apis/inferencers/actionrecog_inferencer.py b/mmaction/apis/inferencers/actionrecog_inferencer.py index f45f137b59..cc6e60b0da 100644 --- a/mmaction/apis/inferencers/actionrecog_inferencer.py +++ b/mmaction/apis/inferencers/actionrecog_inferencer.py @@ -356,6 +356,6 @@ def pred2dict(self, data_sample: ActionDataSample) -> Dict: dict: The output dictionary. """ result = {} - result['pred_labels'] = data_sample.pred_labels.item.tolist() - result['pred_scores'] = data_sample.pred_scores.item.tolist() + result['pred_labels'] = data_sample.pred_label.tolist() + result['pred_scores'] = data_sample.pred_score.tolist() return result diff --git a/mmaction/datasets/transforms/formatting.py b/mmaction/datasets/transforms/formatting.py index 168509be30..fb67e10c0e 100644 --- a/mmaction/datasets/transforms/formatting.py +++ b/mmaction/datasets/transforms/formatting.py @@ -4,7 +4,7 @@ import numpy as np import torch from mmcv.transforms import BaseTransform, to_tensor -from mmengine.structures import InstanceData, LabelData +from mmengine.structures import InstanceData from mmaction.registry import TRANSFORMS from mmaction.structures import ActionDataSample @@ -12,20 +12,11 @@ @TRANSFORMS.register_module() class PackActionInputs(BaseTransform): - """Pack the input data for the recognition. - - PackActionInputs first packs one of 'imgs', 'keypoint' and 'audios' into - the `packed_results['inputs']`, which are the three basic input modalities - for the task of rgb-based, skeleton-based and audio-based action - recognition, as well as spatio-temporal action detection in the case - of 'img'. Next, it prepares a `data_sample` for the task of action - recognition (only a single label of `torch.LongTensor` format, which is - saved in the `data_sample.gt_labels.item`) or spatio-temporal action - detection respectively. Then, it saves the meta keys defined in - the `meta_keys` in `data_sample.metainfo`, and packs the `data_sample` - into the `packed_results['data_samples']`. + """Pack the inputs data. Args: + collect_keys (tuple[str], optional): The keys to be collected + to ``packed_results['inputs']``. Defaults to `` meta_keys (Sequence[str]): The meta keys to saved in the `metainfo` of the `data_sample`. Defaults to ``('img_shape', 'img_key', 'video_id', 'timestamp')``. @@ -95,9 +86,7 @@ def transform(self, results: Dict) -> Dict: bboxes=to_tensor(results['proposals'])) if 'label' in results: - label_data = LabelData() - label_data.item = to_tensor(results['label']) - data_sample.gt_labels = label_data + data_sample.set_gt_label(results['label']) img_meta = {k: results[k] for k in self.meta_keys if k in results} data_sample.set_metainfo(img_meta) diff --git a/mmaction/evaluation/metrics/acc_metric.py b/mmaction/evaluation/metrics/acc_metric.py index 9abc20fa6c..04985e5938 100644 --- a/mmaction/evaluation/metrics/acc_metric.py +++ b/mmaction/evaluation/metrics/acc_metric.py @@ -75,17 +75,23 @@ def process(self, data_batch: Sequence[Tuple[Any, Dict]], data_samples = copy.deepcopy(data_samples) for data_sample in data_samples: result = dict() - pred = data_sample['pred_scores'] - label = data_sample['gt_labels'] - for item_name, score in pred.items(): - pred[item_name] = score.cpu().numpy() + pred = data_sample['pred_score'] + label = data_sample['gt_label'] + + # Ad-hoc for RGBPoseConv3D + if isinstance(pred, dict): + for item_name, score in pred.items(): + pred[item_name] = score.cpu().numpy() + else: + pred = pred.cpu().numpy() + result['pred'] = pred - if label['item'].size(0) == 1: + if label.size(0) == 1: # single-label - result['label'] = label['item'].item() + result['label'] = label.item() else: # multi-label - result['label'] = label['item'].cpu().numpy() + result['label'] = label.cpu().numpy() self.results.append(result) def compute_metrics(self, results: List) -> Dict: @@ -100,39 +106,41 @@ def compute_metrics(self, results: List) -> Dict: """ labels = [x['label'] for x in results] - if len(results[0]['pred']) == 1: - preds = [x['pred']['item'] for x in results] - return self.calculate(preds, labels) - eval_results = dict() - for item_name in results[0]['pred'].keys(): - preds = [x['pred'][item_name] for x in results] - eval_result = self.calculate(preds, labels) - eval_results.update( - {f'{item_name}_{k}': v - for k, v in eval_result.items()}) - # Ad-hoc for RGBPoseConv3D - if len(results[0]['pred']) == 2 and \ - 'rgb' in results[0]['pred'] and \ - 'pose' in results[0]['pred']: - - rgb = [x['pred']['rgb'] for x in results] - pose = [x['pred']['pose'] for x in results] - - preds = { - '1:1': get_weighted_score([rgb, pose], [1, 1]), - '2:1': get_weighted_score([rgb, pose], [2, 1]), - '1:2': get_weighted_score([rgb, pose], [1, 2]) - } - for k in preds: - eval_result = self.calculate(preds[k], labels) - eval_results.update({ - f'RGBPose_{k}_{key}': v - for key, v in eval_result.items() - }) - - return eval_results + if isinstance(results[0]['pred'], dict): + + for item_name in results[0]['pred'].keys(): + preds = [x['pred'][item_name] for x in results] + eval_result = self.calculate(preds, labels) + eval_results.update( + {f'{item_name}_{k}': v + for k, v in eval_result.items()}) + + if len(results[0]['pred']) == 2 and \ + 'rgb' in results[0]['pred'] and \ + 'pose' in results[0]['pred']: + + rgb = [x['pred']['rgb'] for x in results] + pose = [x['pred']['pose'] for x in results] + + preds = { + '1:1': get_weighted_score([rgb, pose], [1, 1]), + '2:1': get_weighted_score([rgb, pose], [2, 1]), + '1:2': get_weighted_score([rgb, pose], [1, 2]) + } + for k in preds: + eval_result = self.calculate(preds[k], labels) + eval_results.update({ + f'RGBPose_{k}_{key}': v + for key, v in eval_result.items() + }) + return eval_results + + # Simple Acc Calculation + else: + preds = [x['pred'] for x in results] + return self.calculate(preds, labels) def calculate(self, preds: List[np.ndarray], labels: List[Union[int, np.ndarray]]) -> Dict: @@ -238,13 +246,13 @@ def __init__(self, def process(self, data_batch, data_samples: Sequence[dict]) -> None: for data_sample in data_samples: - pred_scores = data_sample.get('pred_scores') - gt_label = data_sample['gt_labels']['item'] + pred_scores = data_sample.get('pred_score') + gt_label = data_sample['gt_label'] if pred_scores is not None: - pred_label = pred_scores['item'].argmax(dim=0, keepdim=True) - self.num_classes = pred_scores['item'].size(0) + pred_label = pred_scores.argmax(dim=0, keepdim=True) + self.num_classes = pred_scores.size(0) else: - pred_label = data_sample['pred_labels']['item'] + pred_label = data_sample['pred_label'] self.results.append({ 'pred_label': pred_label, diff --git a/mmaction/models/data_preprocessors/data_preprocessor.py b/mmaction/models/data_preprocessors/data_preprocessor.py index 891cb8f386..0376318ff7 100644 --- a/mmaction/models/data_preprocessors/data_preprocessor.py +++ b/mmaction/models/data_preprocessors/data_preprocessor.py @@ -84,7 +84,7 @@ def forward(self, data = self.cast_data(data) if isinstance(data, dict): return self.forward_onesample(data, training=training) - elif isinstance(data, tuple): + elif isinstance(data, (tuple, list)): outputs = [] for data_sample in data: output = self.forward_onesample(data_sample, training=training) diff --git a/mmaction/models/heads/base.py b/mmaction/models/heads/base.py index c39da5aa9a..8febe1df5b 100644 --- a/mmaction/models/heads/base.py +++ b/mmaction/models/heads/base.py @@ -6,7 +6,6 @@ import torch.nn as nn import torch.nn.functional as F from mmengine.model import BaseModule -from mmengine.structures import LabelData from mmaction.evaluation import top_k_accuracy from mmaction.registry import MODELS @@ -112,7 +111,7 @@ def loss_by_feat(self, cls_scores: torch.Tensor, Returns: dict: A dictionary of loss components. """ - labels = [x.gt_labels.item for x in data_samples] + labels = [x.gt_label for x in data_samples] labels = torch.stack(labels).to(cls_scores.device) labels = labels.squeeze() @@ -175,7 +174,7 @@ def predict_by_feat(self, cls_scores: torch.Tensor, (B*num_segs, num_classes) data_samples (list[:obj:`ActionDataSample`]): The annotation data of every samples. It usually includes - information such as `gt_labels`. + information such as `gt_label`. Returns: List[:obj:`ActionDataSample`]: Recognition results wrapped @@ -187,10 +186,8 @@ def predict_by_feat(self, cls_scores: torch.Tensor, for data_sample, score, pred_label in zip(data_samples, cls_scores, pred_labels): - prediction = LabelData(item=score) - pred_label = LabelData(item=pred_label) - data_sample.pred_scores = prediction - data_sample.pred_labels = pred_label + data_sample.set_pred_score(score) + data_sample.set_pred_label(pred_label) return data_samples def average_clip(self, diff --git a/mmaction/models/heads/omni_head.py b/mmaction/models/heads/omni_head.py index f5084dde06..7a62cf56da 100644 --- a/mmaction/models/heads/omni_head.py +++ b/mmaction/models/heads/omni_head.py @@ -87,10 +87,7 @@ def loss_by_feat(self, cls_scores: Union[Tensor, Tuple[Tensor]], Returns: dict: A dictionary of loss components. """ - if hasattr(data_samples[0], 'gt_labels'): - labels = [x.gt_labels.item for x in data_samples] - else: - labels = [x.gt_label.label for x in data_samples] + labels = [x.gt_label for x in data_samples] labels = torch.stack(labels).to(cls_scores.device) labels = labels.squeeze() diff --git a/mmaction/models/heads/rgbpose_head.py b/mmaction/models/heads/rgbpose_head.py index 69da4efed9..880e37f084 100644 --- a/mmaction/models/heads/rgbpose_head.py +++ b/mmaction/models/heads/rgbpose_head.py @@ -5,7 +5,6 @@ import torch.nn as nn import torch.nn.functional as F from mmengine.model.weight_init import normal_init -from mmengine.structures import LabelData from mmaction.evaluation import top_k_accuracy from mmaction.registry import MODELS @@ -110,7 +109,7 @@ def loss_by_feat(self, cls_scores: Dict[str, torch.Tensor], Returns: dict: A dictionary of loss components. """ - labels = torch.stack([x.gt_labels.item for x in data_samples]) + labels = torch.stack([x.gt_label for x in data_samples]) labels = labels.squeeze() if labels.shape == torch.Size([]): @@ -192,34 +191,26 @@ def predict_by_feat(self, cls_scores: Dict[str, torch.Tensor], classification scores, data_samples (list[:obj:`ActionDataSample`]): The annotation data of every samples. It usually includes - information such as `gt_labels`. + information such as `gt_label`. Returns: list[:obj:`ActionDataSample`]: Recognition results wrapped by :obj:`ActionDataSample`. """ - pred_scores = [LabelData() for _ in range(len(data_samples))] - pred_labels = [LabelData() for _ in range(len(data_samples))] + pred_scores = [dict() for _ in range(len(data_samples))] for name in self.loss_components: cls_score = cls_scores[name] - cls_score, pred_label = \ - self.predict_by_scores(cls_score, data_samples) - for pred_score, pred_label, score, label in zip( - pred_scores, pred_labels, cls_score, pred_label): - pred_score.set_data({f'{name}': score}) - pred_label.set_data({f'{name}': label}) - - for data_sample, pred_score, pred_label in zip(data_samples, - pred_scores, - pred_labels): - data_sample.pred_scores = pred_score - data_sample.pred_labels = pred_label + cls_score = self.predict_by_scores(cls_score, data_samples) + for pred_score, score in zip(pred_scores, cls_score): + pred_score[f'{name}'] = score + for data_sample, pred_score, in zip(data_samples, pred_scores): + data_sample.set_pred_score(pred_score) return data_samples def predict_by_scores(self, cls_scores: torch.Tensor, - data_samples: SampleList) -> Tuple: + data_samples: SampleList) -> torch.Tensor: """Transform a batch of output features extracted from the head into prediction results. @@ -230,11 +221,9 @@ def predict_by_scores(self, cls_scores: torch.Tensor, data of every samples. Returns: - tuple: A tuple of the averaged classification scores and - prediction labels. + torch.Tensor: The averaged classification scores. """ num_segs = cls_scores.shape[0] // len(data_samples) cls_scores = self.average_clip(cls_scores, num_segs=num_segs) - pred_labels = cls_scores.argmax(dim=-1, keepdim=True).detach() - return cls_scores, pred_labels + return cls_scores diff --git a/mmaction/models/necks/tpn.py b/mmaction/models/necks/tpn.py index b3cdc92ff9..c04dde4123 100644 --- a/mmaction/models/necks/tpn.py +++ b/mmaction/models/necks/tpn.py @@ -254,7 +254,7 @@ def loss(self, x: torch.Tensor, data_samples: Optional[SampleList]) -> dict: """Calculate auxiliary loss.""" x = self(x) - labels = [x.gt_labels.item for x in data_samples] + labels = [x.gt_label for x in data_samples] labels = torch.stack(labels).to(x.device) labels = labels.squeeze() if labels.shape == torch.Size([]): diff --git a/mmaction/models/recognizers/base.py b/mmaction/models/recognizers/base.py index 7ce2a51b1f..ced45380cf 100644 --- a/mmaction/models/recognizers/base.py +++ b/mmaction/models/recognizers/base.py @@ -162,7 +162,7 @@ def loss(self, inputs: torch.Tensor, data_samples: SampleList, These should usually be mean centered and std scaled. data_samples (List[``ActionDataSample``]): The batch data samples. It usually includes information such - as ``gt_labels``. + as ``gt_label``. Returns: dict: A dictionary of loss components. @@ -187,7 +187,7 @@ def predict(self, inputs: torch.Tensor, data_samples: SampleList, These should usually be mean centered and std scaled. data_samples (List[``ActionDataSample``]): The batch data samples. It usually includes information such - as ``gt_labels``. + as ``gt_label``. Returns: List[``ActionDataSample``]: Return the recognition results. diff --git a/mmaction/models/utils/blending_utils.py b/mmaction/models/utils/blending_utils.py index 2d3732eeb1..855ca226b1 100644 --- a/mmaction/models/utils/blending_utils.py +++ b/mmaction/models/utils/blending_utils.py @@ -55,18 +55,18 @@ def __call__(self, imgs: torch.Tensor, batch_data_samples: SampleList, shape of (B, N, C, H, W) or (B, N, C, T, H, W). batch_data_samples (List[:obj:`ActionDataSample`]): The batch data samples. It usually includes information such - as `gt_labels`. + as `gt_label`. Returns: mixed_imgs (torch.Tensor): Blending images, float tensor with the same shape of the input imgs. batch_data_samples (List[:obj:`ActionDataSample`]): The modified - batch data samples. ``gt_labels`` in each data sample are + batch data samples. ``gt_label`` in each data sample are converted from a hard label to a blended soft label, float tensor with the shape of (num_classes, ) and all elements are in range [0, 1]. """ - label = [x.gt_labels.item for x in batch_data_samples] + label = [x.gt_label for x in batch_data_samples] # single-label classification if label[0].size(0) == 1: label = torch.tensor(label, dtype=torch.long).to(imgs.device) @@ -79,7 +79,7 @@ def __call__(self, imgs: torch.Tensor, batch_data_samples: SampleList, **kwargs) for label_item, sample in zip(mixed_label, batch_data_samples): - sample.gt_labels.item = label_item + sample.set_gt_label(label_item) return mixed_imgs, batch_data_samples diff --git a/mmaction/structures/action_data_sample.py b/mmaction/structures/action_data_sample.py index 6ea146cba2..79bec540a0 100644 --- a/mmaction/structures/action_data_sample.py +++ b/mmaction/structures/action_data_sample.py @@ -1,15 +1,16 @@ # Copyright (c) OpenMMLab. All rights reserved. -from numbers import Number -from typing import Sequence, Union +from typing import Dict, Sequence, Union import numpy as np import torch -from mmengine.structures import BaseDataElement, InstanceData, LabelData +from mmengine.structures import BaseDataElement, InstanceData from mmengine.utils import is_str +LABEL_TYPE = Union[torch.Tensor, np.ndarray, Sequence, int] +SCORE_TYPE = Union[torch.Tensor, np.ndarray, Sequence, Dict] -def format_label(value: Union[torch.Tensor, np.ndarray, Sequence, - int]) -> torch.Tensor: + +def format_label(value: LABEL_TYPE) -> torch.Tensor: """Convert various python types to label-format tensor. Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, @@ -19,7 +20,7 @@ def format_label(value: Union[torch.Tensor, np.ndarray, Sequence, value (torch.Tensor | numpy.ndarray | Sequence | int): Label value. Returns: - :obj:`torch.Tensor`: The foramtted label tensor. + :obj:`torch.Tensor`: The formatted label tensor. """ # Handle single number @@ -34,119 +35,62 @@ def format_label(value: Union[torch.Tensor, np.ndarray, Sequence, value = torch.LongTensor([value]) elif not isinstance(value, torch.Tensor): raise TypeError(f'Type {type(value)} is not an available label type.') - assert value.ndim == 1, \ - f'The dims of value should be 1, but got {value.ndim}.' return value -def format_score(value: Union[torch.Tensor, np.ndarray, - Sequence]) -> torch.Tensor: +def format_score(value: SCORE_TYPE) -> Union[torch.Tensor, Dict]: """Convert various python types to score-format tensor. Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, :class:`Sequence`. Args: - value (torch.Tensor | numpy.ndarray | Sequence): Score values. + value (torch.Tensor | numpy.ndarray | Sequence | dict): + Score values or dict of scores values. Returns: - :obj:`torch.Tensor`: The foramtted score tensor. + :obj:`torch.Tensor` | dict: The formatted scores. """ if isinstance(value, np.ndarray): value = torch.from_numpy(value).float() elif isinstance(value, Sequence) and not is_str(value): value = torch.tensor(value).float() + elif isinstance(value, dict): + for k, v in value.items(): + value[k] = format_score(v) elif not isinstance(value, torch.Tensor): raise TypeError(f'Type {type(value)} is not an available label type.') - assert value.ndim == 1, \ - f'The dims of value should be 1, but got {value.ndim}.' return value class ActionDataSample(BaseDataElement): - def set_gt_labels( - self, value: Union[np.ndarray, torch.Tensor, Sequence[Number], Number] - ) -> 'ActionDataSample': - """Set label of ``gt_labels``.""" - label_data = getattr(self, '_gt_label', LabelData()) - label_data.item = format_label(value) - self.gt_labels = label_data + def set_gt_label(self, value: LABEL_TYPE) -> 'ActionDataSample': + """Set `gt_label``.""" + self.set_field(format_label(value), 'gt_label', dtype=torch.Tensor) return self - def set_pred_label( - self, value: Union[np.ndarray, torch.Tensor, Sequence[Number], Number] - ) -> 'ActionDataSample': - """Set label of ``pred_label``.""" - label_data = getattr(self, '_pred_label', LabelData()) - label_data.item = format_label(value) - self.pred_labels = label_data + def set_pred_label(self, value: LABEL_TYPE) -> 'ActionDataSample': + """Set ``pred_label``.""" + self.set_field(format_label(value), 'pred_label', dtype=torch.Tensor) return self - def set_pred_score(self, value: torch.Tensor) -> 'ActionDataSample': + def set_pred_score(self, value: SCORE_TYPE) -> 'ActionDataSample': """Set score of ``pred_label``.""" - label_data = getattr(self, '_pred_label', LabelData()) - label_data.item = format_score(value) + score = format_score(value) + self.set_field(score, 'pred_score') if hasattr(self, 'num_classes'): - assert len(label_data.item) == self.num_classes, \ - f'The length of score {len(label_data.item)} should be '\ + assert len(score) == self.num_classes, \ + f'The length of score {len(score)} should be '\ f'equal to the num_classes {self.num_classes}.' else: self.set_field( - name='num_classes', - value=len(label_data.item), - field_type='metainfo') - self.pred_scores = label_data + name='num_classes', value=len(score), field_type='metainfo') return self - @property - def gt_labels(self): - """Property of `gt_labels`""" - return self._gt_labels - - @gt_labels.setter - def gt_labels(self, value): - """Setter of `gt_labels`""" - self.set_field(value, '_gt_labels', LabelData) - - @gt_labels.deleter - def gt_labels(self): - """Deleter of `gt_labels`""" - del self._gt_labels - - @property - def pred_scores(self): - """Property of `pred_scores`""" - return self._pred_scores - - @pred_scores.setter - def pred_scores(self, value): - """Setter of `pred_scores`""" - self.set_field(value, '_pred_scores', LabelData) - - @pred_scores.deleter - def pred_scores(self): - """Deleter of `pred_scores`""" - del self._pred_scores - - @property - def pred_labels(self): - """Property of `pred_labels`""" - return self._pred_labels - - @pred_labels.setter - def pred_labels(self, value): - """Setter of `pred_labels`""" - self.set_field(value, '_pred_labels', LabelData) - - @pred_labels.deleter - def pred_labels(self): - """Deleter of `pred_labels`""" - del self._pred_labels - @property def proposals(self): """Property of `proposals`""" diff --git a/mmaction/utils/gradcam_utils.py b/mmaction/utils/gradcam_utils.py index 23f124f554..3d1a7f8f47 100644 --- a/mmaction/utils/gradcam_utils.py +++ b/mmaction/utils/gradcam_utils.py @@ -94,11 +94,11 @@ def _calculate_localization_map(self, self.model.cls_head.average_clips = 'score' # model forward & backward results = self.model.test_step(data) - preds = [result.pred_scores.item for result in results] + preds = [result.pred_score for result in results] preds = torch.stack(preds) if use_labels: - labels = [result.gt_labels.item for result in results] + labels = [result.gt_label for result in results] labels = torch.stack(labels) score = torch.gather(preds, dim=1, index=labels) else: diff --git a/mmaction/visualization/action_visualizer.py b/mmaction/visualization/action_visualizer.py index 5924669c83..7a3bfab85e 100644 --- a/mmaction/visualization/action_visualizer.py +++ b/mmaction/visualization/action_visualizer.py @@ -63,7 +63,7 @@ class ActionVisualizer(Visualizer): >>> video = video.get_batch(range(32)).asnumpy() >>> # Example annotation >>> data_sample = ActionDataSample() - >>> data_sample.gt_labels = LabelData(item=torch.tensor([2])) + >>> data_sample.gt_label = LabelData(item=torch.tensor([2])) >>> # Setup the visualizer >>> vis = ActionVisualizer( ... save_dir="./outputs", @@ -215,8 +215,8 @@ def add_datasample(self, self.set_image(frame) if draw_gt and 'gt_labels' in data_sample: - gt_labels = data_sample.gt_labels - idx = gt_labels.item.tolist() + gt_labels = data_sample.gt_label + idx = gt_labels.tolist() class_labels = [''] * len(idx) if classes is not None: class_labels = [f' ({classes[i]})' for i in idx] diff --git a/projects/actionclip/README.md b/projects/actionclip/README.md index cfaf0e3f2b..df694fd538 100644 --- a/projects/actionclip/README.md +++ b/projects/actionclip/README.md @@ -140,7 +140,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu" model = init_recognizer(config=config, checkpoint=checkpoint_path, device=device) pred_result = inference_recognizer(model, 'test.mp4') -probs = pred_result.pred_scores.item.cpu().numpy() +probs = pred_result.pred_score.cpu().numpy() print("Label probs:", probs) # [9.995e-01 5.364e-07 6.666e-04] ``` diff --git a/tests/apis/test_inference.py b/tests/apis/test_inference.py index 1b004943f7..749c3af01b 100644 --- a/tests/apis/test_inference.py +++ b/tests/apis/test_inference.py @@ -66,7 +66,7 @@ def test_inference_recognizer(self, config, video_path, devices): result = inference_recognizer(model, video_path) self.assertIsInstance(result, ActionDataSample) - self.assertTrue(result.pred_scores.item.shape, (400, )) + self.assertTrue(result.pred_score.shape, (400, )) def test_detection_inference(self): from mmdet.apis import init_detector diff --git a/tests/datasets/transforms/test_formating.py b/tests/datasets/transforms/test_formating.py index e12a1a95d7..93e32249b5 100644 --- a/tests/datasets/transforms/test_formating.py +++ b/tests/datasets/transforms/test_formating.py @@ -34,7 +34,7 @@ def test_transform(self): self.assertIn('data_samples', results) self.assertIsInstance(results['inputs'], torch.Tensor) self.assertEqual(results['inputs'].shape, (2, 300, 17, 3)) - self.assertEqual(results['data_samples'].gt_labels.item, + self.assertEqual(results['data_samples'].gt_label, torch.LongTensor([1])) # heatmap_imgs input @@ -45,7 +45,7 @@ def test_transform(self): self.assertIn('data_samples', results) self.assertIsInstance(results['inputs'], torch.Tensor) self.assertEqual(results['inputs'].shape, (2, 17, 56, 56)) - self.assertEqual(results['data_samples'].gt_labels.item, + self.assertEqual(results['data_samples'].gt_label, torch.LongTensor([1])) # audios input @@ -82,7 +82,7 @@ def test_transform(self): self.assertIsInstance(results['inputs'], torch.Tensor) self.assertIsInstance(results['data_samples'], ActionDataSample) self.assertEqual(results['data_samples'].img_shape, (256, 256, 3)) - self.assertEqual(results['data_samples'].gt_labels.item, + self.assertEqual(results['data_samples'].gt_label, torch.LongTensor([1])) # Test grayscale image diff --git a/tests/evaluation/metrics/test_acc_metric.py b/tests/evaluation/metrics/test_acc_metric.py index aeb6fb2cb0..b0e966933e 100644 --- a/tests/evaluation/metrics/test_acc_metric.py +++ b/tests/evaluation/metrics/test_acc_metric.py @@ -26,8 +26,7 @@ def generate_data(num_classes=5, random_label=False, multi_label=False): label = torch.randint(num_classes, size=[1]) else: label = torch.LongTensor([scores.argmax().item()]) - data_sample = dict( - pred_scores=dict(item=scores), gt_labels=dict(item=label)) + data_sample = dict(pred_score=scores, gt_label=label) data_samples.append(data_sample) return data_batch, data_samples @@ -97,7 +96,7 @@ def test_evaluate(self): """Test using the metric in the same way as Evalutor.""" pred = [ ActionDataSample().set_pred_score(i).set_pred_label( - j).set_gt_labels(k).to_dict() for i, j, k in zip([ + j).set_gt_label(k).to_dict() for i, j, k in zip([ torch.tensor([0.7, 0.0, 0.3]), torch.tensor([0.5, 0.2, 0.3]), torch.tensor([0.4, 0.5, 0.1]), @@ -122,7 +121,7 @@ def test_evaluate(self): # Test with label for sample in pred: - del sample['pred_scores'] + del sample['pred_score'] metric = METRICS.build(dict(type='ConfusionMatrix')) metric.process(None, pred) with self.assertRaisesRegex(AssertionError, diff --git a/tests/models/data_preprocessors/test_data_preprocessor.py b/tests/models/data_preprocessors/test_data_preprocessor.py index 5fe3e8f663..9591305691 100644 --- a/tests/models/data_preprocessors/test_data_preprocessor.py +++ b/tests/models/data_preprocessors/test_data_preprocessor.py @@ -15,7 +15,7 @@ def generate_dummy_data(batch_size, input_shape): 'inputs': [torch.randint(0, 255, input_shape) for _ in range(batch_size)], 'data_samples': - [ActionDataSample().set_gt_labels(2) for _ in range(batch_size)] + [ActionDataSample().set_gt_label(2) for _ in range(batch_size)] } return data @@ -53,8 +53,8 @@ def test_data_preprocessor(): format_shape='NCTHW', blending=dict(type='MixupBlending', num_classes=5)) data = psr(deepcopy(raw_data), training=True) - assert data['data_samples'][0].gt_labels.item.shape == (5, ) - assert data['data_samples'][1].gt_labels.item.shape == (5, ) + assert data['data_samples'][0].gt_label.shape == (5, ) + assert data['data_samples'][1].gt_label.shape == (5, ) raw_data = generate_dummy_data(2, (1, 3, 224, 224)) psr = ActionDataPreprocessor( diff --git a/tests/models/data_preprocessors/test_multimodal_data_preprocessor.py b/tests/models/data_preprocessors/test_multimodal_data_preprocessor.py index 35483bd5d9..671d2c1c96 100644 --- a/tests/models/data_preprocessors/test_multimodal_data_preprocessor.py +++ b/tests/models/data_preprocessors/test_multimodal_data_preprocessor.py @@ -13,7 +13,7 @@ def generate_dummy_data(batch_size, input_keys, input_shapes): data = dict() data['data_samples'] = [ - ActionDataSample().set_gt_labels(2) for _ in range(batch_size) + ActionDataSample().set_gt_label(2) for _ in range(batch_size) ] data['inputs'] = dict() for key, shape in zip(input_keys, input_shapes): diff --git a/tests/models/heads/test_feature_head.py b/tests/models/heads/test_feature_head.py index 932ed87133..424016bc8d 100644 --- a/tests/models/heads/test_feature_head.py +++ b/tests/models/heads/test_feature_head.py @@ -27,7 +27,7 @@ def test_2d_recognizer(self): input_shape = [3, 3, 32, 32] data_batch = { 'inputs': [torch.randint(0, 256, input_shape)], - 'data_samples': [ActionDataSample().set_gt_labels(2)] + 'data_samples': [ActionDataSample().set_gt_label(2)] } feat = recognizer.test_step(data_batch) assert isinstance(feat, torch.Tensor) @@ -46,7 +46,7 @@ def test_3d_recognizer(self): input_shape = [1, 3, 4, 32, 32] data_batch = { 'inputs': [torch.randint(0, 256, input_shape)], - 'data_samples': [ActionDataSample().set_gt_labels(2)] + 'data_samples': [ActionDataSample().set_gt_label(2)] } feat = recognizer.test_step(data_batch) assert isinstance(feat, torch.Tensor) diff --git a/tests/models/heads/test_omni_head.py b/tests/models/heads/test_omni_head.py index f9181893af..2724830353 100644 --- a/tests/models/heads/test_omni_head.py +++ b/tests/models/heads/test_omni_head.py @@ -31,9 +31,7 @@ def testOmniHead(): video_feat = torch.randn(2, 400, 8, 8, 8) video_score = head(video_feat) assert video_score.shape == torch.Size([2, 200]) - data_samples = [ - obj('gt_label', obj('label', torch.tensor(1))) for _ in range(2) - ] + data_samples = [obj('gt_label', torch.tensor(1)) for _ in range(2)] losses = head.loss_by_feat(video_score, data_samples) assert 'loss_cls' in losses @@ -41,6 +39,6 @@ def testOmniHead(): head.eval() image_score = head(image_feat) assert image_score.shape == torch.Size([1, 100]) - data_samples = [obj('gt_labels', obj('item', torch.tensor(1)))] + data_samples = [obj('gt_label', torch.tensor(1))] losses = head.loss_by_feat(image_score, data_samples) assert 'loss_cls' in losses diff --git a/tests/models/necks/test_tpn.py b/tests/models/necks/test_tpn.py index 1e9387aa39..08cc17dedc 100644 --- a/tests/models/necks/test_tpn.py +++ b/tests/models/necks/test_tpn.py @@ -3,7 +3,6 @@ import pytest import torch -from mmengine.structures import LabelData from mmaction.models import TPN from mmaction.structures import ActionDataSample @@ -14,7 +13,7 @@ def get_label(label_): label = [] for idx, one_label in enumerate(label_): data_sample = ActionDataSample() - data_sample.gt_labels = LabelData(item=label_[idx]) + data_sample.set_gt_label(label_[idx]) label.append(data_sample) return label diff --git a/tests/models/recognizers/recognizer_omni.py b/tests/models/recognizers/recognizer_omni.py index 23c58748de..e06cd5c03f 100644 --- a/tests/models/recognizers/recognizer_omni.py +++ b/tests/models/recognizers/recognizer_omni.py @@ -12,8 +12,7 @@ def test_omni_resnet(): register_all_modules() config = get_recognizer_cfg( - 'omnisource/slowonly_r50_16xb16-8x8x1-256e_imagenet-kinetics400-rgb.py' - ) + 'omnisource/slowonly_r50_8xb16-8x8x1-256e_imagenet-kinetics400-rgb.py') recognizer = MODELS.build(config.model) # test train_step @@ -24,8 +23,8 @@ def test_omni_resnet(): torch.randint(0, 255, (1, 3, 8, 224, 224)) ], 'data_samples': [ - ActionDataSample().set_gt_labels(2), - ActionDataSample().set_gt_labels(2) + ActionDataSample().set_gt_label(2), + ActionDataSample().set_gt_label(2) ] } @@ -35,8 +34,8 @@ def test_omni_resnet(): torch.randint(0, 255, (1, 3, 224, 224)) ], 'data_samples': [ - ActionDataSample().set_gt_labels(2), - ActionDataSample().set_gt_labels(2) + ActionDataSample().set_gt_label(2), + ActionDataSample().set_gt_label(2) ] } @@ -54,7 +53,7 @@ def test_omni_resnet(): # test test_step with torch.no_grad(): predictions = recognizer.test_step(video_sample) - score = predictions[0].pred_scores.item - assert len(predictions) == 1 + score = predictions[0].pred_score + assert len(predictions) == 2 assert torch.min(score) >= 0 assert torch.max(score) <= 1 diff --git a/tests/models/recognizers/test_recognizer2d.py b/tests/models/recognizers/test_recognizer2d.py index b40398755b..3a13b0ef37 100644 --- a/tests/models/recognizers/test_recognizer2d.py +++ b/tests/models/recognizers/test_recognizer2d.py @@ -21,7 +21,7 @@ def train_test_step(cfg, input_shape): 'inputs': [torch.randint(0, 256, input_shape) for i in range(batch_size)], 'data_samples': - [ActionDataSample().set_gt_labels(2) for i in range(batch_size)] + [ActionDataSample().set_gt_label(2) for i in range(batch_size)] } # test train_step @@ -34,7 +34,7 @@ def train_test_step(cfg, input_shape): # test test_step with torch.no_grad(): predictions = recognizer.test_step(data_batch) - score = predictions[0].pred_scores.item + score = predictions[0].pred_score assert len(predictions) == batch_size assert score.shape == torch.Size([num_classes]) assert torch.min(score) >= 0 @@ -46,7 +46,7 @@ def train_test_step(cfg, input_shape): data_batch['inputs'] = [torch.randint(0, 256, input_shape)] with torch.no_grad(): predictions = recognizer.test_step(data_batch) - score = predictions[0].pred_scores.item + score = predictions[0].pred_score assert len(predictions) == batch_size assert score.shape == torch.Size([num_classes]) diff --git a/tests/models/recognizers/test_recognizer3d.py b/tests/models/recognizers/test_recognizer3d.py index 7d80de00fb..c9f73d1a10 100644 --- a/tests/models/recognizers/test_recognizer3d.py +++ b/tests/models/recognizers/test_recognizer3d.py @@ -14,7 +14,7 @@ def train_test_step(cfg, input_shape): num_classes = cfg.model.cls_head.num_classes data_batch = { 'inputs': [torch.randint(0, 256, input_shape)], - 'data_samples': [ActionDataSample().set_gt_labels(2)] + 'data_samples': [ActionDataSample().set_gt_label(2)] } # test train_step @@ -27,7 +27,7 @@ def train_test_step(cfg, input_shape): # test test_step with torch.no_grad(): predictions = recognizer.test_step(data_batch) - score = predictions[0].pred_scores.item + score = predictions[0].pred_score assert len(predictions) == 1 assert score.shape == torch.Size([num_classes]) assert torch.min(score) >= 0 @@ -40,7 +40,7 @@ def train_test_step(cfg, input_shape): data_batch['inputs'] = [torch.randint(0, 256, input_shape)] with torch.no_grad(): predictions = recognizer.test_step(data_batch) - score = predictions[0].pred_scores.item + score = predictions[0].pred_score assert len(predictions) == 1 assert score.shape == torch.Size([num_views, num_classes]) diff --git a/tests/models/recognizers/test_recognizer_gcn.py b/tests/models/recognizers/test_recognizer_gcn.py index 7ae1441a6b..723c77d595 100644 --- a/tests/models/recognizers/test_recognizer_gcn.py +++ b/tests/models/recognizers/test_recognizer_gcn.py @@ -14,7 +14,7 @@ def train_test_step(cfg, input_shape): num_classes = cfg.model.cls_head.num_classes data_batch = { 'inputs': [torch.randn(input_shape)], - 'data_samples': [ActionDataSample().set_gt_labels(2)] + 'data_samples': [ActionDataSample().set_gt_label(2)] } # test train_step @@ -27,7 +27,7 @@ def train_test_step(cfg, input_shape): # test test_step with torch.no_grad(): predictions = recognizer.test_step(data_batch) - score = predictions[0].pred_scores.item + score = predictions[0].pred_score assert len(predictions) == 1 assert score.shape == torch.Size([num_classes]) assert torch.min(score) >= 0 @@ -40,7 +40,7 @@ def train_test_step(cfg, input_shape): data_batch['inputs'] = [torch.randn(input_shape)] with torch.no_grad(): predictions = recognizer.test_step(data_batch) - score = predictions[0].pred_scores.item + score = predictions[0].pred_score assert len(predictions) == 1 assert score.shape == torch.Size([num_clips, num_classes]) diff --git a/tests/models/utils/test_blending_utils.py b/tests/models/utils/test_blending_utils.py index 993b331093..e2eba9de47 100644 --- a/tests/models/utils/test_blending_utils.py +++ b/tests/models/utils/test_blending_utils.py @@ -4,7 +4,6 @@ import torch import torch.nn.functional as F from mmcv.transforms import to_tensor -from mmengine.structures import LabelData from mmaction.models import CutmixBlending, MixupBlending, RandomBatchAugment from mmaction.structures import ActionDataSample @@ -14,7 +13,7 @@ def get_label(label_): label = [] for idx, one_label in enumerate(label_): data_sample = ActionDataSample() - data_sample.gt_labels = LabelData(item=label_[idx]) + data_sample.set_gt_label(label_[idx]) label.append(data_sample) return label diff --git a/tests/models/utils/test_gradcam.py b/tests/models/utils/test_gradcam.py index e9568531c5..3982907bcb 100644 --- a/tests/models/utils/test_gradcam.py +++ b/tests/models/utils/test_gradcam.py @@ -41,7 +41,7 @@ def _do_test_2D_models(recognizer, device='cpu'): demo_data = { 'inputs': [torch.randint(0, 256, input_shape[1:])], - 'data_samples': [ActionDataSample().set_gt_labels(2)] + 'data_samples': [ActionDataSample().set_gt_label(2)] } recognizer = recognizer.to(device) @@ -67,7 +67,7 @@ def _do_test_3D_models(recognizer, input_shape, num_classes=num_classes, model_type='3D') demo_data = { 'inputs': [torch.randint(0, 256, input_shape[1:])], - 'data_samples': [ActionDataSample().set_gt_labels(2)] + 'data_samples': [ActionDataSample().set_gt_label(2)] } gradcam = GradCAM(recognizer, target_layer_name) diff --git a/tests/visualization/test_action_visualizer.py b/tests/visualization/test_action_visualizer.py index c86b324af9..298b59a842 100644 --- a/tests/visualization/test_action_visualizer.py +++ b/tests/visualization/test_action_visualizer.py @@ -3,8 +3,6 @@ import decord import pytest -import torch -from mmengine.structures import LabelData from mmaction.structures import ActionDataSample from mmaction.visualization import ActionVisualizer @@ -16,7 +14,7 @@ def test_visualizer(): video = video.get_batch(range(32)).asnumpy() data_sample = ActionDataSample() - data_sample.gt_labels = LabelData(item=torch.tensor([2])) + data_sample.set_gt_label(2) vis = ActionVisualizer() vis.add_datasample('demo', video) diff --git a/tests/visualization/test_video_backend.py b/tests/visualization/test_video_backend.py index c5153d812d..591646eb7a 100644 --- a/tests/visualization/test_video_backend.py +++ b/tests/visualization/test_video_backend.py @@ -8,8 +8,6 @@ import decord import pytest -import torch -from mmengine.structures import LabelData from mmaction.structures import ActionDataSample from mmaction.utils import register_all_modules @@ -24,7 +22,7 @@ def test_local_visbackend(): video = video.get_batch(range(32)).asnumpy() data_sample = ActionDataSample() - data_sample.gt_labels = LabelData(item=torch.tensor([2])) + data_sample.set_gt_label(2) with TemporaryDirectory() as tmp_dir: vis = ActionVisualizer( save_dir=tmp_dir, vis_backends=[dict(type='LocalVisBackend')]) @@ -46,7 +44,7 @@ def test_tensorboard_visbackend(): video = video.get_batch(range(32)).asnumpy() data_sample = ActionDataSample() - data_sample.gt_labels = LabelData(item=torch.tensor([2])) + data_sample.set_gt_label(2) with TemporaryDirectory() as tmp_dir: vis = ActionVisualizer( save_dir=tmp_dir, @@ -63,29 +61,3 @@ def test_tensorboard_visbackend(): # wait tensorboard store asynchronously time.sleep(1) return - - -""" -def test_wandb_visbackend(): - video = decord.VideoReader('./demo/demo.mp4') - video = video.get_batch(range(32)).asnumpy() - - data_sample = ActionDataSample() - data_sample.gt_labels = LabelData(item=torch.tensor([2])) - - vis = ActionVisualizer( - save_dir='./outputs', vis_backends=[dict(type='WandbVisBackend')]) - vis.add_datasample('demo', video, data_sample, step=1) - - wandb_dir = 'outputs/vis_data/wandb/' - assert Path(wandb_dir).exists() - - flag = False - for item in os.listdir(wandb_dir): - if item.startswith('run-') and os.path.isdir('%s/%s' % - (wandb_dir, item)): - flag = True - break - assert flag, 'Cannot find wandb folder!' - return -""" diff --git a/tools/analysis_tools/report_accuracy.py b/tools/analysis_tools/report_accuracy.py index c361f644de..d5c529dfe1 100644 --- a/tools/analysis_tools/report_accuracy.py +++ b/tools/analysis_tools/report_accuracy.py @@ -39,20 +39,13 @@ def main(): data_sample_list = [load(f) for f in args.preds] score_list = [] for data_samples in data_sample_list: - scores = [ - sample['pred_scores']['item'].numpy() for sample in data_samples - ] + scores = [sample['pred_score'].numpy() for sample in data_samples] score_list.append(scores) if args.multi_label: - labels = [ - sample['gt_labels']['item'] for sample in data_sample_list[0] - ] + labels = [sample['gt_label'] for sample in data_sample_list[0]] else: - labels = [ - sample['gt_labels']['item'].item() - for sample in data_sample_list[0] - ] + labels = [sample['gt_label'].item() for sample in data_sample_list[0]] if args.apply_softmax: diff --git a/tools/deployment/export_onnx_gcn.py b/tools/deployment/export_onnx_gcn.py index a4fd237a59..b9cb8423a6 100644 --- a/tools/deployment/export_onnx_gcn.py +++ b/tools/deployment/export_onnx_gcn.py @@ -122,7 +122,7 @@ def main(): base_output = base_model( input_tensor.unsqueeze(0), data_samples=[data_sample], mode='predict')[0] - base_output = base_output.pred_scores.item.detach().cpu().numpy() + base_output = base_output.pred_score.detach().cpu().numpy() model = GCNNet(base_model).to(args.device) model.eval() diff --git a/tools/deployment/export_onnx_posec3d.py b/tools/deployment/export_onnx_posec3d.py index 014096b48e..f8950dd8c8 100644 --- a/tools/deployment/export_onnx_posec3d.py +++ b/tools/deployment/export_onnx_posec3d.py @@ -118,7 +118,7 @@ def main(): base_output = base_model( input_tensor.unsqueeze(0), data_samples=[data_sample], mode='predict')[0] - base_output = base_output.pred_scores.item.detach().cpu().numpy() + base_output = base_output.pred_score.detach().cpu().numpy() model = GCNNet(base_model).to(args.device) model.eval()