diff --git a/demo/demo.ipynb b/demo/demo.ipynb
index ebcf2ff538..9d5e958864 100644
--- a/demo/demo.ipynb
+++ b/demo/demo.ipynb
@@ -70,7 +70,7 @@
     "label = '../tools/data/kinetics/label_map_k400.txt'\n",
     "results = inference_recognizer(model, video)\n",
     "\n",
-    "pred_scores = results.pred_scores.item.tolist()\n",
+    "pred_scores = results.pred_score.tolist()\n",
     "score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))\n",
     "score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)\n",
     "top5_label = score_sorted[:5]\n",
diff --git a/demo/demo.py b/demo/demo.py
index 6c9b5db5a5..d2ec044a04 100644
--- a/demo/demo.py
+++ b/demo/demo.py
@@ -119,7 +119,7 @@ def main():
     model = init_recognizer(cfg, args.checkpoint, device=args.device)
     pred_result = inference_recognizer(model, args.video)
 
-    pred_scores = pred_result.pred_scores.item.tolist()
+    pred_scores = pred_result.pred_score.tolist()
     score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))
     score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)
     top5_label = score_sorted[:5]
diff --git a/demo/demo_audio.py b/demo/demo_audio.py
index 2da446a2da..c874813f1f 100644
--- a/demo/demo_audio.py
+++ b/demo/demo_audio.py
@@ -39,7 +39,7 @@ def main():
         raise NotImplementedError('Demo works on extracted audio features')
     pred_result = inference_recognizer(model, args.audio)
 
-    pred_scores = pred_result.pred_scores.item.tolist()
+    pred_scores = pred_result.pred_score.tolist()
     score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))
     score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)
     top5_label = score_sorted[:5]
diff --git a/demo/demo_skeleton.py b/demo/demo_skeleton.py
index 7a162ef468..19245b6540 100644
--- a/demo/demo_skeleton.py
+++ b/demo/demo_skeleton.py
@@ -152,7 +152,7 @@ def main():
     model = init_recognizer(config, args.checkpoint, args.device)
     result = inference_skeleton(model, pose_results, (h, w))
 
-    max_pred_index = result.pred_scores.item.argmax().item()
+    max_pred_index = result.pred_score.argmax().item()
     label_map = [x.strip() for x in open(args.label_map).readlines()]
     action_label = label_map[max_pred_index]
 
diff --git a/demo/demo_video_structuralize.py b/demo/demo_video_structuralize.py
index 805dda7e14..85784efbf5 100644
--- a/demo/demo_video_structuralize.py
+++ b/demo/demo_video_structuralize.py
@@ -373,7 +373,7 @@ def skeleton_based_action_recognition(args, pose_results, h, w):
     skeleton_model = init_recognizer(
         skeleton_config, args.skeleton_checkpoint, device=args.device)
     result = inference_skeleton(skeleton_model, pose_results, (h, w))
-    action_idx = result.pred_scores.item.argmax().item()
+    action_idx = result.pred_score.argmax().item()
     return label_map[action_idx]
 
 
@@ -382,7 +382,7 @@ def rgb_based_action_recognition(args):
     rgb_config.model.backbone.pretrained = None
     rgb_model = init_recognizer(rgb_config, args.rgb_checkpoint, args.device)
     action_results = inference_recognizer(rgb_model, args.video)
-    rgb_action_result = action_results.pred_scores.item.argmax().item()
+    rgb_action_result = action_results.pred_score.argmax().item()
     label_map = [x.strip() for x in open(args.label_map).readlines()]
     return label_map[rgb_action_result]
 
@@ -460,7 +460,7 @@ def skeleton_based_stdet(args, label_map, human_detections, pose_results,
 
             output = inference_recognizer(skeleton_stdet_model, fake_anno)
             # for multi-label recognition
-            score = output.pred_scores.item.tolist()
+            score = output.pred_score.tolist()
             for k in range(len(score)):  # 81
                 if k not in label_map:
                     continue
diff --git a/demo/fuse/bone.pkl b/demo/fuse/bone.pkl
index a5cc72b3a1..21d311924c 100644
Binary files a/demo/fuse/bone.pkl and b/demo/fuse/bone.pkl differ
diff --git a/demo/fuse/joint.pkl b/demo/fuse/joint.pkl
index 1259a508ce..96d023b336 100644
Binary files a/demo/fuse/joint.pkl and b/demo/fuse/joint.pkl differ
diff --git a/demo/long_video_demo.py b/demo/long_video_demo.py
index bb7e51a234..eea03348ff 100644
--- a/demo/long_video_demo.py
+++ b/demo/long_video_demo.py
@@ -216,7 +216,7 @@ def inference(model, data, args, frame_queue):
 
     result = inference_recognizer(
         model, cur_data, test_pipeline=args.test_pipeline)
-    scores = result.pred_scores.item.tolist()
+    scores = result.pred_score.tolist()
 
     if args.stride > 0:
         pred_stride = int(args.sample_length * args.stride)
diff --git a/demo/mmaction2_tutorial.ipynb b/demo/mmaction2_tutorial.ipynb
index 1a9d6ec70e..4d24a04d5e 100644
--- a/demo/mmaction2_tutorial.ipynb
+++ b/demo/mmaction2_tutorial.ipynb
@@ -1,1936 +1,1936 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VcjSRFELVbNk"
-      },
-      "source": [
-        "# MMAction2 Tutorial\n",
-        "\n",
-        "Welcome to MMAction2! This is the official colab tutorial for using MMAction2. In this tutorial, you will learn\n",
-        "- Perform inference with a MMAction2 recognizer.\n",
-        "- Train a new recognizer with a new dataset.\n",
-        "\n",
-        "\n",
-        "Let's start!"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7LqHGkGEVqpm"
-      },
-      "source": [
-        "## Install MMAction2"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "Bf8PpPXtVvmg",
-        "outputId": "9d3f4594-f151-4ee9-a19b-09f8a439ac04"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "nvcc: NVIDIA (R) Cuda compiler driver\n",
-            "Copyright (c) 2005-2022 NVIDIA Corporation\n",
-            "Built on Wed_Sep_21_10:33:58_PDT_2022\n",
-            "Cuda compilation tools, release 11.8, V11.8.89\n",
-            "Build cuda_11.8.r11.8/compiler.31833905_0\n",
-            "gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0\n",
-            "Copyright (C) 2019 Free Software Foundation, Inc.\n",
-            "This is free software; see the source for copying conditions.  There is NO\n",
-            "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n",
-            "\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Check nvcc version\n",
-        "!nvcc -V\n",
-        "# Check GCC version\n",
-        "!gcc --version"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 3,
-      "metadata": {
-        "id": "ZPwKGzqydnb2",
-        "outputId": "27506fa7-48a2-4fe0-d377-56f940dafec4",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        }
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Looking in indexes: https://download.pytorch.org/whl/cu118, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
-            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.0.0+cu118)\n",
-            "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (0.15.1+cu118)\n",
-            "Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (2.0.1+cu118)\n",
-            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.12.0)\n",
-            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n",
-            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.11.1)\n",
-            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.1)\n",
-            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n",
-            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.0.0)\n",
-            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (3.25.2)\n",
-            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (16.0.3)\n",
-            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision) (1.22.4)\n",
-            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision) (2.27.1)\n",
-            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision) (8.4.0)\n",
-            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.2)\n",
-            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (1.26.15)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2022.12.7)\n",
-            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2.0.12)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (3.4)\n",
-            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n"
-          ]
-        }
-      ],
-      "source": [
-        "# install dependencies: (if your colab has CUDA 11.8)\n",
-        "%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "5PAJ4ArzV5Ry",
-        "outputId": "eb8539a0-9524-4c48-f3e1-0b013ce0d344"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
-            "Collecting openmim\n",
-            "  Downloading openmim-0.3.7-py2.py3-none-any.whl (51 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.3/51.3 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: Click in /usr/local/lib/python3.10/dist-packages (from openmim) (8.1.3)\n",
-            "Collecting colorama (from openmim)\n",
-            "  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
-            "Collecting model-index (from openmim)\n",
-            "  Downloading model_index-0.1.11-py3-none-any.whl (34 kB)\n",
-            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from openmim) (1.5.3)\n",
-            "Requirement already satisfied: pip>=19.3 in /usr/local/lib/python3.10/dist-packages (from openmim) (23.1.2)\n",
-            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from openmim) (2.27.1)\n",
-            "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from openmim) (13.3.4)\n",
-            "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from openmim) (0.8.10)\n",
-            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from model-index->openmim) (6.0)\n",
-            "Requirement already satisfied: markdown in /usr/local/lib/python3.10/dist-packages (from model-index->openmim) (3.4.3)\n",
-            "Collecting ordered-set (from model-index->openmim)\n",
-            "  Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)\n",
-            "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (2.8.2)\n",
-            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (2022.7.1)\n",
-            "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (1.22.4)\n",
-            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (1.26.15)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (2022.12.7)\n",
-            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (2.0.12)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (3.4)\n",
-            "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->openmim) (2.2.0)\n",
-            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->openmim) (2.14.0)\n",
-            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->openmim) (0.1.2)\n",
-            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->openmim) (1.16.0)\n",
-            "Installing collected packages: ordered-set, colorama, model-index, openmim\n",
-            "Successfully installed colorama-0.4.6 model-index-0.1.11 openmim-0.3.7 ordered-set-4.1.0\n",
-            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
-            "Looking in links: https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/index.html\n",
-            "Collecting mmengine\n",
-            "  Downloading mmengine-0.7.3-py3-none-any.whl (372 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m372.1/372.1 kB\u001b[0m \u001b[31m20.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting addict (from mmengine)\n",
-            "  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
-            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmengine) (3.7.1)\n",
-            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmengine) (1.22.4)\n",
-            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from mmengine) (6.0)\n",
-            "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from mmengine) (13.3.4)\n",
-            "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from mmengine) (2.3.0)\n",
-            "Collecting yapf (from mmengine)\n",
-            "  Downloading yapf-0.33.0-py2.py3-none-any.whl (200 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.9/200.9 kB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: opencv-python>=3 in /usr/local/lib/python3.10/dist-packages (from mmengine) (4.7.0.72)\n",
-            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (1.0.7)\n",
-            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (0.11.0)\n",
-            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (4.39.3)\n",
-            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (1.4.4)\n",
-            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (23.1)\n",
-            "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (8.4.0)\n",
-            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (3.0.9)\n",
-            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (2.8.2)\n",
-            "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine) (2.2.0)\n",
-            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine) (2.14.0)\n",
-            "Requirement already satisfied: tomli>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from yapf->mmengine) (2.0.1)\n",
-            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->mmengine) (0.1.2)\n",
-            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmengine) (1.16.0)\n",
-            "Installing collected packages: addict, yapf, mmengine\n",
-            "Successfully installed addict-2.4.0 mmengine-0.7.3 yapf-0.33.0\n",
-            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
-            "Looking in links: https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/index.html\n",
-            "Collecting mmcv>=2.0.0\n",
-            "  Downloading https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/mmcv-2.0.0-cp310-cp310-manylinux1_x86_64.whl (74.4 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.4/74.4 MB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: addict in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (2.4.0)\n",
-            "Requirement already satisfied: mmengine>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (0.7.3)\n",
-            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (1.22.4)\n",
-            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (23.1)\n",
-            "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (8.4.0)\n",
-            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (6.0)\n",
-            "Requirement already satisfied: yapf in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (0.33.0)\n",
-            "Requirement already satisfied: opencv-python>=3 in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (4.7.0.72)\n",
-            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (3.7.1)\n",
-            "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (13.3.4)\n",
-            "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (2.3.0)\n",
-            "Requirement already satisfied: tomli>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from yapf->mmcv>=2.0.0) (2.0.1)\n",
-            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.0.7)\n",
-            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (0.11.0)\n",
-            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (4.39.3)\n",
-            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.4.4)\n",
-            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (3.0.9)\n",
-            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (2.8.2)\n",
-            "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine>=0.2.0->mmcv>=2.0.0) (2.2.0)\n",
-            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine>=0.2.0->mmcv>=2.0.0) (2.14.0)\n",
-            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->mmengine>=0.2.0->mmcv>=2.0.0) (0.1.2)\n",
-            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.16.0)\n",
-            "Installing collected packages: mmcv\n",
-            "Successfully installed mmcv-2.0.0\n",
-            "Cloning into 'mmaction2'...\n",
-            "remote: Enumerating objects: 21284, done.\u001b[K\n",
-            "remote: Counting objects: 100% (394/394), done.\u001b[K\n",
-            "remote: Compressing objects: 100% (287/287), done.\u001b[K\n",
-            "remote: Total 21284 (delta 175), reused 248 (delta 103), pack-reused 20890\u001b[K\n",
-            "Receiving objects: 100% (21284/21284), 68.63 MiB | 16.59 MiB/s, done.\n",
-            "Resolving deltas: 100% (14990/14990), done.\n",
-            "/content/mmaction2\n",
-            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
-            "Obtaining file:///content/mmaction2\n",
-            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Collecting decord>=0.4.1 (from mmaction2==1.0.0)\n",
-            "  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m76.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting einops (from mmaction2==1.0.0)\n",
-            "  Downloading einops-0.6.1-py3-none-any.whl (42 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (3.7.1)\n",
-            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (1.22.4)\n",
-            "Requirement already satisfied: opencv-contrib-python in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (4.7.0.72)\n",
-            "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (8.4.0)\n",
-            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (1.10.1)\n",
-            "Requirement already satisfied: torch>=1.3 in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (2.0.0+cu118)\n",
-            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.12.0)\n",
-            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (4.5.0)\n",
-            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (1.11.1)\n",
-            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.1)\n",
-            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.1.2)\n",
-            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (2.0.0)\n",
-            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.3->mmaction2==1.0.0) (3.25.2)\n",
-            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.3->mmaction2==1.0.0) (16.0.3)\n",
-            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (1.0.7)\n",
-            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (0.11.0)\n",
-            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (4.39.3)\n",
-            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (1.4.4)\n",
-            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (23.1)\n",
-            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (3.0.9)\n",
-            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (2.8.2)\n",
-            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmaction2==1.0.0) (1.16.0)\n",
-            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.3->mmaction2==1.0.0) (2.1.2)\n",
-            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.3->mmaction2==1.0.0) (1.3.0)\n",
-            "Installing collected packages: einops, decord, mmaction2\n",
-            "  Running setup.py develop for mmaction2\n",
-            "Successfully installed decord-0.6.0 einops-0.6.1 mmaction2-1.0.0\n",
-            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
-            "Collecting av>=9.0 (from -r requirements/optional.txt (line 1))\n",
-            "  Downloading av-10.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31.0 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m31.0/31.0 MB\u001b[0m \u001b[31m38.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: future in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 2)) (0.18.3)\n",
-            "Collecting fvcore (from -r requirements/optional.txt (line 3))\n",
-            "  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.2/50.2 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: imgaug in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 4)) (0.4.0)\n",
-            "Requirement already satisfied: librosa in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 5)) (0.10.0.post2)\n",
-            "Collecting lmdb (from -r requirements/optional.txt (line 6))\n",
-            "  Downloading lmdb-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (299 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m299.2/299.2 kB\u001b[0m \u001b[31m30.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: moviepy in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 7)) (1.0.3)\n",
-            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 8)) (23.1)\n",
-            "Collecting pims (from -r requirements/optional.txt (line 9))\n",
-            "  Downloading PIMS-0.6.1.tar.gz (86 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Collecting PyTurboJPEG (from -r requirements/optional.txt (line 10))\n",
-            "  Downloading PyTurboJPEG-1.7.1.tar.gz (11 kB)\n",
-            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 11)) (0.12.1)\n",
-            "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 12)) (2.12.2)\n",
-            "Collecting wandb (from -r requirements/optional.txt (line 13))\n",
-            "  Downloading wandb-0.15.2-py3-none-any.whl (2.0 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m79.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (1.22.4)\n",
-            "Collecting yacs>=0.1.6 (from fvcore->-r requirements/optional.txt (line 3))\n",
-            "  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)\n",
-            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (6.0)\n",
-            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (4.65.0)\n",
-            "Requirement already satisfied: termcolor>=1.1 in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (2.3.0)\n",
-            "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (8.4.0)\n",
-            "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (0.8.10)\n",
-            "Collecting iopath>=0.1.7 (from fvcore->-r requirements/optional.txt (line 3))\n",
-            "  Downloading iopath-0.1.10.tar.gz (42 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (1.16.0)\n",
-            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (1.10.1)\n",
-            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (3.7.1)\n",
-            "Requirement already satisfied: scikit-image>=0.14.2 in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (0.19.3)\n",
-            "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (4.7.0.72)\n",
-            "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (2.25.1)\n",
-            "Requirement already satisfied: Shapely in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (2.0.1)\n",
-            "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (3.0.0)\n",
-            "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.2.2)\n",
-            "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.2.0)\n",
-            "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (4.4.2)\n",
-            "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.56.4)\n",
-            "Requirement already satisfied: pooch<1.7,>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.6.0)\n",
-            "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.3.5)\n",
-            "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (4.5.0)\n",
-            "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.2)\n",
-            "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.0.5)\n",
-            "Requirement already satisfied: requests<3.0,>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (2.27.1)\n",
-            "Requirement already satisfied: proglog<=1.0.0 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (0.1.10)\n",
-            "Requirement already satisfied: imageio-ffmpeg>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (0.4.8)\n",
-            "Collecting slicerator>=0.9.8 (from pims->-r requirements/optional.txt (line 9))\n",
-            "  Downloading slicerator-1.1.0-py3-none-any.whl (10 kB)\n",
-            "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->-r requirements/optional.txt (line 11)) (1.15.1)\n",
-            "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.4.0)\n",
-            "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.54.0)\n",
-            "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (2.17.3)\n",
-            "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.0.0)\n",
-            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (3.4.3)\n",
-            "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (3.20.3)\n",
-            "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (67.7.2)\n",
-            "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (0.7.0)\n",
-            "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.8.1)\n",
-            "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (2.3.0)\n",
-            "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (0.40.0)\n",
-            "Requirement already satisfied: Click!=8.0.0,>=7.0 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (8.1.3)\n",
-            "Collecting GitPython!=3.1.29,>=1.0.0 (from wandb->-r requirements/optional.txt (line 13))\n",
-            "  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m184.3/184.3 kB\u001b[0m \u001b[31m22.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (5.9.5)\n",
-            "Collecting sentry-sdk>=1.0.0 (from wandb->-r requirements/optional.txt (line 13))\n",
-            "  Downloading sentry_sdk-1.22.2-py2.py3-none-any.whl (203 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m203.3/203.3 kB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting docker-pycreds>=0.4.0 (from wandb->-r requirements/optional.txt (line 13))\n",
-            "  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
-            "Collecting pathtools (from wandb->-r requirements/optional.txt (line 13))\n",
-            "  Downloading pathtools-0.1.2.tar.gz (11 kB)\n",
-            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Collecting setproctitle (from wandb->-r requirements/optional.txt (line 13))\n",
-            "  Downloading setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
-            "Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (1.4.4)\n",
-            "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->-r requirements/optional.txt (line 11)) (2.21)\n",
-            "Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wandb->-r requirements/optional.txt (line 13))\n",
-            "  Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (5.3.0)\n",
-            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (0.3.0)\n",
-            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (4.9)\n",
-            "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->-r requirements/optional.txt (line 12)) (1.3.1)\n",
-            "Collecting portalocker (from iopath>=0.1.7->fvcore->-r requirements/optional.txt (line 3))\n",
-            "  Downloading portalocker-2.7.0-py2.py3-none-any.whl (15 kB)\n",
-            "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa->-r requirements/optional.txt (line 5)) (0.39.1)\n",
-            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (1.26.15)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (2022.12.7)\n",
-            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (2.0.12)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (3.4)\n",
-            "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (3.1)\n",
-            "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (2023.4.12)\n",
-            "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (1.4.1)\n",
-            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->librosa->-r requirements/optional.txt (line 5)) (3.1.0)\n",
-            "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard->-r requirements/optional.txt (line 12)) (2.1.2)\n",
-            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (1.0.7)\n",
-            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (0.11.0)\n",
-            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (4.39.3)\n",
-            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (1.4.4)\n",
-            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (3.0.9)\n",
-            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (2.8.2)\n",
-            "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb->-r requirements/optional.txt (line 13))\n",
-            "  Downloading smmap-5.0.0-py3-none-any.whl (24 kB)\n",
-            "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (0.5.0)\n",
-            "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->-r requirements/optional.txt (line 12)) (3.2.2)\n",
-            "Building wheels for collected packages: fvcore, pims, PyTurboJPEG, iopath, pathtools\n",
-            "  Building wheel for fvcore (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for fvcore: filename=fvcore-0.1.5.post20221221-py3-none-any.whl size=61405 sha256=25c1e50155c8788d00eec898793c96133a746a8bb076ffc5c01f5a4dc256751e\n",
-            "  Stored in directory: /root/.cache/pip/wheels/01/c0/af/77c1cf53a1be9e42a52b48e5af2169d40ec2e89f7362489dd0\n",
-            "  Building wheel for pims (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for pims: filename=PIMS-0.6.1-py3-none-any.whl size=82619 sha256=59a328dc88a438c60cfb6e937e04c8a7dd55ad2a2905034cd41ff80cdbba6497\n",
-            "  Stored in directory: /root/.cache/pip/wheels/cc/bf/3e/bfa77232d942f8244145f9c713b6b38f6ef04b6fb5c021c114\n",
-            "  Building wheel for PyTurboJPEG (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for PyTurboJPEG: filename=PyTurboJPEG-1.7.1-py3-none-any.whl size=12243 sha256=ddf6424c85ac533335abd96dd9e98b014ea1dd4f143c88cd35ecb08d6128f411\n",
-            "  Stored in directory: /root/.cache/pip/wheels/de/6e/b1/e7ba70c328c3395555cb92ca8820babb32950d867858b1948b\n",
-            "  Building wheel for iopath (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for iopath: filename=iopath-0.1.10-py3-none-any.whl size=31531 sha256=db977a4344bebbdd710665e767caab4fbcf53cc6aea0707cd38d26c45718331e\n",
-            "  Stored in directory: /root/.cache/pip/wheels/9a/a3/b6/ac0fcd1b4ed5cfeb3db92e6a0e476cfd48ed0df92b91080c1d\n",
-            "  Building wheel for pathtools (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for pathtools: filename=pathtools-0.1.2-py3-none-any.whl size=8791 sha256=08bb5753ce029aef01f25c3e81882d93c0e040e5932e90a02a062ad058756b52\n",
-            "  Stored in directory: /root/.cache/pip/wheels/e7/f3/22/152153d6eb222ee7a56ff8617d80ee5207207a8c00a7aab794\n",
-            "Successfully built fvcore pims PyTurboJPEG iopath pathtools\n",
-            "Installing collected packages: slicerator, pathtools, lmdb, av, yacs, smmap, setproctitle, sentry-sdk, PyTurboJPEG, portalocker, docker-pycreds, pims, iopath, gitdb, GitPython, fvcore, wandb\n",
-            "Successfully installed GitPython-3.1.31 PyTurboJPEG-1.7.1 av-10.0.0 docker-pycreds-0.4.0 fvcore-0.1.5.post20221221 gitdb-4.0.10 iopath-0.1.10 lmdb-1.4.1 pathtools-0.1.2 pims-0.6.1 portalocker-2.7.0 sentry-sdk-1.22.2 setproctitle-1.3.2 slicerator-1.1.0 smmap-5.0.0 wandb-0.15.2 yacs-0.1.8\n"
-          ]
-        }
-      ],
-      "source": [
-        "# install MMEngine, MMCV and MMDetection using MIM\n",
-        "%pip install -U openmim\n",
-        "!mim install mmengine\n",
-        "!mim install \"mmcv>=2.0.0\"\n",
-        "\n",
-        "# Install mmaction2\n",
-        "!rm -rf mmaction2\n",
-        "!git clone https://github.com/open-mmlab/mmaction2.git -b main\n",
-        "%cd mmaction2\n",
-        "\n",
-        "!pip install -e .\n",
-        "\n",
-        "# Install some optional requirements\n",
-        "!pip install -r requirements/optional.txt"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 5,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "No_zZAFpWC-a",
-        "outputId": "9386dd81-2308-4adb-d3cb-798de11c035e"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "2.0.0+cu118 True\n",
-            "1.0.0\n",
-            "11.8\n",
-            "GCC 9.3\n",
-            "OrderedDict([('sys.platform', 'linux'), ('Python', '3.10.11 (main, Apr  5 2023, 14:15:10) [GCC 9.4.0]'), ('CUDA available', True), ('numpy_random_seed', 2147483648), ('GPU 0', 'Tesla T4'), ('CUDA_HOME', '/usr/local/cuda'), ('NVCC', 'Cuda compilation tools, release 11.8, V11.8.89'), ('GCC', 'x86_64-linux-gnu-gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0'), ('PyTorch', '2.0.0+cu118'), ('PyTorch compiling details', 'PyTorch built with:\\n  - GCC 9.3\\n  - C++ Version: 201703\\n  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications\\n  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)\\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\\n  - LAPACK is enabled (usually provided by MKL)\\n  - NNPACK is enabled\\n  - CPU capability usage: AVX2\\n  - CUDA Runtime 11.8\\n  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\\n  - CuDNN 8.7\\n  - Magma 2.6.1\\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.8, CUDNN_VERSION=8.7.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.0.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \\n'), ('TorchVision', '0.15.1+cu118'), ('OpenCV', '4.7.0'), ('MMEngine', '0.7.3')])\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Check Pytorch installation\n",
-        "import torch, torchvision\n",
-        "print(torch.__version__, torch.cuda.is_available())\n",
-        "\n",
-        "# Check MMAction2 installation\n",
-        "import mmaction\n",
-        "print(mmaction.__version__)\n",
-        "\n",
-        "# Check MMCV installation\n",
-        "from mmcv.ops import get_compiling_cuda_version, get_compiler_version\n",
-        "print(get_compiling_cuda_version())\n",
-        "print(get_compiler_version())\n",
-        "\n",
-        "# Check MMEngine installation\n",
-        "from mmengine.utils.dl_utils import collect_env\n",
-        "print(collect_env())"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "pXf7oV5DWdab"
-      },
-      "source": [
-        "## Perform inference with a MMAction2 recognizer\n",
-        "MMAction2 already provides high level APIs to do inference and training."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 6,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "64CW6d_AaT-Q",
-        "outputId": "ea330d8c-2e20-4dbd-d046-51d7c9ec4f7a"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "--2023-05-15 03:33:08--  https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n",
-            "Resolving download.openmmlab.com (download.openmmlab.com)... 163.181.82.216, 163.181.82.218, 163.181.82.213, ...\n",
-            "Connecting to download.openmmlab.com (download.openmmlab.com)|163.181.82.216|:443... connected.\n",
-            "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 97579339 (93M) [application/octet-stream]\n",
-            "Saving to: ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’\n",
-            "\n",
-            "checkpoints/tsn_r50 100%[===================>]  93.06M  26.1MB/s    in 3.6s    \n",
-            "\n",
-            "2023-05-15 03:33:12 (26.2 MB/s) - ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’ saved [97579339/97579339]\n",
-            "\n"
-          ]
-        }
-      ],
-      "source": [
-        "!mkdir checkpoints\n",
-        "!wget -c https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \\\n",
-        "      -O checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth"
-      ]
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VcjSRFELVbNk"
+   },
+   "source": [
+    "# MMAction2 Tutorial\n",
+    "\n",
+    "Welcome to MMAction2! This is the official colab tutorial for using MMAction2. In this tutorial, you will learn\n",
+    "- Perform inference with a MMAction2 recognizer.\n",
+    "- Train a new recognizer with a new dataset.\n",
+    "\n",
+    "\n",
+    "Let's start!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "7LqHGkGEVqpm"
+   },
+   "source": [
+    "## Install MMAction2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "Bf8PpPXtVvmg",
+    "outputId": "9d3f4594-f151-4ee9-a19b-09f8a439ac04"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 7,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "HNZB7NoSabzj",
-        "outputId": "c0c2ba71-72ff-4cac-a5b8-65590f5a6bb0"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loads checkpoint by local backend from path: checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n"
-          ]
-        }
-      ],
-      "source": [
-        "from mmaction.apis import inference_recognizer, init_recognizer\n",
-        "from mmengine import Config\n",
-        "\n",
-        "\n",
-        "# Choose to use a config and initialize the recognizer\n",
-        "config = 'configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py'\n",
-        "config = Config.fromfile(config)\n",
-        "# Setup a checkpoint file to load\n",
-        "checkpoint = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
-        "# Initialize the recognizer\n",
-        "model = init_recognizer(config, checkpoint, device='cuda:0')"
-      ]
-    },
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "nvcc: NVIDIA (R) Cuda compiler driver\n",
+      "Copyright (c) 2005-2022 NVIDIA Corporation\n",
+      "Built on Wed_Sep_21_10:33:58_PDT_2022\n",
+      "Cuda compilation tools, release 11.8, V11.8.89\n",
+      "Build cuda_11.8.r11.8/compiler.31833905_0\n",
+      "gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0\n",
+      "Copyright (C) 2019 Free Software Foundation, Inc.\n",
+      "This is free software; see the source for copying conditions.  There is NO\n",
+      "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check nvcc version\n",
+    "!nvcc -V\n",
+    "# Check GCC version\n",
+    "!gcc --version"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "id": "ZPwKGzqydnb2",
+    "outputId": "27506fa7-48a2-4fe0-d377-56f940dafec4",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    }
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 8,
-      "metadata": {
-        "id": "rEMsBnpHapAn",
-        "outputId": "ec05049e-7289-4798-94fa-2b773cb23634",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        }
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "05/15 03:33:18 - mmengine - WARNING - \"FileClient\" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io\n",
-            "05/15 03:33:18 - mmengine - WARNING - \"HardDiskBackend\" is the alias of \"LocalBackend\" and the former will be deprecated in future.\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Use the recognizer to do inference\n",
-        "from operator import itemgetter\n",
-        "video = 'demo/demo.mp4'\n",
-        "label = 'tools/data/kinetics/label_map_k400.txt'\n",
-        "results = inference_recognizer(model, video)\n",
-        "\n",
-        "pred_scores = results.pred_scores.item.tolist()\n",
-        "score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))\n",
-        "score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)\n",
-        "top5_label = score_sorted[:5]\n",
-        "\n",
-        "labels = open(label).readlines()\n",
-        "labels = [x.strip() for x in labels]\n",
-        "results = [(labels[k[0]], k[1]) for k in top5_label]\n"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Looking in indexes: https://download.pytorch.org/whl/cu118, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+      "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.0.0+cu118)\n",
+      "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (0.15.1+cu118)\n",
+      "Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (2.0.1+cu118)\n",
+      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.12.0)\n",
+      "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n",
+      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.11.1)\n",
+      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.1)\n",
+      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n",
+      "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.0.0)\n",
+      "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (3.25.2)\n",
+      "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (16.0.3)\n",
+      "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision) (1.22.4)\n",
+      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision) (2.27.1)\n",
+      "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision) (8.4.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.2)\n",
+      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (1.26.15)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2022.12.7)\n",
+      "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2.0.12)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (3.4)\n",
+      "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# install dependencies: (if your colab has CUDA 11.8)\n",
+    "%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "5PAJ4ArzV5Ry",
+    "outputId": "eb8539a0-9524-4c48-f3e1-0b013ce0d344"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 9,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "NIyJXqfWathq",
-        "outputId": "cb25aca9-e72d-4c54-f295-4c889713cb3a"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "The top-5 labels with corresponding scores are:\n",
-            "arm wrestling:  1.0\n",
-            "rock scissors paper:  6.434453414527752e-09\n",
-            "shaking hands:  2.7599860175087088e-09\n",
-            "clapping:  1.3454612979302283e-09\n",
-            "massaging feet:  5.555100823784187e-10\n"
-          ]
-        }
-      ],
-      "source": [
-        "print('The top-5 labels with corresponding scores are:')\n",
-        "for result in results:\n",
-        "    print(f'{result[0]}: ', result[1])"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+      "Collecting openmim\n",
+      "  Downloading openmim-0.3.7-py2.py3-none-any.whl (51 kB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m51.3/51.3 kB\u001B[0m \u001B[31m4.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hRequirement already satisfied: Click in /usr/local/lib/python3.10/dist-packages (from openmim) (8.1.3)\n",
+      "Collecting colorama (from openmim)\n",
+      "  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
+      "Collecting model-index (from openmim)\n",
+      "  Downloading model_index-0.1.11-py3-none-any.whl (34 kB)\n",
+      "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from openmim) (1.5.3)\n",
+      "Requirement already satisfied: pip>=19.3 in /usr/local/lib/python3.10/dist-packages (from openmim) (23.1.2)\n",
+      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from openmim) (2.27.1)\n",
+      "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from openmim) (13.3.4)\n",
+      "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from openmim) (0.8.10)\n",
+      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from model-index->openmim) (6.0)\n",
+      "Requirement already satisfied: markdown in /usr/local/lib/python3.10/dist-packages (from model-index->openmim) (3.4.3)\n",
+      "Collecting ordered-set (from model-index->openmim)\n",
+      "  Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (2.8.2)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (2022.7.1)\n",
+      "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas->openmim) (1.22.4)\n",
+      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (1.26.15)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (2022.12.7)\n",
+      "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (2.0.12)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->openmim) (3.4)\n",
+      "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->openmim) (2.2.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->openmim) (2.14.0)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->openmim) (0.1.2)\n",
+      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->openmim) (1.16.0)\n",
+      "Installing collected packages: ordered-set, colorama, model-index, openmim\n",
+      "Successfully installed colorama-0.4.6 model-index-0.1.11 openmim-0.3.7 ordered-set-4.1.0\n",
+      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+      "Looking in links: https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/index.html\n",
+      "Collecting mmengine\n",
+      "  Downloading mmengine-0.7.3-py3-none-any.whl (372 kB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m372.1/372.1 kB\u001B[0m \u001B[31m20.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hCollecting addict (from mmengine)\n",
+      "  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
+      "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmengine) (3.7.1)\n",
+      "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmengine) (1.22.4)\n",
+      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from mmengine) (6.0)\n",
+      "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from mmengine) (13.3.4)\n",
+      "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from mmengine) (2.3.0)\n",
+      "Collecting yapf (from mmengine)\n",
+      "  Downloading yapf-0.33.0-py2.py3-none-any.whl (200 kB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m200.9/200.9 kB\u001B[0m \u001B[31m21.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hRequirement already satisfied: opencv-python>=3 in /usr/local/lib/python3.10/dist-packages (from mmengine) (4.7.0.72)\n",
+      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (1.0.7)\n",
+      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (0.11.0)\n",
+      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (4.39.3)\n",
+      "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (1.4.4)\n",
+      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (23.1)\n",
+      "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (8.4.0)\n",
+      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (3.0.9)\n",
+      "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine) (2.8.2)\n",
+      "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine) (2.2.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine) (2.14.0)\n",
+      "Requirement already satisfied: tomli>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from yapf->mmengine) (2.0.1)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->mmengine) (0.1.2)\n",
+      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmengine) (1.16.0)\n",
+      "Installing collected packages: addict, yapf, mmengine\n",
+      "Successfully installed addict-2.4.0 mmengine-0.7.3 yapf-0.33.0\n",
+      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+      "Looking in links: https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/index.html\n",
+      "Collecting mmcv>=2.0.0\n",
+      "  Downloading https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/mmcv-2.0.0-cp310-cp310-manylinux1_x86_64.whl (74.4 MB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m74.4/74.4 MB\u001B[0m \u001B[31m9.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hRequirement already satisfied: addict in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (2.4.0)\n",
+      "Requirement already satisfied: mmengine>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (0.7.3)\n",
+      "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (1.22.4)\n",
+      "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (23.1)\n",
+      "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (8.4.0)\n",
+      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (6.0)\n",
+      "Requirement already satisfied: yapf in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (0.33.0)\n",
+      "Requirement already satisfied: opencv-python>=3 in /usr/local/lib/python3.10/dist-packages (from mmcv>=2.0.0) (4.7.0.72)\n",
+      "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (3.7.1)\n",
+      "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (13.3.4)\n",
+      "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from mmengine>=0.2.0->mmcv>=2.0.0) (2.3.0)\n",
+      "Requirement already satisfied: tomli>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from yapf->mmcv>=2.0.0) (2.0.1)\n",
+      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.0.7)\n",
+      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (0.11.0)\n",
+      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (4.39.3)\n",
+      "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.4.4)\n",
+      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (3.0.9)\n",
+      "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (2.8.2)\n",
+      "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine>=0.2.0->mmcv>=2.0.0) (2.2.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->mmengine>=0.2.0->mmcv>=2.0.0) (2.14.0)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->mmengine>=0.2.0->mmcv>=2.0.0) (0.1.2)\n",
+      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmengine>=0.2.0->mmcv>=2.0.0) (1.16.0)\n",
+      "Installing collected packages: mmcv\n",
+      "Successfully installed mmcv-2.0.0\n",
+      "Cloning into 'mmaction2'...\n",
+      "remote: Enumerating objects: 21284, done.\u001B[K\n",
+      "remote: Counting objects: 100% (394/394), done.\u001B[K\n",
+      "remote: Compressing objects: 100% (287/287), done.\u001B[K\n",
+      "remote: Total 21284 (delta 175), reused 248 (delta 103), pack-reused 20890\u001B[K\n",
+      "Receiving objects: 100% (21284/21284), 68.63 MiB | 16.59 MiB/s, done.\n",
+      "Resolving deltas: 100% (14990/14990), done.\n",
+      "/content/mmaction2\n",
+      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+      "Obtaining file:///content/mmaction2\n",
+      "  Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "Collecting decord>=0.4.1 (from mmaction2==1.0.0)\n",
+      "  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m13.6/13.6 MB\u001B[0m \u001B[31m76.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hCollecting einops (from mmaction2==1.0.0)\n",
+      "  Downloading einops-0.6.1-py3-none-any.whl (42 kB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m42.2/42.2 kB\u001B[0m \u001B[31m4.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (3.7.1)\n",
+      "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (1.22.4)\n",
+      "Requirement already satisfied: opencv-contrib-python in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (4.7.0.72)\n",
+      "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (8.4.0)\n",
+      "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (1.10.1)\n",
+      "Requirement already satisfied: torch>=1.3 in /usr/local/lib/python3.10/dist-packages (from mmaction2==1.0.0) (2.0.0+cu118)\n",
+      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.12.0)\n",
+      "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (4.5.0)\n",
+      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (1.11.1)\n",
+      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.1)\n",
+      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (3.1.2)\n",
+      "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.3->mmaction2==1.0.0) (2.0.0)\n",
+      "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.3->mmaction2==1.0.0) (3.25.2)\n",
+      "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.3->mmaction2==1.0.0) (16.0.3)\n",
+      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (1.0.7)\n",
+      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (0.11.0)\n",
+      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (4.39.3)\n",
+      "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (1.4.4)\n",
+      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (23.1)\n",
+      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (3.0.9)\n",
+      "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->mmaction2==1.0.0) (2.8.2)\n",
+      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->mmaction2==1.0.0) (1.16.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.3->mmaction2==1.0.0) (2.1.2)\n",
+      "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.3->mmaction2==1.0.0) (1.3.0)\n",
+      "Installing collected packages: einops, decord, mmaction2\n",
+      "  Running setup.py develop for mmaction2\n",
+      "Successfully installed decord-0.6.0 einops-0.6.1 mmaction2-1.0.0\n",
+      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+      "Collecting av>=9.0 (from -r requirements/optional.txt (line 1))\n",
+      "  Downloading av-10.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31.0 MB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m31.0/31.0 MB\u001B[0m \u001B[31m38.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hRequirement already satisfied: future in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 2)) (0.18.3)\n",
+      "Collecting fvcore (from -r requirements/optional.txt (line 3))\n",
+      "  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m50.2/50.2 kB\u001B[0m \u001B[31m6.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25h  Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "Requirement already satisfied: imgaug in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 4)) (0.4.0)\n",
+      "Requirement already satisfied: librosa in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 5)) (0.10.0.post2)\n",
+      "Collecting lmdb (from -r requirements/optional.txt (line 6))\n",
+      "  Downloading lmdb-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (299 kB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m299.2/299.2 kB\u001B[0m \u001B[31m30.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hRequirement already satisfied: moviepy in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 7)) (1.0.3)\n",
+      "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 8)) (23.1)\n",
+      "Collecting pims (from -r requirements/optional.txt (line 9))\n",
+      "  Downloading PIMS-0.6.1.tar.gz (86 kB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m86.0/86.0 kB\u001B[0m \u001B[31m12.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25h  Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "Collecting PyTurboJPEG (from -r requirements/optional.txt (line 10))\n",
+      "  Downloading PyTurboJPEG-1.7.1.tar.gz (11 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 11)) (0.12.1)\n",
+      "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from -r requirements/optional.txt (line 12)) (2.12.2)\n",
+      "Collecting wandb (from -r requirements/optional.txt (line 13))\n",
+      "  Downloading wandb-0.15.2-py3-none-any.whl (2.0 MB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m2.0/2.0 MB\u001B[0m \u001B[31m79.1 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (1.22.4)\n",
+      "Collecting yacs>=0.1.6 (from fvcore->-r requirements/optional.txt (line 3))\n",
+      "  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)\n",
+      "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (6.0)\n",
+      "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (4.65.0)\n",
+      "Requirement already satisfied: termcolor>=1.1 in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (2.3.0)\n",
+      "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (8.4.0)\n",
+      "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from fvcore->-r requirements/optional.txt (line 3)) (0.8.10)\n",
+      "Collecting iopath>=0.1.7 (from fvcore->-r requirements/optional.txt (line 3))\n",
+      "  Downloading iopath-0.1.10.tar.gz (42 kB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m42.2/42.2 kB\u001B[0m \u001B[31m4.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25h  Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (1.16.0)\n",
+      "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (1.10.1)\n",
+      "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (3.7.1)\n",
+      "Requirement already satisfied: scikit-image>=0.14.2 in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (0.19.3)\n",
+      "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (4.7.0.72)\n",
+      "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (2.25.1)\n",
+      "Requirement already satisfied: Shapely in /usr/local/lib/python3.10/dist-packages (from imgaug->-r requirements/optional.txt (line 4)) (2.0.1)\n",
+      "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (3.0.0)\n",
+      "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.2.2)\n",
+      "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.2.0)\n",
+      "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (4.4.2)\n",
+      "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.56.4)\n",
+      "Requirement already satisfied: pooch<1.7,>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.6.0)\n",
+      "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.3.5)\n",
+      "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (4.5.0)\n",
+      "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (0.2)\n",
+      "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements/optional.txt (line 5)) (1.0.5)\n",
+      "Requirement already satisfied: requests<3.0,>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (2.27.1)\n",
+      "Requirement already satisfied: proglog<=1.0.0 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (0.1.10)\n",
+      "Requirement already satisfied: imageio-ffmpeg>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from moviepy->-r requirements/optional.txt (line 7)) (0.4.8)\n",
+      "Collecting slicerator>=0.9.8 (from pims->-r requirements/optional.txt (line 9))\n",
+      "  Downloading slicerator-1.1.0-py3-none-any.whl (10 kB)\n",
+      "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->-r requirements/optional.txt (line 11)) (1.15.1)\n",
+      "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.4.0)\n",
+      "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.54.0)\n",
+      "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (2.17.3)\n",
+      "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.0.0)\n",
+      "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (3.4.3)\n",
+      "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (3.20.3)\n",
+      "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (67.7.2)\n",
+      "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (0.7.0)\n",
+      "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (1.8.1)\n",
+      "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (2.3.0)\n",
+      "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r requirements/optional.txt (line 12)) (0.40.0)\n",
+      "Requirement already satisfied: Click!=8.0.0,>=7.0 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (8.1.3)\n",
+      "Collecting GitPython!=3.1.29,>=1.0.0 (from wandb->-r requirements/optional.txt (line 13))\n",
+      "  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m184.3/184.3 kB\u001B[0m \u001B[31m22.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (5.9.5)\n",
+      "Collecting sentry-sdk>=1.0.0 (from wandb->-r requirements/optional.txt (line 13))\n",
+      "  Downloading sentry_sdk-1.22.2-py2.py3-none-any.whl (203 kB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m203.3/203.3 kB\u001B[0m \u001B[31m25.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hCollecting docker-pycreds>=0.4.0 (from wandb->-r requirements/optional.txt (line 13))\n",
+      "  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
+      "Collecting pathtools (from wandb->-r requirements/optional.txt (line 13))\n",
+      "  Downloading pathtools-0.1.2.tar.gz (11 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "Collecting setproctitle (from wandb->-r requirements/optional.txt (line 13))\n",
+      "  Downloading setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
+      "Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.10/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (1.4.4)\n",
+      "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->-r requirements/optional.txt (line 11)) (2.21)\n",
+      "Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wandb->-r requirements/optional.txt (line 13))\n",
+      "  Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)\n",
+      "\u001B[2K     \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m62.7/62.7 kB\u001B[0m \u001B[31m9.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (5.3.0)\n",
+      "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (0.3.0)\n",
+      "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (4.9)\n",
+      "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->-r requirements/optional.txt (line 12)) (1.3.1)\n",
+      "Collecting portalocker (from iopath>=0.1.7->fvcore->-r requirements/optional.txt (line 3))\n",
+      "  Downloading portalocker-2.7.0-py2.py3-none-any.whl (15 kB)\n",
+      "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa->-r requirements/optional.txt (line 5)) (0.39.1)\n",
+      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (1.26.15)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (2022.12.7)\n",
+      "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (2.0.12)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.8.1->moviepy->-r requirements/optional.txt (line 7)) (3.4)\n",
+      "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (3.1)\n",
+      "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (2023.4.12)\n",
+      "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 4)) (1.4.1)\n",
+      "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->librosa->-r requirements/optional.txt (line 5)) (3.1.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard->-r requirements/optional.txt (line 12)) (2.1.2)\n",
+      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (1.0.7)\n",
+      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (0.11.0)\n",
+      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (4.39.3)\n",
+      "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (1.4.4)\n",
+      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (3.0.9)\n",
+      "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 4)) (2.8.2)\n",
+      "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb->-r requirements/optional.txt (line 13))\n",
+      "  Downloading smmap-5.0.0-py3-none-any.whl (24 kB)\n",
+      "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 12)) (0.5.0)\n",
+      "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->-r requirements/optional.txt (line 12)) (3.2.2)\n",
+      "Building wheels for collected packages: fvcore, pims, PyTurboJPEG, iopath, pathtools\n",
+      "  Building wheel for fvcore (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "  Created wheel for fvcore: filename=fvcore-0.1.5.post20221221-py3-none-any.whl size=61405 sha256=25c1e50155c8788d00eec898793c96133a746a8bb076ffc5c01f5a4dc256751e\n",
+      "  Stored in directory: /root/.cache/pip/wheels/01/c0/af/77c1cf53a1be9e42a52b48e5af2169d40ec2e89f7362489dd0\n",
+      "  Building wheel for pims (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "  Created wheel for pims: filename=PIMS-0.6.1-py3-none-any.whl size=82619 sha256=59a328dc88a438c60cfb6e937e04c8a7dd55ad2a2905034cd41ff80cdbba6497\n",
+      "  Stored in directory: /root/.cache/pip/wheels/cc/bf/3e/bfa77232d942f8244145f9c713b6b38f6ef04b6fb5c021c114\n",
+      "  Building wheel for PyTurboJPEG (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "  Created wheel for PyTurboJPEG: filename=PyTurboJPEG-1.7.1-py3-none-any.whl size=12243 sha256=ddf6424c85ac533335abd96dd9e98b014ea1dd4f143c88cd35ecb08d6128f411\n",
+      "  Stored in directory: /root/.cache/pip/wheels/de/6e/b1/e7ba70c328c3395555cb92ca8820babb32950d867858b1948b\n",
+      "  Building wheel for iopath (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "  Created wheel for iopath: filename=iopath-0.1.10-py3-none-any.whl size=31531 sha256=db977a4344bebbdd710665e767caab4fbcf53cc6aea0707cd38d26c45718331e\n",
+      "  Stored in directory: /root/.cache/pip/wheels/9a/a3/b6/ac0fcd1b4ed5cfeb3db92e6a0e476cfd48ed0df92b91080c1d\n",
+      "  Building wheel for pathtools (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "  Created wheel for pathtools: filename=pathtools-0.1.2-py3-none-any.whl size=8791 sha256=08bb5753ce029aef01f25c3e81882d93c0e040e5932e90a02a062ad058756b52\n",
+      "  Stored in directory: /root/.cache/pip/wheels/e7/f3/22/152153d6eb222ee7a56ff8617d80ee5207207a8c00a7aab794\n",
+      "Successfully built fvcore pims PyTurboJPEG iopath pathtools\n",
+      "Installing collected packages: slicerator, pathtools, lmdb, av, yacs, smmap, setproctitle, sentry-sdk, PyTurboJPEG, portalocker, docker-pycreds, pims, iopath, gitdb, GitPython, fvcore, wandb\n",
+      "Successfully installed GitPython-3.1.31 PyTurboJPEG-1.7.1 av-10.0.0 docker-pycreds-0.4.0 fvcore-0.1.5.post20221221 gitdb-4.0.10 iopath-0.1.10 lmdb-1.4.1 pathtools-0.1.2 pims-0.6.1 portalocker-2.7.0 sentry-sdk-1.22.2 setproctitle-1.3.2 slicerator-1.1.0 smmap-5.0.0 wandb-0.15.2 yacs-0.1.8\n"
+     ]
+    }
+   ],
+   "source": [
+    "# install MMEngine, MMCV and MMDetection using MIM\n",
+    "%pip install -U openmim\n",
+    "!mim install mmengine\n",
+    "!mim install \"mmcv>=2.0.0\"\n",
+    "\n",
+    "# Install mmaction2\n",
+    "!rm -rf mmaction2\n",
+    "!git clone https://github.com/open-mmlab/mmaction2.git -b main\n",
+    "%cd mmaction2\n",
+    "\n",
+    "!pip install -e .\n",
+    "\n",
+    "# Install some optional requirements\n",
+    "!pip install -r requirements/optional.txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "No_zZAFpWC-a",
+    "outputId": "9386dd81-2308-4adb-d3cb-798de11c035e"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "QuZG8kZ2fJ5d"
-      },
-      "source": [
-        "## Train a recognizer on customized dataset\n",
-        "\n",
-        "To train a new recognizer, there are usually three things to do:\n",
-        "1. Support a new dataset\n",
-        "2. Modify the config\n",
-        "3. Train a new recognizer"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "2.0.0+cu118 True\n",
+      "1.0.0\n",
+      "11.8\n",
+      "GCC 9.3\n",
+      "OrderedDict([('sys.platform', 'linux'), ('Python', '3.10.11 (main, Apr  5 2023, 14:15:10) [GCC 9.4.0]'), ('CUDA available', True), ('numpy_random_seed', 2147483648), ('GPU 0', 'Tesla T4'), ('CUDA_HOME', '/usr/local/cuda'), ('NVCC', 'Cuda compilation tools, release 11.8, V11.8.89'), ('GCC', 'x86_64-linux-gnu-gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0'), ('PyTorch', '2.0.0+cu118'), ('PyTorch compiling details', 'PyTorch built with:\\n  - GCC 9.3\\n  - C++ Version: 201703\\n  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications\\n  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)\\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\\n  - LAPACK is enabled (usually provided by MKL)\\n  - NNPACK is enabled\\n  - CPU capability usage: AVX2\\n  - CUDA Runtime 11.8\\n  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\\n  - CuDNN 8.7\\n  - Magma 2.6.1\\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.8, CUDNN_VERSION=8.7.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.0.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \\n'), ('TorchVision', '0.15.1+cu118'), ('OpenCV', '4.7.0'), ('MMEngine', '0.7.3')])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check Pytorch installation\n",
+    "import torch, torchvision\n",
+    "print(torch.__version__, torch.cuda.is_available())\n",
+    "\n",
+    "# Check MMAction2 installation\n",
+    "import mmaction\n",
+    "print(mmaction.__version__)\n",
+    "\n",
+    "# Check MMCV installation\n",
+    "from mmcv.ops import get_compiling_cuda_version, get_compiler_version\n",
+    "print(get_compiling_cuda_version())\n",
+    "print(get_compiler_version())\n",
+    "\n",
+    "# Check MMEngine installation\n",
+    "from mmengine.utils.dl_utils import collect_env\n",
+    "print(collect_env())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "pXf7oV5DWdab"
+   },
+   "source": [
+    "## Perform inference with a MMAction2 recognizer\n",
+    "MMAction2 already provides high level APIs to do inference and training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "64CW6d_AaT-Q",
+    "outputId": "ea330d8c-2e20-4dbd-d046-51d7c9ec4f7a"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "neEFyxChfgiJ"
-      },
-      "source": [
-        "### Support a new dataset\n",
-        "\n",
-        "In this tutorial, we gives an example to convert the data into the format of existing datasets. Other methods and more advanced usages can be found in the [doc](/docs/tutorials/new_dataset.md)\n",
-        "\n",
-        "Firstly, let's download a tiny dataset obtained from [Kinetics-400](https://deepmind.com/research/open-source/open-source-datasets/kinetics/). We select 30 videos with their labels as train dataset and 10 videos with their labels as test dataset."
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "--2023-05-15 03:33:08--  https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n",
+      "Resolving download.openmmlab.com (download.openmmlab.com)... 163.181.82.216, 163.181.82.218, 163.181.82.213, ...\n",
+      "Connecting to download.openmmlab.com (download.openmmlab.com)|163.181.82.216|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 97579339 (93M) [application/octet-stream]\n",
+      "Saving to: ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’\n",
+      "\n",
+      "checkpoints/tsn_r50 100%[===================>]  93.06M  26.1MB/s    in 3.6s    \n",
+      "\n",
+      "2023-05-15 03:33:12 (26.2 MB/s) - ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’ saved [97579339/97579339]\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "!mkdir checkpoints\n",
+    "!wget -c https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \\\n",
+    "      -O checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "HNZB7NoSabzj",
+    "outputId": "c0c2ba71-72ff-4cac-a5b8-65590f5a6bb0"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 10,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "gjsUj9JzgUlJ",
-        "outputId": "96a0e6e9-0dd8-4c07-9fed-22b93d5c1318"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "rm: cannot remove 'kinetics400_tiny.zip*': No such file or directory\n",
-            "--2023-05-15 03:33:27--  https://download.openmmlab.com/mmaction/kinetics400_tiny.zip\n",
-            "Resolving download.openmmlab.com (download.openmmlab.com)... 163.181.82.216, 163.181.82.218, 163.181.82.213, ...\n",
-            "Connecting to download.openmmlab.com (download.openmmlab.com)|163.181.82.216|:443... connected.\n",
-            "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 18308682 (17M) [application/zip]\n",
-            "Saving to: ‘kinetics400_tiny.zip’\n",
-            "\n",
-            "kinetics400_tiny.zi 100%[===================>]  17.46M  32.7MB/s    in 0.5s    \n",
-            "\n",
-            "2023-05-15 03:33:28 (32.7 MB/s) - ‘kinetics400_tiny.zip’ saved [18308682/18308682]\n",
-            "\n"
-          ]
-        }
-      ],
-      "source": [
-        "# download, decompress the data\n",
-        "!rm kinetics400_tiny.zip*\n",
-        "!rm -rf kinetics400_tiny\n",
-        "!wget https://download.openmmlab.com/mmaction/kinetics400_tiny.zip\n",
-        "!unzip kinetics400_tiny.zip > /dev/null"
-      ]
-    },
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Loads checkpoint by local backend from path: checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n"
+     ]
+    }
+   ],
+   "source": [
+    "from mmaction.apis import inference_recognizer, init_recognizer\n",
+    "from mmengine import Config\n",
+    "\n",
+    "\n",
+    "# Choose to use a config and initialize the recognizer\n",
+    "config = 'configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py'\n",
+    "config = Config.fromfile(config)\n",
+    "# Setup a checkpoint file to load\n",
+    "checkpoint = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
+    "# Initialize the recognizer\n",
+    "model = init_recognizer(config, checkpoint, device='cuda:0')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "id": "rEMsBnpHapAn",
+    "outputId": "ec05049e-7289-4798-94fa-2b773cb23634",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    }
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 11,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "AbZ-o7V6hNw4",
-        "outputId": "f229f352-1b43-41b7-a374-21404f618581"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Reading package lists...\n",
-            "Building dependency tree...\n",
-            "Reading state information...\n",
-            "The following NEW packages will be installed:\n",
-            "  tree\n",
-            "0 upgraded, 1 newly installed, 0 to remove and 24 not upgraded.\n",
-            "Need to get 43.0 kB of archives.\n",
-            "After this operation, 115 kB of additional disk space will be used.\n",
-            "Get:1 http://archive.ubuntu.com/ubuntu focal/universe amd64 tree amd64 1.8.0-1 [43.0 kB]\n",
-            "Fetched 43.0 kB in 1s (48.9 kB/s)\n",
-            "Selecting previously unselected package tree.\n",
-            "(Reading database ... 122519 files and directories currently installed.)\n",
-            "Preparing to unpack .../tree_1.8.0-1_amd64.deb ...\n",
-            "Unpacking tree (1.8.0-1) ...\n",
-            "Setting up tree (1.8.0-1) ...\n",
-            "Processing triggers for man-db (2.9.1-1) ...\n",
-            "\u001b[01;34mkinetics400_tiny\u001b[00m\n",
-            "├── kinetics_tiny_train_video.txt\n",
-            "├── kinetics_tiny_val_video.txt\n",
-            "├── \u001b[01;34mtrain\u001b[00m\n",
-            "│   ├── 27_CSXByd3s.mp4\n",
-            "│   ├── 34XczvTaRiI.mp4\n",
-            "│   ├── A-wiliK50Zw.mp4\n",
-            "│   ├── D32_1gwq35E.mp4\n",
-            "│   ├── D92m0HsHjcQ.mp4\n",
-            "│   ├── DbX8mPslRXg.mp4\n",
-            "│   ├── FMlSTTpN3VY.mp4\n",
-            "│   ├── h10B9SVE-nk.mp4\n",
-            "│   ├── h2YqqUhnR34.mp4\n",
-            "│   ├── iRuyZSKhHRg.mp4\n",
-            "│   ├── IyfILH9lBRo.mp4\n",
-            "│   ├── kFC3KY2bOP8.mp4\n",
-            "│   ├── LvcFDgCAXQs.mp4\n",
-            "│   ├── O46YA8tI530.mp4\n",
-            "│   ├── oMrZaozOvdQ.mp4\n",
-            "│   ├── oXy-e_P_cAI.mp4\n",
-            "│   ├── P5M-hAts7MQ.mp4\n",
-            "│   ├── phDqGd0NKoo.mp4\n",
-            "│   ├── PnOe3GZRVX8.mp4\n",
-            "│   ├── R8HXQkdgKWA.mp4\n",
-            "│   ├── RqnKtCEoEcA.mp4\n",
-            "│   ├── soEcZZsBmDs.mp4\n",
-            "│   ├── TkkZPZHbAKA.mp4\n",
-            "│   ├── T_TMNGzVrDk.mp4\n",
-            "│   ├── WaS0qwP46Us.mp4\n",
-            "│   ├── Wh_YPQdH1Zg.mp4\n",
-            "│   ├── WWP5HZJsg-o.mp4\n",
-            "│   ├── xGY2dP0YUjA.mp4\n",
-            "│   ├── yLC9CtWU5ws.mp4\n",
-            "│   └── ZQV4U2KQ370.mp4\n",
-            "└── \u001b[01;34mval\u001b[00m\n",
-            "    ├── 0pVGiAU6XEA.mp4\n",
-            "    ├── AQrbRSnRt8M.mp4\n",
-            "    ├── b6Q_b7vgc7Q.mp4\n",
-            "    ├── ddvJ6-faICE.mp4\n",
-            "    ├── IcLztCtvhb8.mp4\n",
-            "    ├── ik4BW3-SCts.mp4\n",
-            "    ├── jqRrH30V0k4.mp4\n",
-            "    ├── SU_x2LQqSLs.mp4\n",
-            "    ├── u4Rm6srmIS8.mp4\n",
-            "    └── y5Iu7XkTqV0.mp4\n",
-            "\n",
-            "2 directories, 42 files\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Check the directory structure of the tiny data\n",
-        "\n",
-        "# Install tree first\n",
-        "!apt-get -q install tree\n",
-        "!tree kinetics400_tiny"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "05/15 03:33:18 - mmengine - WARNING - \"FileClient\" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io\n",
+      "05/15 03:33:18 - mmengine - WARNING - \"HardDiskBackend\" is the alias of \"LocalBackend\" and the former will be deprecated in future.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Use the recognizer to do inference\n",
+    "from operator import itemgetter\n",
+    "video = 'demo/demo.mp4'\n",
+    "label = 'tools/data/kinetics/label_map_k400.txt'\n",
+    "results = inference_recognizer(model, video)\n",
+    "\n",
+    "pred_scores = results.pred_score.tolist()\n",
+    "score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))\n",
+    "score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)\n",
+    "top5_label = score_sorted[:5]\n",
+    "\n",
+    "labels = open(label).readlines()\n",
+    "labels = [x.strip() for x in labels]\n",
+    "results = [(labels[k[0]], k[1]) for k in top5_label]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "NIyJXqfWathq",
+    "outputId": "cb25aca9-e72d-4c54-f295-4c889713cb3a"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 12,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "fTdi6dI0hY3g",
-        "outputId": "95f22438-566c-4496-fe0c-50e128b47b5e"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "D32_1gwq35E.mp4 0\n",
-            "iRuyZSKhHRg.mp4 1\n",
-            "oXy-e_P_cAI.mp4 0\n",
-            "34XczvTaRiI.mp4 1\n",
-            "h2YqqUhnR34.mp4 0\n",
-            "O46YA8tI530.mp4 0\n",
-            "kFC3KY2bOP8.mp4 1\n",
-            "WWP5HZJsg-o.mp4 1\n",
-            "phDqGd0NKoo.mp4 1\n",
-            "yLC9CtWU5ws.mp4 0\n",
-            "27_CSXByd3s.mp4 1\n",
-            "IyfILH9lBRo.mp4 1\n",
-            "T_TMNGzVrDk.mp4 1\n",
-            "TkkZPZHbAKA.mp4 0\n",
-            "PnOe3GZRVX8.mp4 1\n",
-            "soEcZZsBmDs.mp4 1\n",
-            "FMlSTTpN3VY.mp4 1\n",
-            "WaS0qwP46Us.mp4 0\n",
-            "A-wiliK50Zw.mp4 1\n",
-            "oMrZaozOvdQ.mp4 1\n",
-            "ZQV4U2KQ370.mp4 0\n",
-            "DbX8mPslRXg.mp4 1\n",
-            "h10B9SVE-nk.mp4 1\n",
-            "P5M-hAts7MQ.mp4 0\n",
-            "R8HXQkdgKWA.mp4 0\n",
-            "D92m0HsHjcQ.mp4 0\n",
-            "RqnKtCEoEcA.mp4 0\n",
-            "LvcFDgCAXQs.mp4 0\n",
-            "xGY2dP0YUjA.mp4 0\n",
-            "Wh_YPQdH1Zg.mp4 0\n"
-          ]
-        }
-      ],
-      "source": [
-        "# After downloading the data, we need to check the annotation format\n",
-        "!cat kinetics400_tiny/kinetics_tiny_train_video.txt"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "The top-5 labels with corresponding scores are:\n",
+      "arm wrestling:  1.0\n",
+      "rock scissors paper:  6.434453414527752e-09\n",
+      "shaking hands:  2.7599860175087088e-09\n",
+      "clapping:  1.3454612979302283e-09\n",
+      "massaging feet:  5.555100823784187e-10\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('The top-5 labels with corresponding scores are:')\n",
+    "for result in results:\n",
+    "    print(f'{result[0]}: ', result[1])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "QuZG8kZ2fJ5d"
+   },
+   "source": [
+    "## Train a recognizer on customized dataset\n",
+    "\n",
+    "To train a new recognizer, there are usually three things to do:\n",
+    "1. Support a new dataset\n",
+    "2. Modify the config\n",
+    "3. Train a new recognizer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "neEFyxChfgiJ"
+   },
+   "source": [
+    "### Support a new dataset\n",
+    "\n",
+    "In this tutorial, we gives an example to convert the data into the format of existing datasets. Other methods and more advanced usages can be found in the [doc](/docs/tutorials/new_dataset.md)\n",
+    "\n",
+    "Firstly, let's download a tiny dataset obtained from [Kinetics-400](https://deepmind.com/research/open-source/open-source-datasets/kinetics/). We select 30 videos with their labels as train dataset and 10 videos with their labels as test dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "gjsUj9JzgUlJ",
+    "outputId": "96a0e6e9-0dd8-4c07-9fed-22b93d5c1318"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "0bq0mxmEi29H"
-      },
-      "source": [
-        "According to the format defined in [`VideoDataset`](./datasets/video_dataset.py), each line indicates a sample video with the filepath and label, which are split with a whitespace."
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "rm: cannot remove 'kinetics400_tiny.zip*': No such file or directory\n",
+      "--2023-05-15 03:33:27--  https://download.openmmlab.com/mmaction/kinetics400_tiny.zip\n",
+      "Resolving download.openmmlab.com (download.openmmlab.com)... 163.181.82.216, 163.181.82.218, 163.181.82.213, ...\n",
+      "Connecting to download.openmmlab.com (download.openmmlab.com)|163.181.82.216|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 18308682 (17M) [application/zip]\n",
+      "Saving to: ‘kinetics400_tiny.zip’\n",
+      "\n",
+      "kinetics400_tiny.zi 100%[===================>]  17.46M  32.7MB/s    in 0.5s    \n",
+      "\n",
+      "2023-05-15 03:33:28 (32.7 MB/s) - ‘kinetics400_tiny.zip’ saved [18308682/18308682]\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# download, decompress the data\n",
+    "!rm kinetics400_tiny.zip*\n",
+    "!rm -rf kinetics400_tiny\n",
+    "!wget https://download.openmmlab.com/mmaction/kinetics400_tiny.zip\n",
+    "!unzip kinetics400_tiny.zip > /dev/null"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "AbZ-o7V6hNw4",
+    "outputId": "f229f352-1b43-41b7-a374-21404f618581"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ht_DGJA9jQar"
-      },
-      "source": [
-        "### Modify the config\n",
-        "\n",
-        "In the next step, we need to modify the config for the training.\n",
-        "To accelerate the process, we finetune a recognizer using a pre-trained recognizer."
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Reading package lists...\n",
+      "Building dependency tree...\n",
+      "Reading state information...\n",
+      "The following NEW packages will be installed:\n",
+      "  tree\n",
+      "0 upgraded, 1 newly installed, 0 to remove and 24 not upgraded.\n",
+      "Need to get 43.0 kB of archives.\n",
+      "After this operation, 115 kB of additional disk space will be used.\n",
+      "Get:1 http://archive.ubuntu.com/ubuntu focal/universe amd64 tree amd64 1.8.0-1 [43.0 kB]\n",
+      "Fetched 43.0 kB in 1s (48.9 kB/s)\n",
+      "Selecting previously unselected package tree.\n",
+      "(Reading database ... 122519 files and directories currently installed.)\n",
+      "Preparing to unpack .../tree_1.8.0-1_amd64.deb ...\n",
+      "Unpacking tree (1.8.0-1) ...\n",
+      "Setting up tree (1.8.0-1) ...\n",
+      "Processing triggers for man-db (2.9.1-1) ...\n",
+      "\u001B[01;34mkinetics400_tiny\u001B[00m\n",
+      "├── kinetics_tiny_train_video.txt\n",
+      "├── kinetics_tiny_val_video.txt\n",
+      "├── \u001B[01;34mtrain\u001B[00m\n",
+      "│   ├── 27_CSXByd3s.mp4\n",
+      "│   ├── 34XczvTaRiI.mp4\n",
+      "│   ├── A-wiliK50Zw.mp4\n",
+      "│   ├── D32_1gwq35E.mp4\n",
+      "│   ├── D92m0HsHjcQ.mp4\n",
+      "│   ├── DbX8mPslRXg.mp4\n",
+      "│   ├── FMlSTTpN3VY.mp4\n",
+      "│   ├── h10B9SVE-nk.mp4\n",
+      "│   ├── h2YqqUhnR34.mp4\n",
+      "│   ├── iRuyZSKhHRg.mp4\n",
+      "│   ├── IyfILH9lBRo.mp4\n",
+      "│   ├── kFC3KY2bOP8.mp4\n",
+      "│   ├── LvcFDgCAXQs.mp4\n",
+      "│   ├── O46YA8tI530.mp4\n",
+      "│   ├── oMrZaozOvdQ.mp4\n",
+      "│   ├── oXy-e_P_cAI.mp4\n",
+      "│   ├── P5M-hAts7MQ.mp4\n",
+      "│   ├── phDqGd0NKoo.mp4\n",
+      "│   ├── PnOe3GZRVX8.mp4\n",
+      "│   ├── R8HXQkdgKWA.mp4\n",
+      "│   ├── RqnKtCEoEcA.mp4\n",
+      "│   ├── soEcZZsBmDs.mp4\n",
+      "│   ├── TkkZPZHbAKA.mp4\n",
+      "│   ├── T_TMNGzVrDk.mp4\n",
+      "│   ├── WaS0qwP46Us.mp4\n",
+      "│   ├── Wh_YPQdH1Zg.mp4\n",
+      "│   ├── WWP5HZJsg-o.mp4\n",
+      "│   ├── xGY2dP0YUjA.mp4\n",
+      "│   ├── yLC9CtWU5ws.mp4\n",
+      "│   └── ZQV4U2KQ370.mp4\n",
+      "└── \u001B[01;34mval\u001B[00m\n",
+      "    ├── 0pVGiAU6XEA.mp4\n",
+      "    ├── AQrbRSnRt8M.mp4\n",
+      "    ├── b6Q_b7vgc7Q.mp4\n",
+      "    ├── ddvJ6-faICE.mp4\n",
+      "    ├── IcLztCtvhb8.mp4\n",
+      "    ├── ik4BW3-SCts.mp4\n",
+      "    ├── jqRrH30V0k4.mp4\n",
+      "    ├── SU_x2LQqSLs.mp4\n",
+      "    ├── u4Rm6srmIS8.mp4\n",
+      "    └── y5Iu7XkTqV0.mp4\n",
+      "\n",
+      "2 directories, 42 files\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check the directory structure of the tiny data\n",
+    "\n",
+    "# Install tree first\n",
+    "!apt-get -q install tree\n",
+    "!tree kinetics400_tiny"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "fTdi6dI0hY3g",
+    "outputId": "95f22438-566c-4496-fe0c-50e128b47b5e"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 13,
-      "metadata": {
-        "id": "LjCcmCKOjktc"
-      },
-      "outputs": [],
-      "source": [
-        "cfg = Config.fromfile('./configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py')"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "D32_1gwq35E.mp4 0\n",
+      "iRuyZSKhHRg.mp4 1\n",
+      "oXy-e_P_cAI.mp4 0\n",
+      "34XczvTaRiI.mp4 1\n",
+      "h2YqqUhnR34.mp4 0\n",
+      "O46YA8tI530.mp4 0\n",
+      "kFC3KY2bOP8.mp4 1\n",
+      "WWP5HZJsg-o.mp4 1\n",
+      "phDqGd0NKoo.mp4 1\n",
+      "yLC9CtWU5ws.mp4 0\n",
+      "27_CSXByd3s.mp4 1\n",
+      "IyfILH9lBRo.mp4 1\n",
+      "T_TMNGzVrDk.mp4 1\n",
+      "TkkZPZHbAKA.mp4 0\n",
+      "PnOe3GZRVX8.mp4 1\n",
+      "soEcZZsBmDs.mp4 1\n",
+      "FMlSTTpN3VY.mp4 1\n",
+      "WaS0qwP46Us.mp4 0\n",
+      "A-wiliK50Zw.mp4 1\n",
+      "oMrZaozOvdQ.mp4 1\n",
+      "ZQV4U2KQ370.mp4 0\n",
+      "DbX8mPslRXg.mp4 1\n",
+      "h10B9SVE-nk.mp4 1\n",
+      "P5M-hAts7MQ.mp4 0\n",
+      "R8HXQkdgKWA.mp4 0\n",
+      "D92m0HsHjcQ.mp4 0\n",
+      "RqnKtCEoEcA.mp4 0\n",
+      "LvcFDgCAXQs.mp4 0\n",
+      "xGY2dP0YUjA.mp4 0\n",
+      "Wh_YPQdH1Zg.mp4 0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# After downloading the data, we need to check the annotation format\n",
+    "!cat kinetics400_tiny/kinetics_tiny_train_video.txt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "0bq0mxmEi29H"
+   },
+   "source": [
+    "According to the format defined in [`VideoDataset`](./datasets/video_dataset.py), each line indicates a sample video with the filepath and label, which are split with a whitespace."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Ht_DGJA9jQar"
+   },
+   "source": [
+    "### Modify the config\n",
+    "\n",
+    "In the next step, we need to modify the config for the training.\n",
+    "To accelerate the process, we finetune a recognizer using a pre-trained recognizer."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "id": "LjCcmCKOjktc"
+   },
+   "outputs": [],
+   "source": [
+    "cfg = Config.fromfile('./configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "tc8YhFFGjp3e"
+   },
+   "source": [
+    "Given a config that trains a TSN model on kinetics400-full dataset, we need to modify some values to use it for training TSN on Kinetics400-tiny dataset.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "tlhu9byjjt-K",
+    "outputId": "2d984a1d-93f7-493f-fd77-e19af8285f38"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tc8YhFFGjp3e"
-      },
-      "source": [
-        "Given a config that trains a TSN model on kinetics400-full dataset, we need to modify some values to use it for training TSN on Kinetics400-tiny dataset.\n"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Config:\n",
+      "model = dict(\n",
+      "    type='Recognizer2D',\n",
+      "    backbone=dict(\n",
+      "        type='ResNet',\n",
+      "        pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',\n",
+      "        depth=50,\n",
+      "        norm_eval=False),\n",
+      "    cls_head=dict(\n",
+      "        type='TSNHead',\n",
+      "        num_classes=2,\n",
+      "        in_channels=2048,\n",
+      "        spatial_type='avg',\n",
+      "        consensus=dict(type='AvgConsensus', dim=1),\n",
+      "        dropout_ratio=0.4,\n",
+      "        init_std=0.01,\n",
+      "        average_clips='prob'),\n",
+      "    data_preprocessor=dict(\n",
+      "        type='ActionDataPreprocessor',\n",
+      "        mean=[123.675, 116.28, 103.53],\n",
+      "        std=[58.395, 57.12, 57.375],\n",
+      "        format_shape='NCHW'),\n",
+      "    train_cfg=None,\n",
+      "    test_cfg=None)\n",
+      "train_cfg = dict(\n",
+      "    type='EpochBasedTrainLoop', max_epochs=10, val_begin=1, val_interval=1)\n",
+      "val_cfg = dict(type='ValLoop')\n",
+      "test_cfg = dict(type='TestLoop')\n",
+      "param_scheduler = [\n",
+      "    dict(\n",
+      "        type='MultiStepLR',\n",
+      "        begin=0,\n",
+      "        end=100,\n",
+      "        by_epoch=True,\n",
+      "        milestones=[40, 80],\n",
+      "        gamma=0.1)\n",
+      "]\n",
+      "optim_wrapper = dict(\n",
+      "    optimizer=dict(\n",
+      "        type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001),\n",
+      "    clip_grad=dict(max_norm=40, norm_type=2))\n",
+      "default_scope = 'mmaction'\n",
+      "default_hooks = dict(\n",
+      "    runtime_info=dict(type='RuntimeInfoHook'),\n",
+      "    timer=dict(type='IterTimerHook'),\n",
+      "    logger=dict(type='LoggerHook', interval=20, ignore_last=False),\n",
+      "    param_scheduler=dict(type='ParamSchedulerHook'),\n",
+      "    checkpoint=dict(\n",
+      "        type='CheckpointHook', interval=3, save_best='auto', max_keep_ckpts=3),\n",
+      "    sampler_seed=dict(type='DistSamplerSeedHook'),\n",
+      "    sync_buffers=dict(type='SyncBuffersHook'))\n",
+      "env_cfg = dict(\n",
+      "    cudnn_benchmark=False,\n",
+      "    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),\n",
+      "    dist_cfg=dict(backend='nccl'))\n",
+      "log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)\n",
+      "vis_backends = [dict(type='LocalVisBackend')]\n",
+      "visualizer = dict(\n",
+      "    type='ActionVisualizer', vis_backends=[dict(type='LocalVisBackend')])\n",
+      "log_level = 'INFO'\n",
+      "load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
+      "resume = False\n",
+      "dataset_type = 'VideoDataset'\n",
+      "data_root = 'kinetics400_tiny/train/'\n",
+      "data_root_val = 'kinetics400_tiny/val/'\n",
+      "ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
+      "ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
+      "file_client_args = dict(io_backend='disk')\n",
+      "train_pipeline = [\n",
+      "    dict(type='DecordInit', io_backend='disk'),\n",
+      "    dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),\n",
+      "    dict(type='DecordDecode'),\n",
+      "    dict(type='Resize', scale=(-1, 256)),\n",
+      "    dict(\n",
+      "        type='MultiScaleCrop',\n",
+      "        input_size=224,\n",
+      "        scales=(1, 0.875, 0.75, 0.66),\n",
+      "        random_crop=False,\n",
+      "        max_wh_scale_gap=1),\n",
+      "    dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
+      "    dict(type='Flip', flip_ratio=0.5),\n",
+      "    dict(type='FormatShape', input_format='NCHW'),\n",
+      "    dict(type='PackActionInputs')\n",
+      "]\n",
+      "val_pipeline = [\n",
+      "    dict(type='DecordInit', io_backend='disk'),\n",
+      "    dict(\n",
+      "        type='SampleFrames',\n",
+      "        clip_len=1,\n",
+      "        frame_interval=1,\n",
+      "        num_clips=3,\n",
+      "        test_mode=True),\n",
+      "    dict(type='DecordDecode'),\n",
+      "    dict(type='Resize', scale=(-1, 256)),\n",
+      "    dict(type='CenterCrop', crop_size=224),\n",
+      "    dict(type='FormatShape', input_format='NCHW'),\n",
+      "    dict(type='PackActionInputs')\n",
+      "]\n",
+      "test_pipeline = [\n",
+      "    dict(type='DecordInit', io_backend='disk'),\n",
+      "    dict(\n",
+      "        type='SampleFrames',\n",
+      "        clip_len=1,\n",
+      "        frame_interval=1,\n",
+      "        num_clips=25,\n",
+      "        test_mode=True),\n",
+      "    dict(type='DecordDecode'),\n",
+      "    dict(type='Resize', scale=(-1, 256)),\n",
+      "    dict(type='TenCrop', crop_size=224),\n",
+      "    dict(type='FormatShape', input_format='NCHW'),\n",
+      "    dict(type='PackActionInputs')\n",
+      "]\n",
+      "train_dataloader = dict(\n",
+      "    batch_size=2,\n",
+      "    num_workers=2,\n",
+      "    persistent_workers=True,\n",
+      "    sampler=dict(type='DefaultSampler', shuffle=True),\n",
+      "    dataset=dict(\n",
+      "        type='VideoDataset',\n",
+      "        ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n",
+      "        data_prefix=dict(video='kinetics400_tiny/train/'),\n",
+      "        pipeline=[\n",
+      "            dict(type='DecordInit', io_backend='disk'),\n",
+      "            dict(\n",
+      "                type='SampleFrames', clip_len=1, frame_interval=1,\n",
+      "                num_clips=3),\n",
+      "            dict(type='DecordDecode'),\n",
+      "            dict(type='Resize', scale=(-1, 256)),\n",
+      "            dict(\n",
+      "                type='MultiScaleCrop',\n",
+      "                input_size=224,\n",
+      "                scales=(1, 0.875, 0.75, 0.66),\n",
+      "                random_crop=False,\n",
+      "                max_wh_scale_gap=1),\n",
+      "            dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
+      "            dict(type='Flip', flip_ratio=0.5),\n",
+      "            dict(type='FormatShape', input_format='NCHW'),\n",
+      "            dict(type='PackActionInputs')\n",
+      "        ]))\n",
+      "val_dataloader = dict(\n",
+      "    batch_size=2,\n",
+      "    num_workers=2,\n",
+      "    persistent_workers=True,\n",
+      "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
+      "    dataset=dict(\n",
+      "        type='VideoDataset',\n",
+      "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
+      "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
+      "        pipeline=[\n",
+      "            dict(type='DecordInit', io_backend='disk'),\n",
+      "            dict(\n",
+      "                type='SampleFrames',\n",
+      "                clip_len=1,\n",
+      "                frame_interval=1,\n",
+      "                num_clips=3,\n",
+      "                test_mode=True),\n",
+      "            dict(type='DecordDecode'),\n",
+      "            dict(type='Resize', scale=(-1, 256)),\n",
+      "            dict(type='CenterCrop', crop_size=224),\n",
+      "            dict(type='FormatShape', input_format='NCHW'),\n",
+      "            dict(type='PackActionInputs')\n",
+      "        ],\n",
+      "        test_mode=True))\n",
+      "test_dataloader = dict(\n",
+      "    batch_size=1,\n",
+      "    num_workers=2,\n",
+      "    persistent_workers=True,\n",
+      "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
+      "    dataset=dict(\n",
+      "        type='VideoDataset',\n",
+      "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
+      "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
+      "        pipeline=[\n",
+      "            dict(type='DecordInit', io_backend='disk'),\n",
+      "            dict(\n",
+      "                type='SampleFrames',\n",
+      "                clip_len=1,\n",
+      "                frame_interval=1,\n",
+      "                num_clips=25,\n",
+      "                test_mode=True),\n",
+      "            dict(type='DecordDecode'),\n",
+      "            dict(type='Resize', scale=(-1, 256)),\n",
+      "            dict(type='TenCrop', crop_size=224),\n",
+      "            dict(type='FormatShape', input_format='NCHW'),\n",
+      "            dict(type='PackActionInputs')\n",
+      "        ],\n",
+      "        test_mode=True))\n",
+      "val_evaluator = dict(type='AccMetric')\n",
+      "test_evaluator = dict(type='AccMetric')\n",
+      "auto_scale_lr = dict(enable=False, base_batch_size=256)\n",
+      "work_dir = './tutorial_exps'\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from mmengine.runner import set_random_seed\n",
+    "\n",
+    "# Modify dataset type and path\n",
+    "cfg.data_root = 'kinetics400_tiny/train/'\n",
+    "cfg.data_root_val = 'kinetics400_tiny/val/'\n",
+    "cfg.ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
+    "cfg.ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
+    "\n",
+    "\n",
+    "cfg.test_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
+    "cfg.test_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/val/'\n",
+    "\n",
+    "cfg.train_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
+    "cfg.train_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/train/'\n",
+    "\n",
+    "cfg.val_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
+    "cfg.val_dataloader.dataset.data_prefix.video  = 'kinetics400_tiny/val/'\n",
+    "\n",
+    "\n",
+    "# Modify num classes of the model in cls_head\n",
+    "cfg.model.cls_head.num_classes = 2\n",
+    "# We can use the pre-trained TSN model\n",
+    "cfg.load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
+    "\n",
+    "# Set up working dir to save files and logs.\n",
+    "cfg.work_dir = './tutorial_exps'\n",
+    "\n",
+    "# The original learning rate (LR) is set for 8-GPU training.\n",
+    "# We divide it by 8 since we only use one GPU.\n",
+    "cfg.train_dataloader.batch_size = cfg.train_dataloader.batch_size // 16\n",
+    "cfg.val_dataloader.batch_size = cfg.val_dataloader.batch_size // 16\n",
+    "cfg.optim_wrapper.optimizer.lr = cfg.optim_wrapper.optimizer.lr / 8 / 16\n",
+    "cfg.train_cfg.max_epochs = 10\n",
+    "\n",
+    "cfg.train_dataloader.num_workers = 2\n",
+    "cfg.val_dataloader.num_workers = 2\n",
+    "cfg.test_dataloader.num_workers = 2\n",
+    "\n",
+    "# We can initialize the logger for training and have a look\n",
+    "# at the final config used for training\n",
+    "print(f'Config:\\n{cfg.pretty_text}')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "tES-qnZ3k38Z"
+   },
+   "source": [
+    "### Train a new recognizer\n",
+    "\n",
+    "Finally, lets initialize the dataset and recognizer, then train a new recognizer!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "dDBWkdDRk6oz",
+    "outputId": "044b9e09-2038-41c9-d5a3-8a74ae11ade2"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 14,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "tlhu9byjjt-K",
-        "outputId": "2d984a1d-93f7-493f-fd77-e19af8285f38"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Config:\n",
-            "model = dict(\n",
-            "    type='Recognizer2D',\n",
-            "    backbone=dict(\n",
-            "        type='ResNet',\n",
-            "        pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',\n",
-            "        depth=50,\n",
-            "        norm_eval=False),\n",
-            "    cls_head=dict(\n",
-            "        type='TSNHead',\n",
-            "        num_classes=2,\n",
-            "        in_channels=2048,\n",
-            "        spatial_type='avg',\n",
-            "        consensus=dict(type='AvgConsensus', dim=1),\n",
-            "        dropout_ratio=0.4,\n",
-            "        init_std=0.01,\n",
-            "        average_clips='prob'),\n",
-            "    data_preprocessor=dict(\n",
-            "        type='ActionDataPreprocessor',\n",
-            "        mean=[123.675, 116.28, 103.53],\n",
-            "        std=[58.395, 57.12, 57.375],\n",
-            "        format_shape='NCHW'),\n",
-            "    train_cfg=None,\n",
-            "    test_cfg=None)\n",
-            "train_cfg = dict(\n",
-            "    type='EpochBasedTrainLoop', max_epochs=10, val_begin=1, val_interval=1)\n",
-            "val_cfg = dict(type='ValLoop')\n",
-            "test_cfg = dict(type='TestLoop')\n",
-            "param_scheduler = [\n",
-            "    dict(\n",
-            "        type='MultiStepLR',\n",
-            "        begin=0,\n",
-            "        end=100,\n",
-            "        by_epoch=True,\n",
-            "        milestones=[40, 80],\n",
-            "        gamma=0.1)\n",
-            "]\n",
-            "optim_wrapper = dict(\n",
-            "    optimizer=dict(\n",
-            "        type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001),\n",
-            "    clip_grad=dict(max_norm=40, norm_type=2))\n",
-            "default_scope = 'mmaction'\n",
-            "default_hooks = dict(\n",
-            "    runtime_info=dict(type='RuntimeInfoHook'),\n",
-            "    timer=dict(type='IterTimerHook'),\n",
-            "    logger=dict(type='LoggerHook', interval=20, ignore_last=False),\n",
-            "    param_scheduler=dict(type='ParamSchedulerHook'),\n",
-            "    checkpoint=dict(\n",
-            "        type='CheckpointHook', interval=3, save_best='auto', max_keep_ckpts=3),\n",
-            "    sampler_seed=dict(type='DistSamplerSeedHook'),\n",
-            "    sync_buffers=dict(type='SyncBuffersHook'))\n",
-            "env_cfg = dict(\n",
-            "    cudnn_benchmark=False,\n",
-            "    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),\n",
-            "    dist_cfg=dict(backend='nccl'))\n",
-            "log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)\n",
-            "vis_backends = [dict(type='LocalVisBackend')]\n",
-            "visualizer = dict(\n",
-            "    type='ActionVisualizer', vis_backends=[dict(type='LocalVisBackend')])\n",
-            "log_level = 'INFO'\n",
-            "load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
-            "resume = False\n",
-            "dataset_type = 'VideoDataset'\n",
-            "data_root = 'kinetics400_tiny/train/'\n",
-            "data_root_val = 'kinetics400_tiny/val/'\n",
-            "ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
-            "ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
-            "file_client_args = dict(io_backend='disk')\n",
-            "train_pipeline = [\n",
-            "    dict(type='DecordInit', io_backend='disk'),\n",
-            "    dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),\n",
-            "    dict(type='DecordDecode'),\n",
-            "    dict(type='Resize', scale=(-1, 256)),\n",
-            "    dict(\n",
-            "        type='MultiScaleCrop',\n",
-            "        input_size=224,\n",
-            "        scales=(1, 0.875, 0.75, 0.66),\n",
-            "        random_crop=False,\n",
-            "        max_wh_scale_gap=1),\n",
-            "    dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
-            "    dict(type='Flip', flip_ratio=0.5),\n",
-            "    dict(type='FormatShape', input_format='NCHW'),\n",
-            "    dict(type='PackActionInputs')\n",
-            "]\n",
-            "val_pipeline = [\n",
-            "    dict(type='DecordInit', io_backend='disk'),\n",
-            "    dict(\n",
-            "        type='SampleFrames',\n",
-            "        clip_len=1,\n",
-            "        frame_interval=1,\n",
-            "        num_clips=3,\n",
-            "        test_mode=True),\n",
-            "    dict(type='DecordDecode'),\n",
-            "    dict(type='Resize', scale=(-1, 256)),\n",
-            "    dict(type='CenterCrop', crop_size=224),\n",
-            "    dict(type='FormatShape', input_format='NCHW'),\n",
-            "    dict(type='PackActionInputs')\n",
-            "]\n",
-            "test_pipeline = [\n",
-            "    dict(type='DecordInit', io_backend='disk'),\n",
-            "    dict(\n",
-            "        type='SampleFrames',\n",
-            "        clip_len=1,\n",
-            "        frame_interval=1,\n",
-            "        num_clips=25,\n",
-            "        test_mode=True),\n",
-            "    dict(type='DecordDecode'),\n",
-            "    dict(type='Resize', scale=(-1, 256)),\n",
-            "    dict(type='TenCrop', crop_size=224),\n",
-            "    dict(type='FormatShape', input_format='NCHW'),\n",
-            "    dict(type='PackActionInputs')\n",
-            "]\n",
-            "train_dataloader = dict(\n",
-            "    batch_size=2,\n",
-            "    num_workers=2,\n",
-            "    persistent_workers=True,\n",
-            "    sampler=dict(type='DefaultSampler', shuffle=True),\n",
-            "    dataset=dict(\n",
-            "        type='VideoDataset',\n",
-            "        ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n",
-            "        data_prefix=dict(video='kinetics400_tiny/train/'),\n",
-            "        pipeline=[\n",
-            "            dict(type='DecordInit', io_backend='disk'),\n",
-            "            dict(\n",
-            "                type='SampleFrames', clip_len=1, frame_interval=1,\n",
-            "                num_clips=3),\n",
-            "            dict(type='DecordDecode'),\n",
-            "            dict(type='Resize', scale=(-1, 256)),\n",
-            "            dict(\n",
-            "                type='MultiScaleCrop',\n",
-            "                input_size=224,\n",
-            "                scales=(1, 0.875, 0.75, 0.66),\n",
-            "                random_crop=False,\n",
-            "                max_wh_scale_gap=1),\n",
-            "            dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
-            "            dict(type='Flip', flip_ratio=0.5),\n",
-            "            dict(type='FormatShape', input_format='NCHW'),\n",
-            "            dict(type='PackActionInputs')\n",
-            "        ]))\n",
-            "val_dataloader = dict(\n",
-            "    batch_size=2,\n",
-            "    num_workers=2,\n",
-            "    persistent_workers=True,\n",
-            "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
-            "    dataset=dict(\n",
-            "        type='VideoDataset',\n",
-            "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
-            "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
-            "        pipeline=[\n",
-            "            dict(type='DecordInit', io_backend='disk'),\n",
-            "            dict(\n",
-            "                type='SampleFrames',\n",
-            "                clip_len=1,\n",
-            "                frame_interval=1,\n",
-            "                num_clips=3,\n",
-            "                test_mode=True),\n",
-            "            dict(type='DecordDecode'),\n",
-            "            dict(type='Resize', scale=(-1, 256)),\n",
-            "            dict(type='CenterCrop', crop_size=224),\n",
-            "            dict(type='FormatShape', input_format='NCHW'),\n",
-            "            dict(type='PackActionInputs')\n",
-            "        ],\n",
-            "        test_mode=True))\n",
-            "test_dataloader = dict(\n",
-            "    batch_size=1,\n",
-            "    num_workers=2,\n",
-            "    persistent_workers=True,\n",
-            "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
-            "    dataset=dict(\n",
-            "        type='VideoDataset',\n",
-            "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
-            "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
-            "        pipeline=[\n",
-            "            dict(type='DecordInit', io_backend='disk'),\n",
-            "            dict(\n",
-            "                type='SampleFrames',\n",
-            "                clip_len=1,\n",
-            "                frame_interval=1,\n",
-            "                num_clips=25,\n",
-            "                test_mode=True),\n",
-            "            dict(type='DecordDecode'),\n",
-            "            dict(type='Resize', scale=(-1, 256)),\n",
-            "            dict(type='TenCrop', crop_size=224),\n",
-            "            dict(type='FormatShape', input_format='NCHW'),\n",
-            "            dict(type='PackActionInputs')\n",
-            "        ],\n",
-            "        test_mode=True))\n",
-            "val_evaluator = dict(type='AccMetric')\n",
-            "test_evaluator = dict(type='AccMetric')\n",
-            "auto_scale_lr = dict(enable=False, base_batch_size=256)\n",
-            "work_dir = './tutorial_exps'\n",
-            "\n"
-          ]
-        }
-      ],
-      "source": [
-        "from mmengine.runner import set_random_seed\n",
-        "\n",
-        "# Modify dataset type and path\n",
-        "cfg.data_root = 'kinetics400_tiny/train/'\n",
-        "cfg.data_root_val = 'kinetics400_tiny/val/'\n",
-        "cfg.ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
-        "cfg.ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
-        "\n",
-        "\n",
-        "cfg.test_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
-        "cfg.test_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/val/'\n",
-        "\n",
-        "cfg.train_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
-        "cfg.train_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/train/'\n",
-        "\n",
-        "cfg.val_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
-        "cfg.val_dataloader.dataset.data_prefix.video  = 'kinetics400_tiny/val/'\n",
-        "\n",
-        "\n",
-        "# Modify num classes of the model in cls_head\n",
-        "cfg.model.cls_head.num_classes = 2\n",
-        "# We can use the pre-trained TSN model\n",
-        "cfg.load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
-        "\n",
-        "# Set up working dir to save files and logs.\n",
-        "cfg.work_dir = './tutorial_exps'\n",
-        "\n",
-        "# The original learning rate (LR) is set for 8-GPU training.\n",
-        "# We divide it by 8 since we only use one GPU.\n",
-        "cfg.train_dataloader.batch_size = cfg.train_dataloader.batch_size // 16\n",
-        "cfg.val_dataloader.batch_size = cfg.val_dataloader.batch_size // 16\n",
-        "cfg.optim_wrapper.optimizer.lr = cfg.optim_wrapper.optimizer.lr / 8 / 16\n",
-        "cfg.train_cfg.max_epochs = 10\n",
-        "\n",
-        "cfg.train_dataloader.num_workers = 2\n",
-        "cfg.val_dataloader.num_workers = 2\n",
-        "cfg.test_dataloader.num_workers = 2\n",
-        "\n",
-        "# We can initialize the logger for training and have a look\n",
-        "# at the final config used for training\n",
-        "print(f'Config:\\n{cfg.pretty_text}')\n"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "05/15 03:33:34 - mmengine - INFO - \n",
+      "------------------------------------------------------------\n",
+      "System environment:\n",
+      "    sys.platform: linux\n",
+      "    Python: 3.10.11 (main, Apr  5 2023, 14:15:10) [GCC 9.4.0]\n",
+      "    CUDA available: True\n",
+      "    numpy_random_seed: 1853452922\n",
+      "    GPU 0: Tesla T4\n",
+      "    CUDA_HOME: /usr/local/cuda\n",
+      "    NVCC: Cuda compilation tools, release 11.8, V11.8.89\n",
+      "    GCC: x86_64-linux-gnu-gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0\n",
+      "    PyTorch: 2.0.0+cu118\n",
+      "    PyTorch compiling details: PyTorch built with:\n",
+      "  - GCC 9.3\n",
+      "  - C++ Version: 201703\n",
+      "  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications\n",
+      "  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)\n",
+      "  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n",
+      "  - LAPACK is enabled (usually provided by MKL)\n",
+      "  - NNPACK is enabled\n",
+      "  - CPU capability usage: AVX2\n",
+      "  - CUDA Runtime 11.8\n",
+      "  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n",
+      "  - CuDNN 8.7\n",
+      "  - Magma 2.6.1\n",
+      "  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.8, CUDNN_VERSION=8.7.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.0.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n",
+      "\n",
+      "    TorchVision: 0.15.1+cu118\n",
+      "    OpenCV: 4.7.0\n",
+      "    MMEngine: 0.7.3\n",
+      "\n",
+      "Runtime environment:\n",
+      "    cudnn_benchmark: False\n",
+      "    mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}\n",
+      "    dist_cfg: {'backend': 'nccl'}\n",
+      "    seed: None\n",
+      "    Distributed launcher: none\n",
+      "    Distributed training: False\n",
+      "    GPU number: 1\n",
+      "------------------------------------------------------------\n",
+      "\n",
+      "05/15 03:33:34 - mmengine - INFO - Config:\n",
+      "model = dict(\n",
+      "    type='Recognizer2D',\n",
+      "    backbone=dict(\n",
+      "        type='ResNet',\n",
+      "        pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',\n",
+      "        depth=50,\n",
+      "        norm_eval=False),\n",
+      "    cls_head=dict(\n",
+      "        type='TSNHead',\n",
+      "        num_classes=2,\n",
+      "        in_channels=2048,\n",
+      "        spatial_type='avg',\n",
+      "        consensus=dict(type='AvgConsensus', dim=1),\n",
+      "        dropout_ratio=0.4,\n",
+      "        init_std=0.01,\n",
+      "        average_clips='prob'),\n",
+      "    data_preprocessor=dict(\n",
+      "        type='ActionDataPreprocessor',\n",
+      "        mean=[123.675, 116.28, 103.53],\n",
+      "        std=[58.395, 57.12, 57.375],\n",
+      "        format_shape='NCHW'),\n",
+      "    train_cfg=None,\n",
+      "    test_cfg=None)\n",
+      "train_cfg = dict(\n",
+      "    type='EpochBasedTrainLoop', max_epochs=10, val_begin=1, val_interval=1)\n",
+      "val_cfg = dict(type='ValLoop')\n",
+      "test_cfg = dict(type='TestLoop')\n",
+      "param_scheduler = [\n",
+      "    dict(\n",
+      "        type='MultiStepLR',\n",
+      "        begin=0,\n",
+      "        end=100,\n",
+      "        by_epoch=True,\n",
+      "        milestones=[40, 80],\n",
+      "        gamma=0.1)\n",
+      "]\n",
+      "optim_wrapper = dict(\n",
+      "    optimizer=dict(\n",
+      "        type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001),\n",
+      "    clip_grad=dict(max_norm=40, norm_type=2))\n",
+      "default_scope = 'mmaction'\n",
+      "default_hooks = dict(\n",
+      "    runtime_info=dict(type='RuntimeInfoHook'),\n",
+      "    timer=dict(type='IterTimerHook'),\n",
+      "    logger=dict(type='LoggerHook', interval=20, ignore_last=False),\n",
+      "    param_scheduler=dict(type='ParamSchedulerHook'),\n",
+      "    checkpoint=dict(\n",
+      "        type='CheckpointHook', interval=3, save_best='auto', max_keep_ckpts=3),\n",
+      "    sampler_seed=dict(type='DistSamplerSeedHook'),\n",
+      "    sync_buffers=dict(type='SyncBuffersHook'))\n",
+      "env_cfg = dict(\n",
+      "    cudnn_benchmark=False,\n",
+      "    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),\n",
+      "    dist_cfg=dict(backend='nccl'))\n",
+      "log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)\n",
+      "vis_backends = [dict(type='LocalVisBackend')]\n",
+      "visualizer = dict(\n",
+      "    type='ActionVisualizer', vis_backends=[dict(type='LocalVisBackend')])\n",
+      "log_level = 'INFO'\n",
+      "load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
+      "resume = False\n",
+      "dataset_type = 'VideoDataset'\n",
+      "data_root = 'kinetics400_tiny/train/'\n",
+      "data_root_val = 'kinetics400_tiny/val/'\n",
+      "ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
+      "ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
+      "file_client_args = dict(io_backend='disk')\n",
+      "train_pipeline = [\n",
+      "    dict(type='DecordInit', io_backend='disk'),\n",
+      "    dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),\n",
+      "    dict(type='DecordDecode'),\n",
+      "    dict(type='Resize', scale=(-1, 256)),\n",
+      "    dict(\n",
+      "        type='MultiScaleCrop',\n",
+      "        input_size=224,\n",
+      "        scales=(1, 0.875, 0.75, 0.66),\n",
+      "        random_crop=False,\n",
+      "        max_wh_scale_gap=1),\n",
+      "    dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
+      "    dict(type='Flip', flip_ratio=0.5),\n",
+      "    dict(type='FormatShape', input_format='NCHW'),\n",
+      "    dict(type='PackActionInputs')\n",
+      "]\n",
+      "val_pipeline = [\n",
+      "    dict(type='DecordInit', io_backend='disk'),\n",
+      "    dict(\n",
+      "        type='SampleFrames',\n",
+      "        clip_len=1,\n",
+      "        frame_interval=1,\n",
+      "        num_clips=3,\n",
+      "        test_mode=True),\n",
+      "    dict(type='DecordDecode'),\n",
+      "    dict(type='Resize', scale=(-1, 256)),\n",
+      "    dict(type='CenterCrop', crop_size=224),\n",
+      "    dict(type='FormatShape', input_format='NCHW'),\n",
+      "    dict(type='PackActionInputs')\n",
+      "]\n",
+      "test_pipeline = [\n",
+      "    dict(type='DecordInit', io_backend='disk'),\n",
+      "    dict(\n",
+      "        type='SampleFrames',\n",
+      "        clip_len=1,\n",
+      "        frame_interval=1,\n",
+      "        num_clips=25,\n",
+      "        test_mode=True),\n",
+      "    dict(type='DecordDecode'),\n",
+      "    dict(type='Resize', scale=(-1, 256)),\n",
+      "    dict(type='TenCrop', crop_size=224),\n",
+      "    dict(type='FormatShape', input_format='NCHW'),\n",
+      "    dict(type='PackActionInputs')\n",
+      "]\n",
+      "train_dataloader = dict(\n",
+      "    batch_size=2,\n",
+      "    num_workers=2,\n",
+      "    persistent_workers=True,\n",
+      "    sampler=dict(type='DefaultSampler', shuffle=True),\n",
+      "    dataset=dict(\n",
+      "        type='VideoDataset',\n",
+      "        ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n",
+      "        data_prefix=dict(video='kinetics400_tiny/train/'),\n",
+      "        pipeline=[\n",
+      "            dict(type='DecordInit', io_backend='disk'),\n",
+      "            dict(\n",
+      "                type='SampleFrames', clip_len=1, frame_interval=1,\n",
+      "                num_clips=3),\n",
+      "            dict(type='DecordDecode'),\n",
+      "            dict(type='Resize', scale=(-1, 256)),\n",
+      "            dict(\n",
+      "                type='MultiScaleCrop',\n",
+      "                input_size=224,\n",
+      "                scales=(1, 0.875, 0.75, 0.66),\n",
+      "                random_crop=False,\n",
+      "                max_wh_scale_gap=1),\n",
+      "            dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
+      "            dict(type='Flip', flip_ratio=0.5),\n",
+      "            dict(type='FormatShape', input_format='NCHW'),\n",
+      "            dict(type='PackActionInputs')\n",
+      "        ]))\n",
+      "val_dataloader = dict(\n",
+      "    batch_size=2,\n",
+      "    num_workers=2,\n",
+      "    persistent_workers=True,\n",
+      "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
+      "    dataset=dict(\n",
+      "        type='VideoDataset',\n",
+      "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
+      "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
+      "        pipeline=[\n",
+      "            dict(type='DecordInit', io_backend='disk'),\n",
+      "            dict(\n",
+      "                type='SampleFrames',\n",
+      "                clip_len=1,\n",
+      "                frame_interval=1,\n",
+      "                num_clips=3,\n",
+      "                test_mode=True),\n",
+      "            dict(type='DecordDecode'),\n",
+      "            dict(type='Resize', scale=(-1, 256)),\n",
+      "            dict(type='CenterCrop', crop_size=224),\n",
+      "            dict(type='FormatShape', input_format='NCHW'),\n",
+      "            dict(type='PackActionInputs')\n",
+      "        ],\n",
+      "        test_mode=True))\n",
+      "test_dataloader = dict(\n",
+      "    batch_size=1,\n",
+      "    num_workers=2,\n",
+      "    persistent_workers=True,\n",
+      "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
+      "    dataset=dict(\n",
+      "        type='VideoDataset',\n",
+      "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
+      "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
+      "        pipeline=[\n",
+      "            dict(type='DecordInit', io_backend='disk'),\n",
+      "            dict(\n",
+      "                type='SampleFrames',\n",
+      "                clip_len=1,\n",
+      "                frame_interval=1,\n",
+      "                num_clips=25,\n",
+      "                test_mode=True),\n",
+      "            dict(type='DecordDecode'),\n",
+      "            dict(type='Resize', scale=(-1, 256)),\n",
+      "            dict(type='TenCrop', crop_size=224),\n",
+      "            dict(type='FormatShape', input_format='NCHW'),\n",
+      "            dict(type='PackActionInputs')\n",
+      "        ],\n",
+      "        test_mode=True))\n",
+      "val_evaluator = dict(type='AccMetric')\n",
+      "test_evaluator = dict(type='AccMetric')\n",
+      "auto_scale_lr = dict(enable=False, base_batch_size=256)\n",
+      "work_dir = './tutorial_exps'\n",
+      "\n",
+      "05/15 03:33:35 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.\n",
+      "05/15 03:33:35 - mmengine - INFO - Hooks will be executed in the following order:\n",
+      "before_run:\n",
+      "(VERY_HIGH   ) RuntimeInfoHook                    \n",
+      "(BELOW_NORMAL) LoggerHook                         \n",
+      " -------------------- \n",
+      "before_train:\n",
+      "(VERY_HIGH   ) RuntimeInfoHook                    \n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      "(VERY_LOW    ) CheckpointHook                     \n",
+      " -------------------- \n",
+      "before_train_epoch:\n",
+      "(VERY_HIGH   ) RuntimeInfoHook                    \n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      "(NORMAL      ) DistSamplerSeedHook                \n",
+      " -------------------- \n",
+      "before_train_iter:\n",
+      "(VERY_HIGH   ) RuntimeInfoHook                    \n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      " -------------------- \n",
+      "after_train_iter:\n",
+      "(VERY_HIGH   ) RuntimeInfoHook                    \n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      "(BELOW_NORMAL) LoggerHook                         \n",
+      "(LOW         ) ParamSchedulerHook                 \n",
+      "(VERY_LOW    ) CheckpointHook                     \n",
+      " -------------------- \n",
+      "after_train_epoch:\n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      "(NORMAL      ) SyncBuffersHook                    \n",
+      "(LOW         ) ParamSchedulerHook                 \n",
+      "(VERY_LOW    ) CheckpointHook                     \n",
+      " -------------------- \n",
+      "before_val_epoch:\n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      "(NORMAL      ) SyncBuffersHook                    \n",
+      " -------------------- \n",
+      "before_val_iter:\n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      " -------------------- \n",
+      "after_val_iter:\n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      "(BELOW_NORMAL) LoggerHook                         \n",
+      " -------------------- \n",
+      "after_val_epoch:\n",
+      "(VERY_HIGH   ) RuntimeInfoHook                    \n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      "(BELOW_NORMAL) LoggerHook                         \n",
+      "(LOW         ) ParamSchedulerHook                 \n",
+      "(VERY_LOW    ) CheckpointHook                     \n",
+      " -------------------- \n",
+      "after_train:\n",
+      "(VERY_LOW    ) CheckpointHook                     \n",
+      " -------------------- \n",
+      "before_test_epoch:\n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      " -------------------- \n",
+      "before_test_iter:\n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      " -------------------- \n",
+      "after_test_iter:\n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      "(BELOW_NORMAL) LoggerHook                         \n",
+      " -------------------- \n",
+      "after_test_epoch:\n",
+      "(VERY_HIGH   ) RuntimeInfoHook                    \n",
+      "(NORMAL      ) IterTimerHook                      \n",
+      "(BELOW_NORMAL) LoggerHook                         \n",
+      " -------------------- \n",
+      "after_run:\n",
+      "(BELOW_NORMAL) LoggerHook                         \n",
+      " -------------------- \n",
+      "Loads checkpoint by http backend from path: https://download.pytorch.org/models/resnet50-11ad3fa6.pth\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tES-qnZ3k38Z"
-      },
-      "source": [
-        "### Train a new recognizer\n",
-        "\n",
-        "Finally, lets initialize the dataset and recognizer, then train a new recognizer!"
-      ]
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "Downloading: \"https://download.pytorch.org/models/resnet50-11ad3fa6.pth\" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": 15,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "dDBWkdDRk6oz",
-        "outputId": "044b9e09-2038-41c9-d5a3-8a74ae11ade2"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "05/15 03:33:34 - mmengine - INFO - \n",
-            "------------------------------------------------------------\n",
-            "System environment:\n",
-            "    sys.platform: linux\n",
-            "    Python: 3.10.11 (main, Apr  5 2023, 14:15:10) [GCC 9.4.0]\n",
-            "    CUDA available: True\n",
-            "    numpy_random_seed: 1853452922\n",
-            "    GPU 0: Tesla T4\n",
-            "    CUDA_HOME: /usr/local/cuda\n",
-            "    NVCC: Cuda compilation tools, release 11.8, V11.8.89\n",
-            "    GCC: x86_64-linux-gnu-gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0\n",
-            "    PyTorch: 2.0.0+cu118\n",
-            "    PyTorch compiling details: PyTorch built with:\n",
-            "  - GCC 9.3\n",
-            "  - C++ Version: 201703\n",
-            "  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications\n",
-            "  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)\n",
-            "  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n",
-            "  - LAPACK is enabled (usually provided by MKL)\n",
-            "  - NNPACK is enabled\n",
-            "  - CPU capability usage: AVX2\n",
-            "  - CUDA Runtime 11.8\n",
-            "  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n",
-            "  - CuDNN 8.7\n",
-            "  - Magma 2.6.1\n",
-            "  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.8, CUDNN_VERSION=8.7.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.0.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n",
-            "\n",
-            "    TorchVision: 0.15.1+cu118\n",
-            "    OpenCV: 4.7.0\n",
-            "    MMEngine: 0.7.3\n",
-            "\n",
-            "Runtime environment:\n",
-            "    cudnn_benchmark: False\n",
-            "    mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}\n",
-            "    dist_cfg: {'backend': 'nccl'}\n",
-            "    seed: None\n",
-            "    Distributed launcher: none\n",
-            "    Distributed training: False\n",
-            "    GPU number: 1\n",
-            "------------------------------------------------------------\n",
-            "\n",
-            "05/15 03:33:34 - mmengine - INFO - Config:\n",
-            "model = dict(\n",
-            "    type='Recognizer2D',\n",
-            "    backbone=dict(\n",
-            "        type='ResNet',\n",
-            "        pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',\n",
-            "        depth=50,\n",
-            "        norm_eval=False),\n",
-            "    cls_head=dict(\n",
-            "        type='TSNHead',\n",
-            "        num_classes=2,\n",
-            "        in_channels=2048,\n",
-            "        spatial_type='avg',\n",
-            "        consensus=dict(type='AvgConsensus', dim=1),\n",
-            "        dropout_ratio=0.4,\n",
-            "        init_std=0.01,\n",
-            "        average_clips='prob'),\n",
-            "    data_preprocessor=dict(\n",
-            "        type='ActionDataPreprocessor',\n",
-            "        mean=[123.675, 116.28, 103.53],\n",
-            "        std=[58.395, 57.12, 57.375],\n",
-            "        format_shape='NCHW'),\n",
-            "    train_cfg=None,\n",
-            "    test_cfg=None)\n",
-            "train_cfg = dict(\n",
-            "    type='EpochBasedTrainLoop', max_epochs=10, val_begin=1, val_interval=1)\n",
-            "val_cfg = dict(type='ValLoop')\n",
-            "test_cfg = dict(type='TestLoop')\n",
-            "param_scheduler = [\n",
-            "    dict(\n",
-            "        type='MultiStepLR',\n",
-            "        begin=0,\n",
-            "        end=100,\n",
-            "        by_epoch=True,\n",
-            "        milestones=[40, 80],\n",
-            "        gamma=0.1)\n",
-            "]\n",
-            "optim_wrapper = dict(\n",
-            "    optimizer=dict(\n",
-            "        type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001),\n",
-            "    clip_grad=dict(max_norm=40, norm_type=2))\n",
-            "default_scope = 'mmaction'\n",
-            "default_hooks = dict(\n",
-            "    runtime_info=dict(type='RuntimeInfoHook'),\n",
-            "    timer=dict(type='IterTimerHook'),\n",
-            "    logger=dict(type='LoggerHook', interval=20, ignore_last=False),\n",
-            "    param_scheduler=dict(type='ParamSchedulerHook'),\n",
-            "    checkpoint=dict(\n",
-            "        type='CheckpointHook', interval=3, save_best='auto', max_keep_ckpts=3),\n",
-            "    sampler_seed=dict(type='DistSamplerSeedHook'),\n",
-            "    sync_buffers=dict(type='SyncBuffersHook'))\n",
-            "env_cfg = dict(\n",
-            "    cudnn_benchmark=False,\n",
-            "    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),\n",
-            "    dist_cfg=dict(backend='nccl'))\n",
-            "log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)\n",
-            "vis_backends = [dict(type='LocalVisBackend')]\n",
-            "visualizer = dict(\n",
-            "    type='ActionVisualizer', vis_backends=[dict(type='LocalVisBackend')])\n",
-            "log_level = 'INFO'\n",
-            "load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
-            "resume = False\n",
-            "dataset_type = 'VideoDataset'\n",
-            "data_root = 'kinetics400_tiny/train/'\n",
-            "data_root_val = 'kinetics400_tiny/val/'\n",
-            "ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
-            "ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
-            "file_client_args = dict(io_backend='disk')\n",
-            "train_pipeline = [\n",
-            "    dict(type='DecordInit', io_backend='disk'),\n",
-            "    dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),\n",
-            "    dict(type='DecordDecode'),\n",
-            "    dict(type='Resize', scale=(-1, 256)),\n",
-            "    dict(\n",
-            "        type='MultiScaleCrop',\n",
-            "        input_size=224,\n",
-            "        scales=(1, 0.875, 0.75, 0.66),\n",
-            "        random_crop=False,\n",
-            "        max_wh_scale_gap=1),\n",
-            "    dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
-            "    dict(type='Flip', flip_ratio=0.5),\n",
-            "    dict(type='FormatShape', input_format='NCHW'),\n",
-            "    dict(type='PackActionInputs')\n",
-            "]\n",
-            "val_pipeline = [\n",
-            "    dict(type='DecordInit', io_backend='disk'),\n",
-            "    dict(\n",
-            "        type='SampleFrames',\n",
-            "        clip_len=1,\n",
-            "        frame_interval=1,\n",
-            "        num_clips=3,\n",
-            "        test_mode=True),\n",
-            "    dict(type='DecordDecode'),\n",
-            "    dict(type='Resize', scale=(-1, 256)),\n",
-            "    dict(type='CenterCrop', crop_size=224),\n",
-            "    dict(type='FormatShape', input_format='NCHW'),\n",
-            "    dict(type='PackActionInputs')\n",
-            "]\n",
-            "test_pipeline = [\n",
-            "    dict(type='DecordInit', io_backend='disk'),\n",
-            "    dict(\n",
-            "        type='SampleFrames',\n",
-            "        clip_len=1,\n",
-            "        frame_interval=1,\n",
-            "        num_clips=25,\n",
-            "        test_mode=True),\n",
-            "    dict(type='DecordDecode'),\n",
-            "    dict(type='Resize', scale=(-1, 256)),\n",
-            "    dict(type='TenCrop', crop_size=224),\n",
-            "    dict(type='FormatShape', input_format='NCHW'),\n",
-            "    dict(type='PackActionInputs')\n",
-            "]\n",
-            "train_dataloader = dict(\n",
-            "    batch_size=2,\n",
-            "    num_workers=2,\n",
-            "    persistent_workers=True,\n",
-            "    sampler=dict(type='DefaultSampler', shuffle=True),\n",
-            "    dataset=dict(\n",
-            "        type='VideoDataset',\n",
-            "        ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n",
-            "        data_prefix=dict(video='kinetics400_tiny/train/'),\n",
-            "        pipeline=[\n",
-            "            dict(type='DecordInit', io_backend='disk'),\n",
-            "            dict(\n",
-            "                type='SampleFrames', clip_len=1, frame_interval=1,\n",
-            "                num_clips=3),\n",
-            "            dict(type='DecordDecode'),\n",
-            "            dict(type='Resize', scale=(-1, 256)),\n",
-            "            dict(\n",
-            "                type='MultiScaleCrop',\n",
-            "                input_size=224,\n",
-            "                scales=(1, 0.875, 0.75, 0.66),\n",
-            "                random_crop=False,\n",
-            "                max_wh_scale_gap=1),\n",
-            "            dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
-            "            dict(type='Flip', flip_ratio=0.5),\n",
-            "            dict(type='FormatShape', input_format='NCHW'),\n",
-            "            dict(type='PackActionInputs')\n",
-            "        ]))\n",
-            "val_dataloader = dict(\n",
-            "    batch_size=2,\n",
-            "    num_workers=2,\n",
-            "    persistent_workers=True,\n",
-            "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
-            "    dataset=dict(\n",
-            "        type='VideoDataset',\n",
-            "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
-            "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
-            "        pipeline=[\n",
-            "            dict(type='DecordInit', io_backend='disk'),\n",
-            "            dict(\n",
-            "                type='SampleFrames',\n",
-            "                clip_len=1,\n",
-            "                frame_interval=1,\n",
-            "                num_clips=3,\n",
-            "                test_mode=True),\n",
-            "            dict(type='DecordDecode'),\n",
-            "            dict(type='Resize', scale=(-1, 256)),\n",
-            "            dict(type='CenterCrop', crop_size=224),\n",
-            "            dict(type='FormatShape', input_format='NCHW'),\n",
-            "            dict(type='PackActionInputs')\n",
-            "        ],\n",
-            "        test_mode=True))\n",
-            "test_dataloader = dict(\n",
-            "    batch_size=1,\n",
-            "    num_workers=2,\n",
-            "    persistent_workers=True,\n",
-            "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
-            "    dataset=dict(\n",
-            "        type='VideoDataset',\n",
-            "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
-            "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
-            "        pipeline=[\n",
-            "            dict(type='DecordInit', io_backend='disk'),\n",
-            "            dict(\n",
-            "                type='SampleFrames',\n",
-            "                clip_len=1,\n",
-            "                frame_interval=1,\n",
-            "                num_clips=25,\n",
-            "                test_mode=True),\n",
-            "            dict(type='DecordDecode'),\n",
-            "            dict(type='Resize', scale=(-1, 256)),\n",
-            "            dict(type='TenCrop', crop_size=224),\n",
-            "            dict(type='FormatShape', input_format='NCHW'),\n",
-            "            dict(type='PackActionInputs')\n",
-            "        ],\n",
-            "        test_mode=True))\n",
-            "val_evaluator = dict(type='AccMetric')\n",
-            "test_evaluator = dict(type='AccMetric')\n",
-            "auto_scale_lr = dict(enable=False, base_batch_size=256)\n",
-            "work_dir = './tutorial_exps'\n",
-            "\n",
-            "05/15 03:33:35 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.\n",
-            "05/15 03:33:35 - mmengine - INFO - Hooks will be executed in the following order:\n",
-            "before_run:\n",
-            "(VERY_HIGH   ) RuntimeInfoHook                    \n",
-            "(BELOW_NORMAL) LoggerHook                         \n",
-            " -------------------- \n",
-            "before_train:\n",
-            "(VERY_HIGH   ) RuntimeInfoHook                    \n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            "(VERY_LOW    ) CheckpointHook                     \n",
-            " -------------------- \n",
-            "before_train_epoch:\n",
-            "(VERY_HIGH   ) RuntimeInfoHook                    \n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            "(NORMAL      ) DistSamplerSeedHook                \n",
-            " -------------------- \n",
-            "before_train_iter:\n",
-            "(VERY_HIGH   ) RuntimeInfoHook                    \n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            " -------------------- \n",
-            "after_train_iter:\n",
-            "(VERY_HIGH   ) RuntimeInfoHook                    \n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            "(BELOW_NORMAL) LoggerHook                         \n",
-            "(LOW         ) ParamSchedulerHook                 \n",
-            "(VERY_LOW    ) CheckpointHook                     \n",
-            " -------------------- \n",
-            "after_train_epoch:\n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            "(NORMAL      ) SyncBuffersHook                    \n",
-            "(LOW         ) ParamSchedulerHook                 \n",
-            "(VERY_LOW    ) CheckpointHook                     \n",
-            " -------------------- \n",
-            "before_val_epoch:\n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            "(NORMAL      ) SyncBuffersHook                    \n",
-            " -------------------- \n",
-            "before_val_iter:\n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            " -------------------- \n",
-            "after_val_iter:\n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            "(BELOW_NORMAL) LoggerHook                         \n",
-            " -------------------- \n",
-            "after_val_epoch:\n",
-            "(VERY_HIGH   ) RuntimeInfoHook                    \n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            "(BELOW_NORMAL) LoggerHook                         \n",
-            "(LOW         ) ParamSchedulerHook                 \n",
-            "(VERY_LOW    ) CheckpointHook                     \n",
-            " -------------------- \n",
-            "after_train:\n",
-            "(VERY_LOW    ) CheckpointHook                     \n",
-            " -------------------- \n",
-            "before_test_epoch:\n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            " -------------------- \n",
-            "before_test_iter:\n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            " -------------------- \n",
-            "after_test_iter:\n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            "(BELOW_NORMAL) LoggerHook                         \n",
-            " -------------------- \n",
-            "after_test_epoch:\n",
-            "(VERY_HIGH   ) RuntimeInfoHook                    \n",
-            "(NORMAL      ) IterTimerHook                      \n",
-            "(BELOW_NORMAL) LoggerHook                         \n",
-            " -------------------- \n",
-            "after_run:\n",
-            "(BELOW_NORMAL) LoggerHook                         \n",
-            " -------------------- \n",
-            "Loads checkpoint by http backend from path: https://download.pytorch.org/models/resnet50-11ad3fa6.pth\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Downloading: \"https://download.pytorch.org/models/resnet50-11ad3fa6.pth\" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "05/15 03:33:37 - mmengine - INFO - These parameters in pretrained checkpoint are not loaded: {'fc.weight', 'fc.bias'}\n",
-            "Loads checkpoint by local backend from path: ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n",
-            "The model and loaded state dict do not match exactly\n",
-            "\n",
-            "size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 2048]) from checkpoint, the shape in current model is torch.Size([2, 2048]).\n",
-            "size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([2]).\n",
-            "05/15 03:33:37 - mmengine - INFO - Load checkpoint from ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n",
-            "05/15 03:33:37 - mmengine - WARNING - \"FileClient\" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io\n",
-            "05/15 03:33:37 - mmengine - INFO - Checkpoints will be saved to /content/mmaction2/tutorial_exps.\n",
-            "05/15 03:33:41 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
-            "05/15 03:33:41 - mmengine - INFO - Epoch(train)  [1][15/15]  lr: 7.8125e-05  eta: 0:00:31  time: 0.2334  data_time: 0.0793  memory: 2917  grad_norm: 11.9900  loss: 0.6971  top1_acc: 1.0000  top5_acc: 1.0000  loss_cls: 0.6971\n",
-            "05/15 03:33:42 - mmengine - INFO - Epoch(val) [1][5/5]    acc/top1: 0.3000  acc/top5: 1.0000  acc/mean1: 0.3000  data_time: 0.1994  time: 0.2254\n",
-            "05/15 03:33:42 - mmengine - INFO - The best checkpoint with 0.3000 acc/top1 at 1 epoch is saved to best_acc_top1_epoch_1.pth.\n",
-            "05/15 03:33:46 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
-            "05/15 03:33:46 - mmengine - INFO - Epoch(train)  [2][15/15]  lr: 7.8125e-05  eta: 0:00:29  time: 0.2373  data_time: 0.1369  memory: 961  grad_norm: 12.4935  loss: 0.7158  top1_acc: 0.5000  top5_acc: 1.0000  loss_cls: 0.7158\n",
-            "05/15 03:33:48 - mmengine - INFO - Epoch(val) [2][5/5]    acc/top1: 0.7000  acc/top5: 1.0000  acc/mean1: 0.7000  data_time: 0.2692  time: 0.3006\n",
-            "05/15 03:33:48 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_1.pth is removed\n",
-            "05/15 03:33:48 - mmengine - INFO - The best checkpoint with 0.7000 acc/top1 at 2 epoch is saved to best_acc_top1_epoch_2.pth.\n",
-            "05/15 03:33:51 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
-            "05/15 03:33:51 - mmengine - INFO - Epoch(train)  [3][15/15]  lr: 7.8125e-05  eta: 0:00:24  time: 0.2112  data_time: 0.1163  memory: 961  grad_norm: 13.4063  loss: 0.7338  top1_acc: 0.0000  top5_acc: 1.0000  loss_cls: 0.7338\n",
-            "05/15 03:33:51 - mmengine - INFO - Saving checkpoint at 3 epochs\n",
-            "05/15 03:33:53 - mmengine - INFO - Epoch(val) [3][5/5]    acc/top1: 0.4000  acc/top5: 1.0000  acc/mean1: 0.4000  data_time: 0.1669  time: 0.1906\n",
-            "05/15 03:33:56 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
-            "05/15 03:33:56 - mmengine - INFO - Epoch(train)  [4][15/15]  lr: 7.8125e-05  eta: 0:00:19  time: 0.1750  data_time: 0.0907  memory: 961  grad_norm: 12.4322  loss: 0.6894  top1_acc: 0.0000  top5_acc: 1.0000  loss_cls: 0.6894\n",
-            "05/15 03:33:57 - mmengine - INFO - Epoch(val) [4][5/5]    acc/top1: 0.7000  acc/top5: 1.0000  acc/mean1: 0.7000  data_time: 0.1791  time: 0.2030\n",
-            "05/15 03:34:00 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
-            "05/15 03:34:00 - mmengine - INFO - Epoch(train)  [5][15/15]  lr: 7.8125e-05  eta: 0:00:16  time: 0.2016  data_time: 0.1155  memory: 961  grad_norm: 11.5982  loss: 0.6940  top1_acc: 0.0000  top5_acc: 1.0000  loss_cls: 0.6940\n",
-            "05/15 03:34:02 - mmengine - INFO - Epoch(val) [5][5/5]    acc/top1: 0.7000  acc/top5: 1.0000  acc/mean1: 0.7000  data_time: 0.3145  time: 0.3455\n",
-            "05/15 03:34:05 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
-            "05/15 03:34:05 - mmengine - INFO - Epoch(train)  [6][15/15]  lr: 7.8125e-05  eta: 0:00:13  time: 0.2366  data_time: 0.1440  memory: 961  grad_norm: 12.0952  loss: 0.6667  top1_acc: 0.0000  top5_acc: 1.0000  loss_cls: 0.6667\n",
-            "05/15 03:34:05 - mmengine - INFO - Saving checkpoint at 6 epochs\n",
-            "05/15 03:34:08 - mmengine - INFO - Epoch(val) [6][5/5]    acc/top1: 0.6000  acc/top5: 1.0000  acc/mean1: 0.6000  data_time: 0.2172  time: 0.2403\n",
-            "05/15 03:34:10 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
-            "05/15 03:34:10 - mmengine - INFO - Epoch(train)  [7][15/15]  lr: 7.8125e-05  eta: 0:00:09  time: 0.1784  data_time: 0.0942  memory: 961  grad_norm: 12.4209  loss: 0.6570  top1_acc: 1.0000  top5_acc: 1.0000  loss_cls: 0.6570\n",
-            "05/15 03:34:11 - mmengine - INFO - Epoch(val) [7][5/5]    acc/top1: 0.9000  acc/top5: 1.0000  acc/mean1: 0.9000  data_time: 0.1898  time: 0.2118\n",
-            "05/15 03:34:11 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_2.pth is removed\n",
-            "05/15 03:34:12 - mmengine - INFO - The best checkpoint with 0.9000 acc/top1 at 7 epoch is saved to best_acc_top1_epoch_7.pth.\n",
-            "05/15 03:34:15 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
-            "05/15 03:34:15 - mmengine - INFO - Epoch(train)  [8][15/15]  lr: 7.8125e-05  eta: 0:00:06  time: 0.2073  data_time: 0.1220  memory: 961  grad_norm: 11.4271  loss: 0.6241  top1_acc: 1.0000  top5_acc: 1.0000  loss_cls: 0.6241\n",
-            "05/15 03:34:17 - mmengine - INFO - Epoch(val) [8][5/5]    acc/top1: 1.0000  acc/top5: 1.0000  acc/mean1: 1.0000  data_time: 0.3497  time: 0.3890\n",
-            "05/15 03:34:17 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_7.pth is removed\n",
-            "05/15 03:34:18 - mmengine - INFO - The best checkpoint with 1.0000 acc/top1 at 8 epoch is saved to best_acc_top1_epoch_8.pth.\n",
-            "05/15 03:34:21 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
-            "05/15 03:34:21 - mmengine - INFO - Epoch(train)  [9][15/15]  lr: 7.8125e-05  eta: 0:00:03  time: 0.2309  data_time: 0.1390  memory: 961  grad_norm: 12.3066  loss: 0.6451  top1_acc: 0.5000  top5_acc: 1.0000  loss_cls: 0.6451\n",
-            "05/15 03:34:21 - mmengine - INFO - Saving checkpoint at 9 epochs\n",
-            "05/15 03:34:23 - mmengine - INFO - Epoch(val) [9][5/5]    acc/top1: 1.0000  acc/top5: 1.0000  acc/mean1: 1.0000  data_time: 0.2023  time: 0.2256\n",
-            "05/15 03:34:26 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
-            "05/15 03:34:26 - mmengine - INFO - Epoch(train) [10][15/15]  lr: 7.8125e-05  eta: 0:00:00  time: 0.1733  data_time: 0.0951  memory: 961  grad_norm: 11.1461  loss: 0.5931  top1_acc: 1.0000  top5_acc: 1.0000  loss_cls: 0.5931\n",
-            "05/15 03:34:26 - mmengine - INFO - Saving checkpoint at 10 epochs\n",
-            "05/15 03:34:27 - mmengine - INFO - Epoch(val) [10][5/5]    acc/top1: 1.0000  acc/top5: 1.0000  acc/mean1: 1.0000  data_time: 0.1836  time: 0.2048\n"
-          ]
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "Recognizer2D(\n",
-              "  (data_preprocessor): ActionDataPreprocessor()\n",
-              "  (backbone): ResNet(\n",
-              "    (conv1): ConvModule(\n",
-              "      (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
-              "      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "      (activate): ReLU(inplace=True)\n",
-              "    )\n",
-              "    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
-              "    (layer1): Sequential(\n",
-              "      (0): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "        (downsample): ConvModule(\n",
-              "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "      )\n",
-              "      (1): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "      (2): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "    )\n",
-              "    (layer2): Sequential(\n",
-              "      (0): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "        (downsample): ConvModule(\n",
-              "          (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
-              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "      )\n",
-              "      (1): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "      (2): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "      (3): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "    )\n",
-              "    (layer3): Sequential(\n",
-              "      (0): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "        (downsample): ConvModule(\n",
-              "          (conv): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
-              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "      )\n",
-              "      (1): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "      (2): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "      (3): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "      (4): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "      (5): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "    )\n",
-              "    (layer4): Sequential(\n",
-              "      (0): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "        (downsample): ConvModule(\n",
-              "          (conv): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
-              "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "      )\n",
-              "      (1): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "      (2): Bottleneck(\n",
-              "        (conv1): ConvModule(\n",
-              "          (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv2): ConvModule(\n",
-              "          (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "          (activate): ReLU(inplace=True)\n",
-              "        )\n",
-              "        (conv3): ConvModule(\n",
-              "          (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
-              "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-              "        )\n",
-              "        (relu): ReLU(inplace=True)\n",
-              "      )\n",
-              "    )\n",
-              "  )\n",
-              "  (cls_head): TSNHead(\n",
-              "    (loss_cls): CrossEntropyLoss()\n",
-              "    (consensus): AvgConsensus()\n",
-              "    (avg_pool): AdaptiveAvgPool2d(output_size=(1, 1))\n",
-              "    (dropout): Dropout(p=0.4, inplace=False)\n",
-              "    (fc_cls): Linear(in_features=2048, out_features=2, bias=True)\n",
-              "  )\n",
-              ")"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 15
-        }
-      ],
-      "source": [
-        "import os.path as osp\n",
-        "import mmengine\n",
-        "from mmengine.runner import Runner\n",
-        "\n",
-        "# Create work_dir\n",
-        "mmengine.mkdir_or_exist(osp.abspath(cfg.work_dir))\n",
-        "\n",
-        "# build the runner from config\n",
-        "runner = Runner.from_cfg(cfg)\n",
-        "\n",
-        "# start training\n",
-        "runner.train()"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "05/15 03:33:37 - mmengine - INFO - These parameters in pretrained checkpoint are not loaded: {'fc.weight', 'fc.bias'}\n",
+      "Loads checkpoint by local backend from path: ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n",
+      "The model and loaded state dict do not match exactly\n",
+      "\n",
+      "size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 2048]) from checkpoint, the shape in current model is torch.Size([2, 2048]).\n",
+      "size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([2]).\n",
+      "05/15 03:33:37 - mmengine - INFO - Load checkpoint from ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n",
+      "05/15 03:33:37 - mmengine - WARNING - \"FileClient\" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io\n",
+      "05/15 03:33:37 - mmengine - INFO - Checkpoints will be saved to /content/mmaction2/tutorial_exps.\n",
+      "05/15 03:33:41 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
+      "05/15 03:33:41 - mmengine - INFO - Epoch(train)  [1][15/15]  lr: 7.8125e-05  eta: 0:00:31  time: 0.2334  data_time: 0.0793  memory: 2917  grad_norm: 11.9900  loss: 0.6971  top1_acc: 1.0000  top5_acc: 1.0000  loss_cls: 0.6971\n",
+      "05/15 03:33:42 - mmengine - INFO - Epoch(val) [1][5/5]    acc/top1: 0.3000  acc/top5: 1.0000  acc/mean1: 0.3000  data_time: 0.1994  time: 0.2254\n",
+      "05/15 03:33:42 - mmengine - INFO - The best checkpoint with 0.3000 acc/top1 at 1 epoch is saved to best_acc_top1_epoch_1.pth.\n",
+      "05/15 03:33:46 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
+      "05/15 03:33:46 - mmengine - INFO - Epoch(train)  [2][15/15]  lr: 7.8125e-05  eta: 0:00:29  time: 0.2373  data_time: 0.1369  memory: 961  grad_norm: 12.4935  loss: 0.7158  top1_acc: 0.5000  top5_acc: 1.0000  loss_cls: 0.7158\n",
+      "05/15 03:33:48 - mmengine - INFO - Epoch(val) [2][5/5]    acc/top1: 0.7000  acc/top5: 1.0000  acc/mean1: 0.7000  data_time: 0.2692  time: 0.3006\n",
+      "05/15 03:33:48 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_1.pth is removed\n",
+      "05/15 03:33:48 - mmengine - INFO - The best checkpoint with 0.7000 acc/top1 at 2 epoch is saved to best_acc_top1_epoch_2.pth.\n",
+      "05/15 03:33:51 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
+      "05/15 03:33:51 - mmengine - INFO - Epoch(train)  [3][15/15]  lr: 7.8125e-05  eta: 0:00:24  time: 0.2112  data_time: 0.1163  memory: 961  grad_norm: 13.4063  loss: 0.7338  top1_acc: 0.0000  top5_acc: 1.0000  loss_cls: 0.7338\n",
+      "05/15 03:33:51 - mmengine - INFO - Saving checkpoint at 3 epochs\n",
+      "05/15 03:33:53 - mmengine - INFO - Epoch(val) [3][5/5]    acc/top1: 0.4000  acc/top5: 1.0000  acc/mean1: 0.4000  data_time: 0.1669  time: 0.1906\n",
+      "05/15 03:33:56 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
+      "05/15 03:33:56 - mmengine - INFO - Epoch(train)  [4][15/15]  lr: 7.8125e-05  eta: 0:00:19  time: 0.1750  data_time: 0.0907  memory: 961  grad_norm: 12.4322  loss: 0.6894  top1_acc: 0.0000  top5_acc: 1.0000  loss_cls: 0.6894\n",
+      "05/15 03:33:57 - mmengine - INFO - Epoch(val) [4][5/5]    acc/top1: 0.7000  acc/top5: 1.0000  acc/mean1: 0.7000  data_time: 0.1791  time: 0.2030\n",
+      "05/15 03:34:00 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
+      "05/15 03:34:00 - mmengine - INFO - Epoch(train)  [5][15/15]  lr: 7.8125e-05  eta: 0:00:16  time: 0.2016  data_time: 0.1155  memory: 961  grad_norm: 11.5982  loss: 0.6940  top1_acc: 0.0000  top5_acc: 1.0000  loss_cls: 0.6940\n",
+      "05/15 03:34:02 - mmengine - INFO - Epoch(val) [5][5/5]    acc/top1: 0.7000  acc/top5: 1.0000  acc/mean1: 0.7000  data_time: 0.3145  time: 0.3455\n",
+      "05/15 03:34:05 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
+      "05/15 03:34:05 - mmengine - INFO - Epoch(train)  [6][15/15]  lr: 7.8125e-05  eta: 0:00:13  time: 0.2366  data_time: 0.1440  memory: 961  grad_norm: 12.0952  loss: 0.6667  top1_acc: 0.0000  top5_acc: 1.0000  loss_cls: 0.6667\n",
+      "05/15 03:34:05 - mmengine - INFO - Saving checkpoint at 6 epochs\n",
+      "05/15 03:34:08 - mmengine - INFO - Epoch(val) [6][5/5]    acc/top1: 0.6000  acc/top5: 1.0000  acc/mean1: 0.6000  data_time: 0.2172  time: 0.2403\n",
+      "05/15 03:34:10 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
+      "05/15 03:34:10 - mmengine - INFO - Epoch(train)  [7][15/15]  lr: 7.8125e-05  eta: 0:00:09  time: 0.1784  data_time: 0.0942  memory: 961  grad_norm: 12.4209  loss: 0.6570  top1_acc: 1.0000  top5_acc: 1.0000  loss_cls: 0.6570\n",
+      "05/15 03:34:11 - mmengine - INFO - Epoch(val) [7][5/5]    acc/top1: 0.9000  acc/top5: 1.0000  acc/mean1: 0.9000  data_time: 0.1898  time: 0.2118\n",
+      "05/15 03:34:11 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_2.pth is removed\n",
+      "05/15 03:34:12 - mmengine - INFO - The best checkpoint with 0.9000 acc/top1 at 7 epoch is saved to best_acc_top1_epoch_7.pth.\n",
+      "05/15 03:34:15 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
+      "05/15 03:34:15 - mmengine - INFO - Epoch(train)  [8][15/15]  lr: 7.8125e-05  eta: 0:00:06  time: 0.2073  data_time: 0.1220  memory: 961  grad_norm: 11.4271  loss: 0.6241  top1_acc: 1.0000  top5_acc: 1.0000  loss_cls: 0.6241\n",
+      "05/15 03:34:17 - mmengine - INFO - Epoch(val) [8][5/5]    acc/top1: 1.0000  acc/top5: 1.0000  acc/mean1: 1.0000  data_time: 0.3497  time: 0.3890\n",
+      "05/15 03:34:17 - mmengine - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc_top1_epoch_7.pth is removed\n",
+      "05/15 03:34:18 - mmengine - INFO - The best checkpoint with 1.0000 acc/top1 at 8 epoch is saved to best_acc_top1_epoch_8.pth.\n",
+      "05/15 03:34:21 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
+      "05/15 03:34:21 - mmengine - INFO - Epoch(train)  [9][15/15]  lr: 7.8125e-05  eta: 0:00:03  time: 0.2309  data_time: 0.1390  memory: 961  grad_norm: 12.3066  loss: 0.6451  top1_acc: 0.5000  top5_acc: 1.0000  loss_cls: 0.6451\n",
+      "05/15 03:34:21 - mmengine - INFO - Saving checkpoint at 9 epochs\n",
+      "05/15 03:34:23 - mmengine - INFO - Epoch(val) [9][5/5]    acc/top1: 1.0000  acc/top5: 1.0000  acc/mean1: 1.0000  data_time: 0.2023  time: 0.2256\n",
+      "05/15 03:34:26 - mmengine - INFO - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20230515_033334\n",
+      "05/15 03:34:26 - mmengine - INFO - Epoch(train) [10][15/15]  lr: 7.8125e-05  eta: 0:00:00  time: 0.1733  data_time: 0.0951  memory: 961  grad_norm: 11.1461  loss: 0.5931  top1_acc: 1.0000  top5_acc: 1.0000  loss_cls: 0.5931\n",
+      "05/15 03:34:26 - mmengine - INFO - Saving checkpoint at 10 epochs\n",
+      "05/15 03:34:27 - mmengine - INFO - Epoch(val) [10][5/5]    acc/top1: 1.0000  acc/top5: 1.0000  acc/mean1: 1.0000  data_time: 0.1836  time: 0.2048\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "zdSd7oTLlxIf"
-      },
-      "source": [
-        "### Understand the log\n",
-        "From the log, we can have a basic understanding the training process and know how well the recognizer is trained.\n",
-        "\n",
-        "Firstly, the ResNet-50 backbone pre-trained on ImageNet is loaded, this is a common practice since training from scratch is more cost. The log shows that all the weights of the ResNet-50 backbone are loaded except the `fc.bias` and `fc.weight`.\n",
-        "\n",
-        "Second, since the dataset we are using is small, we loaded a TSN model and finetune it for action recognition.\n",
-        "The original TSN is trained on original Kinetics-400 dataset which contains 400 classes but Kinetics-400 Tiny dataset only have 2 classes. Therefore, the last FC layer of the pre-trained TSN for classification has different weight shape and is not used.\n",
-        "\n",
-        "Third, after training, the recognizer is evaluated by the default evaluation. The results show that the recognizer achieves 100% top1 accuracy and 100% top5 accuracy on the val dataset,\n",
-        " \n",
-        "Not bad!"
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "Recognizer2D(\n",
+       "  (data_preprocessor): ActionDataPreprocessor()\n",
+       "  (backbone): ResNet(\n",
+       "    (conv1): ConvModule(\n",
+       "      (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
+       "      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "      (activate): ReLU(inplace=True)\n",
+       "    )\n",
+       "    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
+       "    (layer1): Sequential(\n",
+       "      (0): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (downsample): ConvModule(\n",
+       "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "      )\n",
+       "      (1): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "      (2): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "    )\n",
+       "    (layer2): Sequential(\n",
+       "      (0): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (downsample): ConvModule(\n",
+       "          (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
+       "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "      )\n",
+       "      (1): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "      (2): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "      (3): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "    )\n",
+       "    (layer3): Sequential(\n",
+       "      (0): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (downsample): ConvModule(\n",
+       "          (conv): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
+       "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "      )\n",
+       "      (1): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "      (2): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "      (3): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "      (4): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "      (5): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "    )\n",
+       "    (layer4): Sequential(\n",
+       "      (0): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "        (downsample): ConvModule(\n",
+       "          (conv): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
+       "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "      )\n",
+       "      (1): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "      (2): Bottleneck(\n",
+       "        (conv1): ConvModule(\n",
+       "          (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv2): ConvModule(\n",
+       "          (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "          (activate): ReLU(inplace=True)\n",
+       "        )\n",
+       "        (conv3): ConvModule(\n",
+       "          (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "        )\n",
+       "        (relu): ReLU(inplace=True)\n",
+       "      )\n",
+       "    )\n",
+       "  )\n",
+       "  (cls_head): TSNHead(\n",
+       "    (loss_cls): CrossEntropyLoss()\n",
+       "    (consensus): AvgConsensus()\n",
+       "    (avg_pool): AdaptiveAvgPool2d(output_size=(1, 1))\n",
+       "    (dropout): Dropout(p=0.4, inplace=False)\n",
+       "    (fc_cls): Linear(in_features=2048, out_features=2, bias=True)\n",
+       "  )\n",
+       ")"
       ]
+     },
+     "metadata": {},
+     "execution_count": 15
+    }
+   ],
+   "source": [
+    "import os.path as osp\n",
+    "import mmengine\n",
+    "from mmengine.runner import Runner\n",
+    "\n",
+    "# Create work_dir\n",
+    "mmengine.mkdir_or_exist(osp.abspath(cfg.work_dir))\n",
+    "\n",
+    "# build the runner from config\n",
+    "runner = Runner.from_cfg(cfg)\n",
+    "\n",
+    "# start training\n",
+    "runner.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "zdSd7oTLlxIf"
+   },
+   "source": [
+    "### Understand the log\n",
+    "From the log, we can have a basic understanding the training process and know how well the recognizer is trained.\n",
+    "\n",
+    "Firstly, the ResNet-50 backbone pre-trained on ImageNet is loaded, this is a common practice since training from scratch is more cost. The log shows that all the weights of the ResNet-50 backbone are loaded except the `fc.bias` and `fc.weight`.\n",
+    "\n",
+    "Second, since the dataset we are using is small, we loaded a TSN model and finetune it for action recognition.\n",
+    "The original TSN is trained on original Kinetics-400 dataset which contains 400 classes but Kinetics-400 Tiny dataset only have 2 classes. Therefore, the last FC layer of the pre-trained TSN for classification has different weight shape and is not used.\n",
+    "\n",
+    "Third, after training, the recognizer is evaluated by the default evaluation. The results show that the recognizer achieves 100% top1 accuracy and 100% top5 accuracy on the val dataset,\n",
+    " \n",
+    "Not bad!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ryVoSfZVmogw"
+   },
+   "source": [
+    "## Test the trained recognizer\n",
+    "\n",
+    "After finetuning the recognizer, let's check the prediction results!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "eyY3hCMwyTct",
+    "outputId": "34fbbdc5-b9fd-4fd2-8030-3ba56b10adbf"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ryVoSfZVmogw"
-      },
-      "source": [
-        "## Test the trained recognizer\n",
-        "\n",
-        "After finetuning the recognizer, let's check the prediction results!"
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "05/15 03:34:36 - mmengine - INFO - Epoch(test) [10/10]    acc/top1: 0.9000  acc/top5: 1.0000  acc/mean1: 0.9000  data_time: 0.0586  time: 0.7817\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": 16,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "eyY3hCMwyTct",
-        "outputId": "34fbbdc5-b9fd-4fd2-8030-3ba56b10adbf"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "05/15 03:34:36 - mmengine - INFO - Epoch(test) [10/10]    acc/top1: 0.9000  acc/top5: 1.0000  acc/mean1: 0.9000  data_time: 0.0586  time: 0.7817\n"
-          ]
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "{'acc/top1': 0.9, 'acc/top5': 1.0, 'acc/mean1': 0.9}"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 16
-        }
-      ],
-      "source": [
-        "runner.test()"
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "{'acc/top1': 0.9, 'acc/top5': 1.0, 'acc/mean1': 0.9}"
       ]
+     },
+     "metadata": {},
+     "execution_count": 16
     }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "provenance": [],
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "display_name": "mmact_dev",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.7.12"
-    },
-    "vscode": {
-      "interpreter": {
-        "hash": "189c342a4747645665e89db23000ac4d4edb7a87c4cd0b2f881610f468fb778d"
-      }
-    }
+   ],
+   "source": [
+    "runner.test()"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "mmact_dev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.12"
   },
-  "nbformat": 4,
-  "nbformat_minor": 0
+  "vscode": {
+   "interpreter": {
+    "hash": "189c342a4747645665e89db23000ac4d4edb7a87c4cd0b2f881610f468fb778d"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
diff --git a/demo/webcam_demo.py b/demo/webcam_demo.py
index cdd8585540..de87c8aa32 100644
--- a/demo/webcam_demo.py
+++ b/demo/webcam_demo.py
@@ -139,7 +139,7 @@ def inference():
         # Forward the model
         with torch.no_grad():
             result = model.test_step(cur_data)[0]
-        scores = result.pred_scores.item.tolist()
+        scores = result.pred_score.tolist()
         scores = np.array(scores)
         score_cache.append(scores)
         scores_sum += scores
diff --git a/docs/en/get_started/guide_to_framework.md b/docs/en/get_started/guide_to_framework.md
index c65d65331b..3dc1c2314b 100644
--- a/docs/en/get_started/guide_to_framework.md
+++ b/docs/en/get_started/guide_to_framework.md
@@ -179,7 +179,8 @@ class VideoPack(BaseTransform):
     def transform(self, results):
         packed_results = dict()
         inputs = to_tensor(results['imgs'])
-        data_sample = ActionDataSample().set_gt_labels(results['label'])
+        data_sample = ActionDataSample()
+        data_sample.set_gt_label(results['label'])
         metainfo = {k: results[k] for k in self.meta_keys if k in results}
         data_sample.set_metainfo(metainfo)
         packed_results['inputs'] = inputs
@@ -219,7 +220,7 @@ print('num_clips: ', data_sample.num_clips)
 print('clip_len: ', data_sample.clip_len)
 
 # Get label of the inputs
-print('label: ', data_sample.gt_labels.item)
+print('label: ', data_sample.gt_label)
 ```
 
 ```
@@ -321,7 +322,7 @@ print('num_clips: ', data_sample.num_clips)
 print('clip_len: ', data_sample.clip_len)
 
 # Get label of the inputs
-print('label: ', data_sample.gt_labels.item)
+print('label: ', data_sample.gt_label)
 
 from mmengine.runner import Runner
 
@@ -481,7 +482,7 @@ class ClsHeadZelda(BaseModule):
 
     def loss(self, feats, data_samples):
         cls_scores = self(feats)
-        labels = torch.stack([x.gt_labels.item for x in data_samples])
+        labels = torch.stack([x.gt_label for x in data_samples])
         labels = labels.squeeze()
 
         if labels.shape == torch.Size([]):
@@ -589,8 +590,8 @@ with torch.no_grad():
     data_batch_test = copy.deepcopy(batched_packed_results)
     data = model.data_preprocessor(data_batch_test, training=False)
     predictions = model(**data, mode='predict')
-print('Label of Sample[0]', predictions[0].gt_labels.item)
-print('Scores of Sample[0]', predictions[0].pred_scores.item)
+print('Label of Sample[0]', predictions[0].gt_label)
+print('Scores of Sample[0]', predictions[0].pred_score)
 ```
 
 ```shell
@@ -661,8 +662,8 @@ class AccuracyMetric(BaseMetric):
         data_samples = copy.deepcopy(data_samples)
         for data_sample in data_samples:
             result = dict()
-            scores = data_sample['pred_scores']['item'].cpu().numpy()
-            label = data_sample['gt_labels']['item'].item()
+            scores = data_sample['pred_score'].cpu().numpy()
+            label = data_sample['gt_label'].item()
             result['scores'] = scores
             result['label'] = label
             self.results.append(result)
diff --git a/docs/en/get_started/installation.md b/docs/en/get_started/installation.md
index 8cc64b7798..1685f97478 100644
--- a/docs/en/get_started/installation.md
+++ b/docs/en/get_started/installation.md
@@ -121,7 +121,7 @@ label_file = 'tools/data/kinetics/label_map_k400.txt'
 model = init_recognizer(config_file, checkpoint_file, device='cpu')  # or device='cuda:0'
 pred_result = inference_recognizer(model, video_file)
 
-pred_scores = pred_result.pred_scores.item.tolist()
+pred_scores = pred_result.pred_score.tolist()
 score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))
 score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)
 top5_label = score_sorted[:5]
diff --git a/docs/zh_cn/get_started/guide_to_framework.md b/docs/zh_cn/get_started/guide_to_framework.md
index b92c376b5d..0dc6462195 100644
--- a/docs/zh_cn/get_started/guide_to_framework.md
+++ b/docs/zh_cn/get_started/guide_to_framework.md
@@ -180,7 +180,7 @@ class VideoPack(BaseTransform):
     def transform(self, results):
         packed_results = dict()
         inputs = to_tensor(results['imgs'])
-        data_sample = ActionDataSample().set_gt_labels(results['label'])
+        data_sample = ActionDataSample().set_gt_label(results['label'])
         metainfo = {k: results[k] for k in self.meta_keys if k in results}
         data_sample.set_metainfo(metainfo)
         packed_results['inputs'] = inputs
@@ -220,7 +220,7 @@ print('num_clips: ', data_sample.num_clips)
 print('clip_len: ', data_sample.clip_len)
 
 # 获取输入的标签
-print('label: ', data_sample.gt_labels.item)
+print('label: ', data_sample.gt_label)
 ```
 
 ```
@@ -322,7 +322,7 @@ print('num_clips: ', data_sample.num_clips)
 print('clip_len: ', data_sample.clip_len)
 
 # 获取输入的标签
-print('label: ', data_sample.gt_labels.item)
+print('label: ', data_sample.gt_label)
 
 from mmengine.runner import Runner
 
@@ -482,7 +482,7 @@ class ClsHeadZelda(BaseModule):
 
     def loss(self, feats, data_samples):
         cls_scores = self(feats)
-        labels = torch.stack([x.gt_labels.item for x in data_samples])
+        labels = torch.stack([x.gt_label for x in data_samples])
         labels = labels.squeeze()
 
         if labels.shape == torch.Size([]):
@@ -590,8 +590,8 @@ with torch.no_grad():
     data_batch_test = copy.deepcopy(batched_packed_results)
     data = model.data_preprocessor(data_batch_test, training=False)
     predictions = model(**data, mode='predict')
-print('Label of Sample[0]', predictions[0].gt_labels.item)
-print('Scores of Sample[0]', predictions[0].pred_scores.item)
+print('Label of Sample[0]', predictions[0].gt_label)
+print('Scores of Sample[0]', predictions[0].pred_score)
 ```
 
 ```shell
@@ -662,8 +662,8 @@ class AccuracyMetric(BaseMetric):
         data_samples = copy.deepcopy(data_samples)
         for data_sample in data_samples:
             result = dict()
-            scores = data_sample['pred_scores']['item'].cpu().numpy()
-            label = data_sample['gt_labels']['item'].item()
+            scores = data_sample['pred_score'].cpu().numpy()
+            label = data_sample['gt_label'].item()
             result['scores'] = scores
             result['label'] = label
             self.results.append(result)
diff --git a/docs/zh_cn/get_started/installation.md b/docs/zh_cn/get_started/installation.md
index 0e144ce6eb..091a8a5e03 100644
--- a/docs/zh_cn/get_started/installation.md
+++ b/docs/zh_cn/get_started/installation.md
@@ -120,7 +120,7 @@ label_file = 'tools/data/kinetics/label_map_k400.txt'
 model = init_recognizer(config_file, checkpoint_file, device='cpu')  # or device='cuda:0'
 pred_result = inference_recognizer(model, video_file)
 
-pred_scores = pred_result.pred_scores.item.tolist()
+pred_scores = pred_result.pred_score.tolist()
 score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))
 score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)
 top5_label = score_sorted[:5]
diff --git a/mmaction/apis/inference.py b/mmaction/apis/inference.py
index 749395099e..4b2b4f8c4b 100644
--- a/mmaction/apis/inference.py
+++ b/mmaction/apis/inference.py
@@ -70,7 +70,7 @@ def inference_recognizer(model: nn.Module,
 
     Returns:
         :obj:`ActionDataSample`: The inference results. Specifically, the
-        predicted scores are saved at ``result.pred_scores.item``.
+        predicted scores are saved at ``result.pred_score``.
     """
 
     if test_pipeline is None:
@@ -131,7 +131,7 @@ def inference_skeleton(model: nn.Module,
 
     Returns:
         :obj:`ActionDataSample`: The inference results. Specifically, the
-        predicted scores are saved at ``result.pred_scores.item``.
+        predicted scores are saved at ``result.pred_score``.
     """
     if test_pipeline is None:
         cfg = model.cfg
diff --git a/mmaction/apis/inferencers/actionrecog_inferencer.py b/mmaction/apis/inferencers/actionrecog_inferencer.py
index f45f137b59..cc6e60b0da 100644
--- a/mmaction/apis/inferencers/actionrecog_inferencer.py
+++ b/mmaction/apis/inferencers/actionrecog_inferencer.py
@@ -356,6 +356,6 @@ def pred2dict(self, data_sample: ActionDataSample) -> Dict:
             dict: The output dictionary.
         """
         result = {}
-        result['pred_labels'] = data_sample.pred_labels.item.tolist()
-        result['pred_scores'] = data_sample.pred_scores.item.tolist()
+        result['pred_labels'] = data_sample.pred_label.tolist()
+        result['pred_scores'] = data_sample.pred_score.tolist()
         return result
diff --git a/mmaction/datasets/transforms/formatting.py b/mmaction/datasets/transforms/formatting.py
index 168509be30..fb67e10c0e 100644
--- a/mmaction/datasets/transforms/formatting.py
+++ b/mmaction/datasets/transforms/formatting.py
@@ -4,7 +4,7 @@
 import numpy as np
 import torch
 from mmcv.transforms import BaseTransform, to_tensor
-from mmengine.structures import InstanceData, LabelData
+from mmengine.structures import InstanceData
 
 from mmaction.registry import TRANSFORMS
 from mmaction.structures import ActionDataSample
@@ -12,20 +12,11 @@
 
 @TRANSFORMS.register_module()
 class PackActionInputs(BaseTransform):
-    """Pack the input data for the recognition.
-
-    PackActionInputs first packs one of 'imgs', 'keypoint' and 'audios' into
-    the `packed_results['inputs']`, which are the three basic input modalities
-    for the task of rgb-based, skeleton-based and audio-based action
-    recognition, as well as spatio-temporal action detection in the case
-    of 'img'. Next, it prepares a `data_sample` for the task of action
-    recognition (only a single label of `torch.LongTensor` format, which is
-    saved in the `data_sample.gt_labels.item`) or spatio-temporal action
-    detection respectively. Then, it saves the meta keys defined in
-    the `meta_keys` in `data_sample.metainfo`, and packs the `data_sample`
-    into the `packed_results['data_samples']`.
+    """Pack the inputs data.
 
     Args:
+        collect_keys (tuple[str], optional): The keys to be collected
+            to ``packed_results['inputs']``. Defaults to ``
         meta_keys (Sequence[str]): The meta keys to saved in the
             `metainfo` of the `data_sample`.
             Defaults to ``('img_shape', 'img_key', 'video_id', 'timestamp')``.
@@ -95,9 +86,7 @@ def transform(self, results: Dict) -> Dict:
                     bboxes=to_tensor(results['proposals']))
 
         if 'label' in results:
-            label_data = LabelData()
-            label_data.item = to_tensor(results['label'])
-            data_sample.gt_labels = label_data
+            data_sample.set_gt_label(results['label'])
 
         img_meta = {k: results[k] for k in self.meta_keys if k in results}
         data_sample.set_metainfo(img_meta)
diff --git a/mmaction/evaluation/metrics/acc_metric.py b/mmaction/evaluation/metrics/acc_metric.py
index 9abc20fa6c..04985e5938 100644
--- a/mmaction/evaluation/metrics/acc_metric.py
+++ b/mmaction/evaluation/metrics/acc_metric.py
@@ -75,17 +75,23 @@ def process(self, data_batch: Sequence[Tuple[Any, Dict]],
         data_samples = copy.deepcopy(data_samples)
         for data_sample in data_samples:
             result = dict()
-            pred = data_sample['pred_scores']
-            label = data_sample['gt_labels']
-            for item_name, score in pred.items():
-                pred[item_name] = score.cpu().numpy()
+            pred = data_sample['pred_score']
+            label = data_sample['gt_label']
+
+            # Ad-hoc for RGBPoseConv3D
+            if isinstance(pred, dict):
+                for item_name, score in pred.items():
+                    pred[item_name] = score.cpu().numpy()
+            else:
+                pred = pred.cpu().numpy()
+
             result['pred'] = pred
-            if label['item'].size(0) == 1:
+            if label.size(0) == 1:
                 # single-label
-                result['label'] = label['item'].item()
+                result['label'] = label.item()
             else:
                 # multi-label
-                result['label'] = label['item'].cpu().numpy()
+                result['label'] = label.cpu().numpy()
             self.results.append(result)
 
     def compute_metrics(self, results: List) -> Dict:
@@ -100,39 +106,41 @@ def compute_metrics(self, results: List) -> Dict:
         """
         labels = [x['label'] for x in results]
 
-        if len(results[0]['pred']) == 1:
-            preds = [x['pred']['item'] for x in results]
-            return self.calculate(preds, labels)
-
         eval_results = dict()
-        for item_name in results[0]['pred'].keys():
-            preds = [x['pred'][item_name] for x in results]
-            eval_result = self.calculate(preds, labels)
-            eval_results.update(
-                {f'{item_name}_{k}': v
-                 for k, v in eval_result.items()})
-
         # Ad-hoc for RGBPoseConv3D
-        if len(results[0]['pred']) == 2 and \
-                'rgb' in results[0]['pred'] and \
-                'pose' in results[0]['pred']:
-
-            rgb = [x['pred']['rgb'] for x in results]
-            pose = [x['pred']['pose'] for x in results]
-
-            preds = {
-                '1:1': get_weighted_score([rgb, pose], [1, 1]),
-                '2:1': get_weighted_score([rgb, pose], [2, 1]),
-                '1:2': get_weighted_score([rgb, pose], [1, 2])
-            }
-            for k in preds:
-                eval_result = self.calculate(preds[k], labels)
-                eval_results.update({
-                    f'RGBPose_{k}_{key}': v
-                    for key, v in eval_result.items()
-                })
-
-        return eval_results
+        if isinstance(results[0]['pred'], dict):
+
+            for item_name in results[0]['pred'].keys():
+                preds = [x['pred'][item_name] for x in results]
+                eval_result = self.calculate(preds, labels)
+                eval_results.update(
+                    {f'{item_name}_{k}': v
+                     for k, v in eval_result.items()})
+
+            if len(results[0]['pred']) == 2 and \
+                    'rgb' in results[0]['pred'] and \
+                    'pose' in results[0]['pred']:
+
+                rgb = [x['pred']['rgb'] for x in results]
+                pose = [x['pred']['pose'] for x in results]
+
+                preds = {
+                    '1:1': get_weighted_score([rgb, pose], [1, 1]),
+                    '2:1': get_weighted_score([rgb, pose], [2, 1]),
+                    '1:2': get_weighted_score([rgb, pose], [1, 2])
+                }
+                for k in preds:
+                    eval_result = self.calculate(preds[k], labels)
+                    eval_results.update({
+                        f'RGBPose_{k}_{key}': v
+                        for key, v in eval_result.items()
+                    })
+            return eval_results
+
+        # Simple Acc Calculation
+        else:
+            preds = [x['pred'] for x in results]
+            return self.calculate(preds, labels)
 
     def calculate(self, preds: List[np.ndarray],
                   labels: List[Union[int, np.ndarray]]) -> Dict:
@@ -238,13 +246,13 @@ def __init__(self,
 
     def process(self, data_batch, data_samples: Sequence[dict]) -> None:
         for data_sample in data_samples:
-            pred_scores = data_sample.get('pred_scores')
-            gt_label = data_sample['gt_labels']['item']
+            pred_scores = data_sample.get('pred_score')
+            gt_label = data_sample['gt_label']
             if pred_scores is not None:
-                pred_label = pred_scores['item'].argmax(dim=0, keepdim=True)
-                self.num_classes = pred_scores['item'].size(0)
+                pred_label = pred_scores.argmax(dim=0, keepdim=True)
+                self.num_classes = pred_scores.size(0)
             else:
-                pred_label = data_sample['pred_labels']['item']
+                pred_label = data_sample['pred_label']
 
             self.results.append({
                 'pred_label': pred_label,
diff --git a/mmaction/models/data_preprocessors/data_preprocessor.py b/mmaction/models/data_preprocessors/data_preprocessor.py
index 891cb8f386..0376318ff7 100644
--- a/mmaction/models/data_preprocessors/data_preprocessor.py
+++ b/mmaction/models/data_preprocessors/data_preprocessor.py
@@ -84,7 +84,7 @@ def forward(self,
         data = self.cast_data(data)
         if isinstance(data, dict):
             return self.forward_onesample(data, training=training)
-        elif isinstance(data, tuple):
+        elif isinstance(data, (tuple, list)):
             outputs = []
             for data_sample in data:
                 output = self.forward_onesample(data_sample, training=training)
diff --git a/mmaction/models/heads/base.py b/mmaction/models/heads/base.py
index c39da5aa9a..8febe1df5b 100644
--- a/mmaction/models/heads/base.py
+++ b/mmaction/models/heads/base.py
@@ -6,7 +6,6 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from mmengine.model import BaseModule
-from mmengine.structures import LabelData
 
 from mmaction.evaluation import top_k_accuracy
 from mmaction.registry import MODELS
@@ -112,7 +111,7 @@ def loss_by_feat(self, cls_scores: torch.Tensor,
         Returns:
             dict: A dictionary of loss components.
         """
-        labels = [x.gt_labels.item for x in data_samples]
+        labels = [x.gt_label for x in data_samples]
         labels = torch.stack(labels).to(cls_scores.device)
         labels = labels.squeeze()
 
@@ -175,7 +174,7 @@ def predict_by_feat(self, cls_scores: torch.Tensor,
                 (B*num_segs, num_classes)
             data_samples (list[:obj:`ActionDataSample`]): The
                 annotation data of every samples. It usually includes
-                information such as `gt_labels`.
+                information such as `gt_label`.
 
         Returns:
             List[:obj:`ActionDataSample`]: Recognition results wrapped
@@ -187,10 +186,8 @@ def predict_by_feat(self, cls_scores: torch.Tensor,
 
         for data_sample, score, pred_label in zip(data_samples, cls_scores,
                                                   pred_labels):
-            prediction = LabelData(item=score)
-            pred_label = LabelData(item=pred_label)
-            data_sample.pred_scores = prediction
-            data_sample.pred_labels = pred_label
+            data_sample.set_pred_score(score)
+            data_sample.set_pred_label(pred_label)
         return data_samples
 
     def average_clip(self,
diff --git a/mmaction/models/heads/omni_head.py b/mmaction/models/heads/omni_head.py
index f5084dde06..7a62cf56da 100644
--- a/mmaction/models/heads/omni_head.py
+++ b/mmaction/models/heads/omni_head.py
@@ -87,10 +87,7 @@ def loss_by_feat(self, cls_scores: Union[Tensor, Tuple[Tensor]],
         Returns:
             dict: A dictionary of loss components.
         """
-        if hasattr(data_samples[0], 'gt_labels'):
-            labels = [x.gt_labels.item for x in data_samples]
-        else:
-            labels = [x.gt_label.label for x in data_samples]
+        labels = [x.gt_label for x in data_samples]
         labels = torch.stack(labels).to(cls_scores.device)
         labels = labels.squeeze()
 
diff --git a/mmaction/models/heads/rgbpose_head.py b/mmaction/models/heads/rgbpose_head.py
index 69da4efed9..880e37f084 100644
--- a/mmaction/models/heads/rgbpose_head.py
+++ b/mmaction/models/heads/rgbpose_head.py
@@ -5,7 +5,6 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from mmengine.model.weight_init import normal_init
-from mmengine.structures import LabelData
 
 from mmaction.evaluation import top_k_accuracy
 from mmaction.registry import MODELS
@@ -110,7 +109,7 @@ def loss_by_feat(self, cls_scores: Dict[str, torch.Tensor],
         Returns:
             dict: A dictionary of loss components.
         """
-        labels = torch.stack([x.gt_labels.item for x in data_samples])
+        labels = torch.stack([x.gt_label for x in data_samples])
         labels = labels.squeeze()
 
         if labels.shape == torch.Size([]):
@@ -192,34 +191,26 @@ def predict_by_feat(self, cls_scores: Dict[str, torch.Tensor],
                 classification scores,
             data_samples (list[:obj:`ActionDataSample`]): The
                 annotation data of every samples. It usually includes
-                information such as `gt_labels`.
+                information such as `gt_label`.
 
         Returns:
             list[:obj:`ActionDataSample`]: Recognition results wrapped
                 by :obj:`ActionDataSample`.
         """
-        pred_scores = [LabelData() for _ in range(len(data_samples))]
-        pred_labels = [LabelData() for _ in range(len(data_samples))]
+        pred_scores = [dict() for _ in range(len(data_samples))]
 
         for name in self.loss_components:
             cls_score = cls_scores[name]
-            cls_score, pred_label = \
-                self.predict_by_scores(cls_score, data_samples)
-            for pred_score, pred_label, score, label in zip(
-                    pred_scores, pred_labels, cls_score, pred_label):
-                pred_score.set_data({f'{name}': score})
-                pred_label.set_data({f'{name}': label})
-
-        for data_sample, pred_score, pred_label in zip(data_samples,
-                                                       pred_scores,
-                                                       pred_labels):
-            data_sample.pred_scores = pred_score
-            data_sample.pred_labels = pred_label
+            cls_score = self.predict_by_scores(cls_score, data_samples)
+            for pred_score, score in zip(pred_scores, cls_score):
+                pred_score[f'{name}'] = score
 
+        for data_sample, pred_score, in zip(data_samples, pred_scores):
+            data_sample.set_pred_score(pred_score)
         return data_samples
 
     def predict_by_scores(self, cls_scores: torch.Tensor,
-                          data_samples: SampleList) -> Tuple:
+                          data_samples: SampleList) -> torch.Tensor:
         """Transform a batch of output features extracted from the head into
         prediction results.
 
@@ -230,11 +221,9 @@ def predict_by_scores(self, cls_scores: torch.Tensor,
                 data of every samples.
 
         Returns:
-            tuple: A tuple of the averaged classification scores and
-                prediction labels.
+            torch.Tensor: The averaged classification scores.
         """
 
         num_segs = cls_scores.shape[0] // len(data_samples)
         cls_scores = self.average_clip(cls_scores, num_segs=num_segs)
-        pred_labels = cls_scores.argmax(dim=-1, keepdim=True).detach()
-        return cls_scores, pred_labels
+        return cls_scores
diff --git a/mmaction/models/necks/tpn.py b/mmaction/models/necks/tpn.py
index b3cdc92ff9..c04dde4123 100644
--- a/mmaction/models/necks/tpn.py
+++ b/mmaction/models/necks/tpn.py
@@ -254,7 +254,7 @@ def loss(self, x: torch.Tensor,
              data_samples: Optional[SampleList]) -> dict:
         """Calculate auxiliary loss."""
         x = self(x)
-        labels = [x.gt_labels.item for x in data_samples]
+        labels = [x.gt_label for x in data_samples]
         labels = torch.stack(labels).to(x.device)
         labels = labels.squeeze()
         if labels.shape == torch.Size([]):
diff --git a/mmaction/models/recognizers/base.py b/mmaction/models/recognizers/base.py
index 7ce2a51b1f..ced45380cf 100644
--- a/mmaction/models/recognizers/base.py
+++ b/mmaction/models/recognizers/base.py
@@ -162,7 +162,7 @@ def loss(self, inputs: torch.Tensor, data_samples: SampleList,
                 These should usually be mean centered and std scaled.
             data_samples (List[``ActionDataSample``]): The batch
                 data samples. It usually includes information such
-                as ``gt_labels``.
+                as ``gt_label``.
 
         Returns:
             dict: A dictionary of loss components.
@@ -187,7 +187,7 @@ def predict(self, inputs: torch.Tensor, data_samples: SampleList,
                 These should usually be mean centered and std scaled.
             data_samples (List[``ActionDataSample``]): The batch
                 data samples. It usually includes information such
-                as ``gt_labels``.
+                as ``gt_label``.
 
         Returns:
             List[``ActionDataSample``]: Return the recognition results.
diff --git a/mmaction/models/utils/blending_utils.py b/mmaction/models/utils/blending_utils.py
index 2d3732eeb1..855ca226b1 100644
--- a/mmaction/models/utils/blending_utils.py
+++ b/mmaction/models/utils/blending_utils.py
@@ -55,18 +55,18 @@ def __call__(self, imgs: torch.Tensor, batch_data_samples: SampleList,
                 shape of (B, N, C, H, W) or (B, N, C, T, H, W).
             batch_data_samples (List[:obj:`ActionDataSample`]): The batch
                 data samples. It usually includes information such
-                as `gt_labels`.
+                as `gt_label`.
 
         Returns:
             mixed_imgs (torch.Tensor): Blending images, float tensor with the
                 same shape of the input imgs.
             batch_data_samples (List[:obj:`ActionDataSample`]): The modified
-                batch data samples. ``gt_labels`` in each data sample are
+                batch data samples. ``gt_label`` in each data sample are
                 converted from a hard label to a blended soft label, float
                 tensor with the shape of (num_classes, ) and all elements are
                 in range [0, 1].
         """
-        label = [x.gt_labels.item for x in batch_data_samples]
+        label = [x.gt_label for x in batch_data_samples]
         # single-label classification
         if label[0].size(0) == 1:
             label = torch.tensor(label, dtype=torch.long).to(imgs.device)
@@ -79,7 +79,7 @@ def __call__(self, imgs: torch.Tensor, batch_data_samples: SampleList,
                                                    **kwargs)
 
         for label_item, sample in zip(mixed_label, batch_data_samples):
-            sample.gt_labels.item = label_item
+            sample.set_gt_label(label_item)
 
         return mixed_imgs, batch_data_samples
 
diff --git a/mmaction/structures/action_data_sample.py b/mmaction/structures/action_data_sample.py
index 6ea146cba2..79bec540a0 100644
--- a/mmaction/structures/action_data_sample.py
+++ b/mmaction/structures/action_data_sample.py
@@ -1,15 +1,16 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from numbers import Number
-from typing import Sequence, Union
+from typing import Dict, Sequence, Union
 
 import numpy as np
 import torch
-from mmengine.structures import BaseDataElement, InstanceData, LabelData
+from mmengine.structures import BaseDataElement, InstanceData
 from mmengine.utils import is_str
 
+LABEL_TYPE = Union[torch.Tensor, np.ndarray, Sequence, int]
+SCORE_TYPE = Union[torch.Tensor, np.ndarray, Sequence, Dict]
 
-def format_label(value: Union[torch.Tensor, np.ndarray, Sequence,
-                              int]) -> torch.Tensor:
+
+def format_label(value: LABEL_TYPE) -> torch.Tensor:
     """Convert various python types to label-format tensor.
 
     Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
@@ -19,7 +20,7 @@ def format_label(value: Union[torch.Tensor, np.ndarray, Sequence,
         value (torch.Tensor | numpy.ndarray | Sequence | int): Label value.
 
     Returns:
-        :obj:`torch.Tensor`: The foramtted label tensor.
+        :obj:`torch.Tensor`: The formatted label tensor.
     """
 
     # Handle single number
@@ -34,119 +35,62 @@ def format_label(value: Union[torch.Tensor, np.ndarray, Sequence,
         value = torch.LongTensor([value])
     elif not isinstance(value, torch.Tensor):
         raise TypeError(f'Type {type(value)} is not an available label type.')
-    assert value.ndim == 1, \
-        f'The dims of value should be 1, but got {value.ndim}.'
 
     return value
 
 
-def format_score(value: Union[torch.Tensor, np.ndarray,
-                              Sequence]) -> torch.Tensor:
+def format_score(value: SCORE_TYPE) -> Union[torch.Tensor, Dict]:
     """Convert various python types to score-format tensor.
 
     Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
     :class:`Sequence`.
 
     Args:
-        value (torch.Tensor | numpy.ndarray | Sequence): Score values.
+        value (torch.Tensor | numpy.ndarray | Sequence | dict):
+            Score values or dict of scores values.
 
     Returns:
-        :obj:`torch.Tensor`: The foramtted score tensor.
+        :obj:`torch.Tensor` | dict: The formatted scores.
     """
 
     if isinstance(value, np.ndarray):
         value = torch.from_numpy(value).float()
     elif isinstance(value, Sequence) and not is_str(value):
         value = torch.tensor(value).float()
+    elif isinstance(value, dict):
+        for k, v in value.items():
+            value[k] = format_score(v)
     elif not isinstance(value, torch.Tensor):
         raise TypeError(f'Type {type(value)} is not an available label type.')
-    assert value.ndim == 1, \
-        f'The dims of value should be 1, but got {value.ndim}.'
 
     return value
 
 
 class ActionDataSample(BaseDataElement):
 
-    def set_gt_labels(
-        self, value: Union[np.ndarray, torch.Tensor, Sequence[Number], Number]
-    ) -> 'ActionDataSample':
-        """Set label of ``gt_labels``."""
-        label_data = getattr(self, '_gt_label', LabelData())
-        label_data.item = format_label(value)
-        self.gt_labels = label_data
+    def set_gt_label(self, value: LABEL_TYPE) -> 'ActionDataSample':
+        """Set `gt_label``."""
+        self.set_field(format_label(value), 'gt_label', dtype=torch.Tensor)
         return self
 
-    def set_pred_label(
-        self, value: Union[np.ndarray, torch.Tensor, Sequence[Number], Number]
-    ) -> 'ActionDataSample':
-        """Set label of ``pred_label``."""
-        label_data = getattr(self, '_pred_label', LabelData())
-        label_data.item = format_label(value)
-        self.pred_labels = label_data
+    def set_pred_label(self, value: LABEL_TYPE) -> 'ActionDataSample':
+        """Set ``pred_label``."""
+        self.set_field(format_label(value), 'pred_label', dtype=torch.Tensor)
         return self
 
-    def set_pred_score(self, value: torch.Tensor) -> 'ActionDataSample':
+    def set_pred_score(self, value: SCORE_TYPE) -> 'ActionDataSample':
         """Set score of ``pred_label``."""
-        label_data = getattr(self, '_pred_label', LabelData())
-        label_data.item = format_score(value)
+        score = format_score(value)
+        self.set_field(score, 'pred_score')
         if hasattr(self, 'num_classes'):
-            assert len(label_data.item) == self.num_classes, \
-                f'The length of score {len(label_data.item)} should be '\
+            assert len(score) == self.num_classes, \
+                f'The length of score {len(score)} should be '\
                 f'equal to the num_classes {self.num_classes}.'
         else:
             self.set_field(
-                name='num_classes',
-                value=len(label_data.item),
-                field_type='metainfo')
-        self.pred_scores = label_data
+                name='num_classes', value=len(score), field_type='metainfo')
         return self
 
-    @property
-    def gt_labels(self):
-        """Property of `gt_labels`"""
-        return self._gt_labels
-
-    @gt_labels.setter
-    def gt_labels(self, value):
-        """Setter of `gt_labels`"""
-        self.set_field(value, '_gt_labels', LabelData)
-
-    @gt_labels.deleter
-    def gt_labels(self):
-        """Deleter of `gt_labels`"""
-        del self._gt_labels
-
-    @property
-    def pred_scores(self):
-        """Property of `pred_scores`"""
-        return self._pred_scores
-
-    @pred_scores.setter
-    def pred_scores(self, value):
-        """Setter of `pred_scores`"""
-        self.set_field(value, '_pred_scores', LabelData)
-
-    @pred_scores.deleter
-    def pred_scores(self):
-        """Deleter of `pred_scores`"""
-        del self._pred_scores
-
-    @property
-    def pred_labels(self):
-        """Property of `pred_labels`"""
-        return self._pred_labels
-
-    @pred_labels.setter
-    def pred_labels(self, value):
-        """Setter of `pred_labels`"""
-        self.set_field(value, '_pred_labels', LabelData)
-
-    @pred_labels.deleter
-    def pred_labels(self):
-        """Deleter of `pred_labels`"""
-        del self._pred_labels
-
     @property
     def proposals(self):
         """Property of `proposals`"""
diff --git a/mmaction/utils/gradcam_utils.py b/mmaction/utils/gradcam_utils.py
index 23f124f554..3d1a7f8f47 100644
--- a/mmaction/utils/gradcam_utils.py
+++ b/mmaction/utils/gradcam_utils.py
@@ -94,11 +94,11 @@ def _calculate_localization_map(self,
         self.model.cls_head.average_clips = 'score'
         # model forward & backward
         results = self.model.test_step(data)
-        preds = [result.pred_scores.item for result in results]
+        preds = [result.pred_score for result in results]
         preds = torch.stack(preds)
 
         if use_labels:
-            labels = [result.gt_labels.item for result in results]
+            labels = [result.gt_label for result in results]
             labels = torch.stack(labels)
             score = torch.gather(preds, dim=1, index=labels)
         else:
diff --git a/mmaction/visualization/action_visualizer.py b/mmaction/visualization/action_visualizer.py
index 5924669c83..7a3bfab85e 100644
--- a/mmaction/visualization/action_visualizer.py
+++ b/mmaction/visualization/action_visualizer.py
@@ -63,7 +63,7 @@ class ActionVisualizer(Visualizer):
         >>> video = video.get_batch(range(32)).asnumpy()
         >>> # Example annotation
         >>> data_sample = ActionDataSample()
-        >>> data_sample.gt_labels = LabelData(item=torch.tensor([2]))
+        >>> data_sample.gt_label = LabelData(item=torch.tensor([2]))
         >>> # Setup the visualizer
         >>> vis = ActionVisualizer(
         ...     save_dir="./outputs",
@@ -215,8 +215,8 @@ def add_datasample(self,
             self.set_image(frame)
 
             if draw_gt and 'gt_labels' in data_sample:
-                gt_labels = data_sample.gt_labels
-                idx = gt_labels.item.tolist()
+                gt_labels = data_sample.gt_label
+                idx = gt_labels.tolist()
                 class_labels = [''] * len(idx)
                 if classes is not None:
                     class_labels = [f' ({classes[i]})' for i in idx]
diff --git a/projects/actionclip/README.md b/projects/actionclip/README.md
index cfaf0e3f2b..df694fd538 100644
--- a/projects/actionclip/README.md
+++ b/projects/actionclip/README.md
@@ -140,7 +140,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 model = init_recognizer(config=config, checkpoint=checkpoint_path, device=device)
 
 pred_result = inference_recognizer(model, 'test.mp4')
-probs = pred_result.pred_scores.item.cpu().numpy()
+probs = pred_result.pred_score.cpu().numpy()
 print("Label probs:", probs)  # [9.995e-01 5.364e-07 6.666e-04]
 ```
 
diff --git a/tests/apis/test_inference.py b/tests/apis/test_inference.py
index 1b004943f7..749c3af01b 100644
--- a/tests/apis/test_inference.py
+++ b/tests/apis/test_inference.py
@@ -66,7 +66,7 @@ def test_inference_recognizer(self, config, video_path, devices):
             result = inference_recognizer(model, video_path)
 
             self.assertIsInstance(result, ActionDataSample)
-            self.assertTrue(result.pred_scores.item.shape, (400, ))
+            self.assertTrue(result.pred_score.shape, (400, ))
 
     def test_detection_inference(self):
         from mmdet.apis import init_detector
diff --git a/tests/datasets/transforms/test_formating.py b/tests/datasets/transforms/test_formating.py
index e12a1a95d7..93e32249b5 100644
--- a/tests/datasets/transforms/test_formating.py
+++ b/tests/datasets/transforms/test_formating.py
@@ -34,7 +34,7 @@ def test_transform(self):
         self.assertIn('data_samples', results)
         self.assertIsInstance(results['inputs'], torch.Tensor)
         self.assertEqual(results['inputs'].shape, (2, 300, 17, 3))
-        self.assertEqual(results['data_samples'].gt_labels.item,
+        self.assertEqual(results['data_samples'].gt_label,
                          torch.LongTensor([1]))
 
         # heatmap_imgs input
@@ -45,7 +45,7 @@ def test_transform(self):
         self.assertIn('data_samples', results)
         self.assertIsInstance(results['inputs'], torch.Tensor)
         self.assertEqual(results['inputs'].shape, (2, 17, 56, 56))
-        self.assertEqual(results['data_samples'].gt_labels.item,
+        self.assertEqual(results['data_samples'].gt_label,
                          torch.LongTensor([1]))
 
         # audios input
@@ -82,7 +82,7 @@ def test_transform(self):
         self.assertIsInstance(results['inputs'], torch.Tensor)
         self.assertIsInstance(results['data_samples'], ActionDataSample)
         self.assertEqual(results['data_samples'].img_shape, (256, 256, 3))
-        self.assertEqual(results['data_samples'].gt_labels.item,
+        self.assertEqual(results['data_samples'].gt_label,
                          torch.LongTensor([1]))
 
         # Test grayscale image
diff --git a/tests/evaluation/metrics/test_acc_metric.py b/tests/evaluation/metrics/test_acc_metric.py
index aeb6fb2cb0..b0e966933e 100644
--- a/tests/evaluation/metrics/test_acc_metric.py
+++ b/tests/evaluation/metrics/test_acc_metric.py
@@ -26,8 +26,7 @@ def generate_data(num_classes=5, random_label=False, multi_label=False):
             label = torch.randint(num_classes, size=[1])
         else:
             label = torch.LongTensor([scores.argmax().item()])
-        data_sample = dict(
-            pred_scores=dict(item=scores), gt_labels=dict(item=label))
+        data_sample = dict(pred_score=scores, gt_label=label)
         data_samples.append(data_sample)
     return data_batch, data_samples
 
@@ -97,7 +96,7 @@ def test_evaluate(self):
         """Test using the metric in the same way as Evalutor."""
         pred = [
             ActionDataSample().set_pred_score(i).set_pred_label(
-                j).set_gt_labels(k).to_dict() for i, j, k in zip([
+                j).set_gt_label(k).to_dict() for i, j, k in zip([
                     torch.tensor([0.7, 0.0, 0.3]),
                     torch.tensor([0.5, 0.2, 0.3]),
                     torch.tensor([0.4, 0.5, 0.1]),
@@ -122,7 +121,7 @@ def test_evaluate(self):
 
         # Test with label
         for sample in pred:
-            del sample['pred_scores']
+            del sample['pred_score']
         metric = METRICS.build(dict(type='ConfusionMatrix'))
         metric.process(None, pred)
         with self.assertRaisesRegex(AssertionError,
diff --git a/tests/models/data_preprocessors/test_data_preprocessor.py b/tests/models/data_preprocessors/test_data_preprocessor.py
index 5fe3e8f663..9591305691 100644
--- a/tests/models/data_preprocessors/test_data_preprocessor.py
+++ b/tests/models/data_preprocessors/test_data_preprocessor.py
@@ -15,7 +15,7 @@ def generate_dummy_data(batch_size, input_shape):
         'inputs':
         [torch.randint(0, 255, input_shape) for _ in range(batch_size)],
         'data_samples':
-        [ActionDataSample().set_gt_labels(2) for _ in range(batch_size)]
+        [ActionDataSample().set_gt_label(2) for _ in range(batch_size)]
     }
     return data
 
@@ -53,8 +53,8 @@ def test_data_preprocessor():
         format_shape='NCTHW',
         blending=dict(type='MixupBlending', num_classes=5))
     data = psr(deepcopy(raw_data), training=True)
-    assert data['data_samples'][0].gt_labels.item.shape == (5, )
-    assert data['data_samples'][1].gt_labels.item.shape == (5, )
+    assert data['data_samples'][0].gt_label.shape == (5, )
+    assert data['data_samples'][1].gt_label.shape == (5, )
 
     raw_data = generate_dummy_data(2, (1, 3, 224, 224))
     psr = ActionDataPreprocessor(
diff --git a/tests/models/data_preprocessors/test_multimodal_data_preprocessor.py b/tests/models/data_preprocessors/test_multimodal_data_preprocessor.py
index 35483bd5d9..671d2c1c96 100644
--- a/tests/models/data_preprocessors/test_multimodal_data_preprocessor.py
+++ b/tests/models/data_preprocessors/test_multimodal_data_preprocessor.py
@@ -13,7 +13,7 @@
 def generate_dummy_data(batch_size, input_keys, input_shapes):
     data = dict()
     data['data_samples'] = [
-        ActionDataSample().set_gt_labels(2) for _ in range(batch_size)
+        ActionDataSample().set_gt_label(2) for _ in range(batch_size)
     ]
     data['inputs'] = dict()
     for key, shape in zip(input_keys, input_shapes):
diff --git a/tests/models/heads/test_feature_head.py b/tests/models/heads/test_feature_head.py
index 932ed87133..424016bc8d 100644
--- a/tests/models/heads/test_feature_head.py
+++ b/tests/models/heads/test_feature_head.py
@@ -27,7 +27,7 @@ def test_2d_recognizer(self):
         input_shape = [3, 3, 32, 32]
         data_batch = {
             'inputs': [torch.randint(0, 256, input_shape)],
-            'data_samples': [ActionDataSample().set_gt_labels(2)]
+            'data_samples': [ActionDataSample().set_gt_label(2)]
         }
         feat = recognizer.test_step(data_batch)
         assert isinstance(feat, torch.Tensor)
@@ -46,7 +46,7 @@ def test_3d_recognizer(self):
         input_shape = [1, 3, 4, 32, 32]
         data_batch = {
             'inputs': [torch.randint(0, 256, input_shape)],
-            'data_samples': [ActionDataSample().set_gt_labels(2)]
+            'data_samples': [ActionDataSample().set_gt_label(2)]
         }
         feat = recognizer.test_step(data_batch)
         assert isinstance(feat, torch.Tensor)
diff --git a/tests/models/heads/test_omni_head.py b/tests/models/heads/test_omni_head.py
index f9181893af..2724830353 100644
--- a/tests/models/heads/test_omni_head.py
+++ b/tests/models/heads/test_omni_head.py
@@ -31,9 +31,7 @@ def testOmniHead():
     video_feat = torch.randn(2, 400, 8, 8, 8)
     video_score = head(video_feat)
     assert video_score.shape == torch.Size([2, 200])
-    data_samples = [
-        obj('gt_label', obj('label', torch.tensor(1))) for _ in range(2)
-    ]
+    data_samples = [obj('gt_label', torch.tensor(1)) for _ in range(2)]
     losses = head.loss_by_feat(video_score, data_samples)
     assert 'loss_cls' in losses
 
@@ -41,6 +39,6 @@ def testOmniHead():
     head.eval()
     image_score = head(image_feat)
     assert image_score.shape == torch.Size([1, 100])
-    data_samples = [obj('gt_labels', obj('item', torch.tensor(1)))]
+    data_samples = [obj('gt_label', torch.tensor(1))]
     losses = head.loss_by_feat(image_score, data_samples)
     assert 'loss_cls' in losses
diff --git a/tests/models/necks/test_tpn.py b/tests/models/necks/test_tpn.py
index 1e9387aa39..08cc17dedc 100644
--- a/tests/models/necks/test_tpn.py
+++ b/tests/models/necks/test_tpn.py
@@ -3,7 +3,6 @@
 
 import pytest
 import torch
-from mmengine.structures import LabelData
 
 from mmaction.models import TPN
 from mmaction.structures import ActionDataSample
@@ -14,7 +13,7 @@ def get_label(label_):
     label = []
     for idx, one_label in enumerate(label_):
         data_sample = ActionDataSample()
-        data_sample.gt_labels = LabelData(item=label_[idx])
+        data_sample.set_gt_label(label_[idx])
         label.append(data_sample)
     return label
 
diff --git a/tests/models/recognizers/recognizer_omni.py b/tests/models/recognizers/recognizer_omni.py
index 23c58748de..e06cd5c03f 100644
--- a/tests/models/recognizers/recognizer_omni.py
+++ b/tests/models/recognizers/recognizer_omni.py
@@ -12,8 +12,7 @@
 def test_omni_resnet():
     register_all_modules()
     config = get_recognizer_cfg(
-        'omnisource/slowonly_r50_16xb16-8x8x1-256e_imagenet-kinetics400-rgb.py'
-    )
+        'omnisource/slowonly_r50_8xb16-8x8x1-256e_imagenet-kinetics400-rgb.py')
     recognizer = MODELS.build(config.model)
 
     # test train_step
@@ -24,8 +23,8 @@ def test_omni_resnet():
             torch.randint(0, 255, (1, 3, 8, 224, 224))
         ],
         'data_samples': [
-            ActionDataSample().set_gt_labels(2),
-            ActionDataSample().set_gt_labels(2)
+            ActionDataSample().set_gt_label(2),
+            ActionDataSample().set_gt_label(2)
         ]
     }
 
@@ -35,8 +34,8 @@ def test_omni_resnet():
             torch.randint(0, 255, (1, 3, 224, 224))
         ],
         'data_samples': [
-            ActionDataSample().set_gt_labels(2),
-            ActionDataSample().set_gt_labels(2)
+            ActionDataSample().set_gt_label(2),
+            ActionDataSample().set_gt_label(2)
         ]
     }
 
@@ -54,7 +53,7 @@ def test_omni_resnet():
     # test test_step
     with torch.no_grad():
         predictions = recognizer.test_step(video_sample)
-    score = predictions[0].pred_scores.item
-    assert len(predictions) == 1
+    score = predictions[0].pred_score
+    assert len(predictions) == 2
     assert torch.min(score) >= 0
     assert torch.max(score) <= 1
diff --git a/tests/models/recognizers/test_recognizer2d.py b/tests/models/recognizers/test_recognizer2d.py
index b40398755b..3a13b0ef37 100644
--- a/tests/models/recognizers/test_recognizer2d.py
+++ b/tests/models/recognizers/test_recognizer2d.py
@@ -21,7 +21,7 @@ def train_test_step(cfg, input_shape):
         'inputs':
         [torch.randint(0, 256, input_shape) for i in range(batch_size)],
         'data_samples':
-        [ActionDataSample().set_gt_labels(2) for i in range(batch_size)]
+        [ActionDataSample().set_gt_label(2) for i in range(batch_size)]
     }
 
     # test train_step
@@ -34,7 +34,7 @@ def train_test_step(cfg, input_shape):
     # test test_step
     with torch.no_grad():
         predictions = recognizer.test_step(data_batch)
-    score = predictions[0].pred_scores.item
+    score = predictions[0].pred_score
     assert len(predictions) == batch_size
     assert score.shape == torch.Size([num_classes])
     assert torch.min(score) >= 0
@@ -46,7 +46,7 @@ def train_test_step(cfg, input_shape):
     data_batch['inputs'] = [torch.randint(0, 256, input_shape)]
     with torch.no_grad():
         predictions = recognizer.test_step(data_batch)
-    score = predictions[0].pred_scores.item
+    score = predictions[0].pred_score
     assert len(predictions) == batch_size
     assert score.shape == torch.Size([num_classes])
 
diff --git a/tests/models/recognizers/test_recognizer3d.py b/tests/models/recognizers/test_recognizer3d.py
index 7d80de00fb..c9f73d1a10 100644
--- a/tests/models/recognizers/test_recognizer3d.py
+++ b/tests/models/recognizers/test_recognizer3d.py
@@ -14,7 +14,7 @@ def train_test_step(cfg, input_shape):
     num_classes = cfg.model.cls_head.num_classes
     data_batch = {
         'inputs': [torch.randint(0, 256, input_shape)],
-        'data_samples': [ActionDataSample().set_gt_labels(2)]
+        'data_samples': [ActionDataSample().set_gt_label(2)]
     }
 
     # test train_step
@@ -27,7 +27,7 @@ def train_test_step(cfg, input_shape):
     # test test_step
     with torch.no_grad():
         predictions = recognizer.test_step(data_batch)
-    score = predictions[0].pred_scores.item
+    score = predictions[0].pred_score
     assert len(predictions) == 1
     assert score.shape == torch.Size([num_classes])
     assert torch.min(score) >= 0
@@ -40,7 +40,7 @@ def train_test_step(cfg, input_shape):
     data_batch['inputs'] = [torch.randint(0, 256, input_shape)]
     with torch.no_grad():
         predictions = recognizer.test_step(data_batch)
-    score = predictions[0].pred_scores.item
+    score = predictions[0].pred_score
     assert len(predictions) == 1
     assert score.shape == torch.Size([num_views, num_classes])
 
diff --git a/tests/models/recognizers/test_recognizer_gcn.py b/tests/models/recognizers/test_recognizer_gcn.py
index 7ae1441a6b..723c77d595 100644
--- a/tests/models/recognizers/test_recognizer_gcn.py
+++ b/tests/models/recognizers/test_recognizer_gcn.py
@@ -14,7 +14,7 @@ def train_test_step(cfg, input_shape):
     num_classes = cfg.model.cls_head.num_classes
     data_batch = {
         'inputs': [torch.randn(input_shape)],
-        'data_samples': [ActionDataSample().set_gt_labels(2)]
+        'data_samples': [ActionDataSample().set_gt_label(2)]
     }
 
     # test train_step
@@ -27,7 +27,7 @@ def train_test_step(cfg, input_shape):
     # test test_step
     with torch.no_grad():
         predictions = recognizer.test_step(data_batch)
-    score = predictions[0].pred_scores.item
+    score = predictions[0].pred_score
     assert len(predictions) == 1
     assert score.shape == torch.Size([num_classes])
     assert torch.min(score) >= 0
@@ -40,7 +40,7 @@ def train_test_step(cfg, input_shape):
     data_batch['inputs'] = [torch.randn(input_shape)]
     with torch.no_grad():
         predictions = recognizer.test_step(data_batch)
-    score = predictions[0].pred_scores.item
+    score = predictions[0].pred_score
     assert len(predictions) == 1
     assert score.shape == torch.Size([num_clips, num_classes])
 
diff --git a/tests/models/utils/test_blending_utils.py b/tests/models/utils/test_blending_utils.py
index 993b331093..e2eba9de47 100644
--- a/tests/models/utils/test_blending_utils.py
+++ b/tests/models/utils/test_blending_utils.py
@@ -4,7 +4,6 @@
 import torch
 import torch.nn.functional as F
 from mmcv.transforms import to_tensor
-from mmengine.structures import LabelData
 
 from mmaction.models import CutmixBlending, MixupBlending, RandomBatchAugment
 from mmaction.structures import ActionDataSample
@@ -14,7 +13,7 @@ def get_label(label_):
     label = []
     for idx, one_label in enumerate(label_):
         data_sample = ActionDataSample()
-        data_sample.gt_labels = LabelData(item=label_[idx])
+        data_sample.set_gt_label(label_[idx])
         label.append(data_sample)
     return label
 
diff --git a/tests/models/utils/test_gradcam.py b/tests/models/utils/test_gradcam.py
index e9568531c5..3982907bcb 100644
--- a/tests/models/utils/test_gradcam.py
+++ b/tests/models/utils/test_gradcam.py
@@ -41,7 +41,7 @@ def _do_test_2D_models(recognizer,
                        device='cpu'):
     demo_data = {
         'inputs': [torch.randint(0, 256, input_shape[1:])],
-        'data_samples': [ActionDataSample().set_gt_labels(2)]
+        'data_samples': [ActionDataSample().set_gt_label(2)]
     }
 
     recognizer = recognizer.to(device)
@@ -67,7 +67,7 @@ def _do_test_3D_models(recognizer,
         input_shape, num_classes=num_classes, model_type='3D')
     demo_data = {
         'inputs': [torch.randint(0, 256, input_shape[1:])],
-        'data_samples': [ActionDataSample().set_gt_labels(2)]
+        'data_samples': [ActionDataSample().set_gt_label(2)]
     }
 
     gradcam = GradCAM(recognizer, target_layer_name)
diff --git a/tests/visualization/test_action_visualizer.py b/tests/visualization/test_action_visualizer.py
index c86b324af9..298b59a842 100644
--- a/tests/visualization/test_action_visualizer.py
+++ b/tests/visualization/test_action_visualizer.py
@@ -3,8 +3,6 @@
 
 import decord
 import pytest
-import torch
-from mmengine.structures import LabelData
 
 from mmaction.structures import ActionDataSample
 from mmaction.visualization import ActionVisualizer
@@ -16,7 +14,7 @@ def test_visualizer():
     video = video.get_batch(range(32)).asnumpy()
 
     data_sample = ActionDataSample()
-    data_sample.gt_labels = LabelData(item=torch.tensor([2]))
+    data_sample.set_gt_label(2)
 
     vis = ActionVisualizer()
     vis.add_datasample('demo', video)
diff --git a/tests/visualization/test_video_backend.py b/tests/visualization/test_video_backend.py
index c5153d812d..591646eb7a 100644
--- a/tests/visualization/test_video_backend.py
+++ b/tests/visualization/test_video_backend.py
@@ -8,8 +8,6 @@
 
 import decord
 import pytest
-import torch
-from mmengine.structures import LabelData
 
 from mmaction.structures import ActionDataSample
 from mmaction.utils import register_all_modules
@@ -24,7 +22,7 @@ def test_local_visbackend():
     video = video.get_batch(range(32)).asnumpy()
 
     data_sample = ActionDataSample()
-    data_sample.gt_labels = LabelData(item=torch.tensor([2]))
+    data_sample.set_gt_label(2)
     with TemporaryDirectory() as tmp_dir:
         vis = ActionVisualizer(
             save_dir=tmp_dir, vis_backends=[dict(type='LocalVisBackend')])
@@ -46,7 +44,7 @@ def test_tensorboard_visbackend():
     video = video.get_batch(range(32)).asnumpy()
 
     data_sample = ActionDataSample()
-    data_sample.gt_labels = LabelData(item=torch.tensor([2]))
+    data_sample.set_gt_label(2)
     with TemporaryDirectory() as tmp_dir:
         vis = ActionVisualizer(
             save_dir=tmp_dir,
@@ -63,29 +61,3 @@ def test_tensorboard_visbackend():
         # wait tensorboard store asynchronously
         time.sleep(1)
     return
-
-
-"""
-def test_wandb_visbackend():
-    video = decord.VideoReader('./demo/demo.mp4')
-    video = video.get_batch(range(32)).asnumpy()
-
-    data_sample = ActionDataSample()
-    data_sample.gt_labels = LabelData(item=torch.tensor([2]))
-
-    vis = ActionVisualizer(
-        save_dir='./outputs', vis_backends=[dict(type='WandbVisBackend')])
-    vis.add_datasample('demo', video, data_sample, step=1)
-
-    wandb_dir = 'outputs/vis_data/wandb/'
-    assert Path(wandb_dir).exists()
-
-    flag = False
-    for item in os.listdir(wandb_dir):
-        if item.startswith('run-') and os.path.isdir('%s/%s' %
-                                                     (wandb_dir, item)):
-            flag = True
-            break
-    assert flag, 'Cannot find wandb folder!'
-    return
-"""
diff --git a/tools/analysis_tools/report_accuracy.py b/tools/analysis_tools/report_accuracy.py
index c361f644de..d5c529dfe1 100644
--- a/tools/analysis_tools/report_accuracy.py
+++ b/tools/analysis_tools/report_accuracy.py
@@ -39,20 +39,13 @@ def main():
     data_sample_list = [load(f) for f in args.preds]
     score_list = []
     for data_samples in data_sample_list:
-        scores = [
-            sample['pred_scores']['item'].numpy() for sample in data_samples
-        ]
+        scores = [sample['pred_score'].numpy() for sample in data_samples]
         score_list.append(scores)
 
     if args.multi_label:
-        labels = [
-            sample['gt_labels']['item'] for sample in data_sample_list[0]
-        ]
+        labels = [sample['gt_label'] for sample in data_sample_list[0]]
     else:
-        labels = [
-            sample['gt_labels']['item'].item()
-            for sample in data_sample_list[0]
-        ]
+        labels = [sample['gt_label'].item() for sample in data_sample_list[0]]
 
     if args.apply_softmax:
 
diff --git a/tools/deployment/export_onnx_gcn.py b/tools/deployment/export_onnx_gcn.py
index a4fd237a59..b9cb8423a6 100644
--- a/tools/deployment/export_onnx_gcn.py
+++ b/tools/deployment/export_onnx_gcn.py
@@ -122,7 +122,7 @@ def main():
     base_output = base_model(
         input_tensor.unsqueeze(0), data_samples=[data_sample],
         mode='predict')[0]
-    base_output = base_output.pred_scores.item.detach().cpu().numpy()
+    base_output = base_output.pred_score.detach().cpu().numpy()
 
     model = GCNNet(base_model).to(args.device)
     model.eval()
diff --git a/tools/deployment/export_onnx_posec3d.py b/tools/deployment/export_onnx_posec3d.py
index 014096b48e..f8950dd8c8 100644
--- a/tools/deployment/export_onnx_posec3d.py
+++ b/tools/deployment/export_onnx_posec3d.py
@@ -118,7 +118,7 @@ def main():
     base_output = base_model(
         input_tensor.unsqueeze(0), data_samples=[data_sample],
         mode='predict')[0]
-    base_output = base_output.pred_scores.item.detach().cpu().numpy()
+    base_output = base_output.pred_score.detach().cpu().numpy()
 
     model = GCNNet(base_model).to(args.device)
     model.eval()