sonic-net · yutongzhang-microsoft · Sep 23, 2024 · Oct 25, 2024 · Oct 25, 2024 · Oct 25, 2024
diff --git a/.azure-pipelines/impacted_area_testing/calculate-instance-numbers.yml b/.azure-pipelines/impacted_area_testing/calculate-instance-numbers.yml
@@ -0,0 +1,50 @@
+parameters:
+  - name: TOPOLOGY
+    type: string
+    default: ""
+
+  - name: BUILD_BRANCH
+    type: string
+    default: ""
+
+steps:
+- script: |
+    # Check if azure cli is installed. If not, try to install it
+    if ! command -v az; then
+      echo "Azure CLI is not installed. Trying to install it..."
+      curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
+    else
+      echo "Azure CLI is already installed"
+    fi
+
+    sudo apt-get update && sudo apt-get install -y jq
+  displayName: "Install azure-cli"
+
+- script: |
+    set -x
+
+    sudo apt-get update && sudo apt-get install -y jq
+
+    TEST_SCRIPTS=$(echo '$(TEST_SCRIPTS)' | jq -r -c '.${{ parameters.TOPOLOGY }}')
+    SCRIPTS=$(echo "$TEST_SCRIPTS" | jq -r '. | join(",")')
+    echo -n "##vso[task.setvariable variable=SCRIPTS]$SCRIPTS"
+  displayName: "Get ${{ parameters.TOPOLOGY }} test scripts"
+
+- task: AzureCLI@2
+  displayName: "Calculate instance number"
+  inputs:
+    azureSubscription: "SONiC-Automation"
+    scriptType: 'bash'
+    scriptLocation: 'inlineScript'
+    inlineScript: |
+      set -x
+
+      pip install azure-kusto-data
+      pip install azure-kusto-data azure-identity
+
+      accessToken=$(az account get-access-token --resource https://api.kusto.windows.net --query accessToken -o tsv)
+      export ACCESS_TOKEN=$accessToken
+
+      INSTANCE_NUMBER=$(python ./.azure-pipelines/impacted_area_testing/calculate_instance_number.py --scripts $(SCRIPTS) --topology ${{ parameters.TOPOLOGY }} --branch ${{ parameters.BUILD_BRANCH }})
+      echo "$INSTANCE_NUMBER"
+      echo -n "##vso[task.setvariable variable=INSTANCE_NUMBER]$INSTANCE_NUMBER"
diff --git a/.azure-pipelines/impacted_area_testing/calculate_instance_number.py b/.azure-pipelines/impacted_area_testing/calculate_instance_number.py
@@ -0,0 +1,83 @@
+import os
+import argparse
+import math
+from constant import PR_CHECKER_TOPOLOGY_NAME, MAX_INSTANCE_NUMBER
+from azure.kusto.data import KustoConnectionStringBuilder, KustoClient
+
+
+def parse_list_from_str(s):
+    # Since Azure Pipeline doesn't support to receive an empty parameter,
+    # We use ' ' as a magic code for empty parameter.
+    # So we should consider ' ' as en empty input.
+    if isinstance(s, str):
+        s = s.strip()
+    if not s:
+        return None
+    return [single_str.strip()
+            for single_str in s.split(',')
+            if single_str.strip()]
+
+
+def main(scripts, topology, branch):
+    ingest_cluster = os.getenv("TEST_REPORT_QUERY_KUSTO_CLUSTER_BACKUP")
+    access_token = os.getenv('ACCESS_TOKEN', None)
+
+    if not ingest_cluster or not access_token:
+        raise RuntimeError(
+            "Could not load Kusto Credentials from environment")
+    else:
+        kcsb = KustoConnectionStringBuilder.with_aad_application_token_authentication(ingest_cluster,
+                                                                                      access_token)  # noqa F841
+
+    client = KustoClient(kcsb)
+
+    scripts = parse_list_from_str(scripts)
+
+    scripts_running_time = {}
+    total_running_time = 0
+
+    for script in scripts:
+        # As baseline test is the universal set of PR test
+        # we get the historical running time of one script here
+        # We get recent 5 test plans and calculate the average running time
+        query = "V2TestCases " \
+                "| join kind=inner" \
+                "(TestPlans " \
+                "| where TestPlanType == 'PR' and Result == 'FINISHED' and Topology == '{}' " \
+                "and TestBranch == '{}' and TestPlanName contains '{}' " \
+                "and TestPlanName contains '_BaselineTest_'" \
+                "| order by UploadTime desc | take 5) on TestPlanId " \
+                "| where FilePath == '{}' " \
+                "| summarize sum(Runtime)".format(PR_CHECKER_TOPOLOGY_NAME[topology][0], branch,
+                                                  PR_CHECKER_TOPOLOGY_NAME[topology][1], script)
+        response = client.execute("SonicTestData", query)
+
+        average_running_time = 1800
+
+        for row in response.primary_results[0]:
+            # We have obtained the results of the most recent five times.
+            # To get the result for a single time, we need to divide by five
+            # If response.primary_results is None, which means where is no historical data in Kusto,
+            # we will use the default 1800s for a script.
+            average_running_time = row["sum_Runtime"] / 5
+
+        total_running_time += average_running_time
+        scripts_running_time[script] = average_running_time
+    # Total running time is calculated by seconds, divide by 60 to get minutes
+    # For one instance, we plan to assign 90 minutes to run test scripts
+    # Obtain the number of instances by rounding up the calculation.
+    # To prevent unexpected situations, we set the maximum number of instance
+    print(min(math.ceil(total_running_time / 60 / 90), MAX_INSTANCE_NUMBER))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--topology", help="The topology of testplan", type=str, default="")
+    parser.add_argument("--scripts", help="Test scripts to be executed", type=str, default="")
+    parser.add_argument("--branch", help="Test branch", type=str, default="")
+    args = parser.parse_args()
+
+    scripts = args.scripts
+    topology = args.topology
+    branch = args.branch
+    main(scripts, topology, branch)
diff --git a/.azure-pipelines/impacted_area_testing/constant.py b/.azure-pipelines/impacted_area_testing/constant.py
@@ -0,0 +1,27 @@
+# Now, we only have below types of PR checker
+# - dpu
+# - dualtor-t0
+# - multi-asic-t1-lag
+# - t0
+# - t0-2vlans
+# - t0-sonic
+# - t1- lag
+PR_TOPOLOGY_TYPE = ["t0", "t0-2vlans", "t0-sonic", "t1", "t1-multi-asic", "dpu", "dualtor"]
+
+EXCLUDE_TEST_SCRIPTS = [
+    "test_posttest.py",
+    "test_pretest.py"
+]
+
+# The mapping of topology type in PR test and topology recorded in kusto and the name of PR test.
+PR_CHECKER_TOPOLOGY_NAME = {
+    "t0": ["t0", "_kvmtest-t0_"],
+    "t0-2vlans": ["t0", "_kvmtest-t0-2vlans_"],
+    "t0-sonic": ["t0-64-32", "_kvmtest-t0-sonic_"],
+    "t1": ["t1-lag", "_kvmtest-t1-lag_"],
+    "t1-multi-asic": ["t1-8-lag", "_kvmtest-multi-asic-t1-lag_"],
+    "dpu": ["dpu", "_kvmtest-dpu_"],
+    "dualtor": ["dualtor", "_kvmtest-dualtor-t0_"]
+}
+
+MAX_INSTANCE_NUMBER = 25
diff --git a/.azure-pipelines/impacted_area_testing/get-impacted-area.yml b/.azure-pipelines/impacted_area_testing/get-impacted-area.yml
@@ -0,0 +1,55 @@
+steps:
+- script: |
+    set -x
+
+    DIFF_FOLDERS=$(git diff HEAD^ HEAD --name-only | xargs -n1 dirname | sort -u | tr '\n' ' ')
+    echo -n "##vso[task.setvariable variable=DIFF_FOLDERS]$DIFF_FOLDERS"
+  displayName: "Get diff folders"
+
+- script: |
+    set -x
+
+    pip install PyYAML
+    pip install natsort
+
+    sudo apt-get upgrade
+    sudo apt-get install -y jq
+
+    FINAL_FEATURES=""
+    echo "$(DIFF_FOLDERS)"
+    IFS=' ' read -ra FEATURES_LIST <<< "$(DIFF_FOLDERS)"
+    for FEATURE in "${FEATURES_LIST[@]}"
+    do
+      # If changes contains the common part in tests folder,the scope of PR testing is all test scripts.
+      if [[ "$FEATURE" == *tests/common* ]]; then
+        FINAL_FEATURES=""
+        break
+
+      # If changes only limited to specific feature, the scope of PR testing is impacted area.
+      elif [[ "$FEATURE" =~ \/tests\/.+\/.+\.py$ ]]; then
+        if [[ -z "$FINAL_FEATURES" ]]; then
+          FINAL_FEATURES="${FEATURE#tests/}"
+        else
+          FINAL_FEATURES="$FINAL_FEATURES,${FEATURE#tests/}"
+        fi
+
+      # If changes related to other folders excpet tests, we also consider them as common part.
+      # The scope of PR testing is all test scripts.
+      else
+        FINAL_FEATURES=""
+        break
+      fi
+    done
+
+    if [ -z "$FINAL_FEATURES" ]; then
+      TEST_SCRIPTS=$(python ./.azure-pipelines/impacted_area_testing/get_test_scripts.py --features "" --location tests)
+    else
+      TEST_SCRIPTS=$(python ./.azure-pipelines/impacted_area_testing/get_test_scripts.py --features ${FINAL_FEATURES} --location tests)
+    fi
+
+    PR_CHECKERS=$(echo "${TEST_SCRIPTS}" | jq -c 'keys')
+
+    echo "##vso[task.setvariable variable=PR_CHECKERS;isOutput=true]$PR_CHECKERS"
+    echo "##vso[task.setvariable variable=TEST_SCRIPTS;isOutput=true]$TEST_SCRIPTS"
+  name: SetVariableTask
+  displayName: "Get impacted area"
diff --git a/.azure-pipelines/impacted_area_testing/get_test_scripts.py b/.azure-pipelines/impacted_area_testing/get_test_scripts.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+
+"""
+    Scripts for getting test scripts in impacted area
+    Example:
+        python impacted_area_testing/get_test_scripts.py vrf,gnmi ../tests
+
+    It will get all test scripts in specific impacted area.
+"""
+import os
+import re
+import logging
+import json
+import argparse
+from natsort import natsorted
+from constant import PR_TOPOLOGY_TYPE, EXCLUDE_TEST_SCRIPTS
+
+
+def topo_name_to_type(topo_name):
+    pattern = re.compile(r'^(wan|t0|t1|ptf|fullmesh|dualtor|t2|tgen|multidut-tgen|mgmttor'
+                         r'|m0|mc0|mx|dpu|any|snappi|util|t0-2vlans|t0-sonic|t1-multi-asic)')
+    match = pattern.match(topo_name)
+    if match is None:
+        logging.warning("Unsupported testbed type - {}".format(topo_name))
+        return topo_name
+
+    topo_type = match.group()
+    if topo_type in ['mgmttor', 'm0', 'mc0', 'mx']:
+        # certain testbed types are in 't0' category with different names.
+        topo_type = 't0'
+    if topo_type in ['multidut-tgen']:
+        topo_type = 'tgen'
+    return topo_type
+
+
+def collect_all_scripts(features, location):
+    '''
+    This function collects all test scripts under the impacted area
+    and distribute all test scripts to corresponding PR checkers
+    '''
+    # Recursively find all files starting with "test_" and ending with ".py"
+    # Note: The full path and name of files are stored in a list named "files"
+    files = []
+    for feature in features.split(","):
+        feature_path = os.path.join(location, feature)
+        for root, dirs, file in os.walk(feature_path):
+            for f in file:
+                if f.startswith("test_") and f.endswith(".py"):
+                    files.append(os.path.join(root, f))
+    files = natsorted(files)
+
+    # Open each file and search for regex pattern
+    pattern = re.compile(r"[^@]pytest\.mark\.topology\(([^\)]*)\)")
+
+    # Init the dict to record the mapping of topology type and test scripts
+    test_scripts_per_topology_type = {}
+    for topology_type in PR_TOPOLOGY_TYPE:
+        test_scripts_per_topology_type[topology_type] = []
+
+    for f in files:
+        # Remove prefix from file name:
+        filename = f[len(location) + 1:]
+        if filename in EXCLUDE_TEST_SCRIPTS:
+            continue
+
+        try:
+            with open(f, 'r') as file:
+                for line in file:
+                    # Get topology type of script from mark `pytest.mark.topology`
+                    match = pattern.search(line)
+                    if match:
+                        for topology in match.group(1).split(","):
+                            topology_mark = topology.strip().strip('"').strip('\'')
+                            if topology_mark == "any":
+                                for key in test_scripts_per_topology_type:
+                                    if filename not in test_scripts_per_topology_type[key]:
+                                        test_scripts_per_topology_type[key].append(filename)
+                            else:
+                                topology_type = topo_name_to_type(topology_mark)
+                                if topology_type in test_scripts_per_topology_type \
+                                        and filename not in test_scripts_per_topology_type[topology_type]:
+                                    test_scripts_per_topology_type[topology_type].append(filename)
+        except Exception as e:
+            logging.error('Failed to load file {}, error {}'.format(f, e))
+
+    test_scripts_per_topology_type = \
+        {k: v for k, v in test_scripts_per_topology_type.items() if test_scripts_per_topology_type[k]}
+
+    return test_scripts_per_topology_type
+
+
+def main(features, location):
+    scripts_list = collect_all_scripts(features, location)
+    print(json.dumps(scripts_list))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--features", help="Impacted area", type=str, default="")
+    parser.add_argument("--location", help="The location of folder `tests`", type=str, default="")
+    args = parser.parse_args()
+
+    features = args.features
+    location = args.location
+    main(features, location)