DataDog · dd-mergequeue · Dec 3, 2024 · May 31, 2024 · Dec 2, 2024 · Dec 2, 2024
@@ -0,0 +1,23 @@
+---
+name: "Assign issue to a team"
+
+on:
+    issues:
+        types: [opened, reopened]
+
+jobs:
+  auto_assign_issue:
+    runs-on: ubuntu-latest
+    container: ghcr.io/datadog/agent-issue-auto-assign:latest
+    permissions:
+        issues: write
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
+      with:
+        ref: ${{ github.head_ref }}
+    - name: Assign issue
+      env:
+        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: |
+        team=`inv -e issue.assign-owner --issue ${{ github.event.issue.number }}`
@@ -35,6 +35,7 @@
     go_deps,
     installer,
     invoke_unit_tests,
+    issue,
     kmt,
     linter,
     modules,
@@ -180,6 +181,7 @@
 ns.add_collection(git)
 ns.add_collection(github_tasks, "github")
 ns.add_collection(gitlab_helpers, "gitlab")
+ns.add_collection(issue)
 ns.add_collection(package)
 ns.add_collection(pipeline)
 ns.add_collection(notes)

@@ -0,0 +1,37 @@
+import os
+
+from invoke import task
+
+from tasks.libs.ciproviders.github_api import GithubAPI
+from tasks.libs.issue.assign import assign_with_model, assign_with_rules
+from tasks.libs.issue.model.actions import generate_model
+from tasks.libs.pipeline.notifications import GITHUB_SLACK_MAP
+
+
+@task
+def assign_owner(_, issue_id, dry_run=False):
+    gh = GithubAPI('DataDog/datadog-agent')
+    issue = gh.repo.get_issue(int(issue_id))
+    assignment = "model"
+    owner, confidence = assign_with_model(issue)
+    if confidence < 0.5:
+        assignment = "rules"
+        owner = assign_with_rules(issue, gh)
+    print(f"Issue assigned to team/{owner} with {assignment}")
+    if not dry_run:
+        # Edit issue label
+        issue.add_to_labels(f"team/{owner}")
+        # Post message
+        from slack_sdk import WebClient
+
+        client = WebClient(os.environ['SLACK_API_TOKEN'])
+        channel = GITHUB_SLACK_MAP.get(owner.lower(), '#agent-ask-anything')
+        message = f':githubstatus_partial_outage: *New Community Issue*\n{issue.title} <{issue.html_url}|{gh.repo.name}#{issue_id}>'
+        message += "\nThe assignation to your team was done automatically, using issue content and title. Please redirect if needed."
+        client.chat_postMessage(channel=channel, text=message)
+    return owner
+
+
+@task
+def generate_the_model(_):
+    generate_model()
@@ -11,11 +11,9 @@
 import tempfile
 import time
 import traceback
-from collections import Counter
 from contextlib import contextmanager
 from dataclasses import dataclass
 from functools import wraps
-from pathlib import Path
 from subprocess import CalledProcessError, check_output
 from types import SimpleNamespace
 
@@ -26,7 +24,6 @@
 from tasks.libs.common.color import Color, color_message
 from tasks.libs.common.constants import ALLOWED_REPO_ALL_BRANCHES, REPO_PATH
 from tasks.libs.common.git import get_commit_sha, get_default_branch
-from tasks.libs.owners.parsing import search_owners
 from tasks.libs.releasing.version import get_version
 from tasks.libs.types.arch import Arch
 
@@ -587,105 +584,6 @@ def parse_kernel_version(version: str) -> tuple[int, int, int, int]:
     return (int(match.group(1)), int(match.group(2)), int(match.group(4) or "0"), int(match.group(6) or "0"))
 
 
-def guess_from_labels(issue):
-    for label in issue.labels:
-        if label.name.startswith("team/") and "triage" not in label.name:
-            return label.name.split("/")[-1]
-    return 'triage'
-
-
-def guess_from_keywords(issue):
-    text = f"{issue.title} {issue.body}".casefold().split()
-    c = Counter(text)
-    for word in c.most_common():
-        team = simple_match(word[0])
-        if team:
-            return team
-        team = file_match(word[0])
-        if team:
-            return team
-    return "triage"
-
-
-def simple_match(word):
-    pattern_matching = {
-        "agent-apm": ['apm', 'java', 'dotnet', 'ruby', 'trace'],
-        "containers": [
-            'container',
-            'pod',
-            'kubernetes',
-            'orchestrator',
-            'docker',
-            'k8s',
-            'kube',
-            'cluster',
-            'kubelet',
-            'helm',
-        ],
-        "agent-metrics-logs": ['logs', 'metric', 'log-ag', 'statsd', 'tags', 'hostnam'],
-        "agent-delivery": ['omnibus', 'packaging', 'script'],
-        "remote-config": ['installer', 'oci'],
-        "agent-cspm": ['cspm'],
-        "ebpf-platform": ['ebpf', 'system-prob', 'sys-prob'],
-        "agent-security": ['security', 'vuln', 'security-agent'],
-        "agent-shared-components": ['fips', 'inventory', 'payload', 'jmx', 'intak', 'gohai'],
-        "fleet": ['fleet', 'fleet-automation'],
-        "opentelemetry": ['otel', 'opentelemetry'],
-        "windows-agent": ['windows', 'sys32', 'powershell'],
-        "networks": ['tcp', 'udp', 'socket', 'network'],
-        "serverless": ['serverless'],
-        "integrations": ['integration', 'python', 'checks'],
-    }
-    for team, words in pattern_matching.items():
-        if any(w in word for w in words):
-            return team
-    return None
-
-
-def file_match(word):
-    dd_folders = [
-        'chocolatey',
-        'cmd',
-        'comp',
-        'dev',
-        'devenv',
-        'docs',
-        'internal',
-        'omnibus',
-        'pkg',
-        'rtloader',
-        'tasks',
-        'test',
-        'tools',
-    ]
-    p = Path(word)
-    if len(p.parts) > 1 and p.suffix:
-        path_folder = next((f for f in dd_folders if f in p.parts), None)
-        if path_folder:
-            file = '/'.join(p.parts[p.parts.index(path_folder) :])
-            return (
-                search_owners(file, ".github/CODEOWNERS")[0].casefold().replace("@datadog/", "")
-            )  # only return the first owner
-    return None
-
-
-def team_to_label(team):
-    dico = {
-        'apm-core-reliability-and-performance': "agent-apm",
-        'universal-service-monitoring': "usm",
-        'software-integrity-and-trust': "agent-security",
-        'agent-all': "triage",
-        'telemetry-and-analytics': "agent-apm",
-        'fleet': "fleet-automation",
-        'debugger': "dynamic-intrumentation",
-        'container-integrations': "containers",
-        'agent-e2e-testing': "agent-e2e-test",
-        'agent-integrations': "integrations",
-        'asm-go': "agent-security",
-    }
-    return dico.get(team, team)
-
-
 @contextmanager
 def download_to_tempfile(url, checksum=None):
     """

@@ -0,0 +1,146 @@
+from collections import Counter
+from pathlib import Path
+
+from tasks.libs.ciproviders.github_api import get_github_teams
+from tasks.libs.issue.model.constants import BASE_MODEL, MODEL, TEAMS
+from tasks.libs.owners.parsing import most_frequent_agent_team, search_owners
+
+
+def assign_with_model(issue):
+    import torch
+    from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
+    m = AutoModelForSequenceClassification.from_pretrained(
+        f"{MODEL}", ignore_mismatched_sizes=True, local_files_only=True
+    )
+    m.eval()
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+    inputs = tokenizer(
+        f"{issue.title} {issue.body}".casefold(),
+        padding='max_length',
+        truncation=True,
+        max_length=64,
+        return_tensors='pt',
+    )
+    with torch.no_grad():
+        outputs = m(**inputs)
+    logits = outputs.logits
+    proba = torch.softmax(logits, dim=1)
+    predicted_class = torch.argmax(proba).item()
+    confidence = proba[0][predicted_class].item()
+    return TEAMS[torch.argmax(outputs.logits).item()], confidence
+
+
+def assign_with_rules(issue, gh):
+    owner = guess_from_labels(issue)
+    if owner == 'triage':
+        users = [user for user in issue.assignees if gh.is_organization_member(user)]
+        teams = get_github_teams(users)
+        owner = most_frequent_agent_team(teams)
+    if owner == 'triage':
+        commenters = [c.user for c in issue.get_comments() if gh.is_organization_member(c.user)]
+        teams = get_github_teams(commenters)
+        owner = most_frequent_agent_team(teams)
+    if owner == 'triage':
+        owner = guess_from_keywords(issue)
+    return team_to_label(owner)
+
+
+def guess_from_labels(issue):
+    for label in issue.labels:
+        if label.name.startswith("team/") and "triage" not in label.name:
+            return label.name.split("/")[-1]
+    return 'triage'
+
+
+def guess_from_keywords(issue):
+    text = f"{issue.title} {issue.body}".casefold().split()
+    c = Counter(text)
+    for word in c.most_common():
+        team = simple_match(word[0])
+        if team:
+            return team
+        team = file_match(word[0])
+        if team:
+            return team
+    return "triage"
+
+
+def simple_match(word):
+    pattern_matching = {
+        "agent-apm": ['apm', 'java', 'dotnet', 'ruby', 'trace'],
+        "containers": [
+            'container',
+            'pod',
+            'kubernetes',
+            'orchestrator',
+            'docker',
+            'k8s',
+            'kube',
+            'cluster',
+            'kubelet',
+            'helm',
+        ],
+        "agent-metrics-logs": ['logs', 'metric', 'log-ag', 'statsd', 'tags', 'hostnam'],
+        "agent-build-and-releases": ['omnibus', 'packaging', 'script'],
+        "remote-config": ['installer', 'oci'],
+        "agent-cspm": ['cspm'],
+        "ebpf-platform": ['ebpf', 'system-prob', 'sys-prob'],
+        "agent-security": ['security', 'vuln', 'security-agent'],
+        "agent-shared-components": ['fips', 'inventory', 'payload', 'jmx', 'intak', 'gohai'],
+        "fleet": ['fleet', 'fleet-automation'],
+        "opentelemetry": ['otel', 'opentelemetry'],
+        "windows-agent": ['windows', 'sys32', 'powershell'],
+        "networks": ['tcp', 'udp', 'socket', 'network'],
+        "serverless": ['serverless'],
+        "integrations": ['integration', 'python', 'checks'],
+    }
+    for team, words in pattern_matching.items():
+        if any(w in word for w in words):
+            return team
+    return None
+
+
+def file_match(word):
+    dd_folders = [
+        'chocolatey',
+        'cmd',
+        'comp',
+        'dev',
+        'devenv',
+        'docs',
+        'internal',
+        'omnibus',
+        'pkg',
+        'pkg-config',
+        'rtloader',
+        'tasks',
+        'test',
+        'tools',
+    ]
+    p = Path(word)
+    if len(p.parts) > 1 and p.suffix:
+        path_folder = next((f for f in dd_folders if f in p.parts), None)
+        if path_folder:
+            file = '/'.join(p.parts[p.parts.index(path_folder) :])
+            return (
+                search_owners(file, ".github/CODEOWNERS")[0].casefold().replace("@datadog/", "")
+            )  # only return the first owner
+    return None
+
+
+def team_to_label(team):
+    dico = {
+        'apm-core-reliability-and-performance': "agent-apm",
+        'universal-service-monitoring': "usm",
+        'software-integrity-and-trust': "agent-security",
+        'agent-all': "triage",
+        'telemetry-and-analytics': "agent-apm",
+        'fleet': "fleet-automation",
+        'debugger': "dynamic-intrumentation",
+        'container-integrations': "containers",
+        'agent-e2e-testing': "agent-e2e-test",
+        'agent-integrations': "integrations",
+        'asm-go': "agent-security",
+    }
+    return dico.get(team, team)