Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(issues): Enable a new issue workflow for automatic issue triage #26207

Merged
merged 8 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/assign_issue.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---
name: "Assign issue to a team"

on:
issues:
types: [opened, reopened]

jobs:
auto_assign_issue:
runs-on: ubuntu-latest
container:
image: ghcr.io/datadog/agent-issue-auto-assign:latest
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
permissions:
packages: read
issues: write
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To update the label

steps:
- name: Checkout repository
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
- name: Install dependencies
# Dependencies are installed at runtime. Otherwise it would create a huge image see https://hub.docker.com/r/pytorch/pytorch/tags
run: |
pip install --upgrade pip && pip install --no-compile --no-cache-dir torch transformers invoke codeowners slack-sdk PyGithub python-gitlab semver
- name: Assign issue
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
inv -e issue.assign-owner --issue ${{ github.event.issue.number }}
2 changes: 2 additions & 0 deletions tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
go_deps,
installer,
invoke_unit_tests,
issue,
kmt,
linter,
modules,
Expand Down Expand Up @@ -180,6 +181,7 @@
ns.add_collection(git)
ns.add_collection(github_tasks, "github")
ns.add_collection(gitlab_helpers, "gitlab")
ns.add_collection(issue)
ns.add_collection(package)
ns.add_collection(pipeline)
ns.add_collection(notes)
Expand Down
37 changes: 37 additions & 0 deletions tasks/issue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os

from invoke import task

from tasks.libs.ciproviders.github_api import GithubAPI
from tasks.libs.issue.assign import assign_with_model, assign_with_rules
from tasks.libs.issue.model.actions import fetch_data_and_train_model
from tasks.libs.pipeline.notifications import GITHUB_SLACK_MAP


@task
def assign_owner(_, issue_id, dry_run=False):
gh = GithubAPI('DataDog/datadog-agent')
issue = gh.repo.get_issue(int(issue_id))
assignment = "model"
owner, confidence = assign_with_model(issue)
if confidence < 0.5:
assignment = "rules"
owner = assign_with_rules(issue, gh)
print(f"Issue assigned to team/{owner} with {assignment}")
if not dry_run:
# Edit issue label
issue.add_to_labels(f"team/{owner}")
# Post message
from slack_sdk import WebClient

client = WebClient(os.environ['SLACK_API_TOKEN'])
channel = GITHUB_SLACK_MAP.get(owner.lower(), '#agent-ask-anything')
message = f':githubstatus_partial_outage: *New Community Issue*\n{issue.title} <{issue.html_url}|{gh.repo.name}#{issue_id}>'
message += "\nThe assignation to your team was done automatically, using issue content and title. Please redirect if needed."
client.chat_postMessage(channel=channel, text=message)
return owner


@task
def generate_model(_):
fetch_data_and_train_model()
102 changes: 0 additions & 102 deletions tasks/libs/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@
import tempfile
import time
import traceback
from collections import Counter
from contextlib import contextmanager
from dataclasses import dataclass
from functools import wraps
from pathlib import Path
from subprocess import CalledProcessError, check_output
from types import SimpleNamespace

Expand All @@ -26,7 +24,6 @@
from tasks.libs.common.color import Color, color_message
from tasks.libs.common.constants import ALLOWED_REPO_ALL_BRANCHES, REPO_PATH
from tasks.libs.common.git import get_commit_sha, get_default_branch
from tasks.libs.owners.parsing import search_owners
from tasks.libs.releasing.version import get_version
from tasks.libs.types.arch import Arch

Expand Down Expand Up @@ -587,105 +584,6 @@ def parse_kernel_version(version: str) -> tuple[int, int, int, int]:
return (int(match.group(1)), int(match.group(2)), int(match.group(4) or "0"), int(match.group(6) or "0"))


def guess_from_labels(issue):
for label in issue.labels:
if label.name.startswith("team/") and "triage" not in label.name:
return label.name.split("/")[-1]
return 'triage'


def guess_from_keywords(issue):
text = f"{issue.title} {issue.body}".casefold().split()
c = Counter(text)
for word in c.most_common():
team = simple_match(word[0])
if team:
return team
team = file_match(word[0])
if team:
return team
return "triage"


def simple_match(word):
pattern_matching = {
"agent-apm": ['apm', 'java', 'dotnet', 'ruby', 'trace'],
"containers": [
'container',
'pod',
'kubernetes',
'orchestrator',
'docker',
'k8s',
'kube',
'cluster',
'kubelet',
'helm',
],
"agent-metrics-logs": ['logs', 'metric', 'log-ag', 'statsd', 'tags', 'hostnam'],
"agent-delivery": ['omnibus', 'packaging', 'script'],
"remote-config": ['installer', 'oci'],
"agent-cspm": ['cspm'],
"ebpf-platform": ['ebpf', 'system-prob', 'sys-prob'],
"agent-security": ['security', 'vuln', 'security-agent'],
"agent-shared-components": ['fips', 'inventory', 'payload', 'jmx', 'intak', 'gohai'],
"fleet": ['fleet', 'fleet-automation'],
"opentelemetry": ['otel', 'opentelemetry'],
"windows-agent": ['windows', 'sys32', 'powershell'],
"networks": ['tcp', 'udp', 'socket', 'network'],
"serverless": ['serverless'],
"integrations": ['integration', 'python', 'checks'],
}
for team, words in pattern_matching.items():
if any(w in word for w in words):
return team
return None


def file_match(word):
dd_folders = [
'chocolatey',
'cmd',
'comp',
'dev',
'devenv',
'docs',
'internal',
'omnibus',
'pkg',
'rtloader',
'tasks',
'test',
'tools',
]
p = Path(word)
if len(p.parts) > 1 and p.suffix:
path_folder = next((f for f in dd_folders if f in p.parts), None)
if path_folder:
file = '/'.join(p.parts[p.parts.index(path_folder) :])
return (
search_owners(file, ".github/CODEOWNERS")[0].casefold().replace("@datadog/", "")
) # only return the first owner
return None


def team_to_label(team):
dico = {
'apm-core-reliability-and-performance': "agent-apm",
'universal-service-monitoring': "usm",
'software-integrity-and-trust': "agent-security",
'agent-all': "triage",
'telemetry-and-analytics': "agent-apm",
'fleet': "fleet-automation",
'debugger': "dynamic-intrumentation",
'container-integrations': "containers",
'agent-e2e-testing': "agent-e2e-test",
'agent-integrations': "integrations",
'asm-go': "agent-security",
}
return dico.get(team, team)


@contextmanager
def download_to_tempfile(url, checksum=None):
"""
Expand Down
146 changes: 146 additions & 0 deletions tasks/libs/issue/assign.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
from collections import Counter
from pathlib import Path

from tasks.libs.ciproviders.github_api import get_github_teams
from tasks.libs.issue.model.constants import BASE_MODEL, MODEL, TEAMS
from tasks.libs.owners.parsing import most_frequent_agent_team, search_owners


def assign_with_model(issue):
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

m = AutoModelForSequenceClassification.from_pretrained(
f"{MODEL}", ignore_mismatched_sizes=True, local_files_only=True
)
m.eval()
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
inputs = tokenizer(
f"{issue.title} {issue.body}".casefold(),
padding='max_length',
truncation=True,
max_length=64,
return_tensors='pt',
)
with torch.no_grad():
outputs = m(**inputs)
logits = outputs.logits
proba = torch.softmax(logits, dim=1)
predicted_class = torch.argmax(proba).item()
confidence = proba[0][predicted_class].item()
return TEAMS[torch.argmax(outputs.logits).item()], confidence


def assign_with_rules(issue, gh):
owner = guess_from_labels(issue)
if owner == 'triage':
users = [user for user in issue.assignees if gh.is_organization_member(user)]
teams = get_github_teams(users)
owner = most_frequent_agent_team(teams)
if owner == 'triage':
commenters = [c.user for c in issue.get_comments() if gh.is_organization_member(c.user)]
teams = get_github_teams(commenters)
owner = most_frequent_agent_team(teams)
if owner == 'triage':
owner = guess_from_keywords(issue)
return team_to_label(owner)


def guess_from_labels(issue):
for label in issue.labels:
if label.name.startswith("team/") and "triage" not in label.name:
return label.name.split("/")[-1]
return 'triage'


def guess_from_keywords(issue):
text = f"{issue.title} {issue.body}".casefold().split()
c = Counter(text)
for word in c.most_common():
team = simple_match(word[0])
if team:
return team
team = file_match(word[0])
if team:
return team
return "triage"


def simple_match(word):
pattern_matching = {
"agent-apm": ['apm', 'java', 'dotnet', 'ruby', 'trace'],
"containers": [
'container',
'pod',
'kubernetes',
'orchestrator',
'docker',
'k8s',
'kube',
'cluster',
'kubelet',
'helm',
],
"agent-metrics-logs": ['logs', 'metric', 'log-ag', 'statsd', 'tags', 'hostnam'],
"agent-build-and-releases": ['omnibus', 'packaging', 'script'],
"remote-config": ['installer', 'oci'],
"agent-cspm": ['cspm'],
"ebpf-platform": ['ebpf', 'system-prob', 'sys-prob'],
"agent-security": ['security', 'vuln', 'security-agent'],
"agent-shared-components": ['fips', 'inventory', 'payload', 'jmx', 'intak', 'gohai'],
"fleet": ['fleet', 'fleet-automation'],
"opentelemetry": ['otel', 'opentelemetry'],
"windows-agent": ['windows', 'sys32', 'powershell'],
"networks": ['tcp', 'udp', 'socket', 'network'],
"serverless": ['serverless'],
"integrations": ['integration', 'python', 'checks'],
}
for team, words in pattern_matching.items():
if any(w in word for w in words):
return team
return None


def file_match(word):
dd_folders = [
'chocolatey',
'cmd',
'comp',
'dev',
'devenv',
'docs',
'internal',
'omnibus',
'pkg',
'pkg-config',
'rtloader',
'tasks',
'test',
'tools',
]
p = Path(word)
if len(p.parts) > 1 and p.suffix:
path_folder = next((f for f in dd_folders if f in p.parts), None)
if path_folder:
file = '/'.join(p.parts[p.parts.index(path_folder) :])
return (
search_owners(file, ".github/CODEOWNERS")[0].casefold().replace("@datadog/", "")
) # only return the first owner
return None


def team_to_label(team):
dico = {
'apm-core-reliability-and-performance': "agent-apm",
'universal-service-monitoring': "usm",
'software-integrity-and-trust': "agent-security",
'agent-all': "triage",
'telemetry-and-analytics': "agent-apm",
'fleet': "fleet-automation",
'debugger': "dynamic-intrumentation",
'container-integrations': "containers",
'agent-e2e-testing': "agent-e2e-test",
'agent-integrations': "integrations",
'asm-go': "agent-security",
}
return dico.get(team, team)
Loading
Loading