From aa1778132e04415dcbb0c48aeb93c2b1d82cd9fb Mon Sep 17 00:00:00 2001
From: Nicolas Schweitzer <nicolas.schweitzer@datadoghq.com>
Date: Sat, 1 Jun 2024 01:03:38 +0200
Subject: [PATCH 1/8] commit the model usage, missing model load

---
 .github/workflows/assign_issue.yml  |  34 +++++++
 tasks/issue.py                      |  21 ++++
 tasks/libs/common/utils.py          |   2 -
 tasks/libs/issue/assign.py          | 146 ++++++++++++++++++++++++++++
 tasks/libs/issue/model/actions.py   | 117 ++++++++++++++++++++++
 tasks/libs/issue/model/constants.py |  42 ++++++++
 6 files changed, 360 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/assign_issue.yml
 create mode 100644 tasks/issue.py
 create mode 100644 tasks/libs/issue/assign.py
 create mode 100644 tasks/libs/issue/model/actions.py
 create mode 100644 tasks/libs/issue/model/constants.py

diff --git a/.github/workflows/assign_issue.yml b/.github/workflows/assign_issue.yml
new file mode 100644
index 0000000000000..bd8f4a94458ad
--- /dev/null
+++ b/.github/workflows/assign_issue.yml
@@ -0,0 +1,34 @@
+---
+name: "Assign issue to a team"
+
+on:
+    issues:
+        types: [opened, reopened]
+
+jobs:
+  auto_assign_issue:
+    runs-on: ubuntu-latest
+    permissions:
+        issues: write
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
+      with:
+        ref: ${{ github.head_ref }}
+    - name: Install python
+      uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+      with:
+        python-version: 3.11
+        cache: "pip"
+    - name: Install Python dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt -r tasks/libs/requirements-github.txt
+        pip install pytorch==1.0.2 transformers==4.41.2
+    - name: Assign issue
+      env:
+        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: |
+        team=`inv -e issue.assign --issue ${{ github.event.issue.number }}`
+        echo "Issue ${{ github.event.issue.number }} assigned to $team"
+        gh issue edit "${{ github.event.issue.number }}" --add-label "team/$team"
diff --git a/tasks/issue.py b/tasks/issue.py
new file mode 100644
index 0000000000000..cd0d2d9005110
--- /dev/null
+++ b/tasks/issue.py
@@ -0,0 +1,21 @@
+from invoke import task
+
+from tasks.libs.ciproviders.github_api import GithubAPI
+from tasks.libs.issue.assign import assign_with_model, assign_with_rules
+from tasks.libs.issue.model.actions import generate_model
+
+
+@task
+def assign_owner(_, issue_id):
+    gh = GithubAPI('DataDog/datadog-agent')
+    issue = gh.repo.get_issue(int(issue_id))
+    owner, confidence = assign_with_model(issue)
+    if confidence < 0.5:
+        owner = assign_with_rules(issue, gh)
+    print(owner)
+    return owner
+
+
+@task
+def generate_the_model(_):
+    generate_model()
diff --git a/tasks/libs/common/utils.py b/tasks/libs/common/utils.py
index 1bbd88910a8e5..a2643c3bce2a1 100644
--- a/tasks/libs/common/utils.py
+++ b/tasks/libs/common/utils.py
@@ -11,11 +11,9 @@
 import tempfile
 import time
 import traceback
-from collections import Counter
 from contextlib import contextmanager
 from dataclasses import dataclass
 from functools import wraps
-from pathlib import Path
 from subprocess import CalledProcessError, check_output
 from types import SimpleNamespace
 
diff --git a/tasks/libs/issue/assign.py b/tasks/libs/issue/assign.py
new file mode 100644
index 0000000000000..7c196070cbc7f
--- /dev/null
+++ b/tasks/libs/issue/assign.py
@@ -0,0 +1,146 @@
+from collections import Counter
+from pathlib import Path
+
+from tasks.libs.ciproviders.github_api import get_github_teams
+from tasks.libs.issue.model.constants import BASE_MODEL, MODEL, TEAMS
+from tasks.libs.owners.parsing import most_frequent_agent_team, search_owners
+
+
+def assign_with_model(issue):
+    import torch
+    from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
+    m = AutoModelForSequenceClassification.from_pretrained(
+        f"{MODEL}", ignore_mismatched_sizes=True, local_files_only=True
+    )
+    m.eval()
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+    inputs = tokenizer(
+        f"{issue.title} {issue.body}".casefold(),
+        padding='max_length',
+        truncation=True,
+        max_length=64,
+        return_tensors='pt',
+    )
+    with torch.no_grad():
+        outputs = m(**inputs)
+    logits = outputs.logits
+    proba = torch.softmax(logits, dim=1)
+    predicted_class = torch.argmax(proba).item()
+    confidence = proba[0][predicted_class].item()
+    return TEAMS[torch.argmax(outputs.logits).item()], confidence
+
+
+def assign_with_rules(issue, gh):
+    owner = guess_from_labels(issue)
+    if owner == 'triage':
+        users = [user for user in issue.assignees if gh.is_organization_member(user)]
+        teams = get_github_teams(users)
+        owner = most_frequent_agent_team(teams)
+    if owner == 'triage':
+        commenters = [c.user for c in issue.get_comments() if gh.is_organization_member(c.user)]
+        teams = get_github_teams(commenters)
+        owner = most_frequent_agent_team(teams)
+    if owner == 'triage':
+        owner = guess_from_keywords(issue)
+    return team_to_label(owner)
+
+
+def guess_from_labels(issue):
+    for label in issue.labels:
+        if label.name.startswith("team/") and "triage" not in label.name:
+            return label.name.split("/")[-1]
+    return 'triage'
+
+
+def guess_from_keywords(issue):
+    text = f"{issue.title} {issue.body}".casefold().split()
+    c = Counter(text)
+    for word in c.most_common():
+        team = simple_match(word[0])
+        if team:
+            return team
+        team = file_match(word[0])
+        if team:
+            return team
+    return "triage"
+
+
+def simple_match(word):
+    pattern_matching = {
+        "agent-apm": ['apm', 'java', 'dotnet', 'ruby', 'trace'],
+        "containers": [
+            'container',
+            'pod',
+            'kubernetes',
+            'orchestrator',
+            'docker',
+            'k8s',
+            'kube',
+            'cluster',
+            'kubelet',
+            'helm',
+        ],
+        "agent-metrics-logs": ['logs', 'metric', 'log-ag', 'statsd', 'tags', 'hostnam'],
+        "agent-build-and-releases": ['omnibus', 'packaging', 'script'],
+        "remote-config": ['installer', 'oci'],
+        "agent-cspm": ['cspm'],
+        "ebpf-platform": ['ebpf', 'system-prob', 'sys-prob'],
+        "agent-security": ['security', 'vuln', 'security-agent'],
+        "agent-shared-components": ['fips', 'inventory', 'payload', 'jmx', 'intak', 'gohai'],
+        "fleet": ['fleet', 'fleet-automation'],
+        "opentelemetry": ['otel', 'opentelemetry'],
+        "windows-agent": ['windows', 'sys32', 'powershell'],
+        "networks": ['tcp', 'udp', 'socket', 'network'],
+        "serverless": ['serverless'],
+        "integrations": ['integration', 'python', 'checks'],
+    }
+    for team, words in pattern_matching.items():
+        if any(w in word for w in words):
+            return team
+    return None
+
+
+def file_match(word):
+    dd_folders = [
+        'chocolatey',
+        'cmd',
+        'comp',
+        'dev',
+        'devenv',
+        'docs',
+        'internal',
+        'omnibus',
+        'pkg',
+        'pkg-config',
+        'rtloader',
+        'tasks',
+        'test',
+        'tools',
+    ]
+    p = Path(word)
+    if len(p.parts) > 1 and p.suffix:
+        path_folder = next((f for f in dd_folders if f in p.parts), None)
+        if path_folder:
+            file = '/'.join(p.parts[p.parts.index(path_folder) :])
+            return (
+                search_owners(file, ".github/CODEOWNERS")[0].casefold().replace("@datadog/", "")
+            )  # only return the first owner
+    return None
+
+
+def team_to_label(team):
+    dico = {
+        'apm-core-reliability-and-performance': "agent-apm",
+        'universal-service-monitoring': "usm",
+        'software-integrity-and-trust': "agent-security",
+        'agent-all': "triage",
+        'telemetry-and-analytics': "agent-apm",
+        'fleet': "fleet-automation",
+        'debugger': "dynamic-intrumentation",
+        'container-integrations': "containers",
+        'agent-e2e-testing': "agent-e2e-test",
+        'agent-integrations': "integrations",
+        'asm-go': "agent-security",
+    }
+    return dico.get(team, team)
diff --git a/tasks/libs/issue/model/actions.py b/tasks/libs/issue/model/actions.py
new file mode 100644
index 0000000000000..265166e6b2055
--- /dev/null
+++ b/tasks/libs/issue/model/actions.py
@@ -0,0 +1,117 @@
+from time import sleep
+
+from tasks.libs.ciproviders.github_api import GithubAPI
+from tasks.libs.issue.assign import assign_with_rules
+from tasks.libs.issue.model.constants import BASE_MODEL, MODEL, TEAMS
+
+
+def generate_model():
+    gh = GithubAPI('DataDog/datadog-agent')
+    d = gh.repo
+    issues = []
+    teams = []
+    n = 0
+    for i in d.get_issues(state='all'):
+        issues.append(f"{i.title} {i.body}".casefold())
+        teams.append(assign_with_rules(i, gh))
+        # Sleep to avoid hitting the rate limit
+        n += 1
+        if n % 2000 == 0:
+            sleep(3600)
+
+    train_the_model(teams, issues, "issue_auto_assign_model", 64, 5)
+
+
+def train_the_model(teams, issues, batch_size, epochs):
+    import torch
+    from sklearn.model_selection import train_test_split
+    from torch.utils.data import DataLoader, Dataset
+    from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
+    class IssueDataset(Dataset):
+        def __init__(self, issues, labels, tokenizer, max_length=64):
+            self.issues = issues
+            self.labels = labels
+            self.tokenizer = tokenizer
+            self.max_length = max_length
+
+        def __len__(self):
+            return len(self.issues)
+
+        def __getitem__(self, idx):
+            issue = self.issues[idx]
+            label = self.labels[idx]
+            inputs = self.tokenizer(
+                issue, max_length=self.max_length, padding="max_length", truncation=True, return_tensors="pt"
+            )
+            return {
+                "input_ids": inputs["input_ids"].flatten(),
+                "attention_mask": inputs["attention_mask"].flatten(),
+                "labels": torch.tensor(label, dtype=torch.long),
+            }
+
+    # Split the dataset into training and validation sets
+    train_issues, val_issues, train_teams, val_teams = train_test_split(issues, teams, test_size=0.2, random_state=42)
+
+    # Define hyperparameters
+    learning_rate = 2e-5
+
+    # Load pre-trained BERT model and tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+    model = AutoModelForSequenceClassification.from_pretrained(
+        BASE_MODEL, num_labels=len(set(teams)), ignore_mismatched_sizes=True
+    )
+
+    # Prepare dataset and dataloaders
+    train_teams = [TEAMS.index(t) for t in train_teams]
+    val_teams = [TEAMS.index(t) for t in val_teams]
+    train_dataset = IssueDataset(train_issues, train_teams, tokenizer, max_length=batch_size)
+    val_dataset = IssueDataset(val_issues, val_teams, tokenizer, max_length=batch_size)
+    print(f"set sizes : {len(train_dataset)} {len(val_dataset)} {len(set(teams))}")
+
+    print(f"train_dataset {train_dataset[0]}")
+    print(f"train_dataset {train_dataset[1]}")
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+    val_loader = DataLoader(val_dataset, batch_size=batch_size)
+
+    # Define optimizer and loss function
+    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
+
+    print("Start training...")
+    # Fine-tune the model
+    for epoch in range(epochs):
+        print(f"Epoch {epoch+1}/{epochs}")
+        model.train()
+        train_loss = 0.0
+        for batch in train_loader:
+            optimizer.zero_grad()
+            input_ids, attention_mask, labels = batch['input_ids'], batch['attention_mask'], batch['labels']
+            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
+            loss = outputs.loss
+            train_loss += loss.item()
+            loss.backward()
+            optimizer.step()
+        train_loss /= len(train_loader)
+
+        # Evaluate on validation set
+        model.eval()
+        val_loss = 0.0
+        correct = 0
+        total = 0
+        print("validate")
+        with torch.no_grad():
+            for batch in val_loader:
+                input_ids, attention_mask, labels = batch['input_ids'], batch['attention_mask'], batch['labels']
+                outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
+                loss = outputs.loss
+                val_loss += loss.item()
+                _, predicted = torch.max(outputs.logits, 1)
+                total += labels.size(0)
+                correct += (predicted == labels).sum().item()
+        val_loss /= len(val_loader)
+        val_accuracy = correct / total
+
+        print(
+            f"Epoch {epoch+1}/{epochs}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}"
+        )
+    model.save_pretrained(MODEL)
diff --git a/tasks/libs/issue/model/constants.py b/tasks/libs/issue/model/constants.py
new file mode 100644
index 0000000000000..67ec1d5535109
--- /dev/null
+++ b/tasks/libs/issue/model/constants.py
@@ -0,0 +1,42 @@
+MODEL = "issue_auto_assign_model"
+BASE_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
+TEAMS = (
+    'container-ecosystems',
+    'windows-agent',
+    'remote-config',
+    'container-platform',
+    'documentation',
+    'agent-security',
+    'container-app',
+    'agent-all',
+    'processes',
+    'agent-platform',
+    'agent-release-management',
+    'networks',
+    'ebpf-platform',
+    'agent-apm',
+    'single-machine-performance',
+    'agent-e2e-testing',
+    'agent-developer-tools',
+    'triage',
+    'windows-kernel-integrations',
+    'container-integrations',
+    'software-integrity-and-trust',
+    'opentelemetry',
+    'universal-service-monitoring',
+    'agent-build-and-releases',
+    'agent-shared-components',
+    'agent-integrations',
+    'agent-metrics-logs',
+    'platform-integrations',
+    'agent-ci-experience',
+    'asm-go',
+    'agent-cspm',
+    'debugger',
+    'database-monitoring',
+    'network-device-monitoring',
+    'serverless',
+    'apm-onboarding',
+    'fleet',
+    'agent-processing-and-routing',
+)

From 352de06ec67354e8b9437b6550cb1eb9670ad409 Mon Sep 17 00:00:00 2001
From: Nicolas Schweitzer <nicolas.schweitzer@datadoghq.com>
Date: Mon, 2 Dec 2024 14:47:15 +0100
Subject: [PATCH 2/8] Run the job on a container with the model and required
 dependencies

---
 .github/workflows/assign_issue.yml  | 15 ++-------------
 tasks/__init__.py                   |  2 ++
 tasks/issue.py                      | 20 ++++++++++++++++++--
 tasks/libs/issue/model/constants.py |  2 +-
 4 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/assign_issue.yml b/.github/workflows/assign_issue.yml
index bd8f4a94458ad..dc52cad820600 100644
--- a/.github/workflows/assign_issue.yml
+++ b/.github/workflows/assign_issue.yml
@@ -8,6 +8,7 @@ on:
 jobs:
   auto_assign_issue:
     runs-on: ubuntu-latest
+    container: ghcr.io/datadog/agent-issue-auto-assign:latest
     permissions:
         issues: write
     steps:
@@ -15,20 +16,8 @@ jobs:
       uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
       with:
         ref: ${{ github.head_ref }}
-    - name: Install python
-      uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
-      with:
-        python-version: 3.11
-        cache: "pip"
-    - name: Install Python dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt -r tasks/libs/requirements-github.txt
-        pip install pytorch==1.0.2 transformers==4.41.2
     - name: Assign issue
       env:
         GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       run: |
-        team=`inv -e issue.assign --issue ${{ github.event.issue.number }}`
-        echo "Issue ${{ github.event.issue.number }} assigned to $team"
-        gh issue edit "${{ github.event.issue.number }}" --add-label "team/$team"
+        team=`inv -e issue.assign-owner --issue ${{ github.event.issue.number }}`
diff --git a/tasks/__init__.py b/tasks/__init__.py
index 93ce84f1c37b2..3ae9ae75f6749 100644
--- a/tasks/__init__.py
+++ b/tasks/__init__.py
@@ -35,6 +35,7 @@
     go_deps,
     installer,
     invoke_unit_tests,
+    issue,
     kmt,
     linter,
     modules,
@@ -180,6 +181,7 @@
 ns.add_collection(git)
 ns.add_collection(github_tasks, "github")
 ns.add_collection(gitlab_helpers, "gitlab")
+ns.add_collection(issue)
 ns.add_collection(package)
 ns.add_collection(pipeline)
 ns.add_collection(notes)
diff --git a/tasks/issue.py b/tasks/issue.py
index cd0d2d9005110..edbef166ca920 100644
--- a/tasks/issue.py
+++ b/tasks/issue.py
@@ -1,18 +1,34 @@
+import os
+
 from invoke import task
 
 from tasks.libs.ciproviders.github_api import GithubAPI
 from tasks.libs.issue.assign import assign_with_model, assign_with_rules
 from tasks.libs.issue.model.actions import generate_model
+from tasks.libs.pipeline.notifications import GITHUB_SLACK_MAP
 
 
 @task
-def assign_owner(_, issue_id):
+def assign_owner(_, issue_id, dry_run=False):
     gh = GithubAPI('DataDog/datadog-agent')
     issue = gh.repo.get_issue(int(issue_id))
+    assignment = "model"
     owner, confidence = assign_with_model(issue)
     if confidence < 0.5:
+        assignment = "rules"
         owner = assign_with_rules(issue, gh)
-    print(owner)
+    print(f"Issue assigned to team/{owner} with {assignment}")
+    if not dry_run:
+        # Edit issue label
+        issue.add_to_labels(f"team/{owner}")
+        # Post message
+        from slack_sdk import WebClient
+
+        client = WebClient(os.environ['SLACK_API_TOKEN'])
+        channel = GITHUB_SLACK_MAP.get(owner.lower(), '#agent')
+        message = f':githubstatus_partial_outage: *New Community Issue*\n{issue.title} <{issue.html_url}|{gh.repo.name}#{issue_id}>'
+        message += "\nThe assignation to your team was done automatically, using issue content and title. Please redirect if needed."
+        client.chat_postMessage(channel=channel, text=message)
     return owner
 
 
diff --git a/tasks/libs/issue/model/constants.py b/tasks/libs/issue/model/constants.py
index 67ec1d5535109..986d201bcf138 100644
--- a/tasks/libs/issue/model/constants.py
+++ b/tasks/libs/issue/model/constants.py
@@ -1,4 +1,4 @@
-MODEL = "issue_auto_assign_model"
+MODEL = "/issue_auto_assign_model"
 BASE_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
 TEAMS = (
     'container-ecosystems',

From c9940f0fdaead1c7b9e407a9d041ccff2af71b1b Mon Sep 17 00:00:00 2001
From: Nicolas Schweitzer <nicolas.schweitzer@datadoghq.com>
Date: Mon, 2 Dec 2024 15:35:00 +0100
Subject: [PATCH 3/8] Move all issue-related code to an issue lib

---
 tasks/libs/common/utils.py      | 100 --------------------------------
 tasks/owners.py                 |  24 +-------
 tasks/unit_tests/issue_tests.py |  36 ++++++++++++
 tasks/unit_tests/utils_tests.py |  35 +----------
 4 files changed, 38 insertions(+), 157 deletions(-)
 create mode 100644 tasks/unit_tests/issue_tests.py

diff --git a/tasks/libs/common/utils.py b/tasks/libs/common/utils.py
index a2643c3bce2a1..70eb60f20a5f6 100644
--- a/tasks/libs/common/utils.py
+++ b/tasks/libs/common/utils.py
@@ -24,7 +24,6 @@
 from tasks.libs.common.color import Color, color_message
 from tasks.libs.common.constants import ALLOWED_REPO_ALL_BRANCHES, REPO_PATH
 from tasks.libs.common.git import get_commit_sha, get_default_branch
-from tasks.libs.owners.parsing import search_owners
 from tasks.libs.releasing.version import get_version
 from tasks.libs.types.arch import Arch
 
@@ -585,105 +584,6 @@ def parse_kernel_version(version: str) -> tuple[int, int, int, int]:
     return (int(match.group(1)), int(match.group(2)), int(match.group(4) or "0"), int(match.group(6) or "0"))
 
 
-def guess_from_labels(issue):
-    for label in issue.labels:
-        if label.name.startswith("team/") and "triage" not in label.name:
-            return label.name.split("/")[-1]
-    return 'triage'
-
-
-def guess_from_keywords(issue):
-    text = f"{issue.title} {issue.body}".casefold().split()
-    c = Counter(text)
-    for word in c.most_common():
-        team = simple_match(word[0])
-        if team:
-            return team
-        team = file_match(word[0])
-        if team:
-            return team
-    return "triage"
-
-
-def simple_match(word):
-    pattern_matching = {
-        "agent-apm": ['apm', 'java', 'dotnet', 'ruby', 'trace'],
-        "containers": [
-            'container',
-            'pod',
-            'kubernetes',
-            'orchestrator',
-            'docker',
-            'k8s',
-            'kube',
-            'cluster',
-            'kubelet',
-            'helm',
-        ],
-        "agent-metrics-logs": ['logs', 'metric', 'log-ag', 'statsd', 'tags', 'hostnam'],
-        "agent-delivery": ['omnibus', 'packaging', 'script'],
-        "remote-config": ['installer', 'oci'],
-        "agent-cspm": ['cspm'],
-        "ebpf-platform": ['ebpf', 'system-prob', 'sys-prob'],
-        "agent-security": ['security', 'vuln', 'security-agent'],
-        "agent-shared-components": ['fips', 'inventory', 'payload', 'jmx', 'intak', 'gohai'],
-        "fleet": ['fleet', 'fleet-automation'],
-        "opentelemetry": ['otel', 'opentelemetry'],
-        "windows-agent": ['windows', 'sys32', 'powershell'],
-        "networks": ['tcp', 'udp', 'socket', 'network'],
-        "serverless": ['serverless'],
-        "integrations": ['integration', 'python', 'checks'],
-    }
-    for team, words in pattern_matching.items():
-        if any(w in word for w in words):
-            return team
-    return None
-
-
-def file_match(word):
-    dd_folders = [
-        'chocolatey',
-        'cmd',
-        'comp',
-        'dev',
-        'devenv',
-        'docs',
-        'internal',
-        'omnibus',
-        'pkg',
-        'rtloader',
-        'tasks',
-        'test',
-        'tools',
-    ]
-    p = Path(word)
-    if len(p.parts) > 1 and p.suffix:
-        path_folder = next((f for f in dd_folders if f in p.parts), None)
-        if path_folder:
-            file = '/'.join(p.parts[p.parts.index(path_folder) :])
-            return (
-                search_owners(file, ".github/CODEOWNERS")[0].casefold().replace("@datadog/", "")
-            )  # only return the first owner
-    return None
-
-
-def team_to_label(team):
-    dico = {
-        'apm-core-reliability-and-performance': "agent-apm",
-        'universal-service-monitoring': "usm",
-        'software-integrity-and-trust': "agent-security",
-        'agent-all': "triage",
-        'telemetry-and-analytics': "agent-apm",
-        'fleet': "fleet-automation",
-        'debugger': "dynamic-intrumentation",
-        'container-integrations': "containers",
-        'agent-e2e-testing': "agent-e2e-test",
-        'agent-integrations': "integrations",
-        'asm-go': "agent-security",
-    }
-    return dico.get(team, team)
-
-
 @contextmanager
 def download_to_tempfile(url, checksum=None):
     """
diff --git a/tasks/owners.py b/tasks/owners.py
index 11c5f2c27d7ba..b2ecceffd581a 100644
--- a/tasks/owners.py
+++ b/tasks/owners.py
@@ -2,9 +2,7 @@
 
 from invoke import task
 
-from tasks.libs.ciproviders.github_api import GithubAPI, get_github_teams
-from tasks.libs.common.utils import guess_from_keywords, guess_from_labels, team_to_label
-from tasks.libs.owners.parsing import most_frequent_agent_team, read_owners, search_owners
+from tasks.libs.owners.parsing import read_owners, search_owners
 from tasks.libs.pipeline.notifications import GITHUB_SLACK_MAP
 
 
@@ -18,26 +16,6 @@ def find_codeowners(_, path, owners_file=".github/CODEOWNERS"):
     print(", ".join(search_owners(path, owners_file)))
 
 
-@task
-def guess_responsible(_, issue_id):
-    gh = GithubAPI('DataDog/datadog-agent')
-    issue = gh.repo.get_issue(int(issue_id))
-    owner = guess_from_labels(issue)
-    if owner == 'triage':
-        users = [user for user in issue.assignees if gh.is_organization_member(user)]
-        teams = get_github_teams(users)
-        owner = most_frequent_agent_team(teams)
-    if owner == 'triage':
-        commenters = [c.user for c in issue.get_comments() if gh.is_organization_member(c.user)]
-        teams = get_github_teams(commenters)
-        owner = most_frequent_agent_team(teams)
-    if owner == 'triage':
-        owner = guess_from_keywords(issue)
-    owner = team_to_label(owner)
-    print(owner)
-    return owner
-
-
 def make_partition(names: list[str], owners_file: str, get_channels: bool = False) -> dict[str, set[str]]:
     """
     From a list of job / file names, will create a dictionary with the teams as keys and the names as values.
diff --git a/tasks/unit_tests/issue_tests.py b/tasks/unit_tests/issue_tests.py
new file mode 100644
index 0000000000000..e72676fc50e3f
--- /dev/null
+++ b/tasks/unit_tests/issue_tests.py
@@ -0,0 +1,36 @@
+import unittest
+from unittest.mock import MagicMock
+
+from tasks.libs.issue.assign import guess_from_keywords, guess_from_labels
+
+
+# We must define this class as we cannot override the name attribute in MagicMock
+class Label:
+    def __init__(self, name):
+        self.name = name
+
+
+class TestGuessFromLabels(unittest.TestCase):
+    def test_with_team(self):
+        issue = MagicMock(labels=[Label(name="team/triage"), Label(name="team/core")])
+
+        self.assertEqual(guess_from_labels(issue), "core")
+
+    def test_without_team(self):
+        issue = MagicMock(labels=[Label(name="team/triage"), Label(name="team:burton")])
+
+        self.assertEqual(guess_from_labels(issue), "triage")
+
+
+class TestGuessFromKeywords(unittest.TestCase):
+    def test_from_simple_match(self):
+        issue = MagicMock(title="I have an issue", body="I can't get any logs from the agent.")
+        self.assertEqual(guess_from_keywords(issue), "agent-metrics-logs")
+
+    def test_with_a_file(self):
+        issue = MagicMock(title="fix bug", body="It comes from the file pkg/agent/build.py")
+        self.assertEqual(guess_from_keywords(issue), "agent-shared-components")
+
+    def test_no_match(self):
+        issue = MagicMock(title="fix bug", body="It comes from the file... hm I don't know.")
+        self.assertEqual(guess_from_keywords(issue), "triage")
diff --git a/tasks/unit_tests/utils_tests.py b/tasks/unit_tests/utils_tests.py
index 02aae11c5fc80..3d867d240b2b9 100644
--- a/tasks/unit_tests/utils_tests.py
+++ b/tasks/unit_tests/utils_tests.py
@@ -1,7 +1,6 @@
 import unittest
-from unittest.mock import MagicMock
 
-from tasks.libs.common.utils import clean_nested_paths, guess_from_keywords, guess_from_labels
+from tasks.libs.common.utils import clean_nested_paths
 
 
 class TestUtils(unittest.TestCase):
@@ -33,35 +32,3 @@ def test_clean_nested_paths_2(self):
         ]
         expected_paths = ["."]
         self.assertEqual(clean_nested_paths(paths), expected_paths)
-
-
-# We must define this class as we cannot override the name attribute in MagicMock
-class Label:
-    def __init__(self, name):
-        self.name = name
-
-
-class TestGuessFromLabels(unittest.TestCase):
-    def test_with_team(self):
-        issue = MagicMock(labels=[Label(name="team/triage"), Label(name="team/core")])
-
-        self.assertEqual(guess_from_labels(issue), "core")
-
-    def test_without_team(self):
-        issue = MagicMock(labels=[Label(name="team/triage"), Label(name="team:burton")])
-
-        self.assertEqual(guess_from_labels(issue), "triage")
-
-
-class TestGuessFromKeywords(unittest.TestCase):
-    def test_from_simple_match(self):
-        issue = MagicMock(title="I have an issue", body="I can't get any logs from the agent.")
-        self.assertEqual(guess_from_keywords(issue), "agent-metrics-logs")
-
-    def test_with_a_file(self):
-        issue = MagicMock(title="fix bug", body="It comes from the file pkg/agent/build.py")
-        self.assertEqual(guess_from_keywords(issue), "agent-shared-components")
-
-    def test_no_match(self):
-        issue = MagicMock(title="fix bug", body="It comes from the file... hm I don't know.")
-        self.assertEqual(guess_from_keywords(issue), "triage")

From 81b70ae710af145359411519ad7c9adbbc786913 Mon Sep 17 00:00:00 2001
From: Nicolas Schweitzer <nicolas.schweitzer@datadoghq.com>
Date: Mon, 2 Dec 2024 15:37:09 +0100
Subject: [PATCH 4/8] Default to a shared channel

---
 tasks/issue.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tasks/issue.py b/tasks/issue.py
index edbef166ca920..aea8810dae60c 100644
--- a/tasks/issue.py
+++ b/tasks/issue.py
@@ -25,7 +25,7 @@ def assign_owner(_, issue_id, dry_run=False):
         from slack_sdk import WebClient
 
         client = WebClient(os.environ['SLACK_API_TOKEN'])
-        channel = GITHUB_SLACK_MAP.get(owner.lower(), '#agent')
+        channel = GITHUB_SLACK_MAP.get(owner.lower(), '#agent-ask-anything')
         message = f':githubstatus_partial_outage: *New Community Issue*\n{issue.title} <{issue.html_url}|{gh.repo.name}#{issue_id}>'
         message += "\nThe assignation to your team was done automatically, using issue content and title. Please redirect if needed."
         client.chat_postMessage(channel=channel, text=message)

From 4f12187af3809a9777056a814eb1369ddc87e943 Mon Sep 17 00:00:00 2001
From: Nicolas Schweitzer <nicolas.schweitzer@datadoghq.com>
Date: Mon, 2 Dec 2024 16:28:26 +0100
Subject: [PATCH 5/8] codereview: Checkout main and beautify python code

---
 .github/workflows/assign_issue.yml |  2 --
 tasks/issue.py                     |  6 +++---
 tasks/libs/issue/model/actions.py  | 12 +++++-------
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/assign_issue.yml b/.github/workflows/assign_issue.yml
index dc52cad820600..0a8f155d91d5f 100644
--- a/.github/workflows/assign_issue.yml
+++ b/.github/workflows/assign_issue.yml
@@ -14,8 +14,6 @@ jobs:
     steps:
     - name: Checkout repository
       uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
-      with:
-        ref: ${{ github.head_ref }}
     - name: Assign issue
       env:
         GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/tasks/issue.py b/tasks/issue.py
index aea8810dae60c..3d3f80161fdd4 100644
--- a/tasks/issue.py
+++ b/tasks/issue.py
@@ -4,7 +4,7 @@
 
 from tasks.libs.ciproviders.github_api import GithubAPI
 from tasks.libs.issue.assign import assign_with_model, assign_with_rules
-from tasks.libs.issue.model.actions import generate_model
+from tasks.libs.issue.model.actions import fetch_data_and_train_model
 from tasks.libs.pipeline.notifications import GITHUB_SLACK_MAP
 
 
@@ -33,5 +33,5 @@ def assign_owner(_, issue_id, dry_run=False):
 
 
 @task
-def generate_the_model(_):
-    generate_model()
+def generate_model(_):
+    fetch_data_and_train_model()
diff --git a/tasks/libs/issue/model/actions.py b/tasks/libs/issue/model/actions.py
index 265166e6b2055..5e35a7a08f452 100644
--- a/tasks/libs/issue/model/actions.py
+++ b/tasks/libs/issue/model/actions.py
@@ -5,18 +5,16 @@
 from tasks.libs.issue.model.constants import BASE_MODEL, MODEL, TEAMS
 
 
-def generate_model():
+def fetch_data_and_train_model():
     gh = GithubAPI('DataDog/datadog-agent')
     d = gh.repo
     issues = []
     teams = []
-    n = 0
-    for i in d.get_issues(state='all'):
-        issues.append(f"{i.title} {i.body}".casefold())
-        teams.append(assign_with_rules(i, gh))
+    for id, issue in enumerate(d.get_issues(state='all')):
+        issues.append(f"{issue.title} {issue.body}".casefold())
+        teams.append(assign_with_rules(issue, gh))
         # Sleep to avoid hitting the rate limit
-        n += 1
-        if n % 2000 == 0:
+        if id % 2000 == 0:
             sleep(3600)
 
     train_the_model(teams, issues, "issue_auto_assign_model", 64, 5)

From 7d4c9d0da9f94453b4f4c8f08cb92ad5e55bbf71 Mon Sep 17 00:00:00 2001
From: Nicolas Schweitzer <nicolas.schweitzer@datadoghq.com>
Date: Tue, 3 Dec 2024 10:19:00 +0100
Subject: [PATCH 6/8] add permissions to login into ghcr.io

---
 .github/workflows/assign_issue.yml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/assign_issue.yml b/.github/workflows/assign_issue.yml
index 0a8f155d91d5f..7f99066b73af2 100644
--- a/.github/workflows/assign_issue.yml
+++ b/.github/workflows/assign_issue.yml
@@ -8,8 +8,13 @@ on:
 jobs:
   auto_assign_issue:
     runs-on: ubuntu-latest
-    container: ghcr.io/datadog/agent-issue-auto-assign:latest
+    container: 
+      image: ghcr.io/datadog/agent-issue-auto-assign:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
     permissions:
+        packages: read
         issues: write
     steps:
     - name: Checkout repository

From 336b31b84f54c9f3c18c4cf86e464ae589b56bdc Mon Sep 17 00:00:00 2001
From: Nicolas Schweitzer <nicolas.schweitzer@datadoghq.com>
Date: Tue, 3 Dec 2024 16:10:24 +0100
Subject: [PATCH 7/8] with a small image, install dependencies

---
 .github/workflows/assign_issue.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/assign_issue.yml b/.github/workflows/assign_issue.yml
index 7f99066b73af2..b8e11289336ac 100644
--- a/.github/workflows/assign_issue.yml
+++ b/.github/workflows/assign_issue.yml
@@ -19,6 +19,10 @@ jobs:
     steps:
     - name: Checkout repository
       uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
+    - name: Install dependencies
+      # Dependencies are installed at runtime. Otherwise it would create a huge image see https://hub.docker.com/r/pytorch/pytorch/tags
+      run: |
+        pip install --upgrade pip && pip install --no-compile --no-cache-dir torch transformers invoke codeowners slack-sdk PyGithub python-gitlab semver
     - name: Assign issue
       env:
         GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From 22e287536c2ebbfcd1a03ec4a7d57d929b5e7ab2 Mon Sep 17 00:00:00 2001
From: Nicolas Schweitzer <nicolas.schweitzer@datadoghq.com>
Date: Tue, 3 Dec 2024 16:23:04 +0100
Subject: [PATCH 8/8] fix github token name

---
 .github/workflows/assign_issue.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/assign_issue.yml b/.github/workflows/assign_issue.yml
index b8e11289336ac..be9db6209604b 100644
--- a/.github/workflows/assign_issue.yml
+++ b/.github/workflows/assign_issue.yml
@@ -2,8 +2,8 @@
 name: "Assign issue to a team"
 
 on:
-    issues:
-        types: [opened, reopened]
+  issues:
+    types: [opened, reopened]
 
 jobs:
   auto_assign_issue:
@@ -25,6 +25,6 @@ jobs:
         pip install --upgrade pip && pip install --no-compile --no-cache-dir torch transformers invoke codeowners slack-sdk PyGithub python-gitlab semver
     - name: Assign issue
       env:
-        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       run: |
-        team=`inv -e issue.assign-owner --issue ${{ github.event.issue.number }}`
+        inv -e issue.assign-owner --issue ${{ github.event.issue.number }}