From c7e6f59f732f7adeed63d6c0760ef8f2553c1b94 Mon Sep 17 00:00:00 2001
From: MohamedBayan <mohamadbayan2014noname@gmail.com>
Date: Tue, 26 Nov 2024 13:58:13 +0300
Subject: [PATCH] Adding wise-spam assets

---
 .../spam/Spam_GPT4_FewShot_Arabic.py          | 102 +++++++++++++++++
 .../spam/Spam_GPT4_FewShot_English.py         | 104 ++++++++++++++++++
 .../spam/Spam_GPT4_FewShot_Mixed.py           | 102 +++++++++++++++++
 .../spam/Spam_GPT4_ZeroShot_Arabic.py         |  68 ++++++++++++
 .../spam/Spam_GPT4_ZeroShot_English.py        |  68 ++++++++++++
 .../spam/Spam_GPT4_ZeroShot_Mixed.py          |  67 +++++++++++
 .../spam/Spam_JAIS13b_FewShot_Arabic.py       |  91 +++++++++++++++
 .../spam/Spam_JAIS13b_FewShot_English.py      |  88 +++++++++++++++
 .../spam/Spam_JAIS13b_FewShot_Mixed.py        |  63 +++++++++++
 .../spam/Spam_JAIS13b_ZeroShot_Arabic.py      |  61 ++++++++++
 .../spam/Spam_JAIS13b_ZeroShot_English.py     |  44 ++++++++
 .../spam/Spam_JAIS13b_ZeroShot_Mixed.py       |  49 +++++++++
 .../spam/Spam_Llama3-8b_FewShot_Arabic.py     |  95 ++++++++++++++++
 .../spam/Spam_Llama3-8b_FewShot_English.py    | 103 +++++++++++++++++
 .../spam/Spam_Llama3-8b_FewShot_Mixed.py      |  91 +++++++++++++++
 .../spam/Spam_Llama3-8b_ZeroShot_Arabic.py    |  75 +++++++++++++
 .../spam/Spam_Llama3-8b_ZeroShot_English.py   |  71 ++++++++++++
 .../spam/Spam_Llama3-8b_ZeroShot_Mixed.py     |  84 ++++++++++++++
 18 files changed, 1426 insertions(+)
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py

diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py
new file mode 100755
index 00000000..104d2225
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Arabic.py
@@ -0,0 +1,102 @@
+import random
+import re
+
+from llmebench.datasets import SpamDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import SpamTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        label = "إعلان" if example["label"] == "__label__ADS" else "ليس إعلان"
+
+        out_prompt = (
+            out_prompt
+            + "مثال "
+            + str(index)
+            + ":"
+            + "\n"
+            + "التغريدة: "
+            + example["input"]
+            + "\n"
+            + "التصنيف: "
+            + label
+            + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "التغريدة: " + input_sample + "\nالتصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'إعلان' أو 'ليس إعلان'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n"
+
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل و تصنيف التغريدات.",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"]
+    label = out.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+
+    # print("label", label)
+    if "لا أستطيع" in label or "I cannot" in label:
+        return None
+    if (
+        "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "ليس" in label
+        or "notads" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "اعلان" in label
+        or "مرغوب" in label
+        or "غير" in label
+        or "__ads" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py
new file mode 100755
index 00000000..8ceae4eb
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_English.py
@@ -0,0 +1,104 @@
+import random
+import re
+
+from llmebench.datasets import SpamDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import SpamTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    out_prompt = out_prompt + "Here are some examples:\n\n"
+    for index, example in enumerate(examples):
+        label = "spam" if example["label"] == "__label__ADS" else "not spam"
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "tweet: "
+            + example["input"]
+            + "\nlabel: "
+            + label
+            + "\n\n"
+        )
+
+    # Append the tweet we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = (
+        f"If the following tweet can be classified as spam or contains an advertisemnt, write 'spam' without explnanation, otherwise write 'not spam' without explanantion.\n\n"
+        f"Provide only labels as a list of string.\n"
+    )
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert social media content analyst.",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"]
+    label = out.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+
+    # print("label", label)
+    if "لا أستطيع" in label or "I cannot" in label:
+        return None
+    if (
+        "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "ليس" in label
+        or "notads" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "اعلان" in label
+        or "مرغوب" in label
+        or "غير" in label
+        or "__ads" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py
new file mode 100755
index 00000000..ceb490e4
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_FewShot_Mixed.py
@@ -0,0 +1,102 @@
+import random
+import re
+
+from llmebench.datasets import SpamDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import SpamTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        label = "spam" if example["label"] == "__label__ADS" else "not spam"
+
+        out_prompt = (
+            out_prompt
+            + "مثال "
+            + str(index)
+            + ":"
+            + "\n"
+            + "التغريدة: "
+            + example["input"]
+            + "\n"
+            + "التصنيف: "
+            + label
+            + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "التغريدة: " + input_sample + "\nالتصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n"
+
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل و تصنيف التغريدات.",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"]
+    label = out.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+
+    # print("label", label)
+    if "لا أستطيع" in label or "I cannot" in label:
+        return None
+    if (
+        "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "ليس" in label
+        or "notads" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "اعلان" in label
+        or "مرغوب" in label
+        or "غير" in label
+        or "__ads" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py
new file mode 100755
index 00000000..a94dfe41
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Arabic.py
@@ -0,0 +1,68 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'إعلان' أو 'ليس إعلان'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n"
+    base_prompt += "\n" + "التغريدة: " + input_sample + "\n\nالتصنيف: "
+
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل و تصنيف التغريدات.",
+        },
+        {"role": "user", "content": base_prompt},
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"]
+    label = out.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+
+    # print("label", label)
+    if "لا أستطيع" in label or "I cannot" in label:
+        return None
+    if (
+        "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "ليس" in label
+        or "notads" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "اعلان" in label
+        or "مرغوب" in label
+        or "غير" in label
+        or "__ads" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py
new file mode 100755
index 00000000..e7ee1e4d
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_English.py
@@ -0,0 +1,68 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "system",
+            "content": "You are an AI assistant that helps people find information.",
+        },
+        {
+            "role": "user",
+            "content": f"If the following tweet can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion.\n\ntweet: {input_sample}\nlabel: ",
+        },
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"]
+    label = out.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+
+    # print("label", label)
+    if "لا أستطيع" in label or "I cannot" in label:
+        return None
+    if (
+        "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "ليس" in label
+        or "notads" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "اعلان" in label
+        or "مرغوب" in label
+        or "غير" in label
+        or "__ads" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py
new file mode 100755
index 00000000..e5e5f6a4
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_GPT4_ZeroShot_Mixed.py
@@ -0,0 +1,67 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n"
+    base_prompt += "\n" + "التغريدة: " + input_sample + "\n\nالتصنيف: "
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل و تصنيف التغريدات.",
+        },
+        {"role": "user", "content": base_prompt},
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"]
+    label = out.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+
+    # print("label", label)
+    if "لا أستطيع" in label or "I cannot" in label:
+        return None
+    if (
+        "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "ليس" in label
+        or "notads" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "اعلان" in label
+        or "مرغوب" in label
+        or "غير" in label
+        or "__ads" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py
new file mode 100755
index 00000000..31bb2575
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Arabic.py
@@ -0,0 +1,91 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt += "إليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        label = "نعم" if example["label"] == "__label__ADS" else "لا"
+        out_prompt += (
+            f"مثال {index + 1}:\n"
+            f"التغريدة: '{example['input']}'\n"
+            f"التصنيف: {label}\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt += f"التغريدة: '{input_sample}'\nالتصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ نعم أو لا بدون الحاجة إلى وصف أو تحليل.\n"
+    return [
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("التصنيف:", "").strip().lower()
+    if "لا يمكن تحديد" in label:
+        return None
+    if (
+        "هي محتوى غير مرغوب فيه" in label
+        or "التغريدة الأخيرة تحتوي على محتوى غير مرغوب فيه" in label
+        or "والرابعة تحتويان على محتوى" in label
+        or "هذه التغريدة تصنف ضمن الرسائل غير المرغوب فيها" in label
+        or "هي على الأرجح إعلان" in label
+    ):
+        return "__label__ADS"
+    if "هي إعلان" in label or "قدمتها تحتوي على محتوى غير مرغوب" in label:
+        return "__label__ADS"
+
+    # print("label", label)
+    if (
+        "لا" in label
+        or "ليست" in label
+        or "not" in label
+        or "ليس" in label
+        or "no" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "yes" in label
+        or "مرغوب" in label
+        or "غير" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py
new file mode 100755
index 00000000..ee2d02f8
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_English.py
@@ -0,0 +1,88 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt = out_prompt + "Here are some examples:\n\n"
+    for index, example in enumerate(examples):
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "tweet: "
+            + example["input"]
+            + "\nlabel: "
+            + example["label"]
+            + "\n\n"
+        )
+
+    # Append the tweet we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "If the following tweet can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion."
+    return [
+        {
+            "role": "user",
+            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
+        }
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"]
+    j = out.find(".")
+    if j > 0:
+        out = out[0:j]
+
+    label = out.replace("label:", "").strip().lower()
+    if (
+        "لا" in label
+        or "ليست" in label
+        or "not" in label
+        or "ليس" in label
+        or "no" in label
+        or "notads" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "yes" in label
+        or "مرغوب" in label
+        or "غير" in label
+        or "ads" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py
new file mode 100755
index 00000000..4adcb451
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_FewShot_Mixed.py
@@ -0,0 +1,63 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt += "إليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        label = (
+            "__label__ADS" if example["label"] == "__label__ADS" else "__label__NOTADS"
+        )
+        out_prompt += (
+            f"مثال {index + 1}:\n"
+            f"الجملة: '{example['input']}'\n"
+            f"التصنيف: {label}\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt += f"الجملة: '{input_sample}'\nالتصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "صنف الجملة التالية كـ '__label__ADS' أو '__label__NOTADS'، أعد التسمية فقط بدون الحاجة إلى وصف أو تحليل.\n"
+    return [
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+
+    label = label.replace("label:", "").strip()
+    print("label", label)
+    if "ليس" in label or "ليست" in label or "not" in label:
+        return "__label__NOTADS"
+    return "__label__ADS"
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py
new file mode 100755
index 00000000..8b914ca1
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Arabic.py
@@ -0,0 +1,61 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ نعم أو لا بدون الحاجة إلى وصف أو تحليل.\n"
+    return [
+        {
+            "role": "user",
+            "content": base_prompt + "التغريدة: " + input_sample + "التصنيف: ",
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("التصنيف:", "").strip().lower()
+    # print("label", label)
+    if (
+        "لا" in label
+        or "ليست" in label
+        or "not" in label
+        or "ليس" in label
+        or "no" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "yes" in label
+        or "مرغوب" in label
+        or "غير" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py
new file mode 100755
index 00000000..4f9b4b65
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_English.py
@@ -0,0 +1,44 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = "If the following sentence can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion.\n tweet: {input_sample}\nlabel: \n"
+    return [
+        {
+            "role": "user",
+            "content": base_prompt + "Tweet: " + input_sample + "Classification: ",
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+
+    label = label.replace("label:", "").strip()
+    print("label", label)
+    if "ليس" in label or "ليست" in label or "NOT" in label:
+        return "__label__NOTADS"
+    return "__label__ADS"
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py
new file mode 100755
index 00000000..596e8515
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_JAIS13b_ZeroShot_Mixed.py
@@ -0,0 +1,49 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = "صنف الجملة التالية كـ '__label__ADS' أو '__label__NOTADS'، أعد التسمية فقط بدون الحاجة إلى وصف أو تحليل.\n"
+    return [
+        {
+            "role": "user",
+            "content": base_prompt + "التغريدة: " + input_sample + "التصنيف: ",
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+
+    label = label.replace("label:", "").strip()
+    label = label.lower()
+    print("label", label)
+
+    if "لا يمكنني" in label:
+        return None
+
+    if "ليس" in label or "ليست" in label or "not" in label:
+        return "__label__NOTADS"
+    return "__label__ADS"
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py
new file mode 100755
index 00000000..4f099eff
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Arabic.py
@@ -0,0 +1,95 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 100,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt += "إليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        label = "إعلان" if example["label"] == "__label__ADS" else "ليس إعلان"
+        out_prompt += (
+            f"مثال {index + 1}:\n"
+            f"التغريدة: '{example['input']}'\n"
+            f"التصنيف: {label}\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the label blank
+    out_prompt += f"التغريدة: '{input_sample}'\nالتصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = (
+        "هل تحتوي التغريدة التالية على محتوى إعلان؟\n"
+        "حدد التصنيف المناسب بناءً على المحتوى:\n"
+        " - إذا كانت التغريدة تحتوي على إعلان، أجب بـ 'إعلان'.\n"
+        " - إذا كانت التغريدة لا تحتوي على إعلان، أجب بـ 'ليس إعلان'.\n"
+        "قدم التصنيف فقط.\n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        }
+    ]
+
+
+def post_process(response):
+    if "output" in response:
+        label = response["output"].strip()
+        label = label.replace("<s>", "")
+        label = label.replace("</s>", "")
+        label = label.lower()
+    else:
+        print("Error or missing output in response:", response)
+        return None
+
+    label = label.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+
+    # print("label", label)
+    if "لا أستطيع" in label or "I cannot" in label:
+        return None
+    if (
+        "ليست" in label
+        or "not" in label
+        or "غير" in label
+        or "no" in label
+        or "ليس" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "اعلان" in label
+        or "مرغوب" in label
+        or "غير" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py
new file mode 100755
index 00000000..9c8a27b4
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_English.py
@@ -0,0 +1,103 @@
+import random
+
+from llmebench.datasets import SpamDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 100,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt = out_prompt + "Here are some examples:\n\n"
+
+    for index, example in enumerate(examples):
+        label = "'spam'" if example["label"] == "__label__ADS" else "'not spam'"
+        out_prompt += (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "tweet: "
+            + example["input"]
+            + "\nlabel: "
+            + label
+            + "\n\n"
+        )
+
+    # Append the tweet we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = (
+        "Classify the following tweet as either 'spam' or 'not spam'. "
+        "Consider it 'spam' if it contains advertising or promotional content. "
+        "Respond with 'spam' or 'not spam' only, and do not provide any explanation."
+    )
+    return [
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        }
+    ]
+
+
+def post_process(response):
+    if "output" in response:
+        label = response["output"].strip()
+        label = label.replace("<s>", "")
+        label = label.replace("</s>", "")
+        label = label.lower()
+    else:
+        print("Error or missing output in response:", response)
+        return None
+
+    label = label.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+    print("label: ", label)
+    # print("label", label)
+    if "لا أستطيع" in label or "I cannot" in label:
+        return None
+    if (
+        "notads" in label
+        or "ليست" in label
+        or "not" in label
+        or "ليس" in label
+        or "no" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "yes" in label
+        or "مرغوب" in label
+        or "غير" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py
new file mode 100755
index 00000000..4df15579
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_FewShot_Mixed.py
@@ -0,0 +1,91 @@
+import random
+
+from llmebench.datasets import SpamDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 100,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt += "إليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        label = "spam" if example["label"] == "__label__ADS" else "not spam"
+        out_prompt += (
+            f"مثال {index + 1}:\n"
+            f"التغريدة: '{example['input']}'\n"
+            f"التصنيف: {label}\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt += f"التغريدة: '{input_sample}'\nالتصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ 'spam' أو 'not spam'، قدم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n"
+    return [
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        }
+    ]
+
+
+def post_process(response):
+    if "output" in response:
+        label = response["output"].strip()
+        label = label.replace("<s>", "")
+        label = label.replace("</s>", "")
+        label = label.lower()
+    else:
+        print("Error or missing output in response:", response)
+        return None
+
+    label = label.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+
+    # print("label", label)
+    if "لا أستطيع" in label or "I cannot" in label:
+        return None
+    if (
+        "لا" in label
+        or "ليست" in label
+        or "not" in label
+        or "ليس" in label
+        or "no" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "yes" in label
+        or "مرغوب" in label
+        or "غير" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py
new file mode 100755
index 00000000..68edc5eb
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Arabic.py
@@ -0,0 +1,75 @@
+import random
+
+from llmebench.datasets import SpamDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 100,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج /إعلان أم لا؟ أجب بـ نعم أو لا بدون الحاجة إلى وصف أو تحليل.\n"
+    return [
+        {
+            "role": "user",
+            "content": base_prompt + "التغريدة: " + input_sample + "التصنيف: ",
+        }
+    ]
+
+
+def post_process(response):
+    # print(response)
+    if "output" in response:
+        label = response["output"].strip()
+        label = label.replace("<s>", "")
+        label = label.replace("</s>", "")
+        label = label.lower()
+    else:
+        print("Error or missing output in response:", response)
+        return None
+
+    label = label.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+
+    # print("label", label)
+    if "لا أستطيع" in label or "I cannot" in label:
+        return None
+    if (
+        "لا" in label
+        or "ليست" in label
+        or "not" in label
+        or "ليس" in label
+        or "no" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "yes" in label
+        or "مرغوب" in label
+        or "غير" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py
new file mode 100755
index 00000000..df0f6787
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_English.py
@@ -0,0 +1,71 @@
+from llmebench.datasets import SpamDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 100,
+        },
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": f"If the following sentence can be classified as spam or contains an advertisemnt, write '__label__ADS' without explnanation, otherwise write '__label__NOTADS' without explanantion.\n tweet: {input_sample}\nlabel: \n",
+        },
+    ]
+
+
+def post_process(response):
+    if "output" in response:
+        label = response["output"].strip()
+        label = label.replace("<s>", "")
+        label = label.replace("</s>", "")
+        label = label.lower()
+    else:
+        print("Error or missing output in response:", response)
+        return None
+
+    label = label.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+
+    # print("label", label)
+    if "لا أستطيع" in label or "I cannot" in label:
+        return None
+    if (
+        "لا" in label
+        or "ليست" in label
+        or "not" in label
+        or "ليس" in label
+        or "no" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "yes" in label
+        or "مرغوب" in label
+        or "غير" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py
new file mode 100755
index 00000000..c682245f
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/spam/Spam_Llama3-8b_ZeroShot_Mixed.py
@@ -0,0 +1,84 @@
+import random
+
+from llmebench.datasets import SpamDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import SpamTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": SpamDataset,
+        "task": SpamTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["__label__ADS", "__label__NOTADS"],
+            "max_tries": 100,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        "هل تحتوي التغريدة التالية على محتوى سبام / غير مرغوب فيه / مزعج / إعلان؟\n"
+        "أجب بـ 'yes' إذا كانت تحتوي على محتوى من هذا النوع، أو أجب بـ 'no' إذا لم تكن تحتوي على ذلك.\n"
+        "يرجى تقديم التصنيف فقط بدون الحاجة إلى وصف أو تحليل.\n\n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt + "التغريدة: " + input_sample + "\nالتصنيف: ",
+        }
+    ]
+
+
+def post_process(response):
+    if "output" in response:
+        label = response["output"].strip()
+        label = label.replace("<s>", "")
+        label = label.replace("</s>", "")
+        label = label.lower()
+    else:
+        print("Error or missing output in response:", response)
+        return None
+
+    label = label.replace("التصنيف:", "").strip().lower()
+    label = label.replace("label:", "").strip().lower()
+    if "لا يمكنني" in label or "لا توجد معلومات" in label:
+        return None
+
+    # print("label", label)
+    if (
+        "لا أستطيع" in label
+        or "I cannot" in label
+        or "'ads' أو 'label__NOTADS'".lower() in label
+    ):
+        return None
+    if (
+        "لا" in label
+        or "ليست" in label
+        or "not" in label
+        or "ليس" in label
+        or "no" in label
+    ):
+        return "__label__NOTADS"
+    elif (
+        "نعم" in label
+        or "إعلان" in label
+        or "spam" in label
+        or "مزعج" in label
+        or "yes" in label
+        or "مرغوب" in label
+        or "غير" in label
+    ):
+        return "__label__ADS"
+    else:
+        return None