From 2f10ba21f1425246761e69397e6d28962f5c5cdf Mon Sep 17 00:00:00 2001
From: MohamedBayan <118048819+MohamedBayan@users.noreply.github.com>
Date: Mon, 20 Jan 2025 16:51:23 +0300
Subject: [PATCH] Wise adult content detection (#383)

---
 .../Adult_GPT4_FewShot_Arabic.py              | 94 ++++++++++++++++++
 .../Adult_GPT4_FewShot_English.py             | 91 +++++++++++++++++
 .../Adult_GPT4_FewShot_Mixed.py               | 95 ++++++++++++++++++
 .../Adult_GPT4_ZeroShot_Arabic.py             | 68 +++++++++++++
 .../Adult_GPT4_ZeroShot_English.py            | 68 +++++++++++++
 .../Adult_GPT4_ZeroShot_Mixed.py              | 70 +++++++++++++
 .../Adult_JAIS13b_FewShot_Arabic.py           | 98 +++++++++++++++++++
 .../Adult_JAIS13b_FewShot_English.py          | 90 +++++++++++++++++
 .../Adult_JAIS13b_FewShot_Mixed.py            | 92 +++++++++++++++++
 .../Adult_JAIS13b_ZeroShot_Arabic.py          | 85 ++++++++++++++++
 .../Adult_JAIS13b_ZeroShot_English.py         | 85 ++++++++++++++++
 .../Adult_JAIS13b_ZeroShot_Mixed.py           | 85 ++++++++++++++++
 .../Adult_Llama3-8b_FewShot_Arabic.py         | 78 +++++++++++++++
 .../Adult_Llama3-8b_FewShot_English.py        | 90 +++++++++++++++++
 .../Adult_Llama3-8b_FewShot_Mixed.py          | 70 +++++++++++++
 .../Adult_Llama3-8b_ZeroShot_Arabic.py        | 65 ++++++++++++
 .../Adult_Llama3-8b_ZeroShot_English.py       | 66 +++++++++++++
 .../Adult_Llama3-8b_ZeroShot_Mixed.py         | 61 ++++++++++++
 18 files changed, 1451 insertions(+)
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py

diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py
new file mode 100755
index 00000000..98ef6638
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py
@@ -0,0 +1,94 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 30,
+        },
+        "general_args": {
+            "fewshot": {
+                "deduplicate": True,
+            },
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt = out_prompt + "اليك بعض الامثلة:\n\n"
+    for index, example in enumerate(examples):
+        label = "للبالغين" if example["label"] == "ADULT" else "ليست للبالغين"
+
+        out_prompt = (
+            out_prompt
+            + "مثال "
+            + str(index)
+            + ":"
+            + "\n"
+            + "التغريدة: "
+            + example["input"]
+            + "\التصنيف: "
+            + label
+            + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير."
+    return [
+        {
+            "role": "system",
+            "content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def post_process(response):
+    if not response:
+        return None
+
+    label = response["choices"][0]["message"]["content"]
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+        or "not_adult" in label
+        or "not adult" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label:
+        return "ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py
new file mode 100755
index 00000000..38d00b5c
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py
@@ -0,0 +1,91 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 30,
+        },
+        "general_args": {
+            "fewshot": {
+                "deduplicate": True,
+            },
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt = out_prompt + "Here are some examples:\n\n"
+    for index, example in enumerate(examples):
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "tweet: "
+            + example["input"]
+            + "\nlabel: "
+            + example["label"]
+            + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = f'Does the following tweet contain adult content? label it as "ADULT" if it does, or "NOT_ADULT" otherwise. Return label only.'
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator, you can identify and label adult content within a tweet.",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+        or "not_adult" in label
+        or "not adult" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label:
+        return "ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py
new file mode 100755
index 00000000..cf464ddd
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py
@@ -0,0 +1,95 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 30,
+        },
+        "general_args": {
+            "fewshot": {
+                "deduplicate": True,
+            },
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt = out_prompt + "اليك بعض الامثلة:\n\n"
+    for index, example in enumerate(examples):
+        out_prompt = (
+            out_prompt
+            + "مثال "
+            + str(index)
+            + ":"
+            + "\n"
+            + "التغريدة: "
+            + example["input"]
+            + "\التصنيف: "
+            + example["label"]
+            + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = f'بالنظر إلى التغريدة التالية، صنفها على أنها "ADULT" أو "NOT_ADULT" بناء على محتوى التغريدة'
+    return [
+        {
+            "role": "system",
+            "content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def post_process(response):
+    if not response:
+        return None
+
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+        or "not_adult" in label
+        or "not adult" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label:
+        return "ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py
new file mode 100755
index 00000000..50446e5c
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py
@@ -0,0 +1,68 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n"
+        + "التغريدة: "
+        + input_sample
+        + "\nالتصنيف: "
+    )
+    return [
+        {
+            "role": "system",
+            "content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.",
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+        or "not_adult" in label
+        or "not adult" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label:
+        return "ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py
new file mode 100755
index 00000000..89899524
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py
@@ -0,0 +1,68 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        # f'Classify whether the following Arabic sentence is appropriate for children. If not appropriate, just write "ADULT" without explaination, and if appropriate, just write "NOT_ADULT" without explaination \n {input_sample}'
+        f'Does the following tweet contain adult content? label it as "ADULT" if it does, or "NOT_ADULT" otherwise. Return label only.\n\n'
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator, you can identify and label adult content within a tweet.",
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+        or "not_adult" in label
+        or "not adult" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label:
+        return "ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py
new file mode 100755
index 00000000..80b05f79
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py
@@ -0,0 +1,70 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        f'هل تحتوي التغريدة التالية على لغة للبالغين؟ صنفها كـ "ADULT" أو "NOT_ADULT" بناءً على محتوى التغريدة.\n\n'
+        f"التغريدة: {input_sample}\n"
+        f"التصنيف: \n"
+    )
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل وسائل التواصل، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.",
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    out = response["choices"][0]["message"]["content"].replace("label: ", "")
+    j = out.find(".")
+    if j > 0:
+        out = out[0:j]
+    label = out.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+        or "not_adult" in label
+        or "not adult" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label:
+        return "ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py
new file mode 100755
index 00000000..0542da23
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py
@@ -0,0 +1,98 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["للبالغين", "ليس للبالغين"],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, examples):
+    base_prompt = (
+        "صنف التغريدة العربية التالية على أنها تحتوي على لغة للبالغين (اللغة المستخدمة في إعلانات البالغين والإعلانات الإباحية) أو لا تحتوي على لغة للبالغين بدون توضيح. "
+        "في حالة لغة البالغين، اكتب فقط 'للبالغين' بدون تفسير، وفي حالة عدم احتوائها على لغة البالغين، اكتب فقط 'ليست للبالغين' بدون تفسير:\n\n"
+    )
+    for index, example in enumerate(examples):
+        label = "للبالغين" if example["label"] == "ADULT" else "ليست للبالغين"
+        base_prompt += (
+            f"مثال {index + 1}:\n"
+            f"التغريدة: '{example['input']}'\n"
+            f"التصنيف: {label}\n\n"
+        )
+    base_prompt += (
+        f"صنف الآن التغريدة الجديدة:\nالتغريدة: '{input_sample}'\n" f"التصنيف: "
+    )
+    return base_prompt
+
+
+def prompt(input_sample, examples):
+    return [
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, examples),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "cannot classify" in label
+        or "cannot complete" in label
+        or "لا يمكنني" in label
+        or "cannot fulfill" in label
+    ):
+        return None
+    elif (
+        "غير مناسب للبالغين" in label
+        or "غير مناسبة للبالغين" in label
+        or "مناسب للجمهور العام" in label
+    ):
+        return "NOT_ADULT"
+    elif "غير مناسب للنشر العام" in label:
+        return "ADULT"
+    elif "not_adult" in label or "not adult" in label:
+        return "NOT_ADULT"
+    elif (
+        "التغريدة مناسبة للبالغين" in label
+        or "المحتوى مناسب للبالغين" in label
+        or "للبالغين" in label
+        or "نعم" in label
+        or "adult" in label
+    ):
+        return "ADULT"
+    elif (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" == label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py
new file mode 100755
index 00000000..a3fb0afd
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py
@@ -0,0 +1,90 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, examples):
+    base_prompt = "Given the following tweet, label it as 'ADULT' or 'NOT_ADULT' based on the content of the tweet. Provide only label.\n\n"
+    for index, example in enumerate(examples):
+        label = "adult" if example["label"] == "ADULT" else "not adult"
+        base_prompt += (
+            f"Example {index + 1}:\n"
+            f"Tweet: '{example['input']}'\n"
+            f"Label: {label}\n\n"
+        )
+    base_prompt += f"Now classify the new tweet:\nTweet: '{input_sample}'\n" f"Label: "
+    return base_prompt
+
+
+def prompt(input_sample, examples):
+    return [
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, examples),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+    label = label.lower()
+
+    if (
+        "cannot classify" in label
+        or "cannot complete" in label
+        or "لا يمكنني" in label
+        or "cannot fulfill" in label
+    ):
+        return None
+    elif (
+        "غير مناسب للبالغين" in label
+        or "غير مناسبة للبالغين" in label
+        or "مناسب للجمهور العام" in label
+    ):
+        return "NOT_ADULT"
+    elif "غير مناسب للنشر العام" in label:
+        return "ADULT"
+    elif "not_adult" in label or "not adult" in label:
+        return "NOT_ADULT"
+    elif (
+        "التغريدة مناسبة للبالغين" in label
+        or "المحتوى مناسب للبالغين" in label
+        or "للبالغين" in label
+        or "نعم" in label
+        or "adult" in label
+    ):
+        return "ADULT"
+    elif (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" == label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py
new file mode 100755
index 00000000..06b4a500
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py
@@ -0,0 +1,92 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, examples):
+    base_prompt = "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n"
+    for index, example in enumerate(examples):
+        label = "adult" if example["label"] == "ADULT" else "not adult"
+        base_prompt += (
+            f"مثال {index + 1}:\n"
+            f"التغريدة: '{example['input']}'\n"
+            f"التصنيف: {label}\n\n"
+        )
+    base_prompt += f"صنف الآن التغريدة الجديدة:\nجملة: '{input_sample}'\n" f"التصنيف: "
+    return base_prompt
+
+
+def prompt(input_sample, examples):
+    return [
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, examples),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "cannot classify" in label
+        or "cannot complete" in label
+        or "لا يمكنني" in label
+        or "cannot fulfill" in label
+    ):
+        return None
+    elif (
+        "غير مناسب للبالغين" in label
+        or "غير مناسبة للبالغين" in label
+        or "مناسب للجمهور العام" in label
+    ):
+        return "NOT_ADULT"
+    elif "غير مناسب للنشر العام" in label:
+        return "ADULT"
+    elif "not_adult" in label or "not adult" in label:
+        return "NOT_ADULT"
+    elif (
+        "التغريدة مناسبة للبالغين" in label
+        or "المحتوى مناسب للبالغين" in label
+        or "للبالغين" in label
+        or "نعم" in label
+        or "adult" in label
+    ):
+        return "ADULT"
+    elif (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" == label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py
new file mode 100755
index 00000000..20db77d1
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py
@@ -0,0 +1,85 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n"
+                + "التغريدة: "
+                + input_sample
+                + "\nالتصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "cannot classify" in label
+        or "cannot complete" in label
+        or "لا يمكنني" in label
+        or "cannot fulfill" in label
+    ):
+        return None
+    elif (
+        "غير مناسب للبالغين" in label
+        or "غير مناسبة للبالغين" in label
+        or "مناسب للجمهور العام" in label
+    ):
+        return "NOT_ADULT"
+    elif "غير مناسب للنشر العام" in label:
+        return "ADULT"
+    elif "not_adult" in label or "not adult" in label:
+        return "NOT_ADULT"
+    elif (
+        "التغريدة مناسبة للبالغين" in label
+        or "المحتوى مناسب للبالغين" in label
+        or "للبالغين" in label
+        or "نعم" in label
+        or "adult" in label
+    ):
+        return "ADULT"
+    elif (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" == label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py
new file mode 100755
index 00000000..01b11bed
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py
@@ -0,0 +1,85 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "Classify the following Arabic sentence as adult language (the language used in adult advertisement and porno advertisement) or not adult language without illustration. "
+                "In case of adult language, just write 'adult' without explanation, and in case of not adult language, just write 'not adult' without explanation:\n\n"
+                + "Sentence: "
+                + input_sample
+                + "\nLabel: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+    label = label.lower()
+
+    if (
+        "cannot classify" in label
+        or "cannot complete" in label
+        or "لا يمكنني" in label
+        or "cannot fulfill" in label
+    ):
+        return None
+    elif (
+        "غير مناسب للبالغين" in label
+        or "غير مناسبة للبالغين" in label
+        or "مناسب للجمهور العام" in label
+    ):
+        return "NOT_ADULT"
+    elif "غير مناسب للنشر العام" in label:
+        return "ADULT"
+    elif "not_adult" in label or "not adult" in label:
+        return "NOT_ADULT"
+    elif (
+        "التغريدة مناسبة للبالغين" in label
+        or "المحتوى مناسب للبالغين" in label
+        or "للبالغين" in label
+        or "نعم" in label
+        or "adult" in label
+    ):
+        return "ADULT"
+    elif (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" == label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py
new file mode 100755
index 00000000..e051cff2
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py
@@ -0,0 +1,85 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n"
+                + "تغريدة: "
+                + input_sample
+                + "\nالتصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "cannot classify" in label
+        or "cannot complete" in label
+        or "لا يمكنني" in label
+        or "cannot fulfill" in label
+    ):
+        return None
+    elif (
+        "غير مناسب للبالغين" in label
+        or "غير مناسبة للبالغين" in label
+        or "مناسب للجمهور العام" in label
+    ):
+        return "NOT_ADULT"
+    elif "غير مناسب للنشر العام" in label:
+        return "ADULT"
+    elif "not_adult" in label or "not adult" in label:
+        return "NOT_ADULT"
+    elif (
+        "التغريدة مناسبة للبالغين" in label
+        or "المحتوى مناسب للبالغين" in label
+        or "للبالغين" in label
+        or "نعم" in label
+        or "adult" in label
+    ):
+        return "ADULT"
+    elif (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" == label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py
new file mode 100755
index 00000000..fd9a7cfd
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Arabic.py
@@ -0,0 +1,78 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, examples):
+    base_prompt = (
+        "صنف التغريدة العربية التالية على أنها تحتوي على لغة للبالغين (اللغة المستخدمة في إعلانات البالغين والإعلانات الإباحية) أو لا تحتوي على لغة للبالغين بدون توضيح. "
+        "في حالة لغة البالغين، اكتب فقط 'للبالغين' بدون تفسير، وفي حالة عدم احتوائها على لغة البالغين، اكتب فقط 'ليست للبالغين' بدون تفسير:\n\n"
+    )
+    for index, example in enumerate(examples):
+        label = "للبالغين" if example["label"] == "ADULT" else "ليست للبالغين"
+        base_prompt += (
+            f"مثال {index + 1}:\n"
+            f"التغريدة: '{example['input']}'\n"
+            f"التصنيف: {label}\n\n"
+        )
+    base_prompt += (
+        f"صنف الآن التغريدة الجديدة:\nالتغريدة: '{input_sample}'\n" f"التصنيف: "
+    )
+    return base_prompt
+
+
+def prompt(input_sample, examples):
+    return [
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, examples),
+        }
+    ]
+
+
+def post_process(response):
+    # if not response or 'error' in response or 'output' not in response:
+    # print("Error or missing output in response:", response)
+    # return "NOT_ADULT"  # Safely default to NOT_ADULT when unsure
+
+    label = response["output"].strip().lower()
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+        return "ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py
new file mode 100755
index 00000000..89e945de
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_English.py
@@ -0,0 +1,90 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 30,
+        },
+        "general_args": {
+            "fewshot": {
+                "deduplicate": True,
+            },
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt = out_prompt + "Here are some examples:\n\n"
+    for index, example in enumerate(examples):
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "tweet: "
+            + example["input"]
+            + "\nlabel: "
+            + example["label"]
+            + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "Given the following tweet, label it as 'ADULT' or 'NOT_ADULT' based on the content of the tweet. Provide only label."
+    return [
+        {
+            "role": "user",
+            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
+        }
+    ]
+
+
+def post_process(response):
+
+    if not response or "error" in response or "output" not in response:
+        print("Error or missing output in response:", response)
+        return "NOT_ADULT"  # Safely default to NOT_ADULT when unsure
+
+    label = response["output"].strip().lower()
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+        return "ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py
new file mode 100755
index 00000000..460cf13c
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_FewShot_Mixed.py
@@ -0,0 +1,70 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, examples):
+    base_prompt = "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n"
+    for index, example in enumerate(examples):
+        label = "adult" if example["label"] == "ADULT" else "not adult"
+        base_prompt += (
+            f"مثال {index + 1}:\n"
+            f"جملة: '{example['input']}'\n"
+            f"التصنيف: {label}\n\n"
+        )
+    base_prompt += f"صنف الآن الجملة الجديدة:\nجملة: '{input_sample}'\n" f"التصنيف: "
+    return base_prompt
+
+
+def prompt(input_sample, examples):
+    return [
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, examples),
+        }
+    ]
+
+
+def post_process(response):
+
+    label = response["output"].strip().lower()
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+        return "ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py
new file mode 100755
index 00000000..8eddcdd7
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Arabic.py
@@ -0,0 +1,65 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n"
+                + "تغريدة: "
+                + input_sample
+                + "\nالتصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    # if not response or 'error' in response or 'output' not in response:
+    # print("Error or missing output in response:", response)
+    # return "No Response"
+
+    label = response["output"].strip().lower()
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+        return "ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py
new file mode 100755
index 00000000..846965f5
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_English.py
@@ -0,0 +1,66 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "Classify the following Arabic sentence as adult language (the language used in adult advertisement and porno advertisement) or not adult language without illustration. "
+                "In case of adult language, just write 'adult' without explanation, and in case of not adult language, just write 'not adult' without explanation:\n\n"
+                + "Sentence: "
+                + input_sample
+                + "\nLabel: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    # if not response or 'error' in response or 'output' not in response:
+    # print("Error or missing output in response:", response)
+    # return "NOT_ADULT"  # Safely default to NOT_ADULT when unsure
+
+    label = response["output"].strip().lower()
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+        return "ADULT"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py
new file mode 100755
index 00000000..e540e19a
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_Llama3-8b_ZeroShot_Mixed.py
@@ -0,0 +1,61 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": AdultDataset,
+        "task": AdultTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["ADULT", "NOT_ADULT"],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n"
+                + "تغريدة: "
+                + input_sample
+                + "\nالتصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["output"].strip().lower()
+    label = label.replace("<s>", "").replace("</s>", "").lower()
+
+    label = label.replace("التصنيف:", "").strip()
+    label = label.replace("label:", "").strip()
+    label = label.lower()
+
+    if (
+        "ليس" in label
+        or "ليست" in label
+        or "not" in label
+        or "no" in label
+        or "غير" in label
+        or "لا" in label
+    ):
+        return "NOT_ADULT"
+    elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+        return "ADULT"
+    else:
+        return None