From 8ba37eb6ce1d4d19abd2d0081a60046a86646d8b Mon Sep 17 00:00:00 2001
From: MohamedBayan <mohamadbayan2014noname@gmail.com>
Date: Mon, 20 Jan 2025 15:51:39 +0300
Subject: [PATCH 1/3] Add propaganda assets

---
 .../WANLP22T3_GPT4_FewShot_Arabic.py          | 152 ++++++++
 .../WANLP22T3_GPT4_FewShot_English.py         | 200 +++++++++++
 .../WANLP22T3_GPT4_FewShot_Mixed.py           | 143 ++++++++
 .../WANLP22T3_GPT4_ZeroShot_Arabic.py         | 116 +++++++
 .../WANLP22T3_GPT4_ZeroShot_English.py        | 175 ++++++++++
 .../WANLP22T3_GPT4_ZeroShot_Mixed.py          | 116 +++++++
 .../WANLP22T3_JAIS13b_FewShot_Arabic.py       | 143 ++++++++
 .../WANLP22T3_JAIS13b_FewShot_English.py      | 183 ++++++++++
 .../WANLP22T3_JAIS13b_FewShot_Mixed.py        | 116 +++++++
 .../WANLP22T3_JAIS13b_ZeroShot_Arabic.py      | 140 ++++++++
 .../WANLP22T3_JAIS13b_ZeroShot_English.py     | 164 +++++++++
 .../WANLP22T3_JAIS13b_ZeroShot_Mixed.py       | 136 ++++++++
 .../WANLP22T3_Llama3-8b_FewShot_Arabic.py     | 327 ++++++++++++++++++
 .../WANLP22T3_Llama3-8b_FewShot_English.py    | 142 ++++++++
 .../WANLP22T3_Llama3-8b_FewShot_Mixed.py      | 120 +++++++
 .../WANLP22T3_Llama3-8b_ZeroShot_Arabic.py    | 114 ++++++
 .../WANLP22T3_Llama3-8b_ZeroShot_English.py   | 188 ++++++++++
 .../WANLP22T3_Llama3-8b_ZeroShot_Mixed.py     | 113 ++++++
 18 files changed, 2788 insertions(+)
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py

diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py
new file mode 100755
index 00000000..8640392e
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py
@@ -0,0 +1,152 @@
+import ast
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 30,
+        },
+    }
+
+
+def translate_labels(label):
+    label_mapping = {
+        "no technique": "بدون تقنية",
+        "Smears": "تشويه",
+        "Exaggeration/Minimisation": "مبالغة/تقليل",
+        "Loaded Language": "لغة محملة بالمشاعر",
+        "Appeal to fear/prejudice": "الاحتكام إلى الخوف/التحيز",
+        "Name calling/Labeling": "التسمية/الملصقات",
+        "Slogans": "الشعارات",
+        "Repetition": "التكرار",
+        "Doubt": "الشك",
+        "Obfuscation, Intentional vagueness, Confusion": "التعمية/الغموض المتعمد/الارتباك",
+        "Flag-waving": "التلويح بالعلم",
+        "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)",
+        "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (مغالطة رجل القش)",
+        "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)",
+        "Appeal to authority": "الاحتكام إلى السلطة",
+        "Whataboutism": "ماذا عن",
+        "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية",
+        "Thought-terminating cliché": "الكليشيه القاطع للفكر",
+        "Causal Oversimplification": "التبسيط السببي",
+    }
+    return label_mapping.get(label, label)
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\nاليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        tech_str = ", ".join([f"'{translate_labels(t)}'" for t in example["label"]])
+        out_prompt += (
+            f"مثال {index}:\nالتغريدة: {example['input']}\nالتصنيف: {tech_str}\n\n"
+        )
+    out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n"
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = """
+        "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة بالمشاعر"، "الاحتكام إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
+        "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (مغالطة رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "الاحتكام إلى السلطة"، 
+        "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
+        """
+
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي.",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]  # .lower()
+    label = label.strip().lower()
+    if (
+        "لا يوجد في النص" in label
+        or label == "'no technique'"
+        or "doesn't" in label
+        or "does not" in label
+        or "لا يحتوي" in label
+        or "لا يستخدم" in label
+    ):
+        return []
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة بالمشاعر": "Loaded Language",
+        "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "الاحتكام إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = [
+        english_label
+        for arabic_label, english_label in label_mapping.items()
+        if arabic_label in label or english_label.lower() in label
+    ]
+
+    final_labels = [l for l in detected_labels if "no technique" not in l]
+
+    return list(set(final_labels))
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py
new file mode 100755
index 00000000..447e18c1
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py
@@ -0,0 +1,200 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 30,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    out_prompt = out_prompt + "Here are some examples:\n\n"
+    for index, example in enumerate(examples):
+        tech_str = ""
+        for t in example["label"]:
+            tech_str += "'" + t + "', "
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "tweet: "
+            + example["input"]
+            + "\nlabel: "
+            + tech_str
+            + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = (
+        f'Label this "tweet" based on the following propaganda techniques:\n\n'
+        f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'"
+        f"Provide only labels as a list of strings.\n"
+    )
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert social media content analyst.",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def fix_label(pred_label):
+    if "used in this text" in pred_label:
+        return ["no technique"]
+
+    labels_fixed = []
+    pred_label = pred_label.replace('"', "'").split("', '")
+    pred_labels = []
+
+    for l in pred_label:
+        pred_labels.append(l)
+
+    if len(pred_labels) == 0:
+        return ["no technique"]
+
+    for label in pred_labels:
+        label = label.replace(".", "").strip()
+        label = re.sub("-", " ", label)
+        label = label.strip().lower()
+
+        # Handle case of single word labels like "Smears" so we just capitalize it
+        label_fixed = label.capitalize()
+
+        # print(label)
+        if "slogan" in label:
+            label_fixed = "Slogans"
+        if "loaded" in label:
+            label_fixed = "Loaded Language"
+        if "prejudice" in label or "fear" in label or "mongering" in label:
+            label_fixed = "Appeal to fear/prejudice"
+        if "terminating" in label or "thought" in label:
+            label_fixed = "Thought-terminating cliché"
+        if "calling" in label or label == "name c":
+            label_fixed = "Name calling/Labeling"
+        if "minimisation" in label or label == "exaggeration minim":
+            label_fixed = "Exaggeration/Minimisation"
+        if "glittering" in label:
+            label_fixed = "Glittering generalities (Virtue)"
+        if "flag" in label:
+            label_fixed = "Flag-waving"
+        if "obfuscation" in label:
+            label_fixed = "Obfuscation, Intentional vagueness, Confusion"
+        if "oversimplification" in label or "causal" in label:
+            label_fixed = "Causal Oversimplification"
+        if "authority" in label:
+            label_fixed = "Appeal to authority"
+        if "dictatorship" in label or "black" in label or "white" in label:
+            label_fixed = "Black-and-white Fallacy/Dictatorship"
+        if "herring" in label or "irrelevant" in label:
+            label_fixed = "Presenting Irrelevant Data (Red Herring)"
+        if "straw" in label or "misrepresentation" in label:
+            label_fixed = "Misrepresentation of Someone's Position (Straw Man)"
+        if "whataboutism" in label:
+            label_fixed = "Whataboutism"
+
+        if (
+            "no propaganda" in label
+            or "technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "religious" in label
+            or "gratitude" in label
+        ):
+            label_fixed = "no technique"
+
+        labels_fixed.append(label_fixed)
+
+    out_put_labels = []
+    # Remove no technique label when we have other techniques for the same text
+    if len(labels_fixed) > 1:
+        for flabel in labels_fixed:
+            if "no technique" not in flabel:
+                out_put_labels.append(flabel)
+        return out_put_labels
+
+    return labels_fixed
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    if (
+        "لا يوجد في النص" in label
+        or label == "'no technique'"
+        or "doesn't" in label
+        or "does not" in label
+        or "لا يحتوي" in label
+        or "لا يستخدم" in label
+    ):
+        return []
+    pred_label = fix_label(label)
+
+    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py
new file mode 100755
index 00000000..c2a4f1ba
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py
@@ -0,0 +1,143 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 30,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        tech_str = ""
+        for t in example["label"]:
+            tech_str += "'" + t + "', "
+
+        out_prompt = (
+            out_prompt
+            + "مثال "
+            + str(index)
+            + ":"
+            + "\n"
+            + "التغريدة: "
+            + example["input"]
+            + "\n"
+            + "التصنيف: "
+            + tech_str
+            + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = (
+        f'قم بتصنيف هذه "التغريدة" بناءً على تقنيات الدعاية التالية:\n\n'
+        f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'"
+        f"قدم التصنيفات فقط كقائمة من النصوص.\n"
+    )
+
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي.",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]  # .lower()
+    label = label.strip().lower()
+    if (
+        "لا يوجد في النص" in label
+        or label == "'no technique'"
+        or "doesn't" in label
+        or "does not" in label
+        or "لا يحتوي" in label
+        or "لا يستخدم" in label
+    ):
+        return []
+
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة بالمشاعر": "Loaded Language",
+        "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "الاحتكام إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = [
+        english_label
+        for arabic_label, english_label in label_mapping.items()
+        if arabic_label in label or english_label.lower() in label
+    ]
+    final_labels = [l for l in detected_labels if "no technique" not in l]
+
+    return list(set(final_labels))
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py
new file mode 100755
index 00000000..da7aa2d9
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py
@@ -0,0 +1,116 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 30,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = """
+        "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة بالمشاعر"، "الاحتكام إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
+        "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (مغالطة رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "الاحتكام إلى السلطة"، 
+        "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
+        """
+
+    prompt_string = base_prompt + "التغريدة: \n\n" + input_sample + "التصنيف: \n"
+
+    return [
+        {
+            "role": "system",
+            "content": ("أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي."),
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    label = label.strip().lower()
+    if (
+        "لا يوجد في النص" in label
+        or label == "'no technique'"
+        or "doesn't" in label
+        or "does not" in label
+        or "لا يحتوي" in label
+        or "لا يستخدم" in label
+    ):
+        return []
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة بالمشاعر": "Loaded Language",
+        "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "الاحتكام إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = [
+        english_label
+        for arabic_label, english_label in label_mapping.items()
+        if arabic_label in label or english_label.lower() in label
+    ]
+    final_labels = [l for l in detected_labels if "no technique" not in l]
+
+    return list(set(final_labels))
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py
new file mode 100755
index 00000000..c02e72b0
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py
@@ -0,0 +1,175 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 30,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        f'Label this "tweet" based on the following propaganda techniques:\n\n'
+        f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'"
+        f"Provide only labels as a list of strings.\n"
+        f"tweet: {input_sample}\n\n"
+        f"label: \n"
+    )
+
+    return [
+        {
+            "role": "system",
+            "content": ("You are an expert social media content analyst."),
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def fix_label(pred_label):
+    if "used in this text" in pred_label:
+        return ["no technique"]
+
+    labels_fixed = []
+    pred_label = pred_label.replace('"', "'").split("\n")
+    pred_labels = []
+
+    for l in pred_label:
+        pred_labels.append(l)
+
+    if len(pred_labels) == 0:
+        return ["no technique"]
+
+    for label in pred_labels:
+        label = label.replace(".", "").strip()
+        label = re.sub("-", " ", label)
+        label = label.strip().lower()
+
+        # Handle case of single word labels like "Smears" so we just capitalize it
+        label_fixed = label.capitalize()
+
+        # print(label)
+        if "slogan" in label:
+            label_fixed = "Slogans"
+        if "loaded" in label:
+            label_fixed = "Loaded Language"
+        if "prejudice" in label or "fear" in label or "mongering" in label:
+            label_fixed = "Appeal to fear/prejudice"
+        if "terminating" in label or "thought" in label:
+            label_fixed = "Thought-terminating cliché"
+        if "calling" in label or label == "name c":
+            label_fixed = "Name calling/Labeling"
+        if "minimisation" in label or label == "exaggeration minim":
+            label_fixed = "Exaggeration/Minimisation"
+        if "glittering" in label:
+            label_fixed = "Glittering generalities (Virtue)"
+        if "flag" in label:
+            label_fixed = "Flag-waving"
+        if "obfuscation" in label:
+            label_fixed = "Obfuscation, Intentional vagueness, Confusion"
+        if "oversimplification" in label or "causal" in label:
+            label_fixed = "Causal Oversimplification"
+        if "authority" in label:
+            label_fixed = "Appeal to authority"
+        if "dictatorship" in label or "black" in label or "white" in label:
+            label_fixed = "Black-and-white Fallacy/Dictatorship"
+        if "herring" in label or "irrelevant" in label:
+            label_fixed = "Presenting Irrelevant Data (Red Herring)"
+        if "straw" in label or "misrepresentation" in label:
+            label_fixed = "Misrepresentation of Someone's Position (Straw Man)"
+        if "whataboutism" in label:
+            label_fixed = "Whataboutism"
+
+        if (
+            "no propaganda" in label
+            or "technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "religious" in label
+            or "gratitude" in label
+        ):
+            label_fixed = "no technique"
+
+        labels_fixed.append(label_fixed)
+
+    out_put_labels = []
+    # Remove no technique label when we have other techniques for the same text
+    if len(labels_fixed) > 1:
+        for flabel in labels_fixed:
+            if "no technique" not in flabel:
+                out_put_labels.append(flabel)
+        return out_put_labels
+
+    return labels_fixed
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    if (
+        "لا يوجد في النص" in label
+        or label == "'no technique'"
+        or "doesn't" in label
+        or "does not" in label
+        or "لا يحتوي" in label
+        or "لا يستخدم" in label
+    ):
+        return []
+    pred_label = fix_label(label)
+
+    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py
new file mode 100755
index 00000000..c647b461
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py
@@ -0,0 +1,116 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 30,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f'قم بتصنيف هذه "التغريدة" بناءً على تقنيات الدعاية التالية:\n\n'
+        f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'"
+        f"قدم التصنيفات فقط كقائمة من النصوص.\n"
+    )
+
+    prompt_string = base_prompt + f"التغريدة: {input_sample}\n\n" f"التصنيف: \n"
+
+    return [
+        {
+            "role": "system",
+            "content": ("أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي."),
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    label = label.strip().lower()
+    if (
+        "لا يوجد في النص" in label
+        or label == "'no technique'"
+        or "doesn't" in label
+        or "does not" in label
+        or "لا يحتوي" in label
+        or "لا يستخدم" in label
+    ):
+        return []
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة بالمشاعر": "Loaded Language",
+        "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "الاحتكام إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = [
+        english_label
+        for arabic_label, english_label in label_mapping.items()
+        if arabic_label in label or english_label.lower() in label
+    ]
+    final_labels = [l for l in detected_labels if "no technique" not in l]
+
+    return list(set(final_labels))
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py
new file mode 100755
index 00000000..f2de7611
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py
@@ -0,0 +1,143 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, examples):
+    instruction = """
+    "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
+    "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، 
+    "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
+    """
+    label_mapping = {
+        "no technique": "بدون تقنية",
+        "Smears": "تشويه",
+        "Exaggeration/Minimisation": "مبالغة/تقليل",
+        "Loaded Language": "لغة محملة",
+        "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز",
+        "Name calling/Labeling": "التسمية/الملصقات",
+        "Slogans": "الشعارات",
+        "Repetition": "التكرار",
+        "Doubt": "الشك",
+        "Obfuscation Intentional vagueness Confusion": "التعمية/الغموض المتعمد/الارتباك",
+        "Flag-waving": "التلويح بالعلم",
+        "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)",
+        "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)",
+        "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)",
+        "Appeal to authority": "النداء إلى السلطة",
+        "Whataboutism": "ماذا عن",
+        "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية",
+        "Thought-terminating cliché": "الكليشيه القاطع للفكر",
+        "Causal Oversimplification": "التبسيط السببي",
+    }
+
+    few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        print(f"Processing example {index + 1}")
+        print(f"Example label: {example['label']}")
+
+        try:
+            labels = ", ".join(
+                label_mapping[l] for l in example["label"] if example["label"]
+            )
+            print("Labels in few_shot:", labels)
+        except KeyError as e:
+            print(f"KeyError: {e} in example {index + 1}")
+            labels = "Unknown Label"
+
+    few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: "
+    return few_shot_text
+
+
+def prompt(input_sample, examples):
+    return [{"role": "user", "content": few_shot_prompt(input_sample, examples)}]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    label = label.lower()
+    label = re.sub(r"<[^>]+>", "", label)  # Remove any HTML-like tags
+    label = label.lower()
+
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة": "Loaded Language",
+        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "النداء إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = []
+    for arabic_label, english_label in label_mapping.items():
+        if arabic_label in label:
+            detected_labels.append(english_label)
+        elif english_label.lower() in label:
+            detected_labels.append(english_label)
+
+    print("Detected labels:", detected_labels)
+
+    # this is for duplicates values
+    detected_labels = list(set(detected_labels))
+
+    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py
new file mode 100755
index 00000000..86e8afe6
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py
@@ -0,0 +1,183 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt = out_prompt + "Here are some examples:\n\n"
+    for index, example in enumerate(examples):
+        tech_str = ""
+        for t in example["label"]:
+            tech_str += "'" + t + "', "
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "Tweet: "
+            + example["input"]
+            + "\nLabel: "
+            + tech_str[:-2]  # Remove the trailing comma and space
+            + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "Tweet: " + input_sample + "\nLabel: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    instruction = """
+Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from: "no technique," "Smears,"
+ "Exaggeration/Minimisation," "Loaded Language," "Appeal to fear/prejudice," "Name calling/Labeling," "Slogans," "Repetition," "Doubt,"
+ "Obfuscation/Intentional vagueness/Confusion," "Flag-waving," "Glittering generalities (Virtue),"
+ "Misrepresentation of Someone's Position (Straw Man)," "Presenting Irrelevant Data (Red Herring)," "Appeal to authority,"
+"Whataboutism," "Black-and-white Fallacy/Dictatorship," "Thought-terminating cliché," or "Causal Oversimplification."
+    """
+    base_prompt = instruction.strip()
+
+    return [
+        {
+            "role": "user",
+            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
+        }
+    ]
+
+
+def post_process(response):
+
+    label = response["choices"][0]["message"]["content"].lower()
+
+    label = label.replace("<s>", "").replace("</s>", "")
+
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة": "Loaded Language",
+        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "النداء إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+    print("label: ", label)
+    detected_labels = []
+
+    if "no technique" in label:
+        detected_labels.append(label_mapping["بدون تقنية"])
+    if "Smears" in label:
+        detected_labels.append(label_mapping["تشويه"])
+    if "Exaggeration/Minimisation" in label or "مبالغة" in label:
+        detected_labels.append(label_mapping["مبالغة/تقليل"])
+    if "Loaded Language" in label:
+        detected_labels.append(label_mapping["لغة محملة"])
+    if "Appeal to fear/prejudice" in label or "الخوف" in label or "fear" in label:
+        detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"])
+    if "Name calling/Labeling" in label or "التسمية" or "name" in label:
+        detected_labels.append(label_mapping["التسمية/الملصقات"])
+    if "Slogans" in label:
+        detected_labels.append(label_mapping["الشعارات"])
+    if "Repetition" in label:
+        detected_labels.append(label_mapping["التكرار"])
+    if "Doubt" in label:
+        detected_labels.append(label_mapping["الشك"])
+    if (
+        "Obfuscation, Intentional vagueness, Confusion" in label
+        or "Obfuscation" in label
+        or "Intentional vagueness" in label
+        or "Confusion" in label
+    ):
+        detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"])
+    if "Flag-waving" in label or "flag":
+        detected_labels.append(label_mapping["التلويح بالعلم"])
+    if (
+        "Glittering generalities (Virtue)" in label
+        or "الفضيلة" in label
+        or "Glittering"
+    ):
+        detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"])
+    if (
+        "Misrepresentation of Someone's Position (Straw Man)" in label
+        or "تحريف موقف شخص" in label
+    ):
+        detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"])
+    if (
+        "Presenting Irrelevant Data (Red Herring)" in label
+        or "عرض بيانات غير ذات صلة" in label
+    ):
+        detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"])
+    if "Appeal to authority" in label:
+        detected_labels.append(label_mapping["النداء إلى السلطة"])
+    if "Whataboutism" in label:
+        detected_labels.append(label_mapping["ماذا عن"])
+    if "Black-and-white Fallacy/Dictatorship" in label or "الديكتاتورية" in label:
+        detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"])
+    if "Thought-terminating cliché" in label or "الكليشيه" in label:
+        detected_labels.append(label_mapping["الكليشيه القاطع للفكر"])
+    if "Causal Oversimplification" in label or "التبسيط" in label:
+        detected_labels.append(label_mapping["التبسيط السببي"])
+
+    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py
new file mode 100755
index 00000000..72776442
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py
@@ -0,0 +1,116 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, examples):
+    instruction = """
+    "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"،
+    "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، 
+    "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification".
+    """
+
+    few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        labels = ", ".join(example["label"])
+        few_shot_text += (
+            f"مثال {index + 1}:\n"
+            f"التغريدة: '{example['input']}'\n"
+            f"التصنيف: {labels}\n\n"
+        )
+
+    few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: "
+    return few_shot_text
+
+
+def prompt(input_sample, examples):
+    return [{"role": "user", "content": few_shot_prompt(input_sample, examples)}]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    label = re.sub(r"<[^>]+>", "", label)  # Remove any HTML-like tags
+    label = label.lower()
+
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة": "Loaded Language",
+        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "النداء إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = []
+    for arabic_label, english_label in label_mapping.items():
+        if arabic_label in label:
+            detected_labels.append(english_label)
+        elif english_label.lower() in label:
+            detected_labels.append(english_label)
+
+    print("Detected labels:", detected_labels)
+
+    # Remove duplicates
+    detected_labels = list(set(detected_labels))
+
+    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py
new file mode 100755
index 00000000..a304edaa
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py
@@ -0,0 +1,140 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    instruction = """
+    "من القائمة، اختر التقنية الدعائية التي تلتقط جوهر التغريدة بشكل أفضل، "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
+ "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، 
+"ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
+
+    """
+    return [
+        {
+            "role": "user",
+            "content": (
+                f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    label = label.lower()
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة": "Loaded Language",
+        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "النداء إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+    print("label: ", label)
+    detected_labels = []
+
+    if "بدون تقنية" in label:
+        detected_labels.append(label_mapping["بدون تقنية"])
+    if "تشويه" in label:
+        detected_labels.append(label_mapping["تشويه"])
+    if "تقليل" in label or "مبالغة" in label:
+        detected_labels.append(label_mapping["مبالغة/تقليل"])
+    if "لغة محملة" in label:
+        detected_labels.append(label_mapping["لغة محملة"])
+    if "التحيز" in label or "الخوف" in label:
+        detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"])
+    if "الملصقات" in label or "التسمية" in label:
+        detected_labels.append(label_mapping["التسمية/الملصقات"])
+    if "الشعارات" in label:
+        detected_labels.append(label_mapping["الشعارات"])
+    if "التكرار" in label:
+        detected_labels.append(label_mapping["التكرار"])
+    if "الشك" in label:
+        detected_labels.append(label_mapping["الشك"])
+    if "الارتباك" in label or "الغموض المتعمد" in label or "التعمية" in label:
+        detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"])
+    if "التلويح بالعلم" in label:
+        detected_labels.append(label_mapping["التلويح بالعلم"])
+    if "التعميمات البراقة" in label or "الفضيلة" in label:
+        detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"])
+    if "رجل القش" in label or "تحريف موقف شخص" in label:
+        detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"])
+    if "السمكة الحمراء" in label or "عرض بيانات غير ذات صلة" in label:
+        detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"])
+    if "النداء إلى السلطة" in label:
+        detected_labels.append(label_mapping["النداء إلى السلطة"])
+    if "ماذا عن" in label:
+        detected_labels.append(label_mapping["ماذا عن"])
+    if "الأبيض والأسود" in label or "الديكتاتورية" in label:
+        detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"])
+    if "القاطع للفكر" in label or "الكليشيه" in label:
+        detected_labels.append(label_mapping["الكليشيه القاطع للفكر"])
+    if "السببي" in label or "التبسيط" in label:
+        detected_labels.append(label_mapping["التبسيط السببي"])
+
+    # this is for duplicates values
+    detected_labels = list(set(detected_labels))
+
+    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py
new file mode 100755
index 00000000..a56c1dea
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py
@@ -0,0 +1,164 @@
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 30,
+        },
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from:\n\n"
+        f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'"
+        f"\nAnswer (only yes/no) in the following format: \n"
+        f"'Doubt': 'yes', "
+        f"'Smears': 'no', \n\n"
+        f"tweet: {input_sample}\n\n"
+        f"label: \n"
+    )
+
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def fix_label(pred_label):
+    if "used in this text" in pred_label:
+        return ["no technique"]
+
+    labels_fixed = []
+    pred_label = pred_label.replace('"', "'").split("', '")
+    pred_labels = []
+
+    for l in pred_label:
+        splits = l.replace(",", "").split(":")
+        if len(splits) > 1 and "no" in splits[1]:
+            continue
+        pred_labels.append(splits[0].replace("'", ""))
+
+    if len(pred_labels) == 0:
+        return ["no technique"]
+
+    for label in pred_labels:
+        label = label.replace(".", "").strip()
+        label = re.sub("-", " ", label)
+        label = label.strip().lower()
+
+        # Handle case of single word labels like "Smears" so we just capitalize it
+        label_fixed = label.capitalize()
+
+        # print(label)
+        if "slogan" in label:
+            label_fixed = "Slogans"
+        if "loaded" in label:
+            label_fixed = "Loaded Language"
+        if "prejudice" in label or "fear" in label or "mongering" in label:
+            label_fixed = "Appeal to fear/prejudice"
+        if "terminating" in label or "thought" in label:
+            label_fixed = "Thought-terminating cliché"
+        if "calling" in label or label == "name c":
+            label_fixed = "Name calling/Labeling"
+        if "minimisation" in label or label == "exaggeration minim":
+            label_fixed = "Exaggeration/Minimisation"
+        if "glittering" in label:
+            label_fixed = "Glittering generalities (Virtue)"
+        if "flag" in label:
+            label_fixed = "Flag-waving"
+        if "obfuscation" in label:
+            label_fixed = "Obfuscation, Intentional vagueness, Confusion"
+        if "oversimplification" in label or "causal" in label:
+            label_fixed = "Causal Oversimplification"
+        if "authority" in label:
+            label_fixed = "Appeal to authority"
+        if "dictatorship" in label or "black" in label or "white" in label:
+            label_fixed = "Black-and-white Fallacy/Dictatorship"
+        if "herring" in label or "irrelevant" in label:
+            label_fixed = "Presenting Irrelevant Data (Red Herring)"
+        if "straw" in label or "misrepresentation" in label:
+            label_fixed = "Misrepresentation of Someone's Position (Straw Man)"
+        if "whataboutism" in label:
+            label_fixed = "Whataboutism"
+
+        if (
+            "no propaganda" in label
+            or "technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "religious" in label
+            or "gratitude" in label
+        ):
+            label_fixed = "no technique"
+
+        labels_fixed.append(label_fixed)
+
+    out_put_labels = []
+    # Remove no technique label when we have other techniques for the same text
+    if len(labels_fixed) > 1:
+        for flabel in labels_fixed:
+            if flabel != "no technique":
+                out_put_labels.append(flabel)
+        return out_put_labels
+
+    return labels_fixed
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    pred_label = fix_label(label)
+    print(pred_label)
+
+    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py
new file mode 100755
index 00000000..8ecbc6c2
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py
@@ -0,0 +1,136 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    instruction = """
+    "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"،
+    "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، 
+    "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification".
+    """
+    return [
+        {
+            "role": "user",
+            "content": (
+                f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    label = label.lower()
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة": "Loaded Language",
+        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "النداء إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+    print("label: ", label)
+    detected_labels = []
+
+    if "بدون تقنية" in label:
+        detected_labels.append(label_mapping["بدون تقنية"])
+    if "تشويه" in label:
+        detected_labels.append(label_mapping["تشويه"])
+    if "تقليل" in label or "مبالغة" in label:
+        detected_labels.append(label_mapping["مبالغة/تقليل"])
+    if "لغة محملة" in label:
+        detected_labels.append(label_mapping["لغة محملة"])
+    if "التحيز" in label or "الخوف" in label:
+        detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"])
+    if "الملصقات" in label or "التسمية" in label:
+        detected_labels.append(label_mapping["التسمية/الملصقات"])
+    if "الشعارات" in label:
+        detected_labels.append(label_mapping["الشعارات"])
+    if "التكرار" in label:
+        detected_labels.append(label_mapping["التكرار"])
+    if "الشك" in label:
+        detected_labels.append(label_mapping["الشك"])
+    if "الارتباك" in label or "الغموض المتعمد" in label or "التعمية" in label:
+        detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"])
+    if "التلويح بالعلم" in label:
+        detected_labels.append(label_mapping["التلويح بالعلم"])
+    if "التعميمات البراقة" in label or "الفضيلة" in label:
+        detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"])
+    if "رجل القش" in label or "تحريف موقف شخص" in label:
+        detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"])
+    if "السمكة الحمراء" in label or "عرض بيانات غير ذات صلة" in label:
+        detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"])
+    if "النداء إلى السلطة" in label:
+        detected_labels.append(label_mapping["النداء إلى السلطة"])
+    if "ماذا عن" in label:
+        detected_labels.append(label_mapping["ماذا عن"])
+    if "الأبيض والأسود" in label or "الديكتاتورية" in label:
+        detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"])
+    if "القاطع للفكر" in label or "الكليشيه" in label:
+        detected_labels.append(label_mapping["الكليشيه القاطع للفكر"])
+    if "السببي" in label or "التبسيط" in label:
+        detected_labels.append(label_mapping["التبسيط السببي"])
+
+    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py
new file mode 100755
index 00000000..0cef8a61
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py
@@ -0,0 +1,327 @@
+# import random
+# import re
+
+# from llmebench.datasets import WANLP22T3PropagandaDataset
+# from llmebench.models import AzureModel
+# from llmebench.tasks import MultilabelPropagandaTask
+
+# random.seed(1333)
+
+# def metadata():
+#     return {
+#         "author": "Arabic Language Technologies, QCRI, HBKU",
+#         "model": "LLama 3 8b",
+#         "description": "Deployed on Azure.",
+#         "scores": {"Micro-F1": "0.52864"}, # 1 - 0.3631 , 3- 0.48027 , 5- 0.52864
+#     }
+
+# def config():
+#     return {
+#         "dataset": WANLP22T3PropagandaDataset,
+#         "dataset_args": {"techniques_path": "classes.txt"},
+#         "task": MultilabelPropagandaTask,
+#         "model": AzureModel,
+#         "model_args": {
+#             "class_labels": [
+#                 "no technique",
+#                 "Smears",
+#                 "Exaggeration/Minimisation",
+#                 "Loaded Language",
+#                 "Appeal to fear/prejudice",
+#                 "Name calling/Labeling",
+#                 "Slogans",
+#                 "Repetition",
+#                 "Doubt",
+#                 "Obfuscation, Intentional vagueness, Confusion",
+#                 "Flag-waving",
+#                 "Glittering generalities (Virtue)",
+#                 "Misrepresentation of Someone's Position (Straw Man)",
+#                 "Presenting Irrelevant Data (Red Herring)",
+#                 "Appeal to authority",
+#                 "Whataboutism",
+#                 "Black-and-white Fallacy/Dictatorship",
+#                 "Thought-terminating cliché",
+#                 "Causal Oversimplification",
+#             ],
+#             "max_tries": 3,
+#         },
+#     }
+
+# def few_shot_prompt(input_sample, examples):
+#     instruction = """
+#         "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
+#         "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"،
+#         "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
+#         """
+#     label_mapping = {
+#         "no technique": "بدون تقنية",
+#         "Smears": "تشويه",
+#         "Exaggeration/Minimisation": "مبالغة/تقليل",
+#         "Loaded Language": "لغة محملة",
+#         "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز",
+#         "Name calling/Labeling": "التسمية/الملصقات",
+#         "Slogans": "الشعارات",
+#         "Repetition": "التكرار",
+#         "Doubt": "الشك",
+#         "Obfuscation Intentional vagueness Confusion": "التعمية/الغموض المتعمد/الارتباك",
+#         "Flag-waving": "التلويح بالعلم",
+#         "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)",
+#         "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)",
+#         "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)",
+#         "Appeal to authority": "النداء إلى السلطة",
+#         "Whataboutism": "ماذا عن",
+#         "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية",
+#         "Thought-terminating cliché": "الكليشيه القاطع للفكر",
+#         "Causal Oversimplification": "التبسيط السببي"
+#     }
+
+#     few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n"
+#     for index, example in enumerate(examples):
+#         labels_list = [label_mapping.get(label,"") for label in example["label"]]
+#         labels = ", ".join(labels_list)
+#         few_shot_text += (
+#             f"مثال {index + 1}:\n"
+#             f"التغريدة: '{example['input']}'\n"
+#             f"التصنيف: {labels}\n\n"
+#         )
+
+#     few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: "
+#     return few_shot_text
+
+# def few_shot_prompt(input_sample, base_prompt, examples):
+#     label_mapping = {
+#         "no technique": "بدون تقنية",
+#         "Smears": "تشويه",
+#         "Exaggeration/Minimisation": "مبالغة/تقليل",
+#         "Loaded Language": "لغة محملة",
+#         "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز",
+#         "Name calling/Labeling": "التسمية/الملصقات",
+#         "Slogans": "الشعارات",
+#         "Repetition": "التكرار",
+#         "Doubt": "الشك",
+#         "Obfuscation Intentional vagueness Confusion": "التعمية/الغموض المتعمد/الارتباك",
+#         "Flag-waving": "التلويح بالعلم",
+#         "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)",
+#         "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)",
+#         "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)",
+#         "Appeal to authority": "النداء إلى السلطة",
+#         "Whataboutism": "ماذا عن",
+#         "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية",
+#         "Thought-terminating cliché": "الكليشيه القاطع للفكر",
+#         "Causal Oversimplification": "التبسيط السببي"
+#     }
+
+#     out_prompt = base_prompt + "\n"
+#     out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n"
+#     for index, example in enumerate(examples):
+#         tech_str = ""
+#         for t in example["label"]:
+#             tech_str += "'" + label_mapping[t] + "', "
+
+#         out_prompt = (
+#             out_prompt
+#             + "مثال "
+#             + str(index)
+#             + ":"
+#             + "\n"
+#             + "التغريدة: "
+#             + example["input"]
+#             + "\التصنيف: "
+#             + tech_str
+#             + "\n\n"
+#         )
+
+#     # Append the sentence we want the model to predict for but leave the Label blank
+#     out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n"
+
+#     return out_prompt
+
+# def prompt(input_sample, examples):
+#     return [
+#         {
+#             "role": "user",
+#             "content": few_shot_prompt(input_sample, examples)
+#         }
+#     ]
+
+# def post_process(response):
+#     if not response or 'error' in response or 'output' not in response:
+#         print("Error or missing output in response:", response)
+#         return None
+
+#     label = response["output"].strip().lower()
+#     label = re.sub(r'<[^>]+>', '', label)  # Remove any HTML-like tags
+#     label = label.lower()
+
+#     label_mapping = {
+#         "بدون تقنية": "no technique",
+#         "تشويه": "Smears",
+#         "مبالغة/تقليل": "Exaggeration/Minimisation",
+#         "لغة محملة": "Loaded Language",
+#         "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+#         "التسمية/الملصقات": "Name calling/Labeling",
+#         "الشعارات": "Slogans",
+#         "التكرار": "Repetition",
+#         "الشك": "Doubt",
+#         "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+#         "التلويح بالعلم": "Flag-waving",
+#         "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+#         "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+#         "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+#         "النداء إلى السلطة": "Appeal to authority",
+#         "ماذا عن": "Whataboutism",
+#         "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+#         "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+#         "التبسيط السببي": "Causal Oversimplification"
+#     }
+
+#     detected_labels = []
+#     for arabic_label, english_label in label_mapping.items():
+#         if arabic_label in label:
+#             detected_labels.append(english_label)
+#         elif english_label.lower() in label:
+#             detected_labels.append(english_label)
+
+#     print("Detected labels:", detected_labels)
+
+#     # this is for duplicates values
+#     detected_labels = list(set(detected_labels))
+
+#     return detected_labels
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 3,
+        },
+    }
+
+
+def translate_labels(label):
+    label_mapping = {
+        "no technique": "بدون تقنية",
+        "Smears": "تشويه",
+        "Exaggeration/Minimisation": "مبالغة/تقليل",
+        "Loaded Language": "لغة محملة",
+        "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز",
+        "Name calling/Labeling": "التسمية/الملصقات",
+        "Slogans": "الشعارات",
+        "Repetition": "التكرار",
+        "Doubt": "الشك",
+        "Obfuscation, Intentional vagueness, Confusion": "التعمية/الغموض المتعمد/الارتباك",
+        "Flag-waving": "التلويح بالعلم",
+        "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)",
+        "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)",
+        "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)",
+        "Appeal to authority": "النداء إلى السلطة",
+        "Whataboutism": "ماذا عن",
+        "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية",
+        "Thought-terminating cliché": "الكليشيه القاطع للفكر",
+        "Causal Oversimplification": "التبسيط السببي",
+    }
+    return label_mapping.get(label, label)
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\nاليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        tech_str = ", ".join([f"'{translate_labels(t)}'" for t in example["label"]])
+        out_prompt += (
+            f"مثال {index}:\nالتغريدة: {example['input']}\nالتصنيف: {tech_str}\n\n"
+        )
+    out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n"
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = """
+        "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
+        "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، 
+        "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
+        """
+    return [
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        }
+    ]
+
+
+def post_process(response):
+    if not response or "error" in response or "output" not in response:
+        print("Error or missing output in response:", response)
+        return None
+
+    label = re.sub(r"<[^>]+>", "", response["output"].strip().lower())
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة": "Loaded Language",
+        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "النداء إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = [
+        english_label
+        for arabic_label, english_label in label_mapping.items()
+        if arabic_label in label or english_label.lower() in label
+    ]
+    print("Detected labels:", list(set(detected_labels)))
+    return list(set(detected_labels))
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py
new file mode 100755
index 00000000..b0bbef4c
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py
@@ -0,0 +1,142 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n\n"
+    out_prompt = out_prompt + "Here are some examples:\n\n"
+    for index, example in enumerate(examples):
+        tech_str = ""
+        for t in example["label"]:
+            tech_str += "'" + t + "', "
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "Tweet: "
+            + example["input"]
+            + "\nLabel: "
+            + tech_str[:-2]  # Remove the trailing comma and space
+            + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "Tweet: " + input_sample + "\nLabel: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    instruction = """
+Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from: "no technique," "Smears,"
+ "Exaggeration/Minimisation," "Loaded Language," "Appeal to fear/prejudice," "Name calling/Labeling," "Slogans," "Repetition," "Doubt,"
+ "Obfuscation/Intentional vagueness/Confusion," "Flag-waving," "Glittering generalities (Virtue),"
+ "Misrepresentation of Someone's Position (Straw Man)," "Presenting Irrelevant Data (Red Herring)," "Appeal to authority,"
+"Whataboutism," "Black-and-white Fallacy/Dictatorship," "Thought-terminating cliché," or "Causal Oversimplification."
+    """
+    base_prompt = instruction.strip()
+
+    return [
+        {
+            "role": "user",
+            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
+        }
+    ]
+
+
+def post_process(response):
+    if not response or "error" in response or "output" not in response:
+        print("Error or missing output in response:", response)
+        return "No respose"  # Safely default to NOT_ADULT when unsure
+
+    label = response["output"].strip().lower()
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة": "Loaded Language",
+        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "النداء إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = []
+    for arabic_label, english_label in label_mapping.items():
+        if arabic_label in label:
+            detected_labels.append(english_label)
+        elif english_label.lower() in label:
+            detected_labels.append(english_label)
+
+    print("Detected labels:", detected_labels)
+
+    # Remove duplicates
+    detected_labels = list(set(detected_labels))
+
+    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py
new file mode 100755
index 00000000..3b75978a
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py
@@ -0,0 +1,120 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 3,
+        },
+    }
+
+
+def few_shot_prompt(input_sample, examples):
+    instruction = """
+    "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"،
+    "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، 
+    "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification".
+    """
+
+    few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n"
+    for index, example in enumerate(examples):
+        labels = ", ".join(example["label"])
+        few_shot_text += (
+            f"مثال {index + 1}:\n"
+            f"التغريدة: '{example['input']}'\n"
+            f"التصنيف: {labels}\n\n"
+        )
+
+    few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: "
+    return few_shot_text
+
+
+def prompt(input_sample, examples):
+    return [{"role": "user", "content": few_shot_prompt(input_sample, examples)}]
+
+
+def post_process(response):
+    if not response or "error" in response or "output" not in response:
+        print("Error or missing output in response:", response)
+        return "No respose"  # Safely default to NOT_ADULT when unsure
+
+    label = response["output"].strip().lower()
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة": "Loaded Language",
+        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "النداء إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = []
+    for arabic_label, english_label in label_mapping.items():
+        if arabic_label in label:
+            detected_labels.append(english_label)
+        elif english_label.lower() in label:
+            detected_labels.append(english_label)
+
+    print("Detected labels:", detected_labels)
+
+    # Remove duplicates
+    detected_labels = list(set(detected_labels))
+
+    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py
new file mode 100755
index 00000000..a42d224f
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py
@@ -0,0 +1,114 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    instruction = """
+    "من القائمة، اختر التقنية الدعائية التي تلتقط جوهر التغريدة بشكل أفضل، "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
+ "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، 
+"ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
+
+    """
+    return [
+        {
+            "role": "user",
+            "content": (
+                f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    if not response or "error" in response or "output" not in response:
+        print("Error or missing output in response:", response)
+        return "No respose"  # Safely default to NOT_ADULT when unsure
+
+    label = response["output"].strip().lower()
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة": "Loaded Language",
+        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "النداء إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = []
+    for arabic_label, english_label in label_mapping.items():
+        if arabic_label in label:
+            detected_labels.append(english_label)
+        elif english_label.lower() in label:
+            detected_labels.append(english_label)
+
+    print("Detected labels:", detected_labels)
+
+    # Remove duplicates
+    detected_labels = list(set(detected_labels))
+
+    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py
new file mode 100755
index 00000000..3c87ced3
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py
@@ -0,0 +1,188 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    instruction = """
+Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from: "no technique," "Smears,"
+ "Exaggeration/Minimisation," "Loaded Language," "Appeal to fear/prejudice," "Name calling/Labeling," "Slogans," "Repetition," "Doubt,"
+ "Obfuscation/Intentional vagueness/Confusion," "Flag-waving," "Glittering generalities (Virtue),"
+ "Misrepresentation of Someone's Position (Straw Man)," "Presenting Irrelevant Data (Red Herring)," "Appeal to authority,"
+"Whataboutism," "Black-and-white Fallacy/Dictatorship," "Thought-terminating cliché," or "Causal Oversimplification."
+    """
+    return [
+        {
+            "role": "user",
+            "content": (f"{instruction}\n" + "Tweet: " + input_sample + "\nLabel: "),
+        }
+    ]
+
+
+# def post_process(response):
+#     if not response or 'error' in response or 'output' not in response:
+#         print("Error or missing output in response:", response)
+#         return "No respose"  # Safely default to NOT_ADULT when unsure
+
+#     label = response["output"].strip().lower()
+#     label = label.replace("<s>", "").replace("</s>", "")
+#     label = label.lower()
+#     label_mapping = {
+#         "بدون تقنية": "no technique",
+#         "تشويه": "Smears",
+#         "مبالغة/تقليل": "Exaggeration/Minimisation",
+#         "لغة محملة": "Loaded Language",
+#         "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+#         "التسمية/الملصقات": "Name calling/Labeling",
+#         "الشعارات": "Slogans",
+#         "التكرار": "Repetition",
+#         "الشك": "Doubt",
+#         "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+#         "التلويح بالعلم": "Flag-waving",
+#         "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+#         "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+#         "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+#         "النداء إلى السلطة": "Appeal to authority",
+#         "ماذا عن": "Whataboutism",
+#         "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+#         "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+#         "التبسيط السببي": "Causal Oversimplification"
+#     }
+#     print("label: ", label)
+#     detected_labels = []
+
+#     if "no technique" in label:
+#         detected_labels.append(label_mapping["بدون تقنية"])
+#     if "Smears" in label:
+#         detected_labels.append(label_mapping["تشويه"])
+#     if "Exaggeration/Minimisation" in label or "مبالغة" in label:
+#         detected_labels.append(label_mapping["مبالغة/تقليل"])
+#     if "Loaded Language" in label:
+#         detected_labels.append(label_mapping["لغة محملة"])
+#     if "Appeal to fear/prejudice" in label or "الخوف" in label or "fear" in label:
+#         detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"])
+#     if "Name calling/Labeling" in label or "التسمية" or "name" in label:
+#         detected_labels.append(label_mapping["التسمية/الملصقات"])
+#     if "Slogans" in label:
+#         detected_labels.append(label_mapping["الشعارات"])
+#     if "Repetition" in label:
+#         detected_labels.append(label_mapping["التكرار"])
+#     if "Doubt" in label:
+#         detected_labels.append(label_mapping["الشك"])
+#     if "Obfuscation, Intentional vagueness, Confusion" in label or "Obfuscation" in label or "Intentional vagueness" in label or "Confusion" in label:
+#         detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"])
+#     if "Flag-waving" in label or "flag":
+#         detected_labels.append(label_mapping["التلويح بالعلم"])
+#     if "Glittering generalities (Virtue)" in label or "الفضيلة" in label or "Glittering":
+#         detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"])
+#     if "Misrepresentation of Someone's Position (Straw Man)" in label or "تحريف موقف شخص" in label:
+#         detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"])
+#     if "Presenting Irrelevant Data (Red Herring)" in label or "عرض بيانات غير ذات صلة" in label:
+#         detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"])
+#     if "Appeal to authority" in label:
+#         detected_labels.append(label_mapping["النداء إلى السلطة"])
+#     if "Whataboutism" in label:
+#         detected_labels.append(label_mapping["ماذا عن"])
+#     if "Black-and-white Fallacy/Dictatorship" in label or "الديكتاتورية" in label:
+#         detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"])
+#     if "Thought-terminating cliché" in label or "الكليشيه" in label:
+#         detected_labels.append(label_mapping["الكليشيه القاطع للفكر"])
+#     if "Causal Oversimplification" in label or "التبسيط" in label:
+#         detected_labels.append(label_mapping["التبسيط السببي"])
+
+
+#     return detected_labels
+
+
+def post_process(response):
+    if not response or "error" in response or "output" not in response:
+        print("Error or missing output in response:", response)
+        return "No respose"  # Safely default to NOT_ADULT when unsure
+
+    label = response["output"].strip().lower()
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة": "Loaded Language",
+        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "النداء إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = []
+    for arabic_label, english_label in label_mapping.items():
+        if arabic_label in label:
+            detected_labels.append(english_label)
+        elif english_label.lower() in label:
+            detected_labels.append(english_label)
+
+    print("Detected labels:", detected_labels)
+
+    # Remove duplicates
+    detected_labels = list(set(detected_labels))
+
+    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py
new file mode 100755
index 00000000..cdec1b31
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py
@@ -0,0 +1,113 @@
+import random
+import re
+
+from llmebench.datasets import WANLP22T3PropagandaDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+random.seed(1333)
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": WANLP22T3PropagandaDataset,
+        "dataset_args": {"techniques_path": "classes.txt"},
+        "task": MultilabelPropagandaTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": [
+                "no technique",
+                "Smears",
+                "Exaggeration/Minimisation",
+                "Loaded Language",
+                "Appeal to fear/prejudice",
+                "Name calling/Labeling",
+                "Slogans",
+                "Repetition",
+                "Doubt",
+                "Obfuscation, Intentional vagueness, Confusion",
+                "Flag-waving",
+                "Glittering generalities (Virtue)",
+                "Misrepresentation of Someone's Position (Straw Man)",
+                "Presenting Irrelevant Data (Red Herring)",
+                "Appeal to authority",
+                "Whataboutism",
+                "Black-and-white Fallacy/Dictatorship",
+                "Thought-terminating cliché",
+                "Causal Oversimplification",
+            ],
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    instruction = """
+    "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"،
+    "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، 
+    "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification".
+    """
+    return [
+        {
+            "role": "user",
+            "content": (
+                f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    if not response or "error" in response or "output" not in response:
+        print("Error or missing output in response:", response)
+        return "No respose"  # Safely default to NOT_ADULT when unsure
+
+    label = response["output"].strip().lower()
+    label = label.replace("<s>", "").replace("</s>", "")
+    label = label.lower()
+
+    label_mapping = {
+        "بدون تقنية": "no technique",
+        "تشويه": "Smears",
+        "مبالغة/تقليل": "Exaggeration/Minimisation",
+        "لغة محملة": "Loaded Language",
+        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
+        "التسمية/الملصقات": "Name calling/Labeling",
+        "الشعارات": "Slogans",
+        "التكرار": "Repetition",
+        "الشك": "Doubt",
+        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
+        "التلويح بالعلم": "Flag-waving",
+        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
+        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
+        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
+        "النداء إلى السلطة": "Appeal to authority",
+        "ماذا عن": "Whataboutism",
+        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
+        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
+        "التبسيط السببي": "Causal Oversimplification",
+    }
+
+    detected_labels = []
+    for arabic_label, english_label in label_mapping.items():
+        if arabic_label in label:
+            detected_labels.append(english_label)
+        elif english_label.lower() in label:
+            detected_labels.append(english_label)
+
+    print("Detected labels:", detected_labels)
+
+    # Remove duplicates
+    detected_labels = list(set(detected_labels))
+
+    return detected_labels

From 65134a87270b7986b410ef989d160054edcabfb5 Mon Sep 17 00:00:00 2001
From: MohamedBayan <mohamadbayan2014noname@gmail.com>
Date: Mon, 20 Jan 2025 15:57:48 +0300
Subject: [PATCH 2/3] Fix errors

---
 .../WANLP22T3_GPT4_FewShot_Arabic.py          | 152 --------
 .../WANLP22T3_GPT4_FewShot_English.py         | 200 -----------
 .../WANLP22T3_GPT4_FewShot_Mixed.py           | 143 --------
 .../WANLP22T3_GPT4_ZeroShot_Arabic.py         | 116 -------
 .../WANLP22T3_GPT4_ZeroShot_English.py        | 175 ----------
 .../WANLP22T3_GPT4_ZeroShot_Mixed.py          | 116 -------
 .../WANLP22T3_JAIS13b_FewShot_Arabic.py       | 143 --------
 .../WANLP22T3_JAIS13b_FewShot_English.py      | 183 ----------
 .../WANLP22T3_JAIS13b_FewShot_Mixed.py        | 116 -------
 .../WANLP22T3_JAIS13b_ZeroShot_Arabic.py      | 140 --------
 .../WANLP22T3_JAIS13b_ZeroShot_English.py     | 164 ---------
 .../WANLP22T3_JAIS13b_ZeroShot_Mixed.py       | 136 --------
 .../WANLP22T3_Llama3-8b_FewShot_Arabic.py     | 327 ------------------
 .../WANLP22T3_Llama3-8b_FewShot_English.py    | 142 --------
 .../WANLP22T3_Llama3-8b_FewShot_Mixed.py      | 120 -------
 .../WANLP22T3_Llama3-8b_ZeroShot_Arabic.py    | 114 ------
 .../WANLP22T3_Llama3-8b_ZeroShot_English.py   | 188 ----------
 .../WANLP22T3_Llama3-8b_ZeroShot_Mixed.py     | 113 ------
 18 files changed, 2788 deletions(-)
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py
 delete mode 100755 assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py

diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py
deleted file mode 100755
index 8640392e..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Arabic.py
+++ /dev/null
@@ -1,152 +0,0 @@
-import ast
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "GPT-4o-2024-05-22",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 30,
-        },
-    }
-
-
-def translate_labels(label):
-    label_mapping = {
-        "no technique": "بدون تقنية",
-        "Smears": "تشويه",
-        "Exaggeration/Minimisation": "مبالغة/تقليل",
-        "Loaded Language": "لغة محملة بالمشاعر",
-        "Appeal to fear/prejudice": "الاحتكام إلى الخوف/التحيز",
-        "Name calling/Labeling": "التسمية/الملصقات",
-        "Slogans": "الشعارات",
-        "Repetition": "التكرار",
-        "Doubt": "الشك",
-        "Obfuscation, Intentional vagueness, Confusion": "التعمية/الغموض المتعمد/الارتباك",
-        "Flag-waving": "التلويح بالعلم",
-        "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)",
-        "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (مغالطة رجل القش)",
-        "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)",
-        "Appeal to authority": "الاحتكام إلى السلطة",
-        "Whataboutism": "ماذا عن",
-        "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية",
-        "Thought-terminating cliché": "الكليشيه القاطع للفكر",
-        "Causal Oversimplification": "التبسيط السببي",
-    }
-    return label_mapping.get(label, label)
-
-
-def few_shot_prompt(input_sample, base_prompt, examples):
-    out_prompt = base_prompt + "\nاليك بعض الأمثلة:\n\n"
-    for index, example in enumerate(examples):
-        tech_str = ", ".join([f"'{translate_labels(t)}'" for t in example["label"]])
-        out_prompt += (
-            f"مثال {index}:\nالتغريدة: {example['input']}\nالتصنيف: {tech_str}\n\n"
-        )
-    out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n"
-    return out_prompt
-
-
-def prompt(input_sample, examples):
-    base_prompt = """
-        "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة بالمشاعر"، "الاحتكام إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
-        "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (مغالطة رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "الاحتكام إلى السلطة"، 
-        "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
-        """
-
-    return [
-        {
-            "role": "system",
-            "content": "أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي.",
-        },
-        {
-            "role": "user",
-            "content": few_shot_prompt(input_sample, base_prompt, examples),
-        },
-    ]
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"]  # .lower()
-    label = label.strip().lower()
-    if (
-        "لا يوجد في النص" in label
-        or label == "'no technique'"
-        or "doesn't" in label
-        or "does not" in label
-        or "لا يحتوي" in label
-        or "لا يستخدم" in label
-    ):
-        return []
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة بالمشاعر": "Loaded Language",
-        "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "الاحتكام إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = [
-        english_label
-        for arabic_label, english_label in label_mapping.items()
-        if arabic_label in label or english_label.lower() in label
-    ]
-
-    final_labels = [l for l in detected_labels if "no technique" not in l]
-
-    return list(set(final_labels))
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py
deleted file mode 100755
index 447e18c1..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_English.py
+++ /dev/null
@@ -1,200 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "GPT-4o-2024-05-22",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 30,
-        },
-    }
-
-
-def few_shot_prompt(input_sample, base_prompt, examples):
-    out_prompt = base_prompt + "\n"
-    out_prompt = out_prompt + "Here are some examples:\n\n"
-    for index, example in enumerate(examples):
-        tech_str = ""
-        for t in example["label"]:
-            tech_str += "'" + t + "', "
-
-        out_prompt = (
-            out_prompt
-            + "Example "
-            + str(index)
-            + ":"
-            + "\n"
-            + "tweet: "
-            + example["input"]
-            + "\nlabel: "
-            + tech_str
-            + "\n\n"
-        )
-
-    # Append the sentence we want the model to predict for but leave the Label blank
-    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
-
-    return out_prompt
-
-
-def prompt(input_sample, examples):
-    base_prompt = (
-        f'Label this "tweet" based on the following propaganda techniques:\n\n'
-        f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'"
-        f"Provide only labels as a list of strings.\n"
-    )
-
-    return [
-        {
-            "role": "system",
-            "content": "You are an expert social media content analyst.",
-        },
-        {
-            "role": "user",
-            "content": few_shot_prompt(input_sample, base_prompt, examples),
-        },
-    ]
-
-
-def fix_label(pred_label):
-    if "used in this text" in pred_label:
-        return ["no technique"]
-
-    labels_fixed = []
-    pred_label = pred_label.replace('"', "'").split("', '")
-    pred_labels = []
-
-    for l in pred_label:
-        pred_labels.append(l)
-
-    if len(pred_labels) == 0:
-        return ["no technique"]
-
-    for label in pred_labels:
-        label = label.replace(".", "").strip()
-        label = re.sub("-", " ", label)
-        label = label.strip().lower()
-
-        # Handle case of single word labels like "Smears" so we just capitalize it
-        label_fixed = label.capitalize()
-
-        # print(label)
-        if "slogan" in label:
-            label_fixed = "Slogans"
-        if "loaded" in label:
-            label_fixed = "Loaded Language"
-        if "prejudice" in label or "fear" in label or "mongering" in label:
-            label_fixed = "Appeal to fear/prejudice"
-        if "terminating" in label or "thought" in label:
-            label_fixed = "Thought-terminating cliché"
-        if "calling" in label or label == "name c":
-            label_fixed = "Name calling/Labeling"
-        if "minimisation" in label or label == "exaggeration minim":
-            label_fixed = "Exaggeration/Minimisation"
-        if "glittering" in label:
-            label_fixed = "Glittering generalities (Virtue)"
-        if "flag" in label:
-            label_fixed = "Flag-waving"
-        if "obfuscation" in label:
-            label_fixed = "Obfuscation, Intentional vagueness, Confusion"
-        if "oversimplification" in label or "causal" in label:
-            label_fixed = "Causal Oversimplification"
-        if "authority" in label:
-            label_fixed = "Appeal to authority"
-        if "dictatorship" in label or "black" in label or "white" in label:
-            label_fixed = "Black-and-white Fallacy/Dictatorship"
-        if "herring" in label or "irrelevant" in label:
-            label_fixed = "Presenting Irrelevant Data (Red Herring)"
-        if "straw" in label or "misrepresentation" in label:
-            label_fixed = "Misrepresentation of Someone's Position (Straw Man)"
-        if "whataboutism" in label:
-            label_fixed = "Whataboutism"
-
-        if (
-            "no propaganda" in label
-            or "technique" in label
-            or label == ""
-            or label == "no"
-            or label == "appeal to history"
-            or label == "appeal to emotion"
-            or label == "appeal to"
-            or label == "appeal"
-            or label == "appeal to author"
-            or label == "emotional appeal"
-            or "no techn" in label
-            or "hashtag" in label
-            or "theory" in label
-            or "specific mention" in label
-            or "religious" in label
-            or "gratitude" in label
-        ):
-            label_fixed = "no technique"
-
-        labels_fixed.append(label_fixed)
-
-    out_put_labels = []
-    # Remove no technique label when we have other techniques for the same text
-    if len(labels_fixed) > 1:
-        for flabel in labels_fixed:
-            if "no technique" not in flabel:
-                out_put_labels.append(flabel)
-        return out_put_labels
-
-    return labels_fixed
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"].lower()
-    if (
-        "لا يوجد في النص" in label
-        or label == "'no technique'"
-        or "doesn't" in label
-        or "does not" in label
-        or "لا يحتوي" in label
-        or "لا يستخدم" in label
-    ):
-        return []
-    pred_label = fix_label(label)
-
-    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py
deleted file mode 100755
index c2a4f1ba..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_FewShot_Mixed.py
+++ /dev/null
@@ -1,143 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "GPT-4o-2024-05-22",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 30,
-        },
-    }
-
-
-def few_shot_prompt(input_sample, base_prompt, examples):
-    out_prompt = base_prompt + "\n"
-    out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n"
-    for index, example in enumerate(examples):
-        tech_str = ""
-        for t in example["label"]:
-            tech_str += "'" + t + "', "
-
-        out_prompt = (
-            out_prompt
-            + "مثال "
-            + str(index)
-            + ":"
-            + "\n"
-            + "التغريدة: "
-            + example["input"]
-            + "\n"
-            + "التصنيف: "
-            + tech_str
-            + "\n\n"
-        )
-
-    # Append the sentence we want the model to predict for but leave the Label blank
-    out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n"
-
-    return out_prompt
-
-
-def prompt(input_sample, examples):
-    base_prompt = (
-        f'قم بتصنيف هذه "التغريدة" بناءً على تقنيات الدعاية التالية:\n\n'
-        f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'"
-        f"قدم التصنيفات فقط كقائمة من النصوص.\n"
-    )
-
-    return [
-        {
-            "role": "system",
-            "content": "أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي.",
-        },
-        {
-            "role": "user",
-            "content": few_shot_prompt(input_sample, base_prompt, examples),
-        },
-    ]
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"]  # .lower()
-    label = label.strip().lower()
-    if (
-        "لا يوجد في النص" in label
-        or label == "'no technique'"
-        or "doesn't" in label
-        or "does not" in label
-        or "لا يحتوي" in label
-        or "لا يستخدم" in label
-    ):
-        return []
-
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة بالمشاعر": "Loaded Language",
-        "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "الاحتكام إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = [
-        english_label
-        for arabic_label, english_label in label_mapping.items()
-        if arabic_label in label or english_label.lower() in label
-    ]
-    final_labels = [l for l in detected_labels if "no technique" not in l]
-
-    return list(set(final_labels))
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py
deleted file mode 100755
index da7aa2d9..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Arabic.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "GPT-4o-2024-05-22",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 30,
-        },
-    }
-
-
-def prompt(input_sample):
-    base_prompt = """
-        "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة بالمشاعر"، "الاحتكام إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
-        "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (مغالطة رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "الاحتكام إلى السلطة"، 
-        "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
-        """
-
-    prompt_string = base_prompt + "التغريدة: \n\n" + input_sample + "التصنيف: \n"
-
-    return [
-        {
-            "role": "system",
-            "content": ("أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي."),
-        },
-        {
-            "role": "user",
-            "content": prompt_string,
-        },
-    ]
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"].lower()
-    label = label.strip().lower()
-    if (
-        "لا يوجد في النص" in label
-        or label == "'no technique'"
-        or "doesn't" in label
-        or "does not" in label
-        or "لا يحتوي" in label
-        or "لا يستخدم" in label
-    ):
-        return []
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة بالمشاعر": "Loaded Language",
-        "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "الاحتكام إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = [
-        english_label
-        for arabic_label, english_label in label_mapping.items()
-        if arabic_label in label or english_label.lower() in label
-    ]
-    final_labels = [l for l in detected_labels if "no technique" not in l]
-
-    return list(set(final_labels))
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py
deleted file mode 100755
index c02e72b0..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_English.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "GPT-4o-2024-05-22",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 30,
-        },
-    }
-
-
-def prompt(input_sample):
-    prompt_string = (
-        f'Label this "tweet" based on the following propaganda techniques:\n\n'
-        f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'"
-        f"Provide only labels as a list of strings.\n"
-        f"tweet: {input_sample}\n\n"
-        f"label: \n"
-    )
-
-    return [
-        {
-            "role": "system",
-            "content": ("You are an expert social media content analyst."),
-        },
-        {
-            "role": "user",
-            "content": prompt_string,
-        },
-    ]
-
-
-def fix_label(pred_label):
-    if "used in this text" in pred_label:
-        return ["no technique"]
-
-    labels_fixed = []
-    pred_label = pred_label.replace('"', "'").split("\n")
-    pred_labels = []
-
-    for l in pred_label:
-        pred_labels.append(l)
-
-    if len(pred_labels) == 0:
-        return ["no technique"]
-
-    for label in pred_labels:
-        label = label.replace(".", "").strip()
-        label = re.sub("-", " ", label)
-        label = label.strip().lower()
-
-        # Handle case of single word labels like "Smears" so we just capitalize it
-        label_fixed = label.capitalize()
-
-        # print(label)
-        if "slogan" in label:
-            label_fixed = "Slogans"
-        if "loaded" in label:
-            label_fixed = "Loaded Language"
-        if "prejudice" in label or "fear" in label or "mongering" in label:
-            label_fixed = "Appeal to fear/prejudice"
-        if "terminating" in label or "thought" in label:
-            label_fixed = "Thought-terminating cliché"
-        if "calling" in label or label == "name c":
-            label_fixed = "Name calling/Labeling"
-        if "minimisation" in label or label == "exaggeration minim":
-            label_fixed = "Exaggeration/Minimisation"
-        if "glittering" in label:
-            label_fixed = "Glittering generalities (Virtue)"
-        if "flag" in label:
-            label_fixed = "Flag-waving"
-        if "obfuscation" in label:
-            label_fixed = "Obfuscation, Intentional vagueness, Confusion"
-        if "oversimplification" in label or "causal" in label:
-            label_fixed = "Causal Oversimplification"
-        if "authority" in label:
-            label_fixed = "Appeal to authority"
-        if "dictatorship" in label or "black" in label or "white" in label:
-            label_fixed = "Black-and-white Fallacy/Dictatorship"
-        if "herring" in label or "irrelevant" in label:
-            label_fixed = "Presenting Irrelevant Data (Red Herring)"
-        if "straw" in label or "misrepresentation" in label:
-            label_fixed = "Misrepresentation of Someone's Position (Straw Man)"
-        if "whataboutism" in label:
-            label_fixed = "Whataboutism"
-
-        if (
-            "no propaganda" in label
-            or "technique" in label
-            or label == ""
-            or label == "no"
-            or label == "appeal to history"
-            or label == "appeal to emotion"
-            or label == "appeal to"
-            or label == "appeal"
-            or label == "appeal to author"
-            or label == "emotional appeal"
-            or "no techn" in label
-            or "hashtag" in label
-            or "theory" in label
-            or "specific mention" in label
-            or "religious" in label
-            or "gratitude" in label
-        ):
-            label_fixed = "no technique"
-
-        labels_fixed.append(label_fixed)
-
-    out_put_labels = []
-    # Remove no technique label when we have other techniques for the same text
-    if len(labels_fixed) > 1:
-        for flabel in labels_fixed:
-            if "no technique" not in flabel:
-                out_put_labels.append(flabel)
-        return out_put_labels
-
-    return labels_fixed
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"].lower()
-    if (
-        "لا يوجد في النص" in label
-        or label == "'no technique'"
-        or "doesn't" in label
-        or "does not" in label
-        or "لا يحتوي" in label
-        or "لا يستخدم" in label
-    ):
-        return []
-    pred_label = fix_label(label)
-
-    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py
deleted file mode 100755
index c647b461..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_GPT4_ZeroShot_Mixed.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import OpenAIModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "GPT-4o-2024-05-22",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": OpenAIModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 30,
-        },
-    }
-
-
-def prompt(input_sample):
-    base_prompt = (
-        f'قم بتصنيف هذه "التغريدة" بناءً على تقنيات الدعاية التالية:\n\n'
-        f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'"
-        f"قدم التصنيفات فقط كقائمة من النصوص.\n"
-    )
-
-    prompt_string = base_prompt + f"التغريدة: {input_sample}\n\n" f"التصنيف: \n"
-
-    return [
-        {
-            "role": "system",
-            "content": ("أنت خبير في تحليل محتوى وسائل التواصل الاجتماعي."),
-        },
-        {
-            "role": "user",
-            "content": prompt_string,
-        },
-    ]
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"].lower()
-    label = label.strip().lower()
-    if (
-        "لا يوجد في النص" in label
-        or label == "'no technique'"
-        or "doesn't" in label
-        or "does not" in label
-        or "لا يحتوي" in label
-        or "لا يستخدم" in label
-    ):
-        return []
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة بالمشاعر": "Loaded Language",
-        "الاحتكام إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (مغالطة رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "الاحتكام إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = [
-        english_label
-        for arabic_label, english_label in label_mapping.items()
-        if arabic_label in label or english_label.lower() in label
-    ]
-    final_labels = [l for l in detected_labels if "no technique" not in l]
-
-    return list(set(final_labels))
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py
deleted file mode 100755
index f2de7611..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Arabic.py
+++ /dev/null
@@ -1,143 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import FastChatModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "jais-13b-chat",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": FastChatModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 3,
-        },
-    }
-
-
-def few_shot_prompt(input_sample, examples):
-    instruction = """
-    "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
-    "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، 
-    "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
-    """
-    label_mapping = {
-        "no technique": "بدون تقنية",
-        "Smears": "تشويه",
-        "Exaggeration/Minimisation": "مبالغة/تقليل",
-        "Loaded Language": "لغة محملة",
-        "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز",
-        "Name calling/Labeling": "التسمية/الملصقات",
-        "Slogans": "الشعارات",
-        "Repetition": "التكرار",
-        "Doubt": "الشك",
-        "Obfuscation Intentional vagueness Confusion": "التعمية/الغموض المتعمد/الارتباك",
-        "Flag-waving": "التلويح بالعلم",
-        "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)",
-        "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)",
-        "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)",
-        "Appeal to authority": "النداء إلى السلطة",
-        "Whataboutism": "ماذا عن",
-        "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية",
-        "Thought-terminating cliché": "الكليشيه القاطع للفكر",
-        "Causal Oversimplification": "التبسيط السببي",
-    }
-
-    few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n"
-    for index, example in enumerate(examples):
-        print(f"Processing example {index + 1}")
-        print(f"Example label: {example['label']}")
-
-        try:
-            labels = ", ".join(
-                label_mapping[l] for l in example["label"] if example["label"]
-            )
-            print("Labels in few_shot:", labels)
-        except KeyError as e:
-            print(f"KeyError: {e} in example {index + 1}")
-            labels = "Unknown Label"
-
-    few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: "
-    return few_shot_text
-
-
-def prompt(input_sample, examples):
-    return [{"role": "user", "content": few_shot_prompt(input_sample, examples)}]
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"].lower()
-    label = label.lower()
-    label = re.sub(r"<[^>]+>", "", label)  # Remove any HTML-like tags
-    label = label.lower()
-
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة": "Loaded Language",
-        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "النداء إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = []
-    for arabic_label, english_label in label_mapping.items():
-        if arabic_label in label:
-            detected_labels.append(english_label)
-        elif english_label.lower() in label:
-            detected_labels.append(english_label)
-
-    print("Detected labels:", detected_labels)
-
-    # this is for duplicates values
-    detected_labels = list(set(detected_labels))
-
-    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py
deleted file mode 100755
index 86e8afe6..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_English.py
+++ /dev/null
@@ -1,183 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import FastChatModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "jais-13b-chat",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": FastChatModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 3,
-        },
-    }
-
-
-def few_shot_prompt(input_sample, base_prompt, examples):
-    out_prompt = base_prompt + "\n\n"
-    out_prompt = out_prompt + "Here are some examples:\n\n"
-    for index, example in enumerate(examples):
-        tech_str = ""
-        for t in example["label"]:
-            tech_str += "'" + t + "', "
-
-        out_prompt = (
-            out_prompt
-            + "Example "
-            + str(index)
-            + ":"
-            + "\n"
-            + "Tweet: "
-            + example["input"]
-            + "\nLabel: "
-            + tech_str[:-2]  # Remove the trailing comma and space
-            + "\n\n"
-        )
-
-    # Append the sentence we want the model to predict for but leave the Label blank
-    out_prompt = out_prompt + "Tweet: " + input_sample + "\nLabel: \n"
-
-    return out_prompt
-
-
-def prompt(input_sample, examples):
-    instruction = """
-Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from: "no technique," "Smears,"
- "Exaggeration/Minimisation," "Loaded Language," "Appeal to fear/prejudice," "Name calling/Labeling," "Slogans," "Repetition," "Doubt,"
- "Obfuscation/Intentional vagueness/Confusion," "Flag-waving," "Glittering generalities (Virtue),"
- "Misrepresentation of Someone's Position (Straw Man)," "Presenting Irrelevant Data (Red Herring)," "Appeal to authority,"
-"Whataboutism," "Black-and-white Fallacy/Dictatorship," "Thought-terminating cliché," or "Causal Oversimplification."
-    """
-    base_prompt = instruction.strip()
-
-    return [
-        {
-            "role": "user",
-            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
-        }
-    ]
-
-
-def post_process(response):
-
-    label = response["choices"][0]["message"]["content"].lower()
-
-    label = label.replace("<s>", "").replace("</s>", "")
-
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة": "Loaded Language",
-        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "النداء إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-    print("label: ", label)
-    detected_labels = []
-
-    if "no technique" in label:
-        detected_labels.append(label_mapping["بدون تقنية"])
-    if "Smears" in label:
-        detected_labels.append(label_mapping["تشويه"])
-    if "Exaggeration/Minimisation" in label or "مبالغة" in label:
-        detected_labels.append(label_mapping["مبالغة/تقليل"])
-    if "Loaded Language" in label:
-        detected_labels.append(label_mapping["لغة محملة"])
-    if "Appeal to fear/prejudice" in label or "الخوف" in label or "fear" in label:
-        detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"])
-    if "Name calling/Labeling" in label or "التسمية" or "name" in label:
-        detected_labels.append(label_mapping["التسمية/الملصقات"])
-    if "Slogans" in label:
-        detected_labels.append(label_mapping["الشعارات"])
-    if "Repetition" in label:
-        detected_labels.append(label_mapping["التكرار"])
-    if "Doubt" in label:
-        detected_labels.append(label_mapping["الشك"])
-    if (
-        "Obfuscation, Intentional vagueness, Confusion" in label
-        or "Obfuscation" in label
-        or "Intentional vagueness" in label
-        or "Confusion" in label
-    ):
-        detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"])
-    if "Flag-waving" in label or "flag":
-        detected_labels.append(label_mapping["التلويح بالعلم"])
-    if (
-        "Glittering generalities (Virtue)" in label
-        or "الفضيلة" in label
-        or "Glittering"
-    ):
-        detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"])
-    if (
-        "Misrepresentation of Someone's Position (Straw Man)" in label
-        or "تحريف موقف شخص" in label
-    ):
-        detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"])
-    if (
-        "Presenting Irrelevant Data (Red Herring)" in label
-        or "عرض بيانات غير ذات صلة" in label
-    ):
-        detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"])
-    if "Appeal to authority" in label:
-        detected_labels.append(label_mapping["النداء إلى السلطة"])
-    if "Whataboutism" in label:
-        detected_labels.append(label_mapping["ماذا عن"])
-    if "Black-and-white Fallacy/Dictatorship" in label or "الديكتاتورية" in label:
-        detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"])
-    if "Thought-terminating cliché" in label or "الكليشيه" in label:
-        detected_labels.append(label_mapping["الكليشيه القاطع للفكر"])
-    if "Causal Oversimplification" in label or "التبسيط" in label:
-        detected_labels.append(label_mapping["التبسيط السببي"])
-
-    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py
deleted file mode 100755
index 72776442..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_FewShot_Mixed.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import FastChatModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "jais-13b-chat",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": FastChatModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 3,
-        },
-    }
-
-
-def few_shot_prompt(input_sample, examples):
-    instruction = """
-    "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"،
-    "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، 
-    "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification".
-    """
-
-    few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n"
-    for index, example in enumerate(examples):
-        labels = ", ".join(example["label"])
-        few_shot_text += (
-            f"مثال {index + 1}:\n"
-            f"التغريدة: '{example['input']}'\n"
-            f"التصنيف: {labels}\n\n"
-        )
-
-    few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: "
-    return few_shot_text
-
-
-def prompt(input_sample, examples):
-    return [{"role": "user", "content": few_shot_prompt(input_sample, examples)}]
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"].lower()
-    label = re.sub(r"<[^>]+>", "", label)  # Remove any HTML-like tags
-    label = label.lower()
-
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة": "Loaded Language",
-        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "النداء إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = []
-    for arabic_label, english_label in label_mapping.items():
-        if arabic_label in label:
-            detected_labels.append(english_label)
-        elif english_label.lower() in label:
-            detected_labels.append(english_label)
-
-    print("Detected labels:", detected_labels)
-
-    # Remove duplicates
-    detected_labels = list(set(detected_labels))
-
-    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py
deleted file mode 100755
index a304edaa..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Arabic.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import FastChatModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "jais-13b-chat",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": FastChatModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 3,
-        },
-    }
-
-
-def prompt(input_sample):
-    instruction = """
-    "من القائمة، اختر التقنية الدعائية التي تلتقط جوهر التغريدة بشكل أفضل، "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
- "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، 
-"ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
-
-    """
-    return [
-        {
-            "role": "user",
-            "content": (
-                f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: "
-            ),
-        }
-    ]
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"].lower()
-    label = label.lower()
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة": "Loaded Language",
-        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "النداء إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-    print("label: ", label)
-    detected_labels = []
-
-    if "بدون تقنية" in label:
-        detected_labels.append(label_mapping["بدون تقنية"])
-    if "تشويه" in label:
-        detected_labels.append(label_mapping["تشويه"])
-    if "تقليل" in label or "مبالغة" in label:
-        detected_labels.append(label_mapping["مبالغة/تقليل"])
-    if "لغة محملة" in label:
-        detected_labels.append(label_mapping["لغة محملة"])
-    if "التحيز" in label or "الخوف" in label:
-        detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"])
-    if "الملصقات" in label or "التسمية" in label:
-        detected_labels.append(label_mapping["التسمية/الملصقات"])
-    if "الشعارات" in label:
-        detected_labels.append(label_mapping["الشعارات"])
-    if "التكرار" in label:
-        detected_labels.append(label_mapping["التكرار"])
-    if "الشك" in label:
-        detected_labels.append(label_mapping["الشك"])
-    if "الارتباك" in label or "الغموض المتعمد" in label or "التعمية" in label:
-        detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"])
-    if "التلويح بالعلم" in label:
-        detected_labels.append(label_mapping["التلويح بالعلم"])
-    if "التعميمات البراقة" in label or "الفضيلة" in label:
-        detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"])
-    if "رجل القش" in label or "تحريف موقف شخص" in label:
-        detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"])
-    if "السمكة الحمراء" in label or "عرض بيانات غير ذات صلة" in label:
-        detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"])
-    if "النداء إلى السلطة" in label:
-        detected_labels.append(label_mapping["النداء إلى السلطة"])
-    if "ماذا عن" in label:
-        detected_labels.append(label_mapping["ماذا عن"])
-    if "الأبيض والأسود" in label or "الديكتاتورية" in label:
-        detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"])
-    if "القاطع للفكر" in label or "الكليشيه" in label:
-        detected_labels.append(label_mapping["الكليشيه القاطع للفكر"])
-    if "السببي" in label or "التبسيط" in label:
-        detected_labels.append(label_mapping["التبسيط السببي"])
-
-    # this is for duplicates values
-    detected_labels = list(set(detected_labels))
-
-    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py
deleted file mode 100755
index a56c1dea..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_English.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import FastChatModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "jais-13b-chat",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": FastChatModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 30,
-        },
-    }
-
-
-def prompt(input_sample):
-    base_prompt = (
-        f"Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from:\n\n"
-        f"'no technique' , 'Smears' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Appeal to fear/prejudice' , 'Name calling/Labeling' , 'Slogans' , 'Repetition' , 'Doubt' , 'Obfuscation, Intentional vagueness, Confusion' , 'Flag-waving' , 'Glittering generalities (Virtue)' , 'Misrepresentation of Someone's Position (Straw Man)' , 'Presenting Irrelevant Data (Red Herring)' , 'Appeal to authority' , 'Whataboutism' , 'Black-and-white Fallacy/Dictatorship' , 'Thought-terminating cliché' , 'Causal Oversimplification'"
-        f"\nAnswer (only yes/no) in the following format: \n"
-        f"'Doubt': 'yes', "
-        f"'Smears': 'no', \n\n"
-        f"tweet: {input_sample}\n\n"
-        f"label: \n"
-    )
-
-    return [
-        {
-            "role": "user",
-            "content": base_prompt,
-        },
-    ]
-
-
-def fix_label(pred_label):
-    if "used in this text" in pred_label:
-        return ["no technique"]
-
-    labels_fixed = []
-    pred_label = pred_label.replace('"', "'").split("', '")
-    pred_labels = []
-
-    for l in pred_label:
-        splits = l.replace(",", "").split(":")
-        if len(splits) > 1 and "no" in splits[1]:
-            continue
-        pred_labels.append(splits[0].replace("'", ""))
-
-    if len(pred_labels) == 0:
-        return ["no technique"]
-
-    for label in pred_labels:
-        label = label.replace(".", "").strip()
-        label = re.sub("-", " ", label)
-        label = label.strip().lower()
-
-        # Handle case of single word labels like "Smears" so we just capitalize it
-        label_fixed = label.capitalize()
-
-        # print(label)
-        if "slogan" in label:
-            label_fixed = "Slogans"
-        if "loaded" in label:
-            label_fixed = "Loaded Language"
-        if "prejudice" in label or "fear" in label or "mongering" in label:
-            label_fixed = "Appeal to fear/prejudice"
-        if "terminating" in label or "thought" in label:
-            label_fixed = "Thought-terminating cliché"
-        if "calling" in label or label == "name c":
-            label_fixed = "Name calling/Labeling"
-        if "minimisation" in label or label == "exaggeration minim":
-            label_fixed = "Exaggeration/Minimisation"
-        if "glittering" in label:
-            label_fixed = "Glittering generalities (Virtue)"
-        if "flag" in label:
-            label_fixed = "Flag-waving"
-        if "obfuscation" in label:
-            label_fixed = "Obfuscation, Intentional vagueness, Confusion"
-        if "oversimplification" in label or "causal" in label:
-            label_fixed = "Causal Oversimplification"
-        if "authority" in label:
-            label_fixed = "Appeal to authority"
-        if "dictatorship" in label or "black" in label or "white" in label:
-            label_fixed = "Black-and-white Fallacy/Dictatorship"
-        if "herring" in label or "irrelevant" in label:
-            label_fixed = "Presenting Irrelevant Data (Red Herring)"
-        if "straw" in label or "misrepresentation" in label:
-            label_fixed = "Misrepresentation of Someone's Position (Straw Man)"
-        if "whataboutism" in label:
-            label_fixed = "Whataboutism"
-
-        if (
-            "no propaganda" in label
-            or "technique" in label
-            or label == ""
-            or label == "no"
-            or label == "appeal to history"
-            or label == "appeal to emotion"
-            or label == "appeal to"
-            or label == "appeal"
-            or label == "appeal to author"
-            or label == "emotional appeal"
-            or "no techn" in label
-            or "hashtag" in label
-            or "theory" in label
-            or "specific mention" in label
-            or "religious" in label
-            or "gratitude" in label
-        ):
-            label_fixed = "no technique"
-
-        labels_fixed.append(label_fixed)
-
-    out_put_labels = []
-    # Remove no technique label when we have other techniques for the same text
-    if len(labels_fixed) > 1:
-        for flabel in labels_fixed:
-            if flabel != "no technique":
-                out_put_labels.append(flabel)
-        return out_put_labels
-
-    return labels_fixed
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"].lower()
-    pred_label = fix_label(label)
-    print(pred_label)
-
-    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py
deleted file mode 100755
index 8ecbc6c2..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_JAIS13b_ZeroShot_Mixed.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import FastChatModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "jais-13b-chat",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": FastChatModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 3,
-        },
-    }
-
-
-def prompt(input_sample):
-    instruction = """
-    "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"،
-    "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، 
-    "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification".
-    """
-    return [
-        {
-            "role": "user",
-            "content": (
-                f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: "
-            ),
-        }
-    ]
-
-
-def post_process(response):
-    label = response["choices"][0]["message"]["content"].lower()
-    label = label.lower()
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة": "Loaded Language",
-        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "النداء إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-    print("label: ", label)
-    detected_labels = []
-
-    if "بدون تقنية" in label:
-        detected_labels.append(label_mapping["بدون تقنية"])
-    if "تشويه" in label:
-        detected_labels.append(label_mapping["تشويه"])
-    if "تقليل" in label or "مبالغة" in label:
-        detected_labels.append(label_mapping["مبالغة/تقليل"])
-    if "لغة محملة" in label:
-        detected_labels.append(label_mapping["لغة محملة"])
-    if "التحيز" in label or "الخوف" in label:
-        detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"])
-    if "الملصقات" in label or "التسمية" in label:
-        detected_labels.append(label_mapping["التسمية/الملصقات"])
-    if "الشعارات" in label:
-        detected_labels.append(label_mapping["الشعارات"])
-    if "التكرار" in label:
-        detected_labels.append(label_mapping["التكرار"])
-    if "الشك" in label:
-        detected_labels.append(label_mapping["الشك"])
-    if "الارتباك" in label or "الغموض المتعمد" in label or "التعمية" in label:
-        detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"])
-    if "التلويح بالعلم" in label:
-        detected_labels.append(label_mapping["التلويح بالعلم"])
-    if "التعميمات البراقة" in label or "الفضيلة" in label:
-        detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"])
-    if "رجل القش" in label or "تحريف موقف شخص" in label:
-        detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"])
-    if "السمكة الحمراء" in label or "عرض بيانات غير ذات صلة" in label:
-        detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"])
-    if "النداء إلى السلطة" in label:
-        detected_labels.append(label_mapping["النداء إلى السلطة"])
-    if "ماذا عن" in label:
-        detected_labels.append(label_mapping["ماذا عن"])
-    if "الأبيض والأسود" in label or "الديكتاتورية" in label:
-        detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"])
-    if "القاطع للفكر" in label or "الكليشيه" in label:
-        detected_labels.append(label_mapping["الكليشيه القاطع للفكر"])
-    if "السببي" in label or "التبسيط" in label:
-        detected_labels.append(label_mapping["التبسيط السببي"])
-
-    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py
deleted file mode 100755
index 0cef8a61..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Arabic.py
+++ /dev/null
@@ -1,327 +0,0 @@
-# import random
-# import re
-
-# from llmebench.datasets import WANLP22T3PropagandaDataset
-# from llmebench.models import AzureModel
-# from llmebench.tasks import MultilabelPropagandaTask
-
-# random.seed(1333)
-
-# def metadata():
-#     return {
-#         "author": "Arabic Language Technologies, QCRI, HBKU",
-#         "model": "LLama 3 8b",
-#         "description": "Deployed on Azure.",
-#         "scores": {"Micro-F1": "0.52864"}, # 1 - 0.3631 , 3- 0.48027 , 5- 0.52864
-#     }
-
-# def config():
-#     return {
-#         "dataset": WANLP22T3PropagandaDataset,
-#         "dataset_args": {"techniques_path": "classes.txt"},
-#         "task": MultilabelPropagandaTask,
-#         "model": AzureModel,
-#         "model_args": {
-#             "class_labels": [
-#                 "no technique",
-#                 "Smears",
-#                 "Exaggeration/Minimisation",
-#                 "Loaded Language",
-#                 "Appeal to fear/prejudice",
-#                 "Name calling/Labeling",
-#                 "Slogans",
-#                 "Repetition",
-#                 "Doubt",
-#                 "Obfuscation, Intentional vagueness, Confusion",
-#                 "Flag-waving",
-#                 "Glittering generalities (Virtue)",
-#                 "Misrepresentation of Someone's Position (Straw Man)",
-#                 "Presenting Irrelevant Data (Red Herring)",
-#                 "Appeal to authority",
-#                 "Whataboutism",
-#                 "Black-and-white Fallacy/Dictatorship",
-#                 "Thought-terminating cliché",
-#                 "Causal Oversimplification",
-#             ],
-#             "max_tries": 3,
-#         },
-#     }
-
-# def few_shot_prompt(input_sample, examples):
-#     instruction = """
-#         "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
-#         "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"،
-#         "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
-#         """
-#     label_mapping = {
-#         "no technique": "بدون تقنية",
-#         "Smears": "تشويه",
-#         "Exaggeration/Minimisation": "مبالغة/تقليل",
-#         "Loaded Language": "لغة محملة",
-#         "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز",
-#         "Name calling/Labeling": "التسمية/الملصقات",
-#         "Slogans": "الشعارات",
-#         "Repetition": "التكرار",
-#         "Doubt": "الشك",
-#         "Obfuscation Intentional vagueness Confusion": "التعمية/الغموض المتعمد/الارتباك",
-#         "Flag-waving": "التلويح بالعلم",
-#         "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)",
-#         "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)",
-#         "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)",
-#         "Appeal to authority": "النداء إلى السلطة",
-#         "Whataboutism": "ماذا عن",
-#         "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية",
-#         "Thought-terminating cliché": "الكليشيه القاطع للفكر",
-#         "Causal Oversimplification": "التبسيط السببي"
-#     }
-
-#     few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n"
-#     for index, example in enumerate(examples):
-#         labels_list = [label_mapping.get(label,"") for label in example["label"]]
-#         labels = ", ".join(labels_list)
-#         few_shot_text += (
-#             f"مثال {index + 1}:\n"
-#             f"التغريدة: '{example['input']}'\n"
-#             f"التصنيف: {labels}\n\n"
-#         )
-
-#     few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: "
-#     return few_shot_text
-
-# def few_shot_prompt(input_sample, base_prompt, examples):
-#     label_mapping = {
-#         "no technique": "بدون تقنية",
-#         "Smears": "تشويه",
-#         "Exaggeration/Minimisation": "مبالغة/تقليل",
-#         "Loaded Language": "لغة محملة",
-#         "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز",
-#         "Name calling/Labeling": "التسمية/الملصقات",
-#         "Slogans": "الشعارات",
-#         "Repetition": "التكرار",
-#         "Doubt": "الشك",
-#         "Obfuscation Intentional vagueness Confusion": "التعمية/الغموض المتعمد/الارتباك",
-#         "Flag-waving": "التلويح بالعلم",
-#         "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)",
-#         "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)",
-#         "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)",
-#         "Appeal to authority": "النداء إلى السلطة",
-#         "Whataboutism": "ماذا عن",
-#         "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية",
-#         "Thought-terminating cliché": "الكليشيه القاطع للفكر",
-#         "Causal Oversimplification": "التبسيط السببي"
-#     }
-
-#     out_prompt = base_prompt + "\n"
-#     out_prompt = out_prompt + "اليك بعض الأمثلة:\n\n"
-#     for index, example in enumerate(examples):
-#         tech_str = ""
-#         for t in example["label"]:
-#             tech_str += "'" + label_mapping[t] + "', "
-
-#         out_prompt = (
-#             out_prompt
-#             + "مثال "
-#             + str(index)
-#             + ":"
-#             + "\n"
-#             + "التغريدة: "
-#             + example["input"]
-#             + "\التصنيف: "
-#             + tech_str
-#             + "\n\n"
-#         )
-
-#     # Append the sentence we want the model to predict for but leave the Label blank
-#     out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n"
-
-#     return out_prompt
-
-# def prompt(input_sample, examples):
-#     return [
-#         {
-#             "role": "user",
-#             "content": few_shot_prompt(input_sample, examples)
-#         }
-#     ]
-
-# def post_process(response):
-#     if not response or 'error' in response or 'output' not in response:
-#         print("Error or missing output in response:", response)
-#         return None
-
-#     label = response["output"].strip().lower()
-#     label = re.sub(r'<[^>]+>', '', label)  # Remove any HTML-like tags
-#     label = label.lower()
-
-#     label_mapping = {
-#         "بدون تقنية": "no technique",
-#         "تشويه": "Smears",
-#         "مبالغة/تقليل": "Exaggeration/Minimisation",
-#         "لغة محملة": "Loaded Language",
-#         "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-#         "التسمية/الملصقات": "Name calling/Labeling",
-#         "الشعارات": "Slogans",
-#         "التكرار": "Repetition",
-#         "الشك": "Doubt",
-#         "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-#         "التلويح بالعلم": "Flag-waving",
-#         "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-#         "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-#         "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-#         "النداء إلى السلطة": "Appeal to authority",
-#         "ماذا عن": "Whataboutism",
-#         "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-#         "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-#         "التبسيط السببي": "Causal Oversimplification"
-#     }
-
-#     detected_labels = []
-#     for arabic_label, english_label in label_mapping.items():
-#         if arabic_label in label:
-#             detected_labels.append(english_label)
-#         elif english_label.lower() in label:
-#             detected_labels.append(english_label)
-
-#     print("Detected labels:", detected_labels)
-
-#     # this is for duplicates values
-#     detected_labels = list(set(detected_labels))
-
-#     return detected_labels
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import AzureModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "Llama-3.1-8B-Instruct",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": AzureModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 3,
-        },
-    }
-
-
-def translate_labels(label):
-    label_mapping = {
-        "no technique": "بدون تقنية",
-        "Smears": "تشويه",
-        "Exaggeration/Minimisation": "مبالغة/تقليل",
-        "Loaded Language": "لغة محملة",
-        "Appeal to fear/prejudice": "النداء إلى الخوف/التحيز",
-        "Name calling/Labeling": "التسمية/الملصقات",
-        "Slogans": "الشعارات",
-        "Repetition": "التكرار",
-        "Doubt": "الشك",
-        "Obfuscation, Intentional vagueness, Confusion": "التعمية/الغموض المتعمد/الارتباك",
-        "Flag-waving": "التلويح بالعلم",
-        "Glittering generalities (Virtue)": "التعميمات البراقة (الفضيلة)",
-        "Misrepresentation of Someone's Position (Straw Man)": "تحريف موقف شخص (رجل القش)",
-        "Presenting Irrelevant Data (Red Herring)": "عرض بيانات غير ذات صلة (السمكة الحمراء)",
-        "Appeal to authority": "النداء إلى السلطة",
-        "Whataboutism": "ماذا عن",
-        "Black-and-white Fallacy/Dictatorship": "مغالطة الأبيض والأسود/الديكتاتورية",
-        "Thought-terminating cliché": "الكليشيه القاطع للفكر",
-        "Causal Oversimplification": "التبسيط السببي",
-    }
-    return label_mapping.get(label, label)
-
-
-def few_shot_prompt(input_sample, base_prompt, examples):
-    out_prompt = base_prompt + "\nاليك بعض الأمثلة:\n\n"
-    for index, example in enumerate(examples):
-        tech_str = ", ".join([f"'{translate_labels(t)}'" for t in example["label"]])
-        out_prompt += (
-            f"مثال {index}:\nالتغريدة: {example['input']}\nالتصنيف: {tech_str}\n\n"
-        )
-    out_prompt += f"التغريدة: {input_sample}\nالتصنيف: \n"
-    return out_prompt
-
-
-def prompt(input_sample, examples):
-    base_prompt = """
-        "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
-        "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، 
-        "ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
-        """
-    return [
-        {
-            "role": "user",
-            "content": few_shot_prompt(input_sample, base_prompt, examples),
-        }
-    ]
-
-
-def post_process(response):
-    if not response or "error" in response or "output" not in response:
-        print("Error or missing output in response:", response)
-        return None
-
-    label = re.sub(r"<[^>]+>", "", response["output"].strip().lower())
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة": "Loaded Language",
-        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "النداء إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = [
-        english_label
-        for arabic_label, english_label in label_mapping.items()
-        if arabic_label in label or english_label.lower() in label
-    ]
-    print("Detected labels:", list(set(detected_labels)))
-    return list(set(detected_labels))
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py
deleted file mode 100755
index b0bbef4c..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_English.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import AzureModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "Llama-3.1-8B-Instruct",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": AzureModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 3,
-        },
-    }
-
-
-def few_shot_prompt(input_sample, base_prompt, examples):
-    out_prompt = base_prompt + "\n\n"
-    out_prompt = out_prompt + "Here are some examples:\n\n"
-    for index, example in enumerate(examples):
-        tech_str = ""
-        for t in example["label"]:
-            tech_str += "'" + t + "', "
-
-        out_prompt = (
-            out_prompt
-            + "Example "
-            + str(index)
-            + ":"
-            + "\n"
-            + "Tweet: "
-            + example["input"]
-            + "\nLabel: "
-            + tech_str[:-2]  # Remove the trailing comma and space
-            + "\n\n"
-        )
-
-    # Append the sentence we want the model to predict for but leave the Label blank
-    out_prompt = out_prompt + "Tweet: " + input_sample + "\nLabel: \n"
-
-    return out_prompt
-
-
-def prompt(input_sample, examples):
-    instruction = """
-Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from: "no technique," "Smears,"
- "Exaggeration/Minimisation," "Loaded Language," "Appeal to fear/prejudice," "Name calling/Labeling," "Slogans," "Repetition," "Doubt,"
- "Obfuscation/Intentional vagueness/Confusion," "Flag-waving," "Glittering generalities (Virtue),"
- "Misrepresentation of Someone's Position (Straw Man)," "Presenting Irrelevant Data (Red Herring)," "Appeal to authority,"
-"Whataboutism," "Black-and-white Fallacy/Dictatorship," "Thought-terminating cliché," or "Causal Oversimplification."
-    """
-    base_prompt = instruction.strip()
-
-    return [
-        {
-            "role": "user",
-            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
-        }
-    ]
-
-
-def post_process(response):
-    if not response or "error" in response or "output" not in response:
-        print("Error or missing output in response:", response)
-        return "No respose"  # Safely default to NOT_ADULT when unsure
-
-    label = response["output"].strip().lower()
-    label = label.replace("<s>", "").replace("</s>", "")
-    label = label.lower()
-
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة": "Loaded Language",
-        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "النداء إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = []
-    for arabic_label, english_label in label_mapping.items():
-        if arabic_label in label:
-            detected_labels.append(english_label)
-        elif english_label.lower() in label:
-            detected_labels.append(english_label)
-
-    print("Detected labels:", detected_labels)
-
-    # Remove duplicates
-    detected_labels = list(set(detected_labels))
-
-    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py
deleted file mode 100755
index 3b75978a..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_FewShot_Mixed.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import AzureModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "Llama-3.1-8B-Instruct",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": AzureModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 3,
-        },
-    }
-
-
-def few_shot_prompt(input_sample, examples):
-    instruction = """
-    "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"،
-    "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، 
-    "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification".
-    """
-
-    few_shot_text = instruction + "\n\nإليك بعض الأمثلة:\n\n"
-    for index, example in enumerate(examples):
-        labels = ", ".join(example["label"])
-        few_shot_text += (
-            f"مثال {index + 1}:\n"
-            f"التغريدة: '{example['input']}'\n"
-            f"التصنيف: {labels}\n\n"
-        )
-
-    few_shot_text += f"الآن، قم بتقييم التغريدة الجديدة التالية:\nالتغريدة: '{input_sample}'\nالتصنيف: "
-    return few_shot_text
-
-
-def prompt(input_sample, examples):
-    return [{"role": "user", "content": few_shot_prompt(input_sample, examples)}]
-
-
-def post_process(response):
-    if not response or "error" in response or "output" not in response:
-        print("Error or missing output in response:", response)
-        return "No respose"  # Safely default to NOT_ADULT when unsure
-
-    label = response["output"].strip().lower()
-    label = label.replace("<s>", "").replace("</s>", "")
-    label = label.lower()
-
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة": "Loaded Language",
-        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "النداء إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = []
-    for arabic_label, english_label in label_mapping.items():
-        if arabic_label in label:
-            detected_labels.append(english_label)
-        elif english_label.lower() in label:
-            detected_labels.append(english_label)
-
-    print("Detected labels:", detected_labels)
-
-    # Remove duplicates
-    detected_labels = list(set(detected_labels))
-
-    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py
deleted file mode 100755
index a42d224f..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Arabic.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import AzureModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "Llama-3.1-8B-Instruct",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": AzureModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 3,
-        },
-    }
-
-
-def prompt(input_sample):
-    instruction = """
-    "من القائمة، اختر التقنية الدعائية التي تلتقط جوهر التغريدة بشكل أفضل، "بدون تقنية"، "تشويه"، "مبالغة/تقليل"، "لغة محملة"، "النداء إلى الخوف/التحيز"، "التسمية/الملصقات"، "الشعارات"، "التكرار"، "الشك"،
- "التعمية/الغموض المتعمد/الارتباك"، "التلويح بالعلم"، "التعميمات البراقة (الفضيلة)"، "تحريف موقف شخص (رجل القش)"، "عرض بيانات غير ذات صلة (السمكة الحمراء)"، "النداء إلى السلطة"، 
-"ماذا عن"، "مغالطة الأبيض والأسود/الديكتاتورية"، "الكليشيه القاطع للفكر"، أو "التبسيط السببي"."
-
-    """
-    return [
-        {
-            "role": "user",
-            "content": (
-                f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: "
-            ),
-        }
-    ]
-
-
-def post_process(response):
-    if not response or "error" in response or "output" not in response:
-        print("Error or missing output in response:", response)
-        return "No respose"  # Safely default to NOT_ADULT when unsure
-
-    label = response["output"].strip().lower()
-    label = label.replace("<s>", "").replace("</s>", "")
-    label = label.lower()
-
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة": "Loaded Language",
-        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "النداء إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = []
-    for arabic_label, english_label in label_mapping.items():
-        if arabic_label in label:
-            detected_labels.append(english_label)
-        elif english_label.lower() in label:
-            detected_labels.append(english_label)
-
-    print("Detected labels:", detected_labels)
-
-    # Remove duplicates
-    detected_labels = list(set(detected_labels))
-
-    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py
deleted file mode 100755
index 3c87ced3..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_English.py
+++ /dev/null
@@ -1,188 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import AzureModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "Llama-3.1-8B-Instruct",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": AzureModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 3,
-        },
-    }
-
-
-def prompt(input_sample):
-    instruction = """
-Review the following tweets and analyze the propaganda techniques used. Choose only the applicable categories from: "no technique," "Smears,"
- "Exaggeration/Minimisation," "Loaded Language," "Appeal to fear/prejudice," "Name calling/Labeling," "Slogans," "Repetition," "Doubt,"
- "Obfuscation/Intentional vagueness/Confusion," "Flag-waving," "Glittering generalities (Virtue),"
- "Misrepresentation of Someone's Position (Straw Man)," "Presenting Irrelevant Data (Red Herring)," "Appeal to authority,"
-"Whataboutism," "Black-and-white Fallacy/Dictatorship," "Thought-terminating cliché," or "Causal Oversimplification."
-    """
-    return [
-        {
-            "role": "user",
-            "content": (f"{instruction}\n" + "Tweet: " + input_sample + "\nLabel: "),
-        }
-    ]
-
-
-# def post_process(response):
-#     if not response or 'error' in response or 'output' not in response:
-#         print("Error or missing output in response:", response)
-#         return "No respose"  # Safely default to NOT_ADULT when unsure
-
-#     label = response["output"].strip().lower()
-#     label = label.replace("<s>", "").replace("</s>", "")
-#     label = label.lower()
-#     label_mapping = {
-#         "بدون تقنية": "no technique",
-#         "تشويه": "Smears",
-#         "مبالغة/تقليل": "Exaggeration/Minimisation",
-#         "لغة محملة": "Loaded Language",
-#         "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-#         "التسمية/الملصقات": "Name calling/Labeling",
-#         "الشعارات": "Slogans",
-#         "التكرار": "Repetition",
-#         "الشك": "Doubt",
-#         "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-#         "التلويح بالعلم": "Flag-waving",
-#         "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-#         "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-#         "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-#         "النداء إلى السلطة": "Appeal to authority",
-#         "ماذا عن": "Whataboutism",
-#         "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-#         "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-#         "التبسيط السببي": "Causal Oversimplification"
-#     }
-#     print("label: ", label)
-#     detected_labels = []
-
-#     if "no technique" in label:
-#         detected_labels.append(label_mapping["بدون تقنية"])
-#     if "Smears" in label:
-#         detected_labels.append(label_mapping["تشويه"])
-#     if "Exaggeration/Minimisation" in label or "مبالغة" in label:
-#         detected_labels.append(label_mapping["مبالغة/تقليل"])
-#     if "Loaded Language" in label:
-#         detected_labels.append(label_mapping["لغة محملة"])
-#     if "Appeal to fear/prejudice" in label or "الخوف" in label or "fear" in label:
-#         detected_labels.append(label_mapping["النداء إلى الخوف/التحيز"])
-#     if "Name calling/Labeling" in label or "التسمية" or "name" in label:
-#         detected_labels.append(label_mapping["التسمية/الملصقات"])
-#     if "Slogans" in label:
-#         detected_labels.append(label_mapping["الشعارات"])
-#     if "Repetition" in label:
-#         detected_labels.append(label_mapping["التكرار"])
-#     if "Doubt" in label:
-#         detected_labels.append(label_mapping["الشك"])
-#     if "Obfuscation, Intentional vagueness, Confusion" in label or "Obfuscation" in label or "Intentional vagueness" in label or "Confusion" in label:
-#         detected_labels.append(label_mapping["التعمية/الغموض المتعمد/الارتباك"])
-#     if "Flag-waving" in label or "flag":
-#         detected_labels.append(label_mapping["التلويح بالعلم"])
-#     if "Glittering generalities (Virtue)" in label or "الفضيلة" in label or "Glittering":
-#         detected_labels.append(label_mapping["التعميمات البراقة (الفضيلة)"])
-#     if "Misrepresentation of Someone's Position (Straw Man)" in label or "تحريف موقف شخص" in label:
-#         detected_labels.append(label_mapping["تحريف موقف شخص (رجل القش)"])
-#     if "Presenting Irrelevant Data (Red Herring)" in label or "عرض بيانات غير ذات صلة" in label:
-#         detected_labels.append(label_mapping["عرض بيانات غير ذات صلة (السمكة الحمراء)"])
-#     if "Appeal to authority" in label:
-#         detected_labels.append(label_mapping["النداء إلى السلطة"])
-#     if "Whataboutism" in label:
-#         detected_labels.append(label_mapping["ماذا عن"])
-#     if "Black-and-white Fallacy/Dictatorship" in label or "الديكتاتورية" in label:
-#         detected_labels.append(label_mapping["مغالطة الأبيض والأسود/الديكتاتورية"])
-#     if "Thought-terminating cliché" in label or "الكليشيه" in label:
-#         detected_labels.append(label_mapping["الكليشيه القاطع للفكر"])
-#     if "Causal Oversimplification" in label or "التبسيط" in label:
-#         detected_labels.append(label_mapping["التبسيط السببي"])
-
-
-#     return detected_labels
-
-
-def post_process(response):
-    if not response or "error" in response or "output" not in response:
-        print("Error or missing output in response:", response)
-        return "No respose"  # Safely default to NOT_ADULT when unsure
-
-    label = response["output"].strip().lower()
-    label = label.replace("<s>", "").replace("</s>", "")
-    label = label.lower()
-
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة": "Loaded Language",
-        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "النداء إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = []
-    for arabic_label, english_label in label_mapping.items():
-        if arabic_label in label:
-            detected_labels.append(english_label)
-        elif english_label.lower() in label:
-            detected_labels.append(english_label)
-
-    print("Detected labels:", detected_labels)
-
-    # Remove duplicates
-    detected_labels = list(set(detected_labels))
-
-    return detected_labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py
deleted file mode 100755
index cdec1b31..00000000
--- a/assets/ar/factuality_disinformation_harmful_content/propaganda/WANLP22T3_Llama3-8b_ZeroShot_Mixed.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import random
-import re
-
-from llmebench.datasets import WANLP22T3PropagandaDataset
-from llmebench.models import AzureModel
-from llmebench.tasks import MultilabelPropagandaTask
-
-
-random.seed(1333)
-
-
-def metadata():
-    return {
-        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
-        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
-        "model": "Llama-3.1-8B-Instruct",
-        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
-    }
-
-
-def config():
-    return {
-        "dataset": WANLP22T3PropagandaDataset,
-        "dataset_args": {"techniques_path": "classes.txt"},
-        "task": MultilabelPropagandaTask,
-        "model": AzureModel,
-        "model_args": {
-            "class_labels": [
-                "no technique",
-                "Smears",
-                "Exaggeration/Minimisation",
-                "Loaded Language",
-                "Appeal to fear/prejudice",
-                "Name calling/Labeling",
-                "Slogans",
-                "Repetition",
-                "Doubt",
-                "Obfuscation, Intentional vagueness, Confusion",
-                "Flag-waving",
-                "Glittering generalities (Virtue)",
-                "Misrepresentation of Someone's Position (Straw Man)",
-                "Presenting Irrelevant Data (Red Herring)",
-                "Appeal to authority",
-                "Whataboutism",
-                "Black-and-white Fallacy/Dictatorship",
-                "Thought-terminating cliché",
-                "Causal Oversimplification",
-            ],
-            "max_tries": 3,
-        },
-    }
-
-
-def prompt(input_sample):
-    instruction = """
-    "من القائمة، اختر التقنية الدعائية الأنسب للتغريدة: "no technique"، "Smears"، "Exaggeration/Minimisation"، "Loaded Language"، "Appeal to fear/prejudice"، "Name calling/Labeling"، "Slogans"، "Repetition"، "Doubt"،
-    "Obfuscation, Intentional vagueness, Confusion"، "Flag-waving"، "Glittering generalities (Virtue)"، "Misrepresentation of Someone's Position (Straw Man)"، "Presenting Irrelevant Data (Red Herring)"، "Appeal to authority"، 
-    "Whataboutism"، "Black-and-white Fallacy/Dictatorship"، "Thought-terminating cliché"، أو "Causal Oversimplification".
-    """
-    return [
-        {
-            "role": "user",
-            "content": (
-                f" \n{instruction}\n" + "التغريدة: " + input_sample + "التصنيف: "
-            ),
-        }
-    ]
-
-
-def post_process(response):
-    if not response or "error" in response or "output" not in response:
-        print("Error or missing output in response:", response)
-        return "No respose"  # Safely default to NOT_ADULT when unsure
-
-    label = response["output"].strip().lower()
-    label = label.replace("<s>", "").replace("</s>", "")
-    label = label.lower()
-
-    label_mapping = {
-        "بدون تقنية": "no technique",
-        "تشويه": "Smears",
-        "مبالغة/تقليل": "Exaggeration/Minimisation",
-        "لغة محملة": "Loaded Language",
-        "النداء إلى الخوف/التحيز": "Appeal to fear/prejudice",
-        "التسمية/الملصقات": "Name calling/Labeling",
-        "الشعارات": "Slogans",
-        "التكرار": "Repetition",
-        "الشك": "Doubt",
-        "التعمية/الغموض المتعمد/الارتباك": "Obfuscation, Intentional vagueness, Confusion",
-        "التلويح بالعلم": "Flag-waving",
-        "التعميمات البراقة (الفضيلة)": "Glittering generalities (Virtue)",
-        "تحريف موقف شخص (رجل القش)": "Misrepresentation of Someone's Position (Straw Man)",
-        "عرض بيانات غير ذات صلة (السمكة الحمراء)": "Presenting Irrelevant Data (Red Herring)",
-        "النداء إلى السلطة": "Appeal to authority",
-        "ماذا عن": "Whataboutism",
-        "مغالطة الأبيض والأسود/الديكتاتورية": "Black-and-white Fallacy/Dictatorship",
-        "الكليشيه القاطع للفكر": "Thought-terminating cliché",
-        "التبسيط السببي": "Causal Oversimplification",
-    }
-
-    detected_labels = []
-    for arabic_label, english_label in label_mapping.items():
-        if arabic_label in label:
-            detected_labels.append(english_label)
-        elif english_label.lower() in label:
-            detected_labels.append(english_label)
-
-    print("Detected labels:", detected_labels)
-
-    # Remove duplicates
-    detected_labels = list(set(detected_labels))
-
-    return detected_labels

From 32d1f548aed33c50736b9359269851c2e0d2a683 Mon Sep 17 00:00:00 2001
From: MohamedBayan <mohamadbayan2014noname@gmail.com>
Date: Mon, 20 Jan 2025 16:15:58 +0300
Subject: [PATCH 3/3] Add wise-harmfulness_detection assets

---
 .../CT22Harmful_GPT4_FewShot_Arabic.py        | 85 ++++++++++++++++++
 .../CT22Harmful_GPT4_FewShot_English.py       | 84 +++++++++++++++++
 .../CT22Harmful_GPT4_FewShot_Mixed.py         | 85 ++++++++++++++++++
 .../CT22Harmful_GPT4_ZeroShot_Arabic.py       | 71 +++++++++++++++
 .../CT22Harmful_GPT4_ZeroShot_English.py      | 71 +++++++++++++++
 .../CT22Harmful_GPT4_ZeroShot_Mixed.py        | 72 +++++++++++++++
 .../CT22Harmful_JAIS13b_FewShot_Arabic.py     | 67 ++++++++++++++
 .../CT22Harmful_JAIS13b_FewShot_English.py    | 75 ++++++++++++++++
 .../CT22Harmful_JAIS13b_FewShot_Mixed.py      | 89 +++++++++++++++++++
 .../CT22Harmful_JAIS13b_ZeroShot_Arabic.py    | 58 ++++++++++++
 .../CT22Harmful_JAIS13b_ZeroShot_English.py   | 59 ++++++++++++
 .../CT22Harmful_JAIS13b_ZeroShot_Mixed.py     | 59 ++++++++++++
 .../CT22Harmful_Llama3-8b_FewShot_Arabic.py   | 66 ++++++++++++++
 .../CT22Harmful_Llama3-8b_FewShot_English.py  | 71 +++++++++++++++
 .../CT22Harmful_Llama3-8b_FewShot_Mixed.py    | 66 ++++++++++++++
 .../CT22Harmful_Llama3-8b_ZeroShot_Arabic.py  | 57 ++++++++++++
 .../CT22Harmful_Llama3-8b_ZeroShot_English.py | 57 ++++++++++++
 .../CT22Harmful_Llama3-8b_ZeroShot_Mixed.py   | 55 ++++++++++++
 18 files changed, 1247 insertions(+)
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py

diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py
new file mode 100755
index 00000000..187661c9
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Arabic.py
@@ -0,0 +1,85 @@
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, examples):
+    base_prompt = "هل قد تتسبب هذه التغريدة بأي ضرر أو أذى لشخص أو مؤسسة؟ إن كانت كذلك، أجب بـ 'ضارة'. أجب بـ 'غير ضارة' إن لم تكن كذلك. قدم فقط التصنيف.\n"
+    prompt = few_shot_prompt(input_sample, base_prompt, examples)
+
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل وتصنيف التغريدات.",
+        },
+        {
+            "role": "user",
+            "content": prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Found chatgpt confused when using 0 and 1 in the prompt
+        label = "غير ضارة" if example["label"] == "0" else "ضارة"
+        out_prompt = (
+            out_prompt + "التغريدة: " + example["input"] + "التصنيف: " + label + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف: \n"
+
+    # print("=========== FS Prompt =============\n")
+    # print(out_prompt)
+
+    return out_prompt
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label_fixed = None
+    label = label.replace("label:", "")
+    label = label.replace("التصنيف:", "")
+    label = label.strip().lower()
+
+    if (
+        "غير ضارة" in label
+        or "ليست ضارة" in label
+        or "غير ضاره" in label
+        or "غير" in label
+        or "not" in label
+        or "ليست" in label
+        or "لا" in label
+        or "not harmful" in label
+        or label.startswith("no")
+        or "safe" in label
+        or "not_harmful" in label
+    ):
+        label_fixed = "0"
+    elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label:
+        label_fixed = "1"
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py
new file mode 100755
index 00000000..4915b237
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_English.py
@@ -0,0 +1,84 @@
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, examples):
+    base_prompt = "Can this tweet cause harm to any person, organization or entity? Answer only by 'harmful' or 'not harmful'. Provide only label.\n"
+    prompt = few_shot_prompt(input_sample, base_prompt, examples)
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert in tweet analysis and classification.",
+        },
+        {
+            "role": "user",
+            "content": prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Found chatgpt confused when using 0 and 1 in the prompt
+        label = "harmful" if example["label"] == "1" else "not harmful"
+        out_prompt = (
+            out_prompt + "tweet: " + example["input"] + "\nlabel: " + label + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the label blank
+    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+    # print("=========== FS Prompt =============\n")
+    # print(out_prompt)
+
+    return out_prompt
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label = label.replace("label:", "")
+    label = label.replace("التصنيف:", "")
+    label = label.strip().lower()
+
+    if (
+        "غير ضارة" in label
+        or "ليست ضارة" in label
+        or "غير ضاره" in label
+        or "غير" in label
+        or "not" in label
+        or "ليست" in label
+        or "لا" in label
+        or "not harmful" in label
+        or label.startswith("no")
+        or "safe" in label
+        or "not_harmful" in label
+    ):
+        return "0"
+    elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label:
+        return "1"
+
+    return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py
new file mode 100755
index 00000000..ffe298b4
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_FewShot_Mixed.py
@@ -0,0 +1,85 @@
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, examples):
+    base_prompt = "هل قد تتسبب هذه التغريدة بأي ضرر أو أذى لشخص أو مؤسسة؟ إن كانت كذلك، أجب بـ 'harmful'. أجب بـ 'not harmful' إن لم تكن كذلك. قدم فقط التصنيف.\n"
+    prompt = few_shot_prompt(input_sample, base_prompt, examples)
+
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل وتصنيف التغريدات.",
+        },
+        {
+            "role": "user",
+            "content": prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Found chatgpt confused when using 0 and 1 in the prompt
+        label = "not harmful" if example["label"] == "0" else "harmful"
+        out_prompt = (
+            out_prompt + "التغريدة: " + example["input"] + "التصنيف: " + label + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف: \n"
+
+    # print("=========== FS Prompt =============\n")
+    # print(out_prompt)
+
+    return out_prompt
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label_fixed = None
+    label = label.replace("label:", "")
+    label = label.replace("التصنيف:", "")
+    label = label.strip().lower()
+
+    if (
+        "غير ضارة" in label
+        or "ليست ضارة" in label
+        or "غير ضاره" in label
+        or "غير" in label
+        or "not" in label
+        or "ليست" in label
+        or "لا" in label
+        or "not harmful" in label
+        or label.startswith("no")
+        or "safe" in label
+        or "not_harmful" in label
+    ):
+        return "0"
+    elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label:
+        return "1"
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py
new file mode 100755
index 00000000..0f5e05b0
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Arabic.py
@@ -0,0 +1,71 @@
+import re
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        f"هل قد تتسبب هذه التغريدة بأي ضرر أو أذى لشخص أو مؤسسة؟ إن كانت كذلك، أجب بـ 'ضارة'. أجب بـ 'غير ضارة' إن لم تكن كذلك. قدم فقط التصنيف.\n\n"
+        f"التغريدة: {input_sample}\n"
+        f"التصنيف: \n"
+    )
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل وتصنيف التغريدات.",
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label = label.replace("label:", "")
+    label = label.replace("التصنيف:", "")
+    label = label.strip().lower()
+
+    if (
+        "غير ضارة" in label
+        or "ليست ضارة" in label
+        or "غير ضاره" in label
+        or "غير" in label
+        or "not" in label
+        or "ليست" in label
+        or "لا" in label
+        or "not harmful" in label
+        or label.startswith("no")
+        or "safe" in label
+        or "not_harmful" in label
+    ):
+        return "0"
+    elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label:
+        return "1"
+
+    return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py
new file mode 100755
index 00000000..cac11ed7
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_English.py
@@ -0,0 +1,71 @@
+import re
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        f"Can this tweet cause harm to any person, organization or entity? Answer only by 'harmful' or 'not harmful'. Provide only label.\n\n"
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert in tweet analysis and classification.",
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label = label.replace("label:", "")
+    label = label.replace("التصنيف:", "")
+    label = label.strip().lower()
+
+    if (
+        "غير ضارة" in label
+        or "ليست ضارة" in label
+        or "غير ضاره" in label
+        or "غير" in label
+        or "not" in label
+        or "ليست" in label
+        or "لا" in label
+        or "not harmful" in label
+        or label.startswith("no")
+        or "safe" in label
+        or "not_harmful" in label
+    ):
+        return "0"
+    elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label:
+        return "1"
+
+    return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py
new file mode 100755
index 00000000..13196db3
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_GPT4_ZeroShot_Mixed.py
@@ -0,0 +1,72 @@
+import re
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        f"هل قد تتسبب هذه التغريدة بأي ضرر أو أذى لشخص أو مؤسسة؟ إن كانت كذلك، أجب بـ 'harmful'. أجب بـ 'not harmful' إن لم تكن كذلك. قدم فقط التصنيف.\n\n"
+        f"التغريدة: {input_sample}\n"
+        f"التصنيف: \n"
+    )
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل وتصنيف التغريدات.",
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+    label_fixed = None
+    label = label.replace("label:", "")
+    label = label.replace("التصنيف:", "")
+    label = label.strip().lower()
+
+    if (
+        "غير ضارة" in label
+        or "ليست ضارة" in label
+        or "غير ضاره" in label
+        or "غير" in label
+        or "not" in label
+        or "ليست" in label
+        or "لا" in label
+        or "not harmful" in label
+        or label.startswith("no")
+        or "safe" in label
+        or "not_harmful" in label
+    ):
+        return "0"
+    elif "ضارة" in label or "harmful" in label or "نعم" in label or "yes" in label:
+        return "1"
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py
new file mode 100755
index 00000000..9e9be164
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Arabic.py
@@ -0,0 +1,67 @@
+import random
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Translate labels to Arabic
+        label = "غير ضارة" if example["label"] == "0" else "ضارة"
+        out_prompt += "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n"
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt += "التغريدة: " + input_sample + "\nالتصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "صنف التغريدة التالية على أنها 'ضارة' أو 'غير ضارة'. قدم التصنيف فقط."
+    return [
+        {
+            "role": "user",
+            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    print("label: ", label)
+    if "لا يمكنني" in label or "ai" in label:
+        return None
+    if (
+        "غير ضارة" in label
+        or "لا يحتوي على محتوى ضار" in label
+        or "غير" in label
+        or "ليست" in label
+    ):
+        return "0"
+
+    else:
+        return "1"
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py
new file mode 100755
index 00000000..c26b458a
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_English.py
@@ -0,0 +1,75 @@
+import random
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Found chatgpt confused when using 0 and 1 in the prompt
+        label = "not_harmful" if example["label"] == "0" else "harmful"
+        out_prompt = (
+            out_prompt + "tweet: " + example["input"] + "\nlabel: " + label + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+    # print("=========== FS Prompt =============\n")
+    # print(out_prompt)
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "Classify the following tweet as 'harmful' or 'not_harmful'. Provide only label."
+    return [
+        {
+            "role": "user",
+            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    print("label: ", label)
+    if "label: harmful" in label or "is harmful" in label:
+        return "1"
+    if "لا يمكنني" in label or "ai" in label or "لا يمكن" in label:
+        return None
+    if (
+        "غير ضارة" in label
+        or "لا يحتوي على محتوى ضار" in label
+        or "غير" in label
+        or "ليست" in label
+        or "not" in label
+    ):
+        return "0"
+
+    else:
+        return "1"
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py
new file mode 100755
index 00000000..9fe4d0e3
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_FewShot_Mixed.py
@@ -0,0 +1,89 @@
+import random
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Translate labels to Arabic
+        label = "not_harmful" if example["label"] == "0" else "harmful"
+        out_prompt += "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n"
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt += "التغريدة: " + input_sample + "\nالتصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = (
+        "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    print("label: ", label)
+    if (
+        "لا يمكنني" in label
+        or "ai" in label
+        or "لا يمكن" in label
+        or "ما هي المعايير" in label
+    ):
+        return None
+    if "التغريدة الأخيرة هي 'harmful'" in label:
+        return "1"
+    if "التغريدة الثالثة مصنفة على أنها 'غير مؤذية'" in label:
+        return "0"
+    if (
+        "التغريدة الثالثة مصنفة على أنها 'مؤذية'" in label
+        or "غير آمن" in label
+        or "لأنها لا تحتوي على محتوى ضار" in label
+    ):
+        return "1"
+
+    if "label: harmful" in label or "is harmful" in label:
+        return "1"
+
+    if (
+        "غير ضارة" in label
+        or "لا يحتوي على محتوى ضار" in label
+        or "غير" in label
+        or "ليس" in label
+        or "no" in label
+    ):
+        return "0"
+
+    else:
+        return "1"
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py
new file mode 100755
index 00000000..42592efa
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Arabic.py
@@ -0,0 +1,58 @@
+import random
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "صنف التغريدة التالية على أنها 'ضارة' أو 'غير ضارة'. قدم التصنيف فقط.\n\n"
+                + "تغريدة: "
+                + input_sample
+                + "\التصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    print("label: ", label)
+    if "لا يمكنني" in label or "ai" in label:
+        return None
+    if (
+        "غير ضارة" in label
+        or "لا يحتوي على محتوى ضار" in label
+        or "غير" in label
+        or "ليست" in label
+    ):
+        return "0"
+
+    else:
+        return "1"
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py
new file mode 100755
index 00000000..273545b3
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_English.py
@@ -0,0 +1,59 @@
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"Classify the following tweet as 'harmful' or 'not_harmful'. Provide only label.\n\n"
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+    print("label: ", label)
+    if "label: harmful" in label or "is harmful" in label:
+        return "1"
+    if "لا يمكنني" in label or "ai" in label or "لا يمكن" in label:
+        return None
+    if (
+        "غير ضارة" in label
+        or "لا يحتوي على محتوى ضار" in label
+        or "غير" in label
+        or "ليست" in label
+        or "not" in label
+    ):
+        return "0"
+
+    else:
+        return "1"
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py
new file mode 100755
index 00000000..8fd926a5
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_JAIS13b_ZeroShot_Mixed.py
@@ -0,0 +1,59 @@
+import random
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n"
+                + "تغريدة: "
+                + input_sample
+                + "التصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"].lower()
+
+    if "غير" in label or "ليس" in label or "not" in label:
+        return "0"
+    elif (
+        "ضار" in label
+        or "مس" in label
+        or "مؤ" in label
+        or "harm" in label
+        or "مض" in label
+    ):
+        return "1"
+
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py
new file mode 100755
index 00000000..26d6281c
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Arabic.py
@@ -0,0 +1,66 @@
+import random
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Translate labels to Arabic
+        label = "غير ضارة" if example["label"] == "0" else "ضارة"
+        out_prompt += "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n"
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt += "التغريدة: " + input_sample + "\nالتصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "صنف التغريدة التالية على أنها 'ضارة' أو 'غير ضارة'. قدم التصنيف فقط."
+    return [
+        {
+            "role": "user",
+            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
+        }
+    ]
+
+
+def post_process(response):
+    # Extract the label from the response
+    if "output" in response:
+        label = response["output"].strip().lower()
+
+    print("label: ", label)
+
+    if "غير ضارة" in label or "لا" in label:
+        return "0"
+    elif label == "ضارة" or "ضارة" in label or "نعم" in label:
+        return "1"
+
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py
new file mode 100755
index 00000000..2495bdab
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_English.py
@@ -0,0 +1,71 @@
+import random
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Found chatgpt confused when using 0 and 1 in the prompt
+        label = "not_harmful" if example["label"] == "0" else "harmful"
+        out_prompt = (
+            out_prompt + "tweet: " + example["input"] + "\nlabel: " + label + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+    # print("=========== FS Prompt =============\n")
+    # print(out_prompt)
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = "Classify the following tweet as 'harmful' or 'not_harmful'. Provide only label."
+    return [
+        {
+            "role": "user",
+            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
+        }
+    ]
+
+
+def post_process(response):
+    # Extract the label from the response
+    if "output" in response:
+        label = response["output"].strip().lower()
+
+    print("label: ", label)
+
+    if "not_harmful" in label:
+        return "0"
+    elif label == "harmful":
+        return "1"
+
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py
new file mode 100755
index 00000000..cb1587b5
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_FewShot_Mixed.py
@@ -0,0 +1,66 @@
+import random
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Translate labels to Arabic
+        label = "not_harmful" if example["label"] == "0" else "harmful"
+        out_prompt += "التغريدة: " + example["input"] + "\nالتصنيف: " + label + "\n\n"
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt += "التغريدة: " + input_sample + "\nالتصنيف: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = (
+        "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": (few_shot_prompt(input_sample, base_prompt, examples)),
+        }
+    ]
+
+
+def post_process(response):
+    # Extract the label from the response
+    if "output" in response:
+        label = response["output"].strip().lower()
+
+    print("label: ", label)
+    if "لا أستطيع تقديم هذا النوع من الإجابات." in label:
+        return None
+    elif "غير" in label or "ليس" in label or "not" in label:
+        return "0"
+    else:
+        return "1"
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py
new file mode 100755
index 00000000..90d4fd20
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Arabic.py
@@ -0,0 +1,57 @@
+import random
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "صنف التغريدة التالية على أنها 'ضارة' أو 'غير ضارة'. قدم التصنيف فقط.\n\n"
+                + "تغريدة: "
+                + input_sample
+                + "\التصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    # Extract the label from the response
+    if "output" in response:
+        label = response["output"].strip().lower()
+
+    print("label: ", label)
+
+    if "غير ضارة" in label or "لا" in label:
+        return "0"
+    elif label == "ضارة" or "ضارة" in label or "نعم" in label:
+        return "1"
+
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py
new file mode 100755
index 00000000..e2d8107d
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_English.py
@@ -0,0 +1,57 @@
+import random
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "Classify the following tweet as 'harmful' or 'not_harmful'. Provide only label.\n\n"
+                + "tweet: "
+                + input_sample
+                + "\nlabel: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    # Extract the label from the response
+    if "output" in response:
+        label = response["output"].strip().lower()
+
+    print("label: ", label)
+
+    if "not_harmful" in label:
+        return "0"
+    elif label == "harmful":
+        return "1"
+
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py
new file mode 100755
index 00000000..130505ee
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/harmfulness_detection/CT22Harmful_Llama3-8b_ZeroShot_Mixed.py
@@ -0,0 +1,55 @@
+import random
+
+from llmebench.datasets import CT22HarmfulDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import HarmfulDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22HarmfulDataset,
+        "task": HarmfulDetectionTask,
+        "model": AzureModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "صنف التغريدة التالية على أنها 'harmful' أو 'not_harmful'. قدم التصنيف فقط.\n\n"
+                + "تغريدة: "
+                + input_sample
+                + "التصنيف: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    # Extract the label from the response
+    if "output" in response:
+        label = response["output"].strip().lower()
+
+    print("label: ", label)
+    if "لا أستطيع تقديم هذا النوع من الإجابات." in label:
+        return None
+    elif "غير" in label or "ليس" in label or "not" in label:
+        return "0"
+    else:
+        return "1"