From 98f082a2e4cb851841dd041031788be8972f5a6d Mon Sep 17 00:00:00 2001
From: MohamedBayan <118048819+MohamedBayan@users.noreply.github.com>
Date: Mon, 20 Jan 2025 16:34:02 +0300
Subject: [PATCH] Wise claim detection (#380)

* Add propaganda assets

* Fix errors

* Add wise-claim_detection assets
---
 .../CT22Claim_GPT4_FewShot_Arabic.py          | 88 ++++++++++++++++++
 .../CT22Claim_GPT4_FewShot_English.py         | 84 +++++++++++++++++
 .../CT22Claim_GPT4_FewShot_Mixed.py           | 89 ++++++++++++++++++
 .../CT22Claim_GPT4_ZeroShot_Arabic.py         | 71 +++++++++++++++
 .../CT22Claim_GPT4_ZeroShot_English.py        | 69 ++++++++++++++
 .../CT22Claim_GPT4_ZeroShot_Mixed.py          | 72 +++++++++++++++
 .../CT22Claim_JAIS13b_FewShot_Arabic.py       | 73 +++++++++++++++
 .../CT22Claim_JAIS13b_FewShot_English.py      | 85 ++++++++++++++++++
 .../CT22Claim_JAIS13b_FewShot_Mixed.py        | 90 +++++++++++++++++++
 .../CT22Claim_JAIS13b_ZeroShot_Arabic.py      | 61 +++++++++++++
 .../CT22Claim_JAIS13b_ZeroShot_English.py     | 61 +++++++++++++
 .../CT22Claim_JAIS13b_ZeroShot_Mixed.py       | 78 ++++++++++++++++
 .../CT22Claim_Llama3-8b_FewShot_Arabic.py     | 80 +++++++++++++++++
 .../CT22Claim_Llama3-8b_FewShot_English.py    | 73 +++++++++++++++
 .../CT22Claim_Llama3-8b_FewShot_Mixed.py      | 78 ++++++++++++++++
 .../CT22Claim_Llama3-8b_ZeroShot_Arabic.py    | 75 ++++++++++++++++
 .../CT22Claim_Llama3-8b_ZeroShot_English.py   | 75 ++++++++++++++++
 .../CT22Claim_Llama3-8b_ZeroShot_Mixed.py     | 75 ++++++++++++++++
 18 files changed, 1377 insertions(+)
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py
 create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py

diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py
new file mode 100755
index 00000000..ad2ec15a
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py
@@ -0,0 +1,88 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, examples):
+    base_prompt = (
+        "هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'نعم' أو 'لا'. قدم التصنيف فقط.\n"
+    )
+    prompt = few_shot_prompt(input_sample, base_prompt, examples)
+
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل و تصنيف التغريدات.",
+        },
+        {
+            "role": "user",
+            "content": prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Found chatgpt confused when using 0 and 1 in the prompt
+        label = "لا" if example["label"] == "0" else "نعم"
+        out_prompt = (
+            out_prompt + "التغريدة: " + example["input"] + "التصنيف: " + label + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف: \n"
+
+    # print("=========== FS Prompt =============\n")
+    # print(out_prompt)
+
+    return out_prompt
+
+
+def post_process(response):
+    input_label = response["choices"][0]["message"]["content"]
+    input_label = input_label.replace(".", "").strip().lower()
+
+    if (
+        "لا" in input_label
+        or "لا تحتوي" in input_label
+        or "ليست" in input_label
+        or "not" in input_label
+        or "label: 0" in input_label
+        or "label: no" in input_label
+        or "not contain" in input_label
+        or "doesn't contain" in input_label
+    ):
+        return "0"
+
+    elif (
+        "نعم" in input_label
+        or "تحتوي" in input_label
+        or "yes" in input_label
+        or "contains" in input_label
+        or "label: 1" in input_label
+    ):
+        return "1"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py
new file mode 100755
index 00000000..18374d44
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py
@@ -0,0 +1,84 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, examples):
+    base_prompt = "Does the following tweet contain a factual claim? If it does, return 'yes', if it does not, return 'no'. Provide only label.\n"
+    prompt = few_shot_prompt(input_sample, base_prompt, examples)
+
+    return [
+        {
+            "role": "system",
+            "content": "You are expert in text analysis and classification.",
+        },
+        {
+            "role": "user",
+            "content": prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Found chatgpt confused when using 0 and 1 in the prompt
+        label = "no" if example["label"] == "0" else "yes"
+        out_prompt = (
+            out_prompt + "tweet: " + example["input"] + "\nlabel: " + label + "\n\n"
+        )
+
+    # Append the tweet we want the model to predict for but leave the label blank
+    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+    # print("=========== FS Prompt =============\n")
+    # print(out_prompt)
+
+    return out_prompt
+
+
+def post_process(response):
+    input_label = response["choices"][0]["message"]["content"]
+    input_label = input_label.replace(".", "").strip().lower()
+    pred_label = ""
+
+    if (
+        "yes" in input_label
+        or "contains a factual claim" in input_label
+        or "label: 1" in input_label
+    ):
+        pred_label = "1"
+    if (
+        input_label == "no"
+        or "label: 0" in input_label
+        or "label: no" in input_label
+        or "not contain a factual claim" in input_label
+        or "doesn't contain a factual claim" in input_label
+    ):
+        pred_label = "0"
+
+    if pred_label == "":
+        pred_label = None
+
+    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py
new file mode 100755
index 00000000..71f84ca0
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py
@@ -0,0 +1,89 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, examples):
+    base_prompt = (
+        "هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'yes' أو 'no'. قدم التصنيف فقط.\n"
+    )
+    prompt = few_shot_prompt(input_sample, base_prompt, examples)
+
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل و تصنيف التغريدات.",
+        },
+        {
+            "role": "user",
+            "content": prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        # Found chatgpt confused when using 0 and 1 in the prompt
+        label = "no" if example["label"] == "0" else "yes"
+        out_prompt = (
+            out_prompt + "التغريدة: " + example["input"] + "\التصنيف: " + label + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n"
+
+    # print("=========== FS Prompt =============\n")
+    # print(out_prompt)
+
+    return out_prompt
+
+
+def post_process(response):
+    input_label = response["choices"][0]["message"]["content"]
+    input_label = input_label.replace(".", "").strip().lower()
+
+    if (
+        "لا" in input_label
+        or "لا تحتوي" in input_label
+        or "ليست" in input_label
+        or "not" in input_label
+        or "no" in input_label
+        or "label: 0" in input_label
+        or "label: no" in input_label
+        or "not contain" in input_label
+        or "doesn't contain" in input_label
+    ):
+        return "0"
+
+    elif (
+        "نعم" in input_label
+        or "تحتوي" in input_label
+        or "yes" in input_label
+        or "contains" in input_label
+        or "label: 1" in input_label
+    ):
+        return "1"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py
new file mode 100755
index 00000000..b4281930
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py
@@ -0,0 +1,71 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        f"هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'نعم' أو 'لا'. قدم التصنيف فقط.\n"
+        f"التغريدة: {input_sample}\n"
+        f"التصنيف: \n"
+    )
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل و تصنيف التغريدات.",
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    input_label = response["choices"][0]["message"]["content"]
+    input_label = input_label.replace(".", "").strip().lower()
+
+    if (
+        "لا" in input_label
+        or "لا تحتوي" in input_label
+        or "ليست" in input_label
+        or "not" in input_label
+        or "label: 0" in input_label
+        or "label: no" in input_label
+        or "not contain" in input_label
+        or "doesn't contain" in input_label
+    ):
+        return "0"
+
+    elif (
+        "نعم" in input_label
+        or "تحتوي" in input_label
+        or "yes" in input_label
+        or "contains" in input_label
+        or "label: 1" in input_label
+    ):
+        return "1"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py
new file mode 100755
index 00000000..2bdb67cf
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py
@@ -0,0 +1,69 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        f"Does the following tweet contain a factual claim? If it does, return 'yes', if it does not, return 'no'. Provide only label.\n\n"
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "system",
+            "content": "You are expert in text analysis and classification.",
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    input_label = response["choices"][0]["message"]["content"]
+    input_label = input_label.replace(".", "").strip().lower()
+    pred_label = ""
+
+    if (
+        "yes" in input_label
+        or "contains a factual claim" in input_label
+        or "label: 1" in input_label
+    ):
+        pred_label = "1"
+    if (
+        input_label == "no"
+        or "label: 0" in input_label
+        or "label: no" in input_label
+        or "not contain" in input_label
+        or "doesn't contain" in input_label
+    ):
+        pred_label = "0"
+
+    if pred_label == "":
+        pred_label = None
+
+    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py
new file mode 100755
index 00000000..07e8c1db
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py
@@ -0,0 +1,72 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "GPT-4o-2024-05-22",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        f"هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'yes' أو 'no'. قدم التصنيف فقط.\n\n"
+        f"التغريدة: {input_sample}\n"
+        f"التصنيف: \n"
+    )
+    return [
+        {
+            "role": "system",
+            "content": "أنت خبير في تحليل و تصنيف التغريدات.",
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    input_label = response["choices"][0]["message"]["content"]
+    input_label = input_label.replace(".", "").strip().lower()
+
+    if (
+        "لا" in input_label
+        or "لا تحتوي" in input_label
+        or "ليست" in input_label
+        or "not" in input_label
+        or "no" in input_label
+        or "label: 0" in input_label
+        or "label: no" in input_label
+        or "not contain" in input_label
+        or "doesn't contain" in input_label
+    ):
+        return "0"
+
+    elif (
+        "نعم" in input_label
+        or "تحتوي" in input_label
+        or "yes" in input_label
+        or "contains" in input_label
+        or "label: 1" in input_label
+    ):
+        return "1"
+    else:
+        return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py
new file mode 100755
index 00000000..891d8e61
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py
@@ -0,0 +1,73 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, few_shot_examples):
+    few_shot_text = ""
+    for example in few_shot_examples:
+        few_shot_text += (
+            "التغريدة: "
+            + example["input"]
+            + "\n"
+            + "الإجابة: "
+            + ("yes" if example["label"] == "1" else "no")
+            + "\n\n"
+        )
+
+    return [
+        {
+            "role": "user",
+            "content": (
+                "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'. قدم فقط الإجابة.\n\n"
+                + few_shot_text
+                + "التغريدة: "
+                + input_sample
+                + "\n"
+                + "الإجابة: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("التصنيف:", "").strip()
+    label = label.lower()
+
+    if "لا يمكنني" in label:
+        return None
+    if "التصنيف: " in label:
+        arr = label.split("التصنيف: ")
+        label = arr[1].strip()
+
+    if "نعم" in label:
+        label_fixed = "1"
+    elif "لا" in label:
+        label_fixed = "0"
+    else:
+        label_fixed = None
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py
new file mode 100755
index 00000000..ea8e1c53
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py
@@ -0,0 +1,85 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, examples=None):
+    base_prompt = "Does this sentence contain a factual claim? Answer with 'yes' or 'no' only. Provide only the label.\n"
+    if examples:
+        user_message_content = few_shot_prompt(input_sample, base_prompt, examples)
+    else:
+        user_message_content = base_prompt + f"Sentence: {input_sample}\nLabel: "
+
+    return [{"role": "user", "content": user_message_content}]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        label = "no" if example["label"] == "0" else "yes"
+        out_prompt += "Sentence: " + example["input"] + "\nLabel: " + label + "\n\n"
+    out_prompt += "Sentence: " + input_sample + "\nLabel: "
+
+    return out_prompt
+
+
+def post_process(response):
+
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("label:", "").strip()
+    label = label.lower()
+
+    if (
+        "لا يمكنني" in label
+        or "I cannot" in label
+        or "sorry" in label
+        or "هذه المحادثة غير مناسبة" in label
+    ):
+        return None
+    if "هذه التغريدة تحتوي" in label:
+        return "1"
+
+    if (
+        "not a factual claim" in label
+        or "لا يوجد" in label
+        or "not" in label
+        or "لا" in label
+    ):
+        return "0"
+    return "1"
+
+    if "label: " in label:
+        arr = label.split("label: ")
+        label = arr[1].strip()
+        if "yes" in label:
+            pred_label = "1"
+        elif "no" in label:
+            pred_label = "0"
+        else:
+            pred_label = "0"
+
+        print(f"Predicted Label: {pred_label}")
+
+        return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py
new file mode 100755
index 00000000..6687d61a
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py
@@ -0,0 +1,90 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, few_shot_examples):
+    few_shot_text = ""
+    for example in few_shot_examples:
+        few_shot_text += (
+            "التغريدة: "
+            + example["input"]
+            + "\n"
+            + "الإجابة: "
+            + ("yes" if example["label"] == "1" else "no")
+            + "\n\n"
+        )
+
+    return [
+        {
+            "role": "user",
+            "content": (
+                "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'. قدم فقط الإجابة.\n\n"
+                + few_shot_text
+                + "التغريدة: "
+                + input_sample
+                + "\n"
+                + "الإجابة: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    try:
+        label = response["choices"][0]["message"]["content"]
+
+        label = label.replace("الإجابة:", "").strip()
+        label = label.lower()
+        if "هذه التغريدة تحتوي" in label:
+            return "1"
+
+        if (
+            "لا يمكنني" in label
+            or "I cannot" in label
+            or "sorry" in label
+            or "هذه المحادثة غير مناسبة" in label
+        ):
+            return None
+        if (
+            "not a factual claim" in label
+            or "لا يوجد" in label
+            or "not" in label
+            or "لا" in label
+        ):
+            return "0"
+
+        if "نعم" in label or "yes" in label:
+            pred_label = "1"
+        elif "لا" in label or "no" in label:
+            pred_label = "0"
+        else:
+            pred_label = ""
+
+        print(f"Predicted Label: {pred_label}")
+
+        return pred_label
+    except Exception as e:
+        print(f"Error in post-processing: {str(e)}")
+        return "0"
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py
new file mode 100755
index 00000000..77ebde59
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py
@@ -0,0 +1,61 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'.\n\n"
+        f"التغريدة: {input_sample}\n"
+        f"التصنيف: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("التصنيف:", "").strip()
+    label = label.lower()
+
+    if "لا يمكنني" in label:
+        return None
+    if "التصنيف: " in label:
+        arr = label.split("التصنيف: ")
+        label = arr[1].strip()
+
+    if "نعم" in label:
+        label_fixed = "1"
+    elif "لا" in label:
+        label_fixed = "0"
+    else:
+        label_fixed = None
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py
new file mode 100755
index 00000000..b5e5a7b9
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py
@@ -0,0 +1,61 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"Does this sentence contain a factual claim? Please answer with 'yes' or 'no' only\n\n"
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("label:", "").strip()
+    label = label.lower()
+
+    if "label: " in label:
+        arr = label.split("label: ")
+        label = arr[1].strip()
+
+    if (
+        "yes" in label
+        or "نعم" in label
+        or "the sentence contains a factual claim" in label
+    ):
+        label_fixed = "1"
+    if "no" in label or "لا" in label:
+        label_fixed = "0"
+
+    return label_fixed
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py
new file mode 100755
index 00000000..6dec7c55
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py
@@ -0,0 +1,78 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "jais-13b-chat",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": FastChatModel,
+        "model_args": {
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = (
+        f"هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'. قدم فقط الإجابة.\n\n"
+        f"تغريدة: {input_sample}\n"
+        f"التسمية: \n"
+    )
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    try:
+        label = response["choices"][0]["message"]["content"]
+
+        label = label.replace("الإجابة:", "").strip()
+        label = label.lower()
+        if "هذه التغريدة تحتوي" in label:
+            return "1"
+
+        if (
+            "لا يمكنني" in label
+            or "I cannot" in label
+            or "sorry" in label
+            or "هذه المحادثة غير مناسبة" in label
+        ):
+            return None
+        if (
+            "not a factual claim" in label
+            or "لا يوجد" in label
+            or "not" in label
+            or "لا" in label
+        ):
+            return "0"
+
+        if "نعم" in label or "yes" in label:
+            pred_label = "1"
+        elif "لا" in label or "no" in label:
+            pred_label = "0"
+        else:
+            pred_label = ""
+
+        print(f"Predicted Label: {pred_label}")
+
+        return pred_label
+    except Exception as e:
+        print(f"Error in post-processing: {str(e)}")
+        return "0"
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py
new file mode 100755
index 00000000..6e8fac6f
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py
@@ -0,0 +1,80 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": AzureModel,
+        "model_args": {
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, few_shot_examples):
+    few_shot_text = ""
+    for example in few_shot_examples:
+        few_shot_text += (
+            "الجملة: "
+            + example["input"]
+            + "\n"
+            + "الإجابة: "
+            + ("نعم" if example["label"] == "1" else "لا")
+            + "\n\n"
+        )
+
+    return [
+        {
+            "role": "user",
+            "content": (
+                "هل تحتوي هذه الجملة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'. قدم فقط الإجابة.\n\n"
+                + few_shot_text
+                + "الجملة: "
+                + input_sample
+                + "\n"
+                + "الإجابة: "
+            ),
+        }
+    ]
+
+
+import random
+
+
+def post_process(response):
+    try:
+        label = ""
+
+        if "output" in response:
+            label = response["output"].strip().lower()
+
+        print(f"Extracted Label: {label}")
+        if "لا أستطيع" in label or "I cannot" in label:
+            return random.choice(["0", "1"])
+
+        if "نعم" in label:
+            pred_label = "1"
+        elif "لا" in label:
+            pred_label = "0"
+        else:
+            pred_label = ""
+
+        print(f"Predicted Label: {pred_label}")
+
+        return pred_label
+    except Exception as e:
+        print(f"Error in post-processing: {str(e)}")
+        return "0"
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py
new file mode 100755
index 00000000..12e02776
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py
@@ -0,0 +1,73 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": AzureModel,
+        "model_args": {
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, examples=None):
+    base_prompt = "Does this sentence contain a factual claim? Answer with 'yes' or 'no' only. Provide only the label.\n"
+    if examples:
+        user_message_content = few_shot_prompt(input_sample, base_prompt, examples)
+    else:
+        user_message_content = base_prompt + f"Sentence: {input_sample}\nLabel: "
+
+    return [{"role": "user", "content": user_message_content}]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    for example in examples:
+        label = "no" if example["label"] == "0" else "yes"
+        out_prompt += "Sentence: " + example["input"] + "\nLabel: " + label + "\n\n"
+    out_prompt += "Sentence: " + input_sample + "\nLabel: "
+
+    return out_prompt
+
+
+import random
+
+
+def post_process(response):
+    try:
+        label = ""
+
+        if "output" in response:
+            label = response["output"].strip().lower()
+
+        print(f"Extracted Label: {label}")
+        if "لا أستطيع" in label or "I cannot" in label:
+            return random.choice(["0", "1"])
+
+        if "yes" in label:
+            pred_label = "1"
+        elif "no" in label:
+            pred_label = "0"
+        else:
+            pred_label = "0"
+
+        print(f"Predicted Label: {pred_label}")
+
+        return pred_label
+    except Exception as e:
+        print(f"Error in post-processing: {str(e)}")
+        return "No Response "
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py
new file mode 100755
index 00000000..0aae6e0f
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py
@@ -0,0 +1,78 @@
+import random
+
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": AzureModel,
+        "model_args": {
+            "max_tries": 30,
+        },
+        "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}},
+    }
+
+
+def prompt(input_sample, few_shot_examples):
+    few_shot_text = ""
+    for example in few_shot_examples:
+        few_shot_text += (
+            "التغريدة: "
+            + example["input"]
+            + "\n"
+            + "الإجابة: "
+            + ("yes" if example["label"] == "1" else "no")
+            + "\n\n"
+        )
+
+    return [
+        {
+            "role": "user",
+            "content": (
+                "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'. قدم فقط الإجابة.\n\n"
+                + few_shot_text
+                + "التغريدة: "
+                + input_sample
+                + "\n"
+                + "الإجابة: "
+            ),
+        }
+    ]
+
+
+def post_process(response):
+    try:
+        label = ""
+
+        if "output" in response:
+            label = response["output"].strip().lower()
+
+        print(f"Extracted Label: {label}")
+        if "لا أستطيع" in label or "I cannot" in label:
+            return random.choice(["0", "1"])
+        if "نعم" in label or "yes" in label:
+            pred_label = "1"
+        elif "لا" in label or "no" in label:
+            pred_label = "0"
+        else:
+            pred_label = ""
+
+        print(f"Predicted Label: {pred_label}")
+
+        return pred_label
+    except Exception as e:
+        print(f"Error in post-processing: {str(e)}")
+        return "0"
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py
new file mode 100755
index 00000000..6b5013f6
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py
@@ -0,0 +1,75 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": AzureModel,
+        "model_args": {"max_tries": 30},
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'.\n\n"
+                + f"التغريدة: {input_sample}\n"
+                + "التصنيف: "
+            ),
+        }
+    ]
+
+
+import random
+
+
+def post_process(response):
+    try:
+        label = ""
+
+        # Assuming 'response' contains an 'output' directly. Adjust if structure differs.
+        if "output" in response:
+            label = response["output"].strip().lower()
+
+        # Debug print to check the extracted label
+        print(f"Extracted Label: {label}")
+        if "لا أستطيع" in label or "I cannot" in label:
+            return random.choice(["0", "1"])
+
+        # Determining the prediction label based on the response content
+        if "نعم" in label or "contains a factual claim" in label or "label: 1" in label:
+            pred_label = "1"
+        elif (
+            "لا" in label
+            or "label: 0" in label
+            or "does not contain a factual claim" in label
+            or "label: no" in label
+        ):
+            pred_label = "0"
+        else:
+            # If none of the expected labels are found, default to a negative claim (most conservative approach)
+            pred_label = "0"
+
+        # Debug print to check the final predicted label
+        # print(f"Predicted Label: {pred_label}")
+
+        return pred_label
+    except Exception as e:
+        print(f"Error in post-processing: {str(e)}")
+        # Return a default negative label in case of error to prevent unknown targets
+        return ""
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py
new file mode 100755
index 00000000..f88e2944
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py
@@ -0,0 +1,75 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": AzureModel,
+        "model_args": {"max_tries": 30},
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "Does this sentence contain a factual claim? Please answer with 'yes' or 'no' only.\n\n"
+                + f"Sentence: {input_sample}\n"
+                + "Label: "
+            ),
+        }
+    ]
+
+
+import random
+
+
+def post_process(response):
+    try:
+        label = ""
+
+        # Assuming 'response' contains an 'output' directly. Adjust if structure differs.
+        if "output" in response:
+            label = response["output"].strip().lower()
+
+        # Debug print to check the extracted label
+        print(f"Extracted Label: {label}")
+        if "لا أستطيع" in label or "I cannot" in label:
+            return random.choice(["0", "1"])
+
+        # Determining the prediction label based on the response content
+        if "yes" in label or "contains a factual claim" in label or "label: 1" in label:
+            pred_label = "1"
+        elif (
+            "no" in label
+            or "label: 0" in label
+            or "does not contain a factual claim" in label
+            or "label: no" in label
+        ):
+            pred_label = "0"
+        else:
+            # If none of the expected labels are found, default to a negative claim (most conservative approach)
+            pred_label = "0"
+
+        # Debug print to check the final predicted label
+        print(f"Predicted Label: {pred_label}")
+
+        return pred_label
+    except Exception as e:
+        print(f"Error in post-processing: {str(e)}")
+        # Return a default negative label in case of error to prevent unknown targets
+        return "0"
diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py
new file mode 100755
index 00000000..5da6f98e
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py
@@ -0,0 +1,75 @@
+from llmebench.datasets import CT22ClaimDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import ClaimDetectionTask
+
+
+def metadata():
+    return {
+        "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam",
+        "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)",
+        "model": "Llama-3.1-8B-Instruct",
+        "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).",
+    }
+
+
+def config():
+    return {
+        "dataset": CT22ClaimDataset,
+        "task": ClaimDetectionTask,
+        "model": AzureModel,
+        "model_args": {"max_tries": 30},
+        "general_args": {"test_split": "ar"},
+    }
+
+
+def prompt(input_sample):
+    return [
+        {
+            "role": "user",
+            "content": (
+                "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'.\n\n"
+                + f"التغريدة: {input_sample}\n"
+                + "التصنيف: "
+            ),
+        }
+    ]
+
+
+import random
+
+
+def post_process(response):
+    try:
+        label = ""
+
+        # Assuming 'response' contains an 'output' directly. Adjust if structure differs.
+        if "output" in response:
+            label = response["output"].strip().lower()
+
+        # Debug print to check the extracted label
+        print(f"Extracted Label: {label}")
+        if "لا أستطيع" in label or "I cannot" in label:
+            return random.choice(["0", "1"])
+
+        # Determining the prediction label based on the response content
+        if "yes" in label or "contains a factual claim" in label or "label: 1" in label:
+            pred_label = "1"
+        elif (
+            "no" in label
+            or "label: 0" in label
+            or "does not contain a factual claim" in label
+            or "label: no" in label
+        ):
+            pred_label = "0"
+        else:
+            # If none of the expected labels are found, default to a negative claim (most conservative approach)
+            pred_label = "0"
+
+        # Debug print to check the final predicted label
+        # print(f"Predicted Label: {pred_label}")
+
+        return pred_label
+    except Exception as e:
+        print(f"Error in post-processing: {str(e)}")
+        # Return a default negative label in case of error to prevent unknown targets
+        return ""