From 98f082a2e4cb851841dd041031788be8972f5a6d Mon Sep 17 00:00:00 2001 From: MohamedBayan <118048819+MohamedBayan@users.noreply.github.com> Date: Mon, 20 Jan 2025 16:34:02 +0300 Subject: [PATCH] Wise claim detection (#380) * Add propaganda assets * Fix errors * Add wise-claim_detection assets --- .../CT22Claim_GPT4_FewShot_Arabic.py | 88 ++++++++++++++++++ .../CT22Claim_GPT4_FewShot_English.py | 84 +++++++++++++++++ .../CT22Claim_GPT4_FewShot_Mixed.py | 89 ++++++++++++++++++ .../CT22Claim_GPT4_ZeroShot_Arabic.py | 71 +++++++++++++++ .../CT22Claim_GPT4_ZeroShot_English.py | 69 ++++++++++++++ .../CT22Claim_GPT4_ZeroShot_Mixed.py | 72 +++++++++++++++ .../CT22Claim_JAIS13b_FewShot_Arabic.py | 73 +++++++++++++++ .../CT22Claim_JAIS13b_FewShot_English.py | 85 ++++++++++++++++++ .../CT22Claim_JAIS13b_FewShot_Mixed.py | 90 +++++++++++++++++++ .../CT22Claim_JAIS13b_ZeroShot_Arabic.py | 61 +++++++++++++ .../CT22Claim_JAIS13b_ZeroShot_English.py | 61 +++++++++++++ .../CT22Claim_JAIS13b_ZeroShot_Mixed.py | 78 ++++++++++++++++ .../CT22Claim_Llama3-8b_FewShot_Arabic.py | 80 +++++++++++++++++ .../CT22Claim_Llama3-8b_FewShot_English.py | 73 +++++++++++++++ .../CT22Claim_Llama3-8b_FewShot_Mixed.py | 78 ++++++++++++++++ .../CT22Claim_Llama3-8b_ZeroShot_Arabic.py | 75 ++++++++++++++++ .../CT22Claim_Llama3-8b_ZeroShot_English.py | 75 ++++++++++++++++ .../CT22Claim_Llama3-8b_ZeroShot_Mixed.py | 75 ++++++++++++++++ 18 files changed, 1377 insertions(+) create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py create mode 100755 assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py new file mode 100755 index 00000000..ad2ec15a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Arabic.py @@ -0,0 +1,88 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = ( + "هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'نعم' أو 'لا'. قدم التصنيف فقط.\n" + ) + prompt = few_shot_prompt(input_sample, base_prompt, examples) + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + label = "لا" if example["label"] == "0" else "نعم" + out_prompt = ( + out_prompt + "التغريدة: " + example["input"] + "التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "لا" in input_label + or "لا تحتوي" in input_label + or "ليست" in input_label + or "not" in input_label + or "label: 0" in input_label + or "label: no" in input_label + or "not contain" in input_label + or "doesn't contain" in input_label + ): + return "0" + + elif ( + "نعم" in input_label + or "تحتوي" in input_label + or "yes" in input_label + or "contains" in input_label + or "label: 1" in input_label + ): + return "1" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py new file mode 100755 index 00000000..18374d44 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_English.py @@ -0,0 +1,84 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = "Does the following tweet contain a factual claim? If it does, return 'yes', if it does not, return 'no'. Provide only label.\n" + prompt = few_shot_prompt(input_sample, base_prompt, examples) + + return [ + { + "role": "system", + "content": "You are expert in text analysis and classification.", + }, + { + "role": "user", + "content": prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + label = "no" if example["label"] == "0" else "yes" + out_prompt = ( + out_prompt + "tweet: " + example["input"] + "\nlabel: " + label + "\n\n" + ) + + # Append the tweet we want the model to predict for but leave the label blank + out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + pred_label = "" + + if ( + "yes" in input_label + or "contains a factual claim" in input_label + or "label: 1" in input_label + ): + pred_label = "1" + if ( + input_label == "no" + or "label: 0" in input_label + or "label: no" in input_label + or "not contain a factual claim" in input_label + or "doesn't contain a factual claim" in input_label + ): + pred_label = "0" + + if pred_label == "": + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py new file mode 100755 index 00000000..71f84ca0 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_FewShot_Mixed.py @@ -0,0 +1,89 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples): + base_prompt = ( + "هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'yes' أو 'no'. قدم التصنيف فقط.\n" + ) + prompt = few_shot_prompt(input_sample, base_prompt, examples) + + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt, + }, + ] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + # Found chatgpt confused when using 0 and 1 in the prompt + label = "no" if example["label"] == "0" else "yes" + out_prompt = ( + out_prompt + "التغريدة: " + example["input"] + "\التصنيف: " + label + "\n\n" + ) + + # Append the sentence we want the model to predict for but leave the Label blank + out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n" + + # print("=========== FS Prompt =============\n") + # print(out_prompt) + + return out_prompt + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "لا" in input_label + or "لا تحتوي" in input_label + or "ليست" in input_label + or "not" in input_label + or "no" in input_label + or "label: 0" in input_label + or "label: no" in input_label + or "not contain" in input_label + or "doesn't contain" in input_label + ): + return "0" + + elif ( + "نعم" in input_label + or "تحتوي" in input_label + or "yes" in input_label + or "contains" in input_label + or "label: 1" in input_label + ): + return "1" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py new file mode 100755 index 00000000..b4281930 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Arabic.py @@ -0,0 +1,71 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f"هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'نعم' أو 'لا'. قدم التصنيف فقط.\n" + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "لا" in input_label + or "لا تحتوي" in input_label + or "ليست" in input_label + or "not" in input_label + or "label: 0" in input_label + or "label: no" in input_label + or "not contain" in input_label + or "doesn't contain" in input_label + ): + return "0" + + elif ( + "نعم" in input_label + or "تحتوي" in input_label + or "yes" in input_label + or "contains" in input_label + or "label: 1" in input_label + ): + return "1" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py new file mode 100755 index 00000000..2bdb67cf --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_English.py @@ -0,0 +1,69 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f"Does the following tweet contain a factual claim? If it does, return 'yes', if it does not, return 'no'. Provide only label.\n\n" + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "system", + "content": "You are expert in text analysis and classification.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + pred_label = "" + + if ( + "yes" in input_label + or "contains a factual claim" in input_label + or "label: 1" in input_label + ): + pred_label = "1" + if ( + input_label == "no" + or "label: 0" in input_label + or "label: no" in input_label + or "not contain" in input_label + or "doesn't contain" in input_label + ): + pred_label = "0" + + if pred_label == "": + pred_label = None + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py new file mode 100755 index 00000000..07e8c1db --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_GPT4_ZeroShot_Mixed.py @@ -0,0 +1,72 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "GPT-4o-2024-05-22", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": OpenAIModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + prompt_string = ( + f"هل تحتوي هذه التغريدة على ادعاء؟ أجب فقط بـ 'yes' أو 'no'. قدم التصنيف فقط.\n\n" + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "system", + "content": "أنت خبير في تحليل و تصنيف التغريدات.", + }, + { + "role": "user", + "content": prompt_string, + }, + ] + + +def post_process(response): + input_label = response["choices"][0]["message"]["content"] + input_label = input_label.replace(".", "").strip().lower() + + if ( + "لا" in input_label + or "لا تحتوي" in input_label + or "ليست" in input_label + or "not" in input_label + or "no" in input_label + or "label: 0" in input_label + or "label: no" in input_label + or "not contain" in input_label + or "doesn't contain" in input_label + ): + return "0" + + elif ( + "نعم" in input_label + or "تحتوي" in input_label + or "yes" in input_label + or "contains" in input_label + or "label: 1" in input_label + ): + return "1" + else: + return None diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py new file mode 100755 index 00000000..891d8e61 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Arabic.py @@ -0,0 +1,73 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, few_shot_examples): + few_shot_text = "" + for example in few_shot_examples: + few_shot_text += ( + "التغريدة: " + + example["input"] + + "\n" + + "الإجابة: " + + ("yes" if example["label"] == "1" else "no") + + "\n\n" + ) + + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'. قدم فقط الإجابة.\n\n" + + few_shot_text + + "التغريدة: " + + input_sample + + "\n" + + "الإجابة: " + ), + } + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("التصنيف:", "").strip() + label = label.lower() + + if "لا يمكنني" in label: + return None + if "التصنيف: " in label: + arr = label.split("التصنيف: ") + label = arr[1].strip() + + if "نعم" in label: + label_fixed = "1" + elif "لا" in label: + label_fixed = "0" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py new file mode 100755 index 00000000..ea8e1c53 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_English.py @@ -0,0 +1,85 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples=None): + base_prompt = "Does this sentence contain a factual claim? Answer with 'yes' or 'no' only. Provide only the label.\n" + if examples: + user_message_content = few_shot_prompt(input_sample, base_prompt, examples) + else: + user_message_content = base_prompt + f"Sentence: {input_sample}\nLabel: " + + return [{"role": "user", "content": user_message_content}] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + label = "no" if example["label"] == "0" else "yes" + out_prompt += "Sentence: " + example["input"] + "\nLabel: " + label + "\n\n" + out_prompt += "Sentence: " + input_sample + "\nLabel: " + + return out_prompt + + +def post_process(response): + + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + label = label.lower() + + if ( + "لا يمكنني" in label + or "I cannot" in label + or "sorry" in label + or "هذه المحادثة غير مناسبة" in label + ): + return None + if "هذه التغريدة تحتوي" in label: + return "1" + + if ( + "not a factual claim" in label + or "لا يوجد" in label + or "not" in label + or "لا" in label + ): + return "0" + return "1" + + if "label: " in label: + arr = label.split("label: ") + label = arr[1].strip() + if "yes" in label: + pred_label = "1" + elif "no" in label: + pred_label = "0" + else: + pred_label = "0" + + print(f"Predicted Label: {pred_label}") + + return pred_label diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py new file mode 100755 index 00000000..6687d61a --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_FewShot_Mixed.py @@ -0,0 +1,90 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "max_tries": 3, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, few_shot_examples): + few_shot_text = "" + for example in few_shot_examples: + few_shot_text += ( + "التغريدة: " + + example["input"] + + "\n" + + "الإجابة: " + + ("yes" if example["label"] == "1" else "no") + + "\n\n" + ) + + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'. قدم فقط الإجابة.\n\n" + + few_shot_text + + "التغريدة: " + + input_sample + + "\n" + + "الإجابة: " + ), + } + ] + + +def post_process(response): + try: + label = response["choices"][0]["message"]["content"] + + label = label.replace("الإجابة:", "").strip() + label = label.lower() + if "هذه التغريدة تحتوي" in label: + return "1" + + if ( + "لا يمكنني" in label + or "I cannot" in label + or "sorry" in label + or "هذه المحادثة غير مناسبة" in label + ): + return None + if ( + "not a factual claim" in label + or "لا يوجد" in label + or "not" in label + or "لا" in label + ): + return "0" + + if "نعم" in label or "yes" in label: + pred_label = "1" + elif "لا" in label or "no" in label: + pred_label = "0" + else: + pred_label = "" + + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + return "0" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py new file mode 100755 index 00000000..77ebde59 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Arabic.py @@ -0,0 +1,61 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f"هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'.\n\n" + f"التغريدة: {input_sample}\n" + f"التصنيف: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("التصنيف:", "").strip() + label = label.lower() + + if "لا يمكنني" in label: + return None + if "التصنيف: " in label: + arr = label.split("التصنيف: ") + label = arr[1].strip() + + if "نعم" in label: + label_fixed = "1" + elif "لا" in label: + label_fixed = "0" + else: + label_fixed = None + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py new file mode 100755 index 00000000..b5e5a7b9 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_English.py @@ -0,0 +1,61 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f"Does this sentence contain a factual claim? Please answer with 'yes' or 'no' only\n\n" + f"tweet: {input_sample}\n" + f"label: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + label = response["choices"][0]["message"]["content"] + + label = label.replace("label:", "").strip() + label = label.lower() + + if "label: " in label: + arr = label.split("label: ") + label = arr[1].strip() + + if ( + "yes" in label + or "نعم" in label + or "the sentence contains a factual claim" in label + ): + label_fixed = "1" + if "no" in label or "لا" in label: + label_fixed = "0" + + return label_fixed diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py new file mode 100755 index 00000000..6dec7c55 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_JAIS13b_ZeroShot_Mixed.py @@ -0,0 +1,78 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import FastChatModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "jais-13b-chat", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": FastChatModel, + "model_args": { + "class_labels": ["0", "1"], + "max_tries": 30, + }, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + base_prompt = ( + f"هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'. قدم فقط الإجابة.\n\n" + f"تغريدة: {input_sample}\n" + f"التسمية: \n" + ) + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + + +def post_process(response): + try: + label = response["choices"][0]["message"]["content"] + + label = label.replace("الإجابة:", "").strip() + label = label.lower() + if "هذه التغريدة تحتوي" in label: + return "1" + + if ( + "لا يمكنني" in label + or "I cannot" in label + or "sorry" in label + or "هذه المحادثة غير مناسبة" in label + ): + return None + if ( + "not a factual claim" in label + or "لا يوجد" in label + or "not" in label + or "لا" in label + ): + return "0" + + if "نعم" in label or "yes" in label: + pred_label = "1" + elif "لا" in label or "no" in label: + pred_label = "0" + else: + pred_label = "" + + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + return "0" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py new file mode 100755 index 00000000..6e8fac6f --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Arabic.py @@ -0,0 +1,80 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": { + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, few_shot_examples): + few_shot_text = "" + for example in few_shot_examples: + few_shot_text += ( + "الجملة: " + + example["input"] + + "\n" + + "الإجابة: " + + ("نعم" if example["label"] == "1" else "لا") + + "\n\n" + ) + + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه الجملة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'. قدم فقط الإجابة.\n\n" + + few_shot_text + + "الجملة: " + + input_sample + + "\n" + + "الإجابة: " + ), + } + ] + + +import random + + +def post_process(response): + try: + label = "" + + if "output" in response: + label = response["output"].strip().lower() + + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0", "1"]) + + if "نعم" in label: + pred_label = "1" + elif "لا" in label: + pred_label = "0" + else: + pred_label = "" + + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + return "0" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py new file mode 100755 index 00000000..12e02776 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_English.py @@ -0,0 +1,73 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": { + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, examples=None): + base_prompt = "Does this sentence contain a factual claim? Answer with 'yes' or 'no' only. Provide only the label.\n" + if examples: + user_message_content = few_shot_prompt(input_sample, base_prompt, examples) + else: + user_message_content = base_prompt + f"Sentence: {input_sample}\nLabel: " + + return [{"role": "user", "content": user_message_content}] + + +def few_shot_prompt(input_sample, base_prompt, examples): + out_prompt = base_prompt + "\n" + for example in examples: + label = "no" if example["label"] == "0" else "yes" + out_prompt += "Sentence: " + example["input"] + "\nLabel: " + label + "\n\n" + out_prompt += "Sentence: " + input_sample + "\nLabel: " + + return out_prompt + + +import random + + +def post_process(response): + try: + label = "" + + if "output" in response: + label = response["output"].strip().lower() + + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0", "1"]) + + if "yes" in label: + pred_label = "1" + elif "no" in label: + pred_label = "0" + else: + pred_label = "0" + + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + return "No Response " diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py new file mode 100755 index 00000000..0aae6e0f --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_FewShot_Mixed.py @@ -0,0 +1,78 @@ +import random + +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": { + "max_tries": 30, + }, + "general_args": {"test_split": "ar", "fewshot": {"train_split": "ar"}}, + } + + +def prompt(input_sample, few_shot_examples): + few_shot_text = "" + for example in few_shot_examples: + few_shot_text += ( + "التغريدة: " + + example["input"] + + "\n" + + "الإجابة: " + + ("yes" if example["label"] == "1" else "no") + + "\n\n" + ) + + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'. قدم فقط الإجابة.\n\n" + + few_shot_text + + "التغريدة: " + + input_sample + + "\n" + + "الإجابة: " + ), + } + ] + + +def post_process(response): + try: + label = "" + + if "output" in response: + label = response["output"].strip().lower() + + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0", "1"]) + if "نعم" in label or "yes" in label: + pred_label = "1" + elif "لا" in label or "no" in label: + pred_label = "0" + else: + pred_label = "" + + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + return "0" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py new file mode 100755 index 00000000..6b5013f6 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Arabic.py @@ -0,0 +1,75 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": {"max_tries": 30}, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'نعم' أو 'لا'.\n\n" + + f"التغريدة: {input_sample}\n" + + "التصنيف: " + ), + } + ] + + +import random + + +def post_process(response): + try: + label = "" + + # Assuming 'response' contains an 'output' directly. Adjust if structure differs. + if "output" in response: + label = response["output"].strip().lower() + + # Debug print to check the extracted label + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0", "1"]) + + # Determining the prediction label based on the response content + if "نعم" in label or "contains a factual claim" in label or "label: 1" in label: + pred_label = "1" + elif ( + "لا" in label + or "label: 0" in label + or "does not contain a factual claim" in label + or "label: no" in label + ): + pred_label = "0" + else: + # If none of the expected labels are found, default to a negative claim (most conservative approach) + pred_label = "0" + + # Debug print to check the final predicted label + # print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + # Return a default negative label in case of error to prevent unknown targets + return "" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py new file mode 100755 index 00000000..f88e2944 --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_English.py @@ -0,0 +1,75 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": {"max_tries": 30}, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "Does this sentence contain a factual claim? Please answer with 'yes' or 'no' only.\n\n" + + f"Sentence: {input_sample}\n" + + "Label: " + ), + } + ] + + +import random + + +def post_process(response): + try: + label = "" + + # Assuming 'response' contains an 'output' directly. Adjust if structure differs. + if "output" in response: + label = response["output"].strip().lower() + + # Debug print to check the extracted label + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0", "1"]) + + # Determining the prediction label based on the response content + if "yes" in label or "contains a factual claim" in label or "label: 1" in label: + pred_label = "1" + elif ( + "no" in label + or "label: 0" in label + or "does not contain a factual claim" in label + or "label: no" in label + ): + pred_label = "0" + else: + # If none of the expected labels are found, default to a negative claim (most conservative approach) + pred_label = "0" + + # Debug print to check the final predicted label + print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + # Return a default negative label in case of error to prevent unknown targets + return "0" diff --git a/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py new file mode 100755 index 00000000..5da6f98e --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/claim_detection/CT22Claim_Llama3-8b_ZeroShot_Mixed.py @@ -0,0 +1,75 @@ +from llmebench.datasets import CT22ClaimDataset +from llmebench.models import AzureModel +from llmebench.tasks import ClaimDetectionTask + + +def metadata(): + return { + "author": "Mohamed Bayan Kmainasi, Rakif Khan, Ali Ezzat Shahroor, Boushra Bendou, Maram Hasanain, and Firoj Alam", + "affiliation": "Arabic Language Technologies, Qatar Computing Research Institute (QCRI), Hamad Bin Khalifa University (HBKU)", + "model": "Llama-3.1-8B-Instruct", + "description": "For a comprehensive analysis and results, refer to our peer-reviewed publication available at [Springer](https://doi.org/10.1007/978-981-96-0576-7_30) or explore the preprint version on [arXiv](https://arxiv.org/abs/2409.07054).", + } + + +def config(): + return { + "dataset": CT22ClaimDataset, + "task": ClaimDetectionTask, + "model": AzureModel, + "model_args": {"max_tries": 30}, + "general_args": {"test_split": "ar"}, + } + + +def prompt(input_sample): + return [ + { + "role": "user", + "content": ( + "هل تحتوي هذه التغريدة على ادعاء يمكن التحقق منه؟ أجب فقط بـ 'yes' أو 'no'.\n\n" + + f"التغريدة: {input_sample}\n" + + "التصنيف: " + ), + } + ] + + +import random + + +def post_process(response): + try: + label = "" + + # Assuming 'response' contains an 'output' directly. Adjust if structure differs. + if "output" in response: + label = response["output"].strip().lower() + + # Debug print to check the extracted label + print(f"Extracted Label: {label}") + if "لا أستطيع" in label or "I cannot" in label: + return random.choice(["0", "1"]) + + # Determining the prediction label based on the response content + if "yes" in label or "contains a factual claim" in label or "label: 1" in label: + pred_label = "1" + elif ( + "no" in label + or "label: 0" in label + or "does not contain a factual claim" in label + or "label: no" in label + ): + pred_label = "0" + else: + # If none of the expected labels are found, default to a negative claim (most conservative approach) + pred_label = "0" + + # Debug print to check the final predicted label + # print(f"Predicted Label: {pred_label}") + + return pred_label + except Exception as e: + print(f"Error in post-processing: {str(e)}") + # Return a default negative label in case of error to prevent unknown targets + return ""