forked from knmnyn/hugo-blox
-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of https://github.com/WING-NUS/wing-website
- Loading branch information
Showing
38 changed files
with
925 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
@inproceedings{aksu-etal-2023-prompter, | ||
abstract = {A challenge in the Dialogue State Tracking (DST) field is adapting models to new domains without using any supervised data --- zero-shot domain adaptation. Parameter-Efficient Transfer Learning (PETL) has the potential to address this problem due to its robustness. However, it has yet to be applied to the zero-shot scenarios, as it is not clear how to apply it unsupervisedly. Our method, Prompter, uses descriptions of target domain slots to generate dynamic prefixes that are concatenated to the key and values at each layer′s self-attention mechanism. This allows for the use of prefix-tuning in zero-shot. Prompter outperforms previous methods on both the MultiWOZ and SGD benchmarks. In generating prefixes, our analyses find that Prompter not only utilizes the semantics of slot descriptions but also how often the slots appear together in conversation. Moreover, Prompter′s gains are due to its improved ability to distinguish ″none″-valued dialogue slots, compared against baselines.}, | ||
address = {Toronto, Canada}, | ||
author = {Aksu, Ibrahim Taha and | ||
Kan, Min-Yen and | ||
Chen, Nancy}, | ||
booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, | ||
doi = {10.18653/v1/2023.acl-long.252}, | ||
editor = {Rogers, Anna and | ||
Boyd-Graber, Jordan and | ||
Okazaki, Naoaki}, | ||
month = {July}, | ||
pages = {4588--4603}, | ||
publisher = {Association for Computational Linguistics}, | ||
title = {Prompter: Zero-shot Adaptive Prefixes for Dialogue State Tracking Domain Adaptation}, | ||
url = {https://aclanthology.org/2023.acl-long.252}, | ||
year = {2023} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
--- | ||
title: 'Prompter: Zero-shot Adaptive Prefixes for Dialogue State Tracking Domain Adaptation' | ||
authors: | ||
- Ibrahim Taha Aksu | ||
- Min-Yen Kan | ||
- Nancy Chen | ||
date: '2023-07-01' | ||
publishDate: '2024-07-06T02:22:24.632344Z' | ||
publication_types: | ||
- paper-conference | ||
publication: '*Proceedings of the 61st Annual Meeting of the Association for Computational | ||
Linguistics (Volume 1: Long Papers)*' | ||
doi: 10.18653/v1/2023.acl-long.252 | ||
abstract: A challenge in the Dialogue State Tracking (DST) field is adapting models | ||
to new domains without using any supervised data --- zero-shot domain adaptation. | ||
Parameter-Efficient Transfer Learning (PETL) has the potential to address this problem | ||
due to its robustness. However, it has yet to be applied to the zero-shot scenarios, | ||
as it is not clear how to apply it unsupervisedly. Our method, Prompter, uses descriptions | ||
of target domain slots to generate dynamic prefixes that are concatenated to the | ||
key and values at each layer′s self-attention mechanism. This allows for the use | ||
of prefix-tuning in zero-shot. Prompter outperforms previous methods on both the | ||
MultiWOZ and SGD benchmarks. In generating prefixes, our analyses find that Prompter | ||
not only utilizes the semantics of slot descriptions but also how often the slots | ||
appear together in conversation. Moreover, Prompter′s gains are due to its improved | ||
ability to distinguish ″none″-valued dialogue slots, compared against baselines. | ||
links: | ||
- name: URL | ||
url: https://aclanthology.org/2023.acl-long.252 | ||
--- |
18 changes: 18 additions & 0 deletions
18
content/publication/benotti-etal-2023-understanding/cite.bib
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
@inproceedings{benotti-etal-2023-understanding, | ||
abstract = {With NLP research now quickly being transferred into real-world applications, it is important to be aware of and think through the consequences of our scientific investigation. Such ethical considerations are important in both authoring and reviewing. This tutorial will equip participants with basic guidelines for thinking deeply about ethical issues and review common considerations that recur in NLP research. The methodology is interactive and participatory, including case studies and working in groups. Importantly, the participants will be co-building the tutorial outcomes and will be working to create further tutorial materials to share as public outcomes.}, | ||
address = {Dubrovnik, Croatia}, | ||
author = {Benotti, Luciana and | ||
Fort, Karën and | ||
Kan, Min-Yen and | ||
Tsvetkov, Yulia}, | ||
booktitle = {Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Tutorial Abstracts}, | ||
doi = {10.18653/v1/2023.eacl-tutorials.4}, | ||
editor = {Zanzotto, Fabio Massimo and | ||
Pradhan, Sameer}, | ||
month = {May}, | ||
pages = {19--24}, | ||
publisher = {Association for Computational Linguistics}, | ||
title = {Understanding Ethics in NLP Authoring and Reviewing}, | ||
url = {https://aclanthology.org/2023.eacl-tutorials.4}, | ||
year = {2023} | ||
} |
26 changes: 26 additions & 0 deletions
26
content/publication/benotti-etal-2023-understanding/index.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
--- | ||
title: Understanding Ethics in NLP Authoring and Reviewing | ||
authors: | ||
- Luciana Benotti | ||
- Karën Fort | ||
- Min-Yen Kan | ||
- Yulia Tsvetkov | ||
date: '2023-05-01' | ||
publishDate: '2024-07-06T02:22:24.603387Z' | ||
publication_types: | ||
- paper-conference | ||
publication: '*Proceedings of the 17th Conference of the European Chapter of the Association | ||
for Computational Linguistics: Tutorial Abstracts*' | ||
doi: 10.18653/v1/2023.eacl-tutorials.4 | ||
abstract: With NLP research now quickly being transferred into real-world applications, | ||
it is important to be aware of and think through the consequences of our scientific | ||
investigation. Such ethical considerations are important in both authoring and reviewing. | ||
This tutorial will equip participants with basic guidelines for thinking deeply | ||
about ethical issues and review common considerations that recur in NLP research. | ||
The methodology is interactive and participatory, including case studies and working | ||
in groups. Importantly, the participants will be co-building the tutorial outcomes | ||
and will be working to create further tutorial materials to share as public outcomes. | ||
links: | ||
- name: URL | ||
url: https://aclanthology.org/2023.eacl-tutorials.4 | ||
--- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
@inproceedings{chow-etal-2023-travlr, | ||
abstract = {Numerous visio-linguistic (V+L) representation learning methods have been developed, yet existing datasets do not adequately evaluate the extent to which they represent visual and linguistic concepts in a unified space. We propose several novel evaluation settings for V+L models, including cross-modal transfer. Furthermore, existing V+L benchmarks often report global accuracy scores on the entire dataset, making it difficult to pinpoint the specific reasoning tasks that models fail and succeed at. We present TraVLR, a synthetic dataset comprising four V+L reasoning tasks. TraVLR′s synthetic nature allows us to constrain its training and testing distributions along task-relevant dimensions, enabling the evaluation of out-of-distribution generalisation. Each example in TraVLR redundantly encodes the scene in two modalities, allowing either to be dropped or added during training or testing without losing relevant information. We compare the performance of four state-of-the-art V+L models, finding that while they perform well on test examples from the same modality, they all fail at cross-modal transfer and have limited success accommodating the addition or deletion of one modality. We release TraVLR as an open challenge for the research community.}, | ||
address = {Dubrovnik, Croatia}, | ||
author = {Chow, Keng Ji and | ||
Tan, Samson and | ||
Kan, Min-Yen}, | ||
booktitle = {Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics}, | ||
doi = {10.18653/v1/2023.eacl-main.242}, | ||
editor = {Vlachos, Andreas and | ||
Augenstein, Isabelle}, | ||
month = {May}, | ||
pages = {3322--3347}, | ||
publisher = {Association for Computational Linguistics}, | ||
title = {TraVLR: Now You See It, Now You Don′t! A Bimodal Dataset for Evaluating Visio-Linguistic Reasoning}, | ||
url = {https://aclanthology.org/2023.eacl-main.242}, | ||
year = {2023} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
--- | ||
title: 'TraVLR: Now You See It, Now You Don′t! A Bimodal Dataset for Evaluating Visio-Linguistic | ||
Reasoning' | ||
authors: | ||
- Keng Ji Chow | ||
- Samson Tan | ||
- Min-Yen Kan | ||
date: '2023-05-01' | ||
publishDate: '2024-07-06T02:22:24.618335Z' | ||
publication_types: | ||
- paper-conference | ||
publication: '*Proceedings of the 17th Conference of the European Chapter of the Association | ||
for Computational Linguistics*' | ||
doi: 10.18653/v1/2023.eacl-main.242 | ||
abstract: Numerous visio-linguistic (V+L) representation learning methods have been | ||
developed, yet existing datasets do not adequately evaluate the extent to which | ||
they represent visual and linguistic concepts in a unified space. We propose several | ||
novel evaluation settings for V+L models, including cross-modal transfer. Furthermore, | ||
existing V+L benchmarks often report global accuracy scores on the entire dataset, | ||
making it difficult to pinpoint the specific reasoning tasks that models fail and | ||
succeed at. We present TraVLR, a synthetic dataset comprising four V+L reasoning | ||
tasks. TraVLR′s synthetic nature allows us to constrain its training and testing | ||
distributions along task-relevant dimensions, enabling the evaluation of out-of-distribution | ||
generalisation. Each example in TraVLR redundantly encodes the scene in two modalities, | ||
allowing either to be dropped or added during training or testing without losing | ||
relevant information. We compare the performance of four state-of-the-art V+L models, | ||
finding that while they perform well on test examples from the same modality, they | ||
all fail at cross-modal transfer and have limited success accommodating the addition | ||
or deletion of one modality. We release TraVLR as an open challenge for the research | ||
community. | ||
links: | ||
- name: URL | ||
url: https://aclanthology.org/2023.eacl-main.242 | ||
--- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
@inproceedings{diao-etal-2023-doolittle, | ||
abstract = {Improving the quality of academic writing is a meaningful but challenging task. Conventional methods of language refinement focus on narrow, specific linguistic features within isolated sentences, such as grammatical errors and improper word use. We propose a more general task, Academic Writing Formalization (AWF), to improve the overall quality of formal academic writing at the paragraph level. We formulate this language refinement task as a formal text style transfer task which transfers informal-academic text to formal-academic and contribute a large-scale non-parallel dataset, Doolittle, for this purpose. Concurrently, we apply a method named metric-oriented reinforcement learning (MORL) to two large language models (LLM) where we incorporate different levels of automatic feedback into the training process. Our experiments reveal that existing text transfer models and grammatical error correction models address certain aspects of AWF but still have a significant performance gap compared to human performance. Meanwhile, language models fine-tuned with our MORL method exhibit considerably improved performance, rivaling the latest chatbot ChatGPT, but still have a non-negligible gap compared to the ground truth formal-academic texts in Doolittle.}, | ||
address = {Singapore}, | ||
author = {Diao, Shizhe and | ||
Lei, Yongyu and | ||
Pan, Liangming and | ||
Fang, Tianqing and | ||
Zhou, Wangchunshu and | ||
Keh, Sedrick and | ||
Kan, Min-Yen and | ||
Zhang, Tong}, | ||
booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing}, | ||
doi = {10.18653/v1/2023.emnlp-main.809}, | ||
editor = {Bouamor, Houda and | ||
Pino, Juan and | ||
Bali, Kalika}, | ||
month = {December}, | ||
pages = {13093--13111}, | ||
publisher = {Association for Computational Linguistics}, | ||
title = {Doolittle: Benchmarks and Corpora for Academic Writing Formalization}, | ||
url = {https://aclanthology.org/2023.emnlp-main.809}, | ||
year = {2023} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
--- | ||
title: 'Doolittle: Benchmarks and Corpora for Academic Writing Formalization' | ||
authors: | ||
- Shizhe Diao | ||
- Yongyu Lei | ||
- Liangming Pan | ||
- Tianqing Fang | ||
- Wangchunshu Zhou | ||
- Sedrick Keh | ||
- Min-Yen Kan | ||
- Tong Zhang | ||
date: '2023-12-01' | ||
publishDate: '2024-07-06T02:22:24.582277Z' | ||
publication_types: | ||
- paper-conference | ||
publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural | ||
Language Processing*' | ||
doi: 10.18653/v1/2023.emnlp-main.809 | ||
abstract: Improving the quality of academic writing is a meaningful but challenging | ||
task. Conventional methods of language refinement focus on narrow, specific linguistic | ||
features within isolated sentences, such as grammatical errors and improper word | ||
use. We propose a more general task, Academic Writing Formalization (AWF), to improve | ||
the overall quality of formal academic writing at the paragraph level. We formulate | ||
this language refinement task as a formal text style transfer task which transfers | ||
informal-academic text to formal-academic and contribute a large-scale non-parallel | ||
dataset, Doolittle, for this purpose. Concurrently, we apply a method named metric-oriented | ||
reinforcement learning (MORL) to two large language models (LLM) where we incorporate | ||
different levels of automatic feedback into the training process. Our experiments | ||
reveal that existing text transfer models and grammatical error correction models | ||
address certain aspects of AWF but still have a significant performance gap compared | ||
to human performance. Meanwhile, language models fine-tuned with our MORL method | ||
exhibit considerably improved performance, rivaling the latest chatbot ChatGPT, | ||
but still have a non-negligible gap compared to the ground truth formal-academic | ||
texts in Doolittle. | ||
links: | ||
- name: URL | ||
url: https://aclanthology.org/2023.emnlp-main.809 | ||
--- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
@inproceedings{ding-etal-2023-cocoscisum, | ||
abstract = {We present a novel toolkit for controlled summarization of scientific documents, designed for the specific needs of the scientific community. Our system generates summaries based on user preferences, adjusting key attributes specifically of length and keyword inclusion. A distinguishing feature is its ability to manage multiple attributes concurrently, demonstrating Compositional Controllability for Scientific Summarization (CocoSciSum). Benchmarked against the strong Flan-T5 baseline, CocoSciSum exhibits superior performance on both the quality of summaries generated and the control over single and multiple attributes. Moreover, CocoSciSum is a user-centric toolkit, supporting user preferences expressed in natural language instructions, and accommodating diverse input document formats. CocoSciSum is available on GitHub (https://github.com/WING-NUS/SciAssist/tree/CocoSciSum) with an introduction video (https://youtu.be/YC1YDeEjAbQ).}, | ||
address = {Singapore}, | ||
author = {Ding, Yixi and | ||
Qin, Yanxia and | ||
Liu, Qian and | ||
Kan, Min-Yen}, | ||
booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations}, | ||
doi = {10.18653/v1/2023.emnlp-demo.47}, | ||
editor = {Feng, Yansong and | ||
Lefever, Els}, | ||
month = {December}, | ||
pages = {518--526}, | ||
publisher = {Association for Computational Linguistics}, | ||
title = {CocoSciSum: A Scientific Summarization Toolkit with Compositional Controllability}, | ||
url = {https://aclanthology.org/2023.emnlp-demo.47}, | ||
year = {2023} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
--- | ||
title: 'CocoSciSum: A Scientific Summarization Toolkit with Compositional Controllability' | ||
authors: | ||
- Yixi Ding | ||
- Yanxia Qin | ||
- Qian Liu | ||
- Min-Yen Kan | ||
date: '2023-12-01' | ||
publishDate: '2024-07-06T02:22:24.596464Z' | ||
publication_types: | ||
- paper-conference | ||
publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural | ||
Language Processing: System Demonstrations*' | ||
doi: 10.18653/v1/2023.emnlp-demo.47 | ||
abstract: We present a novel toolkit for controlled summarization of scientific documents, | ||
designed for the specific needs of the scientific community. Our system generates | ||
summaries based on user preferences, adjusting key attributes specifically of length | ||
and keyword inclusion. A distinguishing feature is its ability to manage multiple | ||
attributes concurrently, demonstrating Compositional Controllability for Scientific | ||
Summarization (CocoSciSum). Benchmarked against the strong Flan-T5 baseline, CocoSciSum | ||
exhibits superior performance on both the quality of summaries generated and the | ||
control over single and multiple attributes. Moreover, CocoSciSum is a user-centric | ||
toolkit, supporting user preferences expressed in natural language instructions, | ||
and accommodating diverse input document formats. CocoSciSum is available on GitHub | ||
(https://github.com/WING-NUS/SciAssist/tree/CocoSciSum) with an introduction video | ||
(https://youtu.be/YC1YDeEjAbQ). | ||
links: | ||
- name: URL | ||
url: https://aclanthology.org/2023.emnlp-demo.47 | ||
--- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
@inproceedings{li-etal-2023-coannotating, | ||
abstract = {Annotated data plays a critical role in Natural Language Processing (NLP) in training models and evaluating their performance. Given recent developments in Large Language Models (LLMs), models such as ChatGPT demonstrate zero-shot capability on many text-annotation tasks, comparable with or even exceeding human annotators. Such LLMs can serve as alternatives for manual annotation, due to lower costs and higher scalability. However, limited work has leveraged LLMs as complementary annotators, nor explored how annotation work is best allocated among humans and LLMs to achieve both quality and cost objectives. We propose CoAnnotating, a novel paradigm for Human-LLM co-annotation of unstructured texts at scale. Under this framework, we utilize uncertainty to estimate LLMs′ annotation capability. Our empirical study shows CoAnnotating to be an effective means to allocate work from results on different datasets, with up to 21% performance improvement over random baseline. For code implementation, see https://github.com/SALT-NLP/CoAnnotating.}, | ||
address = {Singapore}, | ||
author = {Li, Minzhi and | ||
Shi, Taiwei and | ||
Ziems, Caleb and | ||
Kan, Min-Yen and | ||
Chen, Nancy and | ||
Liu, Zhengyuan and | ||
Yang, Diyi}, | ||
booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing}, | ||
doi = {10.18653/v1/2023.emnlp-main.92}, | ||
editor = {Bouamor, Houda and | ||
Pino, Juan and | ||
Bali, Kalika}, | ||
month = {December}, | ||
pages = {1487--1505}, | ||
publisher = {Association for Computational Linguistics}, | ||
title = {CoAnnotating: Uncertainty-Guided Work Allocation between Human and Large Language Models for Data Annotation}, | ||
url = {https://aclanthology.org/2023.emnlp-main.92}, | ||
year = {2023} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
--- | ||
title: 'CoAnnotating: Uncertainty-Guided Work Allocation between Human and Large Language | ||
Models for Data Annotation' | ||
authors: | ||
- Minzhi Li | ||
- Taiwei Shi | ||
- Caleb Ziems | ||
- Min-Yen Kan | ||
- Nancy Chen | ||
- Zhengyuan Liu | ||
- Diyi Yang | ||
date: '2023-12-01' | ||
publishDate: '2024-07-06T02:22:24.561215Z' | ||
publication_types: | ||
- paper-conference | ||
publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural | ||
Language Processing*' | ||
doi: 10.18653/v1/2023.emnlp-main.92 | ||
abstract: Annotated data plays a critical role in Natural Language Processing (NLP) | ||
in training models and evaluating their performance. Given recent developments in | ||
Large Language Models (LLMs), models such as ChatGPT demonstrate zero-shot capability | ||
on many text-annotation tasks, comparable with or even exceeding human annotators. | ||
Such LLMs can serve as alternatives for manual annotation, due to lower costs and | ||
higher scalability. However, limited work has leveraged LLMs as complementary annotators, | ||
nor explored how annotation work is best allocated among humans and LLMs to achieve | ||
both quality and cost objectives. We propose CoAnnotating, a novel paradigm for | ||
Human-LLM co-annotation of unstructured texts at scale. Under this framework, we | ||
utilize uncertainty to estimate LLMs′ annotation capability. Our empirical study | ||
shows CoAnnotating to be an effective means to allocate work from results on different | ||
datasets, with up to 21% performance improvement over random baseline. For code | ||
implementation, see https://github.com/SALT-NLP/CoAnnotating. | ||
links: | ||
- name: URL | ||
url: https://aclanthology.org/2023.emnlp-main.92 | ||
--- |
Oops, something went wrong.