Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
knmnyn committed Jul 6, 2024
2 parents e85220c + 7b2f6bd commit 6c10af8
Show file tree
Hide file tree
Showing 38 changed files with 925 additions and 0 deletions.
18 changes: 18 additions & 0 deletions content/publication/aksu-etal-2023-prompter/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
@inproceedings{aksu-etal-2023-prompter,
abstract = {A challenge in the Dialogue State Tracking (DST) field is adapting models to new domains without using any supervised data --- zero-shot domain adaptation. Parameter-Efficient Transfer Learning (PETL) has the potential to address this problem due to its robustness. However, it has yet to be applied to the zero-shot scenarios, as it is not clear how to apply it unsupervisedly. Our method, Prompter, uses descriptions of target domain slots to generate dynamic prefixes that are concatenated to the key and values at each layer′s self-attention mechanism. This allows for the use of prefix-tuning in zero-shot. Prompter outperforms previous methods on both the MultiWOZ and SGD benchmarks. In generating prefixes, our analyses find that Prompter not only utilizes the semantics of slot descriptions but also how often the slots appear together in conversation. Moreover, Prompter′s gains are due to its improved ability to distinguish ″none″-valued dialogue slots, compared against baselines.},
address = {Toronto, Canada},
author = {Aksu, Ibrahim Taha and
Kan, Min-Yen and
Chen, Nancy},
booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
doi = {10.18653/v1/2023.acl-long.252},
editor = {Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki},
month = {July},
pages = {4588--4603},
publisher = {Association for Computational Linguistics},
title = {Prompter: Zero-shot Adaptive Prefixes for Dialogue State Tracking Domain Adaptation},
url = {https://aclanthology.org/2023.acl-long.252},
year = {2023}
}
29 changes: 29 additions & 0 deletions content/publication/aksu-etal-2023-prompter/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
title: 'Prompter: Zero-shot Adaptive Prefixes for Dialogue State Tracking Domain Adaptation'
authors:
- Ibrahim Taha Aksu
- Min-Yen Kan
- Nancy Chen
date: '2023-07-01'
publishDate: '2024-07-06T02:22:24.632344Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 61st Annual Meeting of the Association for Computational
Linguistics (Volume 1: Long Papers)*'
doi: 10.18653/v1/2023.acl-long.252
abstract: A challenge in the Dialogue State Tracking (DST) field is adapting models
to new domains without using any supervised data --- zero-shot domain adaptation.
Parameter-Efficient Transfer Learning (PETL) has the potential to address this problem
due to its robustness. However, it has yet to be applied to the zero-shot scenarios,
as it is not clear how to apply it unsupervisedly. Our method, Prompter, uses descriptions
of target domain slots to generate dynamic prefixes that are concatenated to the
key and values at each layer′s self-attention mechanism. This allows for the use
of prefix-tuning in zero-shot. Prompter outperforms previous methods on both the
MultiWOZ and SGD benchmarks. In generating prefixes, our analyses find that Prompter
not only utilizes the semantics of slot descriptions but also how often the slots
appear together in conversation. Moreover, Prompter′s gains are due to its improved
ability to distinguish ″none″-valued dialogue slots, compared against baselines.
links:
- name: URL
url: https://aclanthology.org/2023.acl-long.252
---
18 changes: 18 additions & 0 deletions content/publication/benotti-etal-2023-understanding/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
@inproceedings{benotti-etal-2023-understanding,
abstract = {With NLP research now quickly being transferred into real-world applications, it is important to be aware of and think through the consequences of our scientific investigation. Such ethical considerations are important in both authoring and reviewing. This tutorial will equip participants with basic guidelines for thinking deeply about ethical issues and review common considerations that recur in NLP research. The methodology is interactive and participatory, including case studies and working in groups. Importantly, the participants will be co-building the tutorial outcomes and will be working to create further tutorial materials to share as public outcomes.},
address = {Dubrovnik, Croatia},
author = {Benotti, Luciana and
Fort, Karën and
Kan, Min-Yen and
Tsvetkov, Yulia},
booktitle = {Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Tutorial Abstracts},
doi = {10.18653/v1/2023.eacl-tutorials.4},
editor = {Zanzotto, Fabio Massimo and
Pradhan, Sameer},
month = {May},
pages = {19--24},
publisher = {Association for Computational Linguistics},
title = {Understanding Ethics in NLP Authoring and Reviewing},
url = {https://aclanthology.org/2023.eacl-tutorials.4},
year = {2023}
}
26 changes: 26 additions & 0 deletions content/publication/benotti-etal-2023-understanding/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
title: Understanding Ethics in NLP Authoring and Reviewing
authors:
- Luciana Benotti
- Karën Fort
- Min-Yen Kan
- Yulia Tsvetkov
date: '2023-05-01'
publishDate: '2024-07-06T02:22:24.603387Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 17th Conference of the European Chapter of the Association
for Computational Linguistics: Tutorial Abstracts*'
doi: 10.18653/v1/2023.eacl-tutorials.4
abstract: With NLP research now quickly being transferred into real-world applications,
it is important to be aware of and think through the consequences of our scientific
investigation. Such ethical considerations are important in both authoring and reviewing.
This tutorial will equip participants with basic guidelines for thinking deeply
about ethical issues and review common considerations that recur in NLP research.
The methodology is interactive and participatory, including case studies and working
in groups. Importantly, the participants will be co-building the tutorial outcomes
and will be working to create further tutorial materials to share as public outcomes.
links:
- name: URL
url: https://aclanthology.org/2023.eacl-tutorials.4
---
17 changes: 17 additions & 0 deletions content/publication/chow-etal-2023-travlr/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
@inproceedings{chow-etal-2023-travlr,
abstract = {Numerous visio-linguistic (V+L) representation learning methods have been developed, yet existing datasets do not adequately evaluate the extent to which they represent visual and linguistic concepts in a unified space. We propose several novel evaluation settings for V+L models, including cross-modal transfer. Furthermore, existing V+L benchmarks often report global accuracy scores on the entire dataset, making it difficult to pinpoint the specific reasoning tasks that models fail and succeed at. We present TraVLR, a synthetic dataset comprising four V+L reasoning tasks. TraVLR′s synthetic nature allows us to constrain its training and testing distributions along task-relevant dimensions, enabling the evaluation of out-of-distribution generalisation. Each example in TraVLR redundantly encodes the scene in two modalities, allowing either to be dropped or added during training or testing without losing relevant information. We compare the performance of four state-of-the-art V+L models, finding that while they perform well on test examples from the same modality, they all fail at cross-modal transfer and have limited success accommodating the addition or deletion of one modality. We release TraVLR as an open challenge for the research community.},
address = {Dubrovnik, Croatia},
author = {Chow, Keng Ji and
Tan, Samson and
Kan, Min-Yen},
booktitle = {Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics},
doi = {10.18653/v1/2023.eacl-main.242},
editor = {Vlachos, Andreas and
Augenstein, Isabelle},
month = {May},
pages = {3322--3347},
publisher = {Association for Computational Linguistics},
title = {TraVLR: Now You See It, Now You Don′t! A Bimodal Dataset for Evaluating Visio-Linguistic Reasoning},
url = {https://aclanthology.org/2023.eacl-main.242},
year = {2023}
}
34 changes: 34 additions & 0 deletions content/publication/chow-etal-2023-travlr/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
title: 'TraVLR: Now You See It, Now You Don′t! A Bimodal Dataset for Evaluating Visio-Linguistic
Reasoning'
authors:
- Keng Ji Chow
- Samson Tan
- Min-Yen Kan
date: '2023-05-01'
publishDate: '2024-07-06T02:22:24.618335Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 17th Conference of the European Chapter of the Association
for Computational Linguistics*'
doi: 10.18653/v1/2023.eacl-main.242
abstract: Numerous visio-linguistic (V+L) representation learning methods have been
developed, yet existing datasets do not adequately evaluate the extent to which
they represent visual and linguistic concepts in a unified space. We propose several
novel evaluation settings for V+L models, including cross-modal transfer. Furthermore,
existing V+L benchmarks often report global accuracy scores on the entire dataset,
making it difficult to pinpoint the specific reasoning tasks that models fail and
succeed at. We present TraVLR, a synthetic dataset comprising four V+L reasoning
tasks. TraVLR′s synthetic nature allows us to constrain its training and testing
distributions along task-relevant dimensions, enabling the evaluation of out-of-distribution
generalisation. Each example in TraVLR redundantly encodes the scene in two modalities,
allowing either to be dropped or added during training or testing without losing
relevant information. We compare the performance of four state-of-the-art V+L models,
finding that while they perform well on test examples from the same modality, they
all fail at cross-modal transfer and have limited success accommodating the addition
or deletion of one modality. We release TraVLR as an open challenge for the research
community.
links:
- name: URL
url: https://aclanthology.org/2023.eacl-main.242
---
23 changes: 23 additions & 0 deletions content/publication/diao-etal-2023-doolittle/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
@inproceedings{diao-etal-2023-doolittle,
abstract = {Improving the quality of academic writing is a meaningful but challenging task. Conventional methods of language refinement focus on narrow, specific linguistic features within isolated sentences, such as grammatical errors and improper word use. We propose a more general task, Academic Writing Formalization (AWF), to improve the overall quality of formal academic writing at the paragraph level. We formulate this language refinement task as a formal text style transfer task which transfers informal-academic text to formal-academic and contribute a large-scale non-parallel dataset, Doolittle, for this purpose. Concurrently, we apply a method named metric-oriented reinforcement learning (MORL) to two large language models (LLM) where we incorporate different levels of automatic feedback into the training process. Our experiments reveal that existing text transfer models and grammatical error correction models address certain aspects of AWF but still have a significant performance gap compared to human performance. Meanwhile, language models fine-tuned with our MORL method exhibit considerably improved performance, rivaling the latest chatbot ChatGPT, but still have a non-negligible gap compared to the ground truth formal-academic texts in Doolittle.},
address = {Singapore},
author = {Diao, Shizhe and
Lei, Yongyu and
Pan, Liangming and
Fang, Tianqing and
Zhou, Wangchunshu and
Keh, Sedrick and
Kan, Min-Yen and
Zhang, Tong},
booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
doi = {10.18653/v1/2023.emnlp-main.809},
editor = {Bouamor, Houda and
Pino, Juan and
Bali, Kalika},
month = {December},
pages = {13093--13111},
publisher = {Association for Computational Linguistics},
title = {Doolittle: Benchmarks and Corpora for Academic Writing Formalization},
url = {https://aclanthology.org/2023.emnlp-main.809},
year = {2023}
}
38 changes: 38 additions & 0 deletions content/publication/diao-etal-2023-doolittle/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
title: 'Doolittle: Benchmarks and Corpora for Academic Writing Formalization'
authors:
- Shizhe Diao
- Yongyu Lei
- Liangming Pan
- Tianqing Fang
- Wangchunshu Zhou
- Sedrick Keh
- Min-Yen Kan
- Tong Zhang
date: '2023-12-01'
publishDate: '2024-07-06T02:22:24.582277Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural
Language Processing*'
doi: 10.18653/v1/2023.emnlp-main.809
abstract: Improving the quality of academic writing is a meaningful but challenging
task. Conventional methods of language refinement focus on narrow, specific linguistic
features within isolated sentences, such as grammatical errors and improper word
use. We propose a more general task, Academic Writing Formalization (AWF), to improve
the overall quality of formal academic writing at the paragraph level. We formulate
this language refinement task as a formal text style transfer task which transfers
informal-academic text to formal-academic and contribute a large-scale non-parallel
dataset, Doolittle, for this purpose. Concurrently, we apply a method named metric-oriented
reinforcement learning (MORL) to two large language models (LLM) where we incorporate
different levels of automatic feedback into the training process. Our experiments
reveal that existing text transfer models and grammatical error correction models
address certain aspects of AWF but still have a significant performance gap compared
to human performance. Meanwhile, language models fine-tuned with our MORL method
exhibit considerably improved performance, rivaling the latest chatbot ChatGPT,
but still have a non-negligible gap compared to the ground truth formal-academic
texts in Doolittle.
links:
- name: URL
url: https://aclanthology.org/2023.emnlp-main.809
---
18 changes: 18 additions & 0 deletions content/publication/ding-etal-2023-cocoscisum/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
@inproceedings{ding-etal-2023-cocoscisum,
abstract = {We present a novel toolkit for controlled summarization of scientific documents, designed for the specific needs of the scientific community. Our system generates summaries based on user preferences, adjusting key attributes specifically of length and keyword inclusion. A distinguishing feature is its ability to manage multiple attributes concurrently, demonstrating Compositional Controllability for Scientific Summarization (CocoSciSum). Benchmarked against the strong Flan-T5 baseline, CocoSciSum exhibits superior performance on both the quality of summaries generated and the control over single and multiple attributes. Moreover, CocoSciSum is a user-centric toolkit, supporting user preferences expressed in natural language instructions, and accommodating diverse input document formats. CocoSciSum is available on GitHub (https://github.com/WING-NUS/SciAssist/tree/CocoSciSum) with an introduction video (https://youtu.be/YC1YDeEjAbQ).},
address = {Singapore},
author = {Ding, Yixi and
Qin, Yanxia and
Liu, Qian and
Kan, Min-Yen},
booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations},
doi = {10.18653/v1/2023.emnlp-demo.47},
editor = {Feng, Yansong and
Lefever, Els},
month = {December},
pages = {518--526},
publisher = {Association for Computational Linguistics},
title = {CocoSciSum: A Scientific Summarization Toolkit with Compositional Controllability},
url = {https://aclanthology.org/2023.emnlp-demo.47},
year = {2023}
}
30 changes: 30 additions & 0 deletions content/publication/ding-etal-2023-cocoscisum/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---
title: 'CocoSciSum: A Scientific Summarization Toolkit with Compositional Controllability'
authors:
- Yixi Ding
- Yanxia Qin
- Qian Liu
- Min-Yen Kan
date: '2023-12-01'
publishDate: '2024-07-06T02:22:24.596464Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural
Language Processing: System Demonstrations*'
doi: 10.18653/v1/2023.emnlp-demo.47
abstract: We present a novel toolkit for controlled summarization of scientific documents,
designed for the specific needs of the scientific community. Our system generates
summaries based on user preferences, adjusting key attributes specifically of length
and keyword inclusion. A distinguishing feature is its ability to manage multiple
attributes concurrently, demonstrating Compositional Controllability for Scientific
Summarization (CocoSciSum). Benchmarked against the strong Flan-T5 baseline, CocoSciSum
exhibits superior performance on both the quality of summaries generated and the
control over single and multiple attributes. Moreover, CocoSciSum is a user-centric
toolkit, supporting user preferences expressed in natural language instructions,
and accommodating diverse input document formats. CocoSciSum is available on GitHub
(https://github.com/WING-NUS/SciAssist/tree/CocoSciSum) with an introduction video
(https://youtu.be/YC1YDeEjAbQ).
links:
- name: URL
url: https://aclanthology.org/2023.emnlp-demo.47
---
22 changes: 22 additions & 0 deletions content/publication/li-etal-2023-coannotating/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
@inproceedings{li-etal-2023-coannotating,
abstract = {Annotated data plays a critical role in Natural Language Processing (NLP) in training models and evaluating their performance. Given recent developments in Large Language Models (LLMs), models such as ChatGPT demonstrate zero-shot capability on many text-annotation tasks, comparable with or even exceeding human annotators. Such LLMs can serve as alternatives for manual annotation, due to lower costs and higher scalability. However, limited work has leveraged LLMs as complementary annotators, nor explored how annotation work is best allocated among humans and LLMs to achieve both quality and cost objectives. We propose CoAnnotating, a novel paradigm for Human-LLM co-annotation of unstructured texts at scale. Under this framework, we utilize uncertainty to estimate LLMs′ annotation capability. Our empirical study shows CoAnnotating to be an effective means to allocate work from results on different datasets, with up to 21% performance improvement over random baseline. For code implementation, see https://github.com/SALT-NLP/CoAnnotating.},
address = {Singapore},
author = {Li, Minzhi and
Shi, Taiwei and
Ziems, Caleb and
Kan, Min-Yen and
Chen, Nancy and
Liu, Zhengyuan and
Yang, Diyi},
booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
doi = {10.18653/v1/2023.emnlp-main.92},
editor = {Bouamor, Houda and
Pino, Juan and
Bali, Kalika},
month = {December},
pages = {1487--1505},
publisher = {Association for Computational Linguistics},
title = {CoAnnotating: Uncertainty-Guided Work Allocation between Human and Large Language Models for Data Annotation},
url = {https://aclanthology.org/2023.emnlp-main.92},
year = {2023}
}
35 changes: 35 additions & 0 deletions content/publication/li-etal-2023-coannotating/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
---
title: 'CoAnnotating: Uncertainty-Guided Work Allocation between Human and Large Language
Models for Data Annotation'
authors:
- Minzhi Li
- Taiwei Shi
- Caleb Ziems
- Min-Yen Kan
- Nancy Chen
- Zhengyuan Liu
- Diyi Yang
date: '2023-12-01'
publishDate: '2024-07-06T02:22:24.561215Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural
Language Processing*'
doi: 10.18653/v1/2023.emnlp-main.92
abstract: Annotated data plays a critical role in Natural Language Processing (NLP)
in training models and evaluating their performance. Given recent developments in
Large Language Models (LLMs), models such as ChatGPT demonstrate zero-shot capability
on many text-annotation tasks, comparable with or even exceeding human annotators.
Such LLMs can serve as alternatives for manual annotation, due to lower costs and
higher scalability. However, limited work has leveraged LLMs as complementary annotators,
nor explored how annotation work is best allocated among humans and LLMs to achieve
both quality and cost objectives. We propose CoAnnotating, a novel paradigm for
Human-LLM co-annotation of unstructured texts at scale. Under this framework, we
utilize uncertainty to estimate LLMs′ annotation capability. Our empirical study
shows CoAnnotating to be an effective means to allocate work from results on different
datasets, with up to 21% performance improvement over random baseline. For code
implementation, see https://github.com/SALT-NLP/CoAnnotating.
links:
- name: URL
url: https://aclanthology.org/2023.emnlp-main.92
---
Loading

0 comments on commit 6c10af8

Please sign in to comment.