Skip to content

Commit

Permalink
Merge pull request #32 from WING-NUS/hugoblox-import-publications
Browse files Browse the repository at this point in the history
Hugo Blox Builder - Import latest publications
  • Loading branch information
knmnyn authored Oct 19, 2024
2 parents 4aa3f56 + c3182e0 commit dee588b
Show file tree
Hide file tree
Showing 18 changed files with 364 additions and 0 deletions.
16 changes: 16 additions & 0 deletions content/publication/10-1007-978-3-540-89533-6-33/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
@inproceedings{10.1007/978-3-540-89533-6_33,
abstract = {We present ForeCiteNote, an application that organizes personal digital collections of research articles. It is architected as a single HTML page with embedded Javascript that runs within a web browser. On top of standard annotation and tagging functionality, it also supports both online and offline usage patterns, including local storage of the paper collection.},
address = {Berlin, Heidelberg},
author = {Dang, Dinh-Trung and Tan, Yee Fan and Kan, Min-Yen},
booktitle = {Proceedings of the 11th International Conference on Asian Digital Libraries: Universal and Ubiquitous Access to Information},
doi = {10.1007/978-3-540-89533-6_33},
isbn = {9783540895329},
location = {Bali, Indonesia},
numpages = {4},
pages = {313–316},
publisher = {Springer-Verlag},
series = {ICADL 08},
title = {Towards a Webpage-Based Bibliographic Manager},
url = {https://doi.org/10.1007/978-3-540-89533-6_33},
year = {2008}
}
22 changes: 22 additions & 0 deletions content/publication/10-1007-978-3-540-89533-6-33/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
title: Towards a Webpage-Based Bibliographic Manager
authors:
- Dinh-Trung Dang
- Yee Fan Tan
- Min-Yen Kan
date: '2008-01-01'
publishDate: '2024-10-19T16:53:49.051109Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 11th International Conference on Asian Digital Libraries:
Universal and Ubiquitous Access to Information*'
doi: 10.1007/978-3-540-89533-6_33
abstract: We present ForeCiteNote, an application that organizes personal digital
collections of research articles. It is architected as a single HTML page with embedded
Javascript that runs within a web browser. On top of standard annotation and tagging
functionality, it also supports both online and offline usage patterns, including
local storage of the paper collection.
links:
- name: URL
url: https://doi.org/10.1007/978-3-540-89533-6_33
---
17 changes: 17 additions & 0 deletions content/publication/10-1007-s-00799-008-0042-0/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
@article{10.1007/s00799-008-0042-0,
address = {Berlin, Heidelberg},
author = {Kan, Min-Yen and Lee, Dongwon and Lim, Ee-Peng},
doi = {10.1007/s00799-008-0042-0},
issn = {1432-5012},
issue_date = {November 2008},
journal = {Int. J. Digit. Libr.},
month = {November},
number = {2},
numpages = {2},
pages = {81–82},
publisher = {Springer-Verlag},
title = {Scholarly digital libraries at scale: introduction to the special issue on very large digital libraries},
url = {https://doi.org/10.1007/s00799-008-0042-0},
volume = {9},
year = {2008}
}
17 changes: 17 additions & 0 deletions content/publication/10-1007-s-00799-008-0042-0/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
title: 'Scholarly digital libraries at scale: introduction to the special issue on
very large digital libraries'
authors:
- Min-Yen Kan
- Dongwon Lee
- Ee-Peng Lim
date: '2008-11-01'
publishDate: '2024-10-19T16:53:49.057575Z'
publication_types:
- article-journal
publication: '*Int. J. Digit. Libr.*'
doi: 10.1007/s00799-008-0042-0
links:
- name: URL
url: https://doi.org/10.1007/s00799-008-0042-0
---
15 changes: 15 additions & 0 deletions content/publication/10-1109-wi-iat-2010-14/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
@inproceedings{10.1109/WI-IAT.2010.14,
abstract = {Web information is increasingly used as evidence in solving various problems, including record matching. However, acquiring web-based resources is slow and can incur other access costs. As such, solutions often acquire only a subset of the resources to achieve a balance between acquisition cost and benefit. Unfortunately, existing work has largely ignored the issue of which resources to acquire. They also fail to emphasize on the hierarchical nature of resource acquisitions, e.g., the search engine results for two queries must be obtained before their TF-IDF cosine similarity be computed. In this paper, we propose a framework for performing cost-sensitive acquisition of resources with hierarchical dependencies, and apply it to the web resource context. Our framework is versatile, and we show that a large variety of problems can be formulated using resource dependency graphs. We solve the resource acquisition problem by casting it as a combinatorial search problem. Finally, we demonstrate the effectiveness of our acquisition framework on record matching problems of different domains.},
address = {USA},
author = {Tan, Yee Fan and Kan, Min-Yen},
booktitle = {Proceedings of the 2010 IEEE/WIC/ACM International Conference on Web Intelligence and Intelligent Agent Technology - Volume 01},
doi = {10.1109/WI-IAT.2010.14},
isbn = {9780769541914},
numpages = {8},
pages = {382–389},
publisher = {IEEE Computer Society},
series = {WI-IAT '10},
title = {Hierarchical Cost-Sensitive Web Resource Acquisition for Record Matching},
url = {https://doi.org/10.1109/WI-IAT.2010.14},
year = {2010}
}
30 changes: 30 additions & 0 deletions content/publication/10-1109-wi-iat-2010-14/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---
title: Hierarchical Cost-Sensitive Web Resource Acquisition for Record Matching
authors:
- Yee Fan Tan
- Min-Yen Kan
date: '2010-01-01'
publishDate: '2024-10-19T16:53:49.007582Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 2010 IEEE/WIC/ACM International Conference on Web
Intelligence and Intelligent Agent Technology - Volume 01*'
doi: 10.1109/WI-IAT.2010.14
abstract: Web information is increasingly used as evidence in solving various problems,
including record matching. However, acquiring web-based resources is slow and can
incur other access costs. As such, solutions often acquire only a subset of the
resources to achieve a balance between acquisition cost and benefit. Unfortunately,
existing work has largely ignored the issue of which resources to acquire. They
also fail to emphasize on the hierarchical nature of resource acquisitions, e.g.,
the search engine results for two queries must be obtained before their TF-IDF cosine
similarity be computed. In this paper, we propose a framework for performing cost-sensitive
acquisition of resources with hierarchical dependencies, and apply it to the web
resource context. Our framework is versatile, and we show that a large variety of
problems can be formulated using resource dependency graphs. We solve the resource
acquisition problem by casting it as a combinatorial search problem. Finally, we
demonstrate the effectiveness of our acquisition framework on record matching problems
of different domains.
links:
- name: URL
url: https://doi.org/10.1109/WI-IAT.2010.14
---
17 changes: 17 additions & 0 deletions content/publication/10-1145-1378889-1378951/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
@inproceedings{10.1145/1378889.1378951,
abstract = {We consider the task of automatic slide image retrieval, in which slide images are ranked for relevance against a textual query. Our implemented system, SLIDIR caters specifically for this task using features specifically designed for synthetic images embedded within slide presentation. We show promising results in both the ranking and binary relevance task and analyze the contribution of different features in the task performance.},
address = {New York, NY, USA},
author = {Liew, Guo Min and Kan, Min-Yen},
booktitle = {Proceedings of the 8th ACM/IEEE-CS Joint Conference on Digital Libraries},
doi = {10.1145/1378889.1378951},
isbn = {9781595939982},
keywords = {synthetic images, slidir, slides, presentations, image retrieval},
location = {Pittsburgh PA, PA, USA},
numpages = {4},
pages = {359–362},
publisher = {Association for Computing Machinery},
series = {JCDL '08},
title = {Slide image retrieval: a preliminary study},
url = {https://doi.org/10.1145/1378889.1378951},
year = {2008}
}
27 changes: 27 additions & 0 deletions content/publication/10-1145-1378889-1378951/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
---
title: 'Slide image retrieval: a preliminary study'
authors:
- Guo Min Liew
- Min-Yen Kan
date: '2008-01-01'
publishDate: '2024-10-19T16:53:49.063874Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 8th ACM/IEEE-CS Joint Conference on Digital Libraries*'
doi: 10.1145/1378889.1378951
abstract: We consider the task of automatic slide image retrieval, in which slide
images are ranked for relevance against a textual query. Our implemented system,
SLIDIR caters specifically for this task using features specifically designed for
synthetic images embedded within slide presentation. We show promising results in
both the ranking and binary relevance task and analyze the contribution of different
features in the task performance.
tags:
- synthetic images
- slidir
- slides
- presentations
- image retrieval
links:
- name: URL
url: https://doi.org/10.1145/1378889.1378951
---
17 changes: 17 additions & 0 deletions content/publication/10-1145-1816123-1816155/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
@inproceedings{10.1145/1816123.1816155,
abstract = {We present a new algorithm to measure domain-specific readability. It iteratively computes the readability of domain-specific resources based on the difficulty of domain-specific concepts and vice versa, in a style reminiscent of other bipartite graph algorithms such as Hyperlink-Induced Topic Search (HITS) and the Stochastic Approach for Link-Structure Analysis (SALSA). While simple, our algorithm outperforms standard heuristic measures and remains competitive among supervised-learning approaches. Moreover, it is less domain-dependent and portable across domains as it does not rely on an annotated corpus or expensive expert knowledge that supervised or domain-specific methods require.},
address = {New York, NY, USA},
author = {Zhao, Jin and Kan, Min-Yen},
booktitle = {Proceedings of the 10th Annual Joint Conference on Digital Libraries},
doi = {10.1145/1816123.1816155},
isbn = {9781450300858},
keywords = {domain-specific information retrieval, graph-based algorithm, iterative computation, readability measure},
location = {Gold Coast, Queensland, Australia},
numpages = {10},
pages = {205–214},
publisher = {Association for Computing Machinery},
series = {JCDL '10},
title = {Domain-specific iterative readability computation},
url = {https://doi.org/10.1145/1816123.1816155},
year = {2010}
}
29 changes: 29 additions & 0 deletions content/publication/10-1145-1816123-1816155/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
title: Domain-specific iterative readability computation
authors:
- Jin Zhao
- Min-Yen Kan
date: '2010-01-01'
publishDate: '2024-10-19T16:53:49.031225Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 10th Annual Joint Conference on Digital Libraries*'
doi: 10.1145/1816123.1816155
abstract: We present a new algorithm to measure domain-specific readability. It iteratively
computes the readability of domain-specific resources based on the difficulty of
domain-specific concepts and vice versa, in a style reminiscent of other bipartite
graph algorithms such as Hyperlink-Induced Topic Search (HITS) and the Stochastic
Approach for Link-Structure Analysis (SALSA). While simple, our algorithm outperforms
standard heuristic measures and remains competitive among supervised-learning approaches.
Moreover, it is less domain-dependent and portable across domains as it does not
rely on an annotated corpus or expensive expert knowledge that supervised or domain-specific
methods require.
tags:
- domain-specific information retrieval
- graph-based algorithm
- iterative computation
- readability measure
links:
- name: URL
url: https://doi.org/10.1145/1816123.1816155
---
17 changes: 17 additions & 0 deletions content/publication/10-1145-1816123-1816193/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
@inproceedings{10.1145/1816123.1816193,
abstract = {We present ForeCite (FC), a prototype reader-centric digital library that supports the scholar in using scholarly documents. FC integrates three user interfaces: a bibliometric component, a document reader and annotation system, and a bibliographic management application.},
address = {New York, NY, USA},
author = {Nguyen, Thuy Dung and Kan, Min-Yen and Dang, Dinh-Trung and Hänse, Markus and Hong, Ching Hoi Andy and Luong, Minh-Thang and Gozali, Jesse Prabawa and Sugiyama, Kazunari and Tan, Yee Fan},
booktitle = {Proceedings of the 10th Annual Joint Conference on Digital Libraries},
doi = {10.1145/1816123.1816193},
isbn = {9781450300858},
keywords = {ForeCite, argumentative zoning, document logical structure, scholarly digital library},
location = {Gold Coast, Queensland, Australia},
numpages = {2},
pages = {387–388},
publisher = {Association for Computing Machinery},
series = {JCDL '10},
title = {ForeCite: towards a reader-centric scholarly digital library},
url = {https://doi.org/10.1145/1816123.1816193},
year = {2010}
}
31 changes: 31 additions & 0 deletions content/publication/10-1145-1816123-1816193/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
title: 'ForeCite: towards a reader-centric scholarly digital library'
authors:
- Thuy Dung Nguyen
- Min-Yen Kan
- Dinh-Trung Dang
- Markus Hänse
- Ching Hoi Andy Hong
- Minh-Thang Luong
- Jesse Prabawa Gozali
- Kazunari Sugiyama
- Yee Fan Tan
date: '2010-01-01'
publishDate: '2024-10-19T16:53:49.024377Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 10th Annual Joint Conference on Digital Libraries*'
doi: 10.1145/1816123.1816193
abstract: 'We present ForeCite (FC), a prototype reader-centric digital library that
supports the scholar in using scholarly documents. FC integrates three user interfaces:
a bibliometric component, a document reader and annotation system, and a bibliographic
management application.'
tags:
- ForeCite
- argumentative zoning
- document logical structure
- scholarly digital library
links:
- name: URL
url: https://doi.org/10.1145/1816123.1816193
---
14 changes: 14 additions & 0 deletions content/publication/10-5555-1699750-1699762/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@inproceedings{10.5555/1699750.1699762,
abstract = {We present FireCite, a Mozilla Firefox browser extension that helps scholars assess and manage scholarly references on the web by automatically detecting and parsing such reference strings in real-time. FireCite has two main components: 1) a reference string recognizer that has a high recall of 96%, and 2) a reference string parser that can process HTML web pages with an overall F1 of 878 and plaintext reference strings with an overall F1 of 97. In our preliminary evaluation, we presented our FireCite prototype to four academics in separate unstructured interviews. Their positive feedback gives evidence to the desirability of FireCite's citation management capabilities.},
address = {USA},
author = {Hong, Ching Hoi Andy and Gozali, Jesse Prabawa and Kan, Min-Yen},
booktitle = {Proceedings of the 2009 Workshop on Text and Citation Analysis for Scholarly Digital Libraries},
isbn = {9781932432589},
location = {Suntec, Singapore},
numpages = {9},
pages = {71–79},
publisher = {Association for Computational Linguistics},
series = {NLPIR4DL '09},
title = {FireCite: lightweight real-time reference string extraction from webpages},
year = {2009}
}
22 changes: 22 additions & 0 deletions content/publication/10-5555-1699750-1699762/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
title: 'FireCite: lightweight real-time reference string extraction from webpages'
authors:
- Ching Hoi Andy Hong
- Jesse Prabawa Gozali
- Min-Yen Kan
date: '2009-01-01'
publishDate: '2024-10-19T16:53:49.038008Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 2009 Workshop on Text and Citation Analysis for
Scholarly Digital Libraries*'
abstract: "We present FireCite, a Mozilla Firefox browser extension that helps scholars
assess and manage scholarly references on the web by automatically detecting and
parsing such reference strings in real-time. FireCite has two main components: 1)
a reference string recognizer that has a high recall of 96%, and 2) a reference
string parser that can process HTML web pages with an overall F1 of 878 and plaintext
reference strings with an overall F1 of 97. In our preliminary evaluation, we presented
our FireCite prototype to four academics in separate unstructured interviews. Their
positive feedback gives evidence to the desirability of FireCite's citation management
capabilities."
---
14 changes: 14 additions & 0 deletions content/publication/10-5555-1875689-1875725/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@inproceedings{10.5555/1875689.1875725,
abstract = {We investigate the automatic harvesting of research paper metadata from recent scholarly events. Our system, Kairos, combines a focused crawler and an information extraction engine, to convert a list of conference websites into a index filled with fields of metadata that correspond to individual papers. Using event date metadata extracted from the conference website, Kairos proactively harvests metadata about the individual papers soon after they are made public. We use a Maximum Entropy classifier to classify uniform resource locators (URLs) as scientific conference websites and use Conditional Random Fields (CRF) to extract individual paper metadata from such websites. Experiments show an acceptable measure of classification accuracy of over 95% for each of the two components.},
address = {Berlin, Heidelberg},
author = {Hänse, Markus and Kan, Min-Yen and Karduck, Achim P.},
booktitle = {Proceedings of the Role of Digital Libraries in a Time of Global Change, and 12th International Conference on Asia-Pacific Digital Libraries},
isbn = {3642136532},
location = {Gold Coast, Australia},
numpages = {10},
pages = {226–235},
publisher = {Springer-Verlag},
series = {ICADL'10},
title = {Kairos: proactive harvesting of research paper metadata from scientific conference web sites},
year = {2010}
}
24 changes: 24 additions & 0 deletions content/publication/10-5555-1875689-1875725/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
title: 'Kairos: proactive harvesting of research paper metadata from scientific conference
web sites'
authors:
- Markus Hänse
- Min-Yen Kan
- Achim P. Karduck
date: '2010-01-01'
publishDate: '2024-10-19T16:53:49.017661Z'
publication_types:
- paper-conference
publication: '*Proceedings of the Role of Digital Libraries in a Time of Global Change,
and 12th International Conference on Asia-Pacific Digital Libraries*'
abstract: We investigate the automatic harvesting of research paper metadata from
recent scholarly events. Our system, Kairos, combines a focused crawler and an information
extraction engine, to convert a list of conference websites into a index filled
with fields of metadata that correspond to individual papers. Using event date metadata
extracted from the conference website, Kairos proactively harvests metadata about
the individual papers soon after they are made public. We use a Maximum Entropy
classifier to classify uniform resource locators (URLs) as scientific conference
websites and use Conditional Random Fields (CRF) to extract individual paper metadata
from such websites. Experiments show an acceptable measure of classification accuracy
of over 95% for each of the two components.
---
14 changes: 14 additions & 0 deletions content/publication/10-5555-2039901-2039911/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@inproceedings{10.5555/2039901.2039911,
abstract = {We present an automated approach to classify sentences of scholarly work with respect to their rhetorical function. While previous work that achieves this task of argumentative zoning requires richly annotated input, our approach is robust to noise and can process raw text. Even in cases where the input has noise (as it is obtained from optical character recognition or text extraction from PDF files), our robust classifier is largely accurate. We perform an in-depth study of our system both with clean and noisy inputs. We also give preliminary results from in situ acceptability testing when the classifier is embedded within a digital library reading environment.},
address = {Berlin, Heidelberg},
author = {Teufel, Simone and Kan, Min-Yen},
booktitle = {Proceedings of the 2009 International Conference on Advanced Language Technologies for Digital Libraries},
isbn = {9783642231599},
location = {Viareggio, Italy},
numpages = {17},
pages = {154–170},
publisher = {Springer-Verlag},
series = {NLP4DL'09/AT4DL'09},
title = {Robust argumentative zoning for sensemaking in scholarly documents},
year = {2009}
}
Loading

0 comments on commit dee588b

Please sign in to comment.