Merge pull request #32 from WING-NUS/hugoblox-import-publications

Hugo Blox Builder - Import latest publications
WING-NUS · Oct 19, 2024 · dee588b · dee588b
2 parents 4aa3f56 + c3182e0
commit dee588b
Show file tree

Hide file tree

Showing 18 changed files with 364 additions and 0 deletions.
diff --git a/content/publication/10-1007-978-3-540-89533-6-33/cite.bib b/content/publication/10-1007-978-3-540-89533-6-33/cite.bib
@@ -0,0 +1,16 @@
+@inproceedings{10.1007/978-3-540-89533-6_33,
+ abstract = {We present ForeCiteNote, an application that organizes personal digital collections of research articles. It is architected as a single HTML page with embedded Javascript that runs within a web browser. On top of standard annotation and tagging functionality, it also supports both online and offline usage patterns, including local storage of the paper collection.},
+ address = {Berlin, Heidelberg},
+ author = {Dang, Dinh-Trung and Tan, Yee Fan and Kan, Min-Yen},
+ booktitle = {Proceedings of the 11th International Conference on Asian Digital Libraries: Universal and Ubiquitous Access to Information},
+ doi = {10.1007/978-3-540-89533-6_33},
+ isbn = {9783540895329},
+ location = {Bali, Indonesia},
+ numpages = {4},
+ pages = {313–316},
+ publisher = {Springer-Verlag},
+ series = {ICADL 08},
+ title = {Towards a Webpage-Based Bibliographic Manager},
+ url = {https://doi.org/10.1007/978-3-540-89533-6_33},
+ year = {2008}
+}
diff --git a/content/publication/10-1007-978-3-540-89533-6-33/index.md b/content/publication/10-1007-978-3-540-89533-6-33/index.md
@@ -0,0 +1,22 @@
+---
+title: Towards a Webpage-Based Bibliographic Manager
+authors:
+- Dinh-Trung Dang
+- Yee Fan Tan
+- Min-Yen Kan
+date: '2008-01-01'
+publishDate: '2024-10-19T16:53:49.051109Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 11th International Conference on Asian Digital Libraries:
+  Universal and Ubiquitous Access to Information*'
+doi: 10.1007/978-3-540-89533-6_33
+abstract: We present ForeCiteNote, an application that organizes personal digital
+  collections of research articles. It is architected as a single HTML page with embedded
+  Javascript that runs within a web browser. On top of standard annotation and tagging
+  functionality, it also supports both online and offline usage patterns, including
+  local storage of the paper collection.
+links:
+- name: URL
+  url: https://doi.org/10.1007/978-3-540-89533-6_33
+---
diff --git a/content/publication/10-1007-s-00799-008-0042-0/cite.bib b/content/publication/10-1007-s-00799-008-0042-0/cite.bib
@@ -0,0 +1,17 @@
+@article{10.1007/s00799-008-0042-0,
+ address = {Berlin, Heidelberg},
+ author = {Kan, Min-Yen and Lee, Dongwon and Lim, Ee-Peng},
+ doi = {10.1007/s00799-008-0042-0},
+ issn = {1432-5012},
+ issue_date = {November 2008},
+ journal = {Int. J. Digit. Libr.},
+ month = {November},
+ number = {2},
+ numpages = {2},
+ pages = {81–82},
+ publisher = {Springer-Verlag},
+ title = {Scholarly digital libraries at scale: introduction to the special issue on very large digital libraries},
+ url = {https://doi.org/10.1007/s00799-008-0042-0},
+ volume = {9},
+ year = {2008}
+}
diff --git a/content/publication/10-1007-s-00799-008-0042-0/index.md b/content/publication/10-1007-s-00799-008-0042-0/index.md
@@ -0,0 +1,17 @@
+---
+title: 'Scholarly digital libraries at scale: introduction to the special issue on
+  very large digital libraries'
+authors:
+- Min-Yen Kan
+- Dongwon Lee
+- Ee-Peng Lim
+date: '2008-11-01'
+publishDate: '2024-10-19T16:53:49.057575Z'
+publication_types:
+- article-journal
+publication: '*Int. J. Digit. Libr.*'
+doi: 10.1007/s00799-008-0042-0
+links:
+- name: URL
+  url: https://doi.org/10.1007/s00799-008-0042-0
+---
diff --git a/content/publication/10-1109-wi-iat-2010-14/cite.bib b/content/publication/10-1109-wi-iat-2010-14/cite.bib
@@ -0,0 +1,15 @@
+@inproceedings{10.1109/WI-IAT.2010.14,
+ abstract = {Web information is increasingly used as evidence in solving various problems, including record matching. However, acquiring web-based resources is slow and can incur other access costs. As such, solutions often acquire only a subset of the resources to achieve a balance between acquisition cost and benefit. Unfortunately, existing work has largely ignored the issue of which resources to acquire. They also fail to emphasize on the hierarchical nature of resource acquisitions, e.g., the search engine results for two queries must be obtained before their TF-IDF cosine similarity be computed. In this paper, we propose a framework for performing cost-sensitive acquisition of resources with hierarchical dependencies, and apply it to the web resource context. Our framework is versatile, and we show that a large variety of problems can be formulated using resource dependency graphs. We solve the resource acquisition problem by casting it as a combinatorial search problem. Finally, we demonstrate the effectiveness of our acquisition framework on record matching problems of different domains.},
+ address = {USA},
+ author = {Tan, Yee Fan and Kan, Min-Yen},
+ booktitle = {Proceedings of the 2010 IEEE/WIC/ACM International Conference on Web Intelligence and Intelligent Agent Technology - Volume 01},
+ doi = {10.1109/WI-IAT.2010.14},
+ isbn = {9780769541914},
+ numpages = {8},
+ pages = {382–389},
+ publisher = {IEEE Computer Society},
+ series = {WI-IAT '10},
+ title = {Hierarchical Cost-Sensitive Web Resource Acquisition for Record Matching},
+ url = {https://doi.org/10.1109/WI-IAT.2010.14},
+ year = {2010}
+}
diff --git a/content/publication/10-1109-wi-iat-2010-14/index.md b/content/publication/10-1109-wi-iat-2010-14/index.md
@@ -0,0 +1,30 @@
+---
+title: Hierarchical Cost-Sensitive Web Resource Acquisition for Record Matching
+authors:
+- Yee Fan Tan
+- Min-Yen Kan
+date: '2010-01-01'
+publishDate: '2024-10-19T16:53:49.007582Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2010 IEEE/WIC/ACM International Conference on Web
+  Intelligence and Intelligent Agent Technology - Volume 01*'
+doi: 10.1109/WI-IAT.2010.14
+abstract: Web information is increasingly used as evidence in solving various problems,
+  including record matching. However, acquiring web-based resources is slow and can
+  incur other access costs. As such, solutions often acquire only a subset of the
+  resources to achieve a balance between acquisition cost and benefit. Unfortunately,
+  existing work has largely ignored the issue of which resources to acquire. They
+  also fail to emphasize on the hierarchical nature of resource acquisitions, e.g.,
+  the search engine results for two queries must be obtained before their TF-IDF cosine
+  similarity be computed. In this paper, we propose a framework for performing cost-sensitive
+  acquisition of resources with hierarchical dependencies, and apply it to the web
+  resource context. Our framework is versatile, and we show that a large variety of
+  problems can be formulated using resource dependency graphs. We solve the resource
+  acquisition problem by casting it as a combinatorial search problem. Finally, we
+  demonstrate the effectiveness of our acquisition framework on record matching problems
+  of different domains.
+links:
+- name: URL
+  url: https://doi.org/10.1109/WI-IAT.2010.14
+---
diff --git a/content/publication/10-1145-1378889-1378951/cite.bib b/content/publication/10-1145-1378889-1378951/cite.bib
@@ -0,0 +1,17 @@
+@inproceedings{10.1145/1378889.1378951,
+ abstract = {We consider the task of automatic slide image retrieval, in which slide images are ranked for relevance against a textual query. Our implemented system, SLIDIR caters specifically for this task using features specifically designed for synthetic images embedded within slide presentation. We show promising results in both the ranking and binary relevance task and analyze the contribution of different features in the task performance.},
+ address = {New York, NY, USA},
+ author = {Liew, Guo Min and Kan, Min-Yen},
+ booktitle = {Proceedings of the 8th ACM/IEEE-CS Joint Conference on Digital Libraries},
+ doi = {10.1145/1378889.1378951},
+ isbn = {9781595939982},
+ keywords = {synthetic images, slidir, slides, presentations, image retrieval},
+ location = {Pittsburgh PA, PA, USA},
+ numpages = {4},
+ pages = {359–362},
+ publisher = {Association for Computing Machinery},
+ series = {JCDL '08},
+ title = {Slide image retrieval: a preliminary study},
+ url = {https://doi.org/10.1145/1378889.1378951},
+ year = {2008}
+}
diff --git a/content/publication/10-1145-1378889-1378951/index.md b/content/publication/10-1145-1378889-1378951/index.md
@@ -0,0 +1,27 @@
+---
+title: 'Slide image retrieval: a preliminary study'
+authors:
+- Guo Min Liew
+- Min-Yen Kan
+date: '2008-01-01'
+publishDate: '2024-10-19T16:53:49.063874Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 8th ACM/IEEE-CS Joint Conference on Digital Libraries*'
+doi: 10.1145/1378889.1378951
+abstract: We consider the task of automatic slide image retrieval, in which slide
+  images are ranked for relevance against a textual query. Our implemented system,
+  SLIDIR caters specifically for this task using features specifically designed for
+  synthetic images embedded within slide presentation. We show promising results in
+  both the ranking and binary relevance task and analyze the contribution of different
+  features in the task performance.
+tags:
+- synthetic images
+- slidir
+- slides
+- presentations
+- image retrieval
+links:
+- name: URL
+  url: https://doi.org/10.1145/1378889.1378951
+---
diff --git a/content/publication/10-1145-1816123-1816155/cite.bib b/content/publication/10-1145-1816123-1816155/cite.bib
@@ -0,0 +1,17 @@
+@inproceedings{10.1145/1816123.1816155,
+ abstract = {We present a new algorithm to measure domain-specific readability. It iteratively computes the readability of domain-specific resources based on the difficulty of domain-specific concepts and vice versa, in a style reminiscent of other bipartite graph algorithms such as Hyperlink-Induced Topic Search (HITS) and the Stochastic Approach for Link-Structure Analysis (SALSA). While simple, our algorithm outperforms standard heuristic measures and remains competitive among supervised-learning approaches. Moreover, it is less domain-dependent and portable across domains as it does not rely on an annotated corpus or expensive expert knowledge that supervised or domain-specific methods require.},
+ address = {New York, NY, USA},
+ author = {Zhao, Jin and Kan, Min-Yen},
+ booktitle = {Proceedings of the 10th Annual Joint Conference on Digital Libraries},
+ doi = {10.1145/1816123.1816155},
+ isbn = {9781450300858},
+ keywords = {domain-specific information retrieval, graph-based algorithm, iterative computation, readability measure},
+ location = {Gold Coast, Queensland, Australia},
+ numpages = {10},
+ pages = {205–214},
+ publisher = {Association for Computing Machinery},
+ series = {JCDL '10},
+ title = {Domain-specific iterative readability computation},
+ url = {https://doi.org/10.1145/1816123.1816155},
+ year = {2010}
+}
diff --git a/content/publication/10-1145-1816123-1816155/index.md b/content/publication/10-1145-1816123-1816155/index.md
@@ -0,0 +1,29 @@
+---
+title: Domain-specific iterative readability computation
+authors:
+- Jin Zhao
+- Min-Yen Kan
+date: '2010-01-01'
+publishDate: '2024-10-19T16:53:49.031225Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 10th Annual Joint Conference on Digital Libraries*'
+doi: 10.1145/1816123.1816155
+abstract: We present a new algorithm to measure domain-specific readability. It iteratively
+  computes the readability of domain-specific resources based on the difficulty of
+  domain-specific concepts and vice versa, in a style reminiscent of other bipartite
+  graph algorithms such as Hyperlink-Induced Topic Search (HITS) and the Stochastic
+  Approach for Link-Structure Analysis (SALSA). While simple, our algorithm outperforms
+  standard heuristic measures and remains competitive among supervised-learning approaches.
+  Moreover, it is less domain-dependent and portable across domains as it does not
+  rely on an annotated corpus or expensive expert knowledge that supervised or domain-specific
+  methods require.
+tags:
+- domain-specific information retrieval
+- graph-based algorithm
+- iterative computation
+- readability measure
+links:
+- name: URL
+  url: https://doi.org/10.1145/1816123.1816155
+---
diff --git a/content/publication/10-1145-1816123-1816193/cite.bib b/content/publication/10-1145-1816123-1816193/cite.bib
@@ -0,0 +1,17 @@
+@inproceedings{10.1145/1816123.1816193,
+ abstract = {We present ForeCite (FC), a prototype reader-centric digital library that supports the scholar in using scholarly documents. FC integrates three user interfaces: a bibliometric component, a document reader and annotation system, and a bibliographic management application.},
+ address = {New York, NY, USA},
+ author = {Nguyen, Thuy Dung and Kan, Min-Yen and Dang, Dinh-Trung and Hänse, Markus and Hong, Ching Hoi Andy and Luong, Minh-Thang and Gozali, Jesse Prabawa and Sugiyama, Kazunari and Tan, Yee Fan},
+ booktitle = {Proceedings of the 10th Annual Joint Conference on Digital Libraries},
+ doi = {10.1145/1816123.1816193},
+ isbn = {9781450300858},
+ keywords = {ForeCite, argumentative zoning, document logical structure, scholarly digital library},
+ location = {Gold Coast, Queensland, Australia},
+ numpages = {2},
+ pages = {387–388},
+ publisher = {Association for Computing Machinery},
+ series = {JCDL '10},
+ title = {ForeCite: towards a reader-centric scholarly digital library},
+ url = {https://doi.org/10.1145/1816123.1816193},
+ year = {2010}
+}
diff --git a/content/publication/10-1145-1816123-1816193/index.md b/content/publication/10-1145-1816123-1816193/index.md
@@ -0,0 +1,31 @@
+---
+title: 'ForeCite: towards a reader-centric scholarly digital library'
+authors:
+- Thuy Dung Nguyen
+- Min-Yen Kan
+- Dinh-Trung Dang
+- Markus Hänse
+- Ching Hoi Andy Hong
+- Minh-Thang Luong
+- Jesse Prabawa Gozali
+- Kazunari Sugiyama
+- Yee Fan Tan
+date: '2010-01-01'
+publishDate: '2024-10-19T16:53:49.024377Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 10th Annual Joint Conference on Digital Libraries*'
+doi: 10.1145/1816123.1816193
+abstract: 'We present ForeCite (FC), a prototype reader-centric digital library that
+  supports the scholar in using scholarly documents. FC integrates three user interfaces:
+  a bibliometric component, a document reader and annotation system, and a bibliographic
+  management application.'
+tags:
+- ForeCite
+- argumentative zoning
+- document logical structure
+- scholarly digital library
+links:
+- name: URL
+  url: https://doi.org/10.1145/1816123.1816193
+---
diff --git a/content/publication/10-5555-1699750-1699762/cite.bib b/content/publication/10-5555-1699750-1699762/cite.bib
@@ -0,0 +1,14 @@
+@inproceedings{10.5555/1699750.1699762,
+ abstract = {We present FireCite, a Mozilla Firefox browser extension that helps scholars assess and manage scholarly references on the web by automatically detecting and parsing such reference strings in real-time. FireCite has two main components: 1) a reference string recognizer that has a high recall of 96%, and 2) a reference string parser that can process HTML web pages with an overall F1 of 878 and plaintext reference strings with an overall F1 of 97. In our preliminary evaluation, we presented our FireCite prototype to four academics in separate unstructured interviews. Their positive feedback gives evidence to the desirability of FireCite's citation management capabilities.},
+ address = {USA},
+ author = {Hong, Ching Hoi Andy and Gozali, Jesse Prabawa and Kan, Min-Yen},
+ booktitle = {Proceedings of the 2009 Workshop on Text and Citation Analysis for Scholarly Digital Libraries},
+ isbn = {9781932432589},
+ location = {Suntec, Singapore},
+ numpages = {9},
+ pages = {71–79},
+ publisher = {Association for Computational Linguistics},
+ series = {NLPIR4DL '09},
+ title = {FireCite: lightweight real-time reference string extraction from webpages},
+ year = {2009}
+}
diff --git a/content/publication/10-5555-1699750-1699762/index.md b/content/publication/10-5555-1699750-1699762/index.md
@@ -0,0 +1,22 @@
+---
+title: 'FireCite: lightweight real-time reference string extraction from webpages'
+authors:
+- Ching Hoi Andy Hong
+- Jesse Prabawa Gozali
+- Min-Yen Kan
+date: '2009-01-01'
+publishDate: '2024-10-19T16:53:49.038008Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2009 Workshop on Text and Citation Analysis for
+  Scholarly Digital Libraries*'
+abstract: "We present FireCite, a Mozilla Firefox browser extension that helps scholars
+  assess and manage scholarly references on the web by automatically detecting and
+  parsing such reference strings in real-time. FireCite has two main components: 1)
+  a reference string recognizer that has a high recall of 96%, and 2) a reference
+  string parser that can process HTML web pages with an overall F1 of 878 and plaintext
+  reference strings with an overall F1 of 97. In our preliminary evaluation, we presented
+  our FireCite prototype to four academics in separate unstructured interviews. Their
+  positive feedback gives evidence to the desirability of FireCite's citation management
+  capabilities."
+---
diff --git a/content/publication/10-5555-1875689-1875725/cite.bib b/content/publication/10-5555-1875689-1875725/cite.bib
@@ -0,0 +1,14 @@
+@inproceedings{10.5555/1875689.1875725,
+ abstract = {We investigate the automatic harvesting of research paper metadata from recent scholarly events. Our system, Kairos, combines a focused crawler and an information extraction engine, to convert a list of conference websites into a index filled with fields of metadata that correspond to individual papers. Using event date metadata extracted from the conference website, Kairos proactively harvests metadata about the individual papers soon after they are made public. We use a Maximum Entropy classifier to classify uniform resource locators (URLs) as scientific conference websites and use Conditional Random Fields (CRF) to extract individual paper metadata from such websites. Experiments show an acceptable measure of classification accuracy of over 95% for each of the two components.},
+ address = {Berlin, Heidelberg},
+ author = {Hänse, Markus and Kan, Min-Yen and Karduck, Achim P.},
+ booktitle = {Proceedings of the Role of Digital Libraries in a Time of Global Change, and 12th International Conference on Asia-Pacific Digital Libraries},
+ isbn = {3642136532},
+ location = {Gold Coast, Australia},
+ numpages = {10},
+ pages = {226–235},
+ publisher = {Springer-Verlag},
+ series = {ICADL'10},
+ title = {Kairos: proactive harvesting of research paper metadata from scientific conference web sites},
+ year = {2010}
+}
diff --git a/content/publication/10-5555-1875689-1875725/index.md b/content/publication/10-5555-1875689-1875725/index.md
@@ -0,0 +1,24 @@
+---
+title: 'Kairos: proactive harvesting of research paper metadata from scientific conference
+  web sites'
+authors:
+- Markus Hänse
+- Min-Yen Kan
+- Achim P. Karduck
+date: '2010-01-01'
+publishDate: '2024-10-19T16:53:49.017661Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Role of Digital Libraries in a Time of Global Change,
+  and 12th International Conference on Asia-Pacific Digital Libraries*'
+abstract: We investigate the automatic harvesting of research paper metadata from
+  recent scholarly events. Our system, Kairos, combines a focused crawler and an information
+  extraction engine, to convert a list of conference websites into a index filled
+  with fields of metadata that correspond to individual papers. Using event date metadata
+  extracted from the conference website, Kairos proactively harvests metadata about
+  the individual papers soon after they are made public. We use a Maximum Entropy
+  classifier to classify uniform resource locators (URLs) as scientific conference
+  websites and use Conditional Random Fields (CRF) to extract individual paper metadata
+  from such websites. Experiments show an acceptable measure of classification accuracy
+  of over 95% for each of the two components.
+---
diff --git a/content/publication/10-5555-2039901-2039911/cite.bib b/content/publication/10-5555-2039901-2039911/cite.bib
@@ -0,0 +1,14 @@
+@inproceedings{10.5555/2039901.2039911,
+ abstract = {We present an automated approach to classify sentences of scholarly work with respect to their rhetorical function. While previous work that achieves this task of argumentative zoning requires richly annotated input, our approach is robust to noise and can process raw text. Even in cases where the input has noise (as it is obtained from optical character recognition or text extraction from PDF files), our robust classifier is largely accurate. We perform an in-depth study of our system both with clean and noisy inputs. We also give preliminary results from in situ acceptability testing when the classifier is embedded within a digital library reading environment.},
+ address = {Berlin, Heidelberg},
+ author = {Teufel, Simone and Kan, Min-Yen},
+ booktitle = {Proceedings of the 2009 International Conference on Advanced Language Technologies for Digital Libraries},
+ isbn = {9783642231599},
+ location = {Viareggio, Italy},
+ numpages = {17},
+ pages = {154–170},
+ publisher = {Springer-Verlag},
+ series = {NLP4DL'09/AT4DL'09},
+ title = {Robust argumentative zoning for sensemaking in scholarly documents},
+ year = {2009}
+}