-
Notifications
You must be signed in to change notification settings - Fork 1
/
poster.tex
273 lines (221 loc) · 8.65 KB
/
poster.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
\documentclass[30pt,a1paper]{tikzposter}
\usepackage{natbib}
\usepackage{subcaption}
\usepackage{array}
\usepackage{pifont}
\usepackage{enumitem}
\usepackage{scalefnt} % http://stackoverflow.com/a/4076346
%\usepackage{titling}
\usepackage{authblk}
\newcolumntype{L}[1]{>{\raggedright\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
\newcolumntype{C}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
\newcolumntype{R}[1]{>{\raggedleft\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
\renewcommand{\vec}[1]{%
\overrightarrow{#1}}
\renewcommand{\bar}[1]{%
\overline{#1}}
\usepackage{etoolbox}
\patchcmd{\thebibliography}{\section*{\refname}}{}{}{}
\definecolor{camBlue}{RGB}{106,173,228}
\definecolor{myOrange}{RGB}{227,140,17}
\definecolor{darkGreen}{RGB}{22,142,16}
\definecolor{darkBlue}{RGB}{12,112,196}
\newcommand{\cmark}{\textcolor{darkGreen}{\Large \ding{51}}}%
\newcommand{\xmark}{\textcolor{red}{\Large \ding{55}}}%
\definecolorstyle{myColorStyle} {
\definecolor{colorOne}{named}{camBlue}
\definecolor{colorTwo}{named}{yellow}
\definecolor{colorThree}{named}{purple}
}{
% Background Colors
\colorlet{backgroundcolor}{white}
\colorlet{framecolor}{black}
% Title Colors
\colorlet{titlefgcolor}{black}
\colorlet{titlebgcolor}{camBlue}
% Block Colors
\colorlet{blocktitlebgcolor}{colorThree}
\colorlet{blocktitlefgcolor}{colorThree}
\colorlet{blockbodybgcolor}{white}
\colorlet{blockbodyfgcolor}{black}
% Innerblock Colors
\colorlet{innerblocktitlebgcolor}{camBlue}
\colorlet{innerblocktitlefgcolor}{black}
\colorlet{innerblockbodybgcolor}{colorThree!30!white}
\colorlet{innerblockbodyfgcolor}{black}
% Note colors
\colorlet{notefgcolor}{black}
\colorlet{notebgcolor}{colorTwo!50!white}
\colorlet{noteframecolor}{colorTwo}}
\title{Metadata Clustering for Open Access Policies}
\author[ ]{Antonin Delpeuch$^{\dagger *}$}
\author[ ]{Thomas Bourgeat$^{*}$}
\author[ ]{Robin Champenois$^{*}$}
\affil[ ]{{\Large $^\dagger$University of Cambridge \hspace{2.5cm} $^{*}$\'{E}cole normale sup\'{e}rieure}}
\affil[ ]{{\Large http://dissem.in -- @dissemOA -- [email protected]}\vspace{-2cm}}
\makeatletter
\def\maketitle{\AB@maketitle}
\makeatother
\renewcommand*\rmdefault{lmss}
\titlegraphic{
\vspace{-3cm}
\hspace{-5cm}
{ \centering
\begin{tikzpicture}[overlay,remember picture]
\draw[fill=darkBlue,darkBlue] (current page.north west) rectangle (60,-5);
\begin{scope}[xshift=10cm]
\node at (-15,-1) (a) {\includegraphics[scale=0.4]{figures/reversed-cam.eps}};
\node at (-2.9,-0.3) {\includegraphics[scale=0.4]{figures/ENS.eps}};
\node at (3,-0.9) {\includegraphics[scale=1.9]{figures/ens-text.eps}};
\end{scope}
\end{tikzpicture}}
\vspace{5cm}}
\usetheme{Simple}
\usecolorstyle{myColorStyle}
\usetitlestyle{Filled}
% Simple
% Autumn
\begin{document}
\setlength{\titleheight}{3cm}
\maketitle[titletoblockverticalspace=0cm]
\begin{columns}
\column{0.33}
\block{Fostering open access}{
The goal of the Open Access movement is to make the entire research litterature
freely available online.
Many universities have set up \textbf{open access policies} to ensure that their
researchers upload their papers to repositories.
Yet, bibliographic metadata is scattered in many different places,
under diverse formats, making these policies hard to implement.
We provide a \textbf{clear view on researchers' production} by harvesting
metadata and clustering it with a novel machine learning algorithm, improving
over \cite{weiler2011authormagic}.
\vspace{-2cm}
}
\block{Data sources}{
We retrieve metadata from four types of sources:
\begin{itemize}[leftmargin=0pt]
\item \textbf{Publishers' bibliographic data}, through
the CrossRef.org API;
\item Metadata from \textbf{preprint search engines} \cite{knoth2011core,lossau2006bielefeld}
\item Our custom \textbf{OAI-PMH harvester} to ensure we have the latest
records from some strategic repositories
\item Machine readable \textbf{publishers' policies}, via SHERPA/RoMEO
\end{itemize}
\vspace{1cm}
\hspace{1cm}
\includegraphics{figures/logos.eps}
\vspace{-2cm}
}
\block{Merging records}{
Records referring to the same papers are merged by computing
a \textbf{robust fingerprint} based on the title and the author names.
\vspace{1cm}
{\scalefont{1.16}
\begin{tikzpicture}[scale=0.6,every node/.style={scale=0.6}]
\node[align=left] (l) {\emph{Inferring types over zones with an Abstract Domain} \\
X. Rival, T. Cheng.};
\node[align=left] at (0,-3.5) (r) {Inferring Types over Zones with an Abstract Domain \\
Tie Cheng, Xavier Rival};
\node at (0, -7) {Are these two papers identical?};
\end{tikzpicture}
}
}
\column{0.33}
\block{Similarity clustering}{
Author names are ambiguous, because researchers
may share the same name or change names. To reduce this
ambiguity, we cluster papers involving similar author names
to identify which were written by the same person.
We use a \textbf{semi-supervised} approach based on a binary
classifier trained to predict whether similar names in two
papers refer to the same person. This defines a graph where
connected components correspond to researchers.
\vspace{1cm}
\hspace{-2.4cm}
\includegraphics[scale=0.8]{figures/clustering.eps}
{\small Spanning tree created by the classifier for \emph{Zhen Zhang}}
Our classifier is trained on a set of 2035 manually annotated
papers where authors have been identified. We randomly
generate positive and negative training pairs of example
papers to train a Support Vector Machine with linear kernel.
\vspace{1.2cm}
{\scalefont{0.75}
\hspace{-.1\linewidth}
\begin{minipage}{1.2\linewidth}
\tikzstyle{every picture}+=[remember picture]
\tikzstyle{na} = [shape=rectangle, inner sep=0pt, draw]
\tikzstyle{nat} = [outer sep=0pt,inner sep=0pt]
\noindent \emph{\tikz[baseline={([yshift={-\ht\strutbox}]word1a.north)},nat]
\node[na] (word1a) {\strut An Abstract Domain\,\null};
Combinator for Separately
~\tikz\node (fly1) {};\\
Conjoining Memory Abstractions
}\\
Antoine Toubhans, Bor-Yuh Evan Changi,
\textbf{Xavier Rival}.~\tikz\node (fly2) {};\\
\tikz[baseline={([yshift={-\ht\strutbox}]word2a.north)},nat]
\node[na] (word2a) {\strut Lecture Notes in Computer Science\,\null};, 2014
\medskip
\noindent \emph{Inferring Types over Zones with
\tikz[baseline={([yshift={-\ht\strutbox}]word1b.north)},nat]
\node[na] (word1b) {\strut \null\,an Abstract Domain\,\null};}\\
Tie Cheng, \textbf{Xavier Rival}.\\
\tikz[baseline={([yshift={-\ht\strutbox}]word2b.north)},nat]
\node[na] (word2b) {\strut Lecture Notes in Computer Science\,\null};, 2012
\begin{tikzpicture}[overlay]
\draw[thick](word1a.north east) .. controls ($(fly1.north east) + (1.8, 1.8)$)
and ($(fly2.east) + (1.5, 0)$) .. (word1b.north east);
\path[thick](word2a.south west) edge [out=220, in=140] (word2b.north west);
\end{tikzpicture}
\end{minipage}
}
Our features use unigram language models to model the similarity
between metadata fields, and name similarity heursitics to compare
authors.
}
\column{0.33}
\block{Relevance estimation}{
Once the papers are clustered, we assess their relevance for
particular researchers. We learn a topic model for each department
and use it to score each paper. The score is averaged over each
connected component in the graph and attributed to the target
researcher if the overall score for the component is positive.
\vspace{-1.2cm}
}
\block{Results}{
We test our system to gather the publications of 642 researchers
from the University of Cambridge. This demonstrates that
many publications could be made freely available but cannot be found
in the repositories we cover.
\vspace{0.4cm}
\hspace{-2cm}
\begin{tabular}{r c c c}
& Available & Unavailable & Overall \\
\hline
OA & 1952 & 0 & 1952 \\
Permissive & 2499 & \textbf{5654} & 8153 \\
Not permissive & 1176 & 82 & 1258 \\
Unknown & 2080 & 2769 & 4849 \\
\hline
Total & 6613 & 9599 & 16212 \\
\hline
\end{tabular}
\vspace{0.8cm}
The list of papers annotated with their status can be browsed online:
\vspace{1.3cm}
\hspace{-2cm}
\includegraphics{figures/rendered_papers.eps}
\vspace{-2cm}
}
\block{References}{
\nocite{weiler2011authormagic}
\renewcommand\refname{}
\vspace{-2cm}
{\small
\bibliographystyle{plain}
\bibliography{references}
}
}
\end{columns}
\end{document}