-
Notifications
You must be signed in to change notification settings - Fork 0
/
bibtex2json.js
240 lines (204 loc) · 7.43 KB
/
bibtex2json.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
"use strict"
// Usage: node bibtex2json.js filename.bib
// Requires that parse-bibtex.js is in the same directory as bibtex2json.js
var filepath = __dirname + "/" + process.argv[2];
var bib = require("fs").readFileSync(filepath).toString();
var parse = require(__dirname + "/parse-bibtex.js");
var parsed = parse(bib);
//Make an array of orig bibtex entries as strings
var origBibtex = bib.trim().split("@");
origBibtex.splice(0,1);
origBibtex.forEach(function(entry,index,origBibtex){
//splice removes the @ sign so I add it back to the bibtex entry.
origBibtex[index] = "@" + entry;
})
//console.log(origBibtex);
//Dictionary for converting latex special character codes into utf-8
//equivalents
var specialCharsDict = {
'{\\"o}': 'ö',
'\\"{o}': 'ö',
"\\\x27{e}": 'é',
"{\\\x27e}": 'é',
"\\'{c}": 'ć',
'{\\"a}': 'ä',
'\\"{a}': 'ä',
'\\&': '&'
};
var specialTitleWordsDict = {
': a': ': A',
'3d': '3D'
}
var confJournNamesDict = {
'User Interface Software and Technology': 'User Interface Software and Technology (UIST)',
'Computer Supported Cooperative Work': 'Computer Supported Cooperative Work (CSCW)',
'ACM Trans. Graph.': 'ACM Transactions on Graphics (MISSING SHORTNAME)',
'Visualization and Computer Graphics': 'IEEE Trans. on Visualization and Computer Graphics (MISSING SHORTNAME)',
'Comput. Graph. Forum': 'Computer Graphics Forum (EGSR)',
'SIGCHI Conference on Human Factors in Computing Systems': 'ACM Human Factors in Computing Systems (CHI)',
'Commun. ACM': 'Communications of the ACM (CACM)',
'ACM Trans. Comput.-Hum. Interact.': 'ACM Trans. on Computer-Human Interaction',
'Graphics Interface': 'Graphics Interface',
'Applied Perception in Graphics and Visualization': 'Applied Perception in Graphics and Visualization (APGV)',
'Visual Analytics Science and Technology': 'Visual Analytics Science and Technology (VAST)',
'Eurographics Conference on Rendering Techniques':'Eurographics Conference on Rendering Techniques (EGSR)',
'Computer Vision and Pattern Recognition': 'Computer Vision and Pattern Recognition (CVPR)',
'Advanced Visual Interfaces': 'Advanced Visual Interfaces (AVI)',
'International Conference on Human-Computer Interaction': 'International Conference on Human-Computer Interaction (INTERACT)',
'International Conference on Document Analysis and Recognition': 'International Conference on Document Analysis and Recognition (ICDAR)',
'Conference on Visualization': 'IEEE Visualization',
'Symposium on Computer Animation': 'Symposium on Computer Animation (SCA)',
'Interactive 3D Graphics': 'ACM Symposium on Interactive 3D Graphics (I3D)'
}
var monthsDict = {
JANUARY: "Jan",
FEBRUARY: "Feb",
MARCH: "Mar",
APRIL: "Apr",
MAY: "May",
JUNE: "Jun",
JULY: "Jul",
AUGUST: "Aug",
SEPTEMBER: "Sep",
OCTOBER: "Oct",
NOVEMBER: "Nov",
DECEMBER: "Dec",
JAN: "Jan",
FEB: "Feb",
MAR: "Mar",
APR: "Apr",
// MAY: "May",
JUN: "Jun",
JUL: "Jul",
AUG: "Aug",
SEP: "Sep",
OCT: "Oct",
NOV: "Nov",
DEC: "Dec"
};
String.prototype.toTitleCase = function() {
var i, j, str, lowers, uppers;
str = this.replace(/([^\W_]+[^\s-]*) */g, function(txt) {
return txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase();
});
// Certain minor words should be left lowercase unless
// they are the first or last words in the string
lowers = ['A', 'An', 'The', 'And', 'But', 'Or', 'For', 'Nor', 'As', 'At',
'By', 'For', 'From', 'In', 'Into', 'Near', 'Of', 'On', 'Onto', 'To', 'With'];
for (i = 0, j = lowers.length; i < j; i++)
str = str.replace(new RegExp('\\s' + lowers[i] + '\\s', 'g'),
function(txt) {
return txt.toLowerCase();
});
for (var stwkey in specialTitleWordsDict) {
str = str.replace(stwkey,specialTitleWordsDict[stwkey]);
}
// Certain words such as initialisms or acronyms should be left uppercase
uppers = ['Id', 'Tv'];
for (i = 0, j = uppers.length; i < j; i++)
str = str.replace(new RegExp('\\b' + uppers[i] + '\\b', 'g'),
uppers[i].toUpperCase());
return str;
}
var finalPubsList = [];
function escapeRegExp(string) {
return string.replace(/([.*+?^=!:${}()|\[\]\/\\])/g, "\\$1");
}
// set up iterator for origBibtex
var origBibIdx = 0;
for (var key in parsed) {
// parse-bibtex always makes an extra @comments entry at end of pubs list.
// We skip processing this entry.
if(key == '@comments') continue;
var pub = parsed[key];
var finalPub = {};
finalPub["bibtex"] = origBibtex[origBibIdx];
origBibIdx++;
// Start by copying over all fields from pub to finalpub
for (var pubkey in pub) {
// finalPub[pubkey.toLowerCase()] = pub[pubkey];
// During first copy replace special latex chacters with utf-8 equivs.
var value = pub[pubkey];
for (var sckey in specialCharsDict) {
value = value.replace(new RegExp(escapeRegExp(sckey), 'g'),
specialCharsDict[sckey]);
}
for (var cjkey in confJournNamesDict) {
if(value.search(cjkey) > -1) {
value = confJournNamesDict[cjkey];
}
}
finalPub[pubkey.toLowerCase()] = value;
}
// Reformat TITLE field to titleCase (see function above)
var title = finalPub["title"];
var finalTitle = title;
if (title) {
finalTitle = title.toTitleCase();
finalPub["title"] = finalTitle;
} else {
finalPub["title"] = "MISSING TITLE";
}
// Reformat AUTHOR field into an array of authors in "first last" format.
//
var authors = finalPub["author"];
var finalAuthorsList = [];
if (authors) {
var authorsArray = authors.split(" and ");
// Check if format if "last, first" or "first last"
// If using comma seprated format, reformat to "first last"
for (var i=0; i < authorsArray.length; i++) {
var fullname = authorsArray[i];
var namesplit = fullname.split(", ");
if (namesplit.length == 2) {
fullname = namesplit[1] + " " + namesplit[0];
}
finalAuthorsList[i] = fullname;
}
finalPub["author"] = finalAuthorsList;
} else {
finalPub["author"] = "MISSING AUTHORS";
}
// Reformat month into three letter month name
var month = finalPub["month"];
var finalMonth = month;
if (month) {
if (monthsDict[month.toUpperCase()]) {
finalMonth = monthsDict[month.toUpperCase()];
}
finalPub["month"] = finalMonth;
} else {
finalPub["month"] = "MISSING MONTH";
}
// Reformat pages to turn "--" to "-"
var pages = finalPub["pages"];
var finalPages = pages;
if (pages) {
var pageslist = pages.split("--");
if (pageslist.length > 1) {
finalPages = pageslist[0] + "-" + pageslist[1];
finalPub["pages"] = finalPages;
}
} else {
finalPub["pages"] = "MISSING PAGES";
}
// Reformat keywords as a list
var keywordsList = finalPub["keywords"];
var finalKeywords = keywordsList;
if (keywordsList) {
finalPub["keywords"] = keywordsList.split(", ");
} else {
finalPub["keywords"] = ["MISSING KEYWORDS"];
}
//Add in fields for (keywords, url, thumbnail, award, quicklinks)
//with value MISSING
finalPub["url"] = "MISSING URL";
finalPub["thumbnail"] = "MISSING THUMBNAIL";
finalPub["award"] = "MISSING AWARD";
finalPub["quicklinks"] = [ { "type": "MISSING QLINK", "url": "MISSING QLINK" } ];
// console.log(finalPub);
// console.log(" ");
finalPubsList.push(finalPub);
}
//console.log(parsed);
console.log(JSON.stringify(finalPubsList, null, " "));