Skip to content

Commit

Permalink
decode uri components in doi when possible (#28)
Browse files Browse the repository at this point in the history
Also, attempt to drop last bracket or paren if there
is an opened one before DOI to not skip and pass existing tests.

Fixes: zotero/zotero#3218
  • Loading branch information
abaevbog authored Aug 5, 2024
1 parent eab0922 commit 98fd154
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 6 deletions.
18 changes: 14 additions & 4 deletions test/tests/utilitiesTest.js
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,23 @@ describe("Zotero.Utilities", function() {
assert.equal(cleanDOI(`Foo bar ${doi}. Foo bar`), doi);
});

// FIXME
it.skip("should parse a DOI in parentheses", function () {
it("should parse a DOI with URL-encoded < and >", function () {
const encodedUri = "10.1002/1096-9128(200005)12:6%3C375::AID-CPE480%3E3.0.CO;2-M";
const expected = "10.1002/1096-9128(200005)12:6<375::AID-CPE480>3.0.CO;2-M";
assert.equal(cleanDOI(`Foo bar ${encodedUri}. Foo bar`), expected);
});

it("should parse a DOI URL with encoded characters", function () {
const encodedUri = "https://doi.org/10.1002/1096-9128(200005)12:6%3C375::AID-CPE480%3E3.0.CO;2-M";
const expected = "10.1002/1096-9128(200005)12:6<375::AID-CPE480>3.0.CO;2-M";
assert.equal(cleanDOI(`Foo bar ${encodedUri}. Foo bar`), expected);
});

it("should parse a DOI in parentheses", function () {
assert.equal(cleanDOI(`Foo bar (${doi}) foo bar`), doi);
});

// FIXME
it.skip("should parse a DOI in brackets", function () {
it("should parse a DOI in brackets", function () {
assert.equal(cleanDOI(`Foo bar [${doi}] foo bar`), doi);
});
});
Expand Down
33 changes: 31 additions & 2 deletions utilities.js
Original file line number Diff line number Diff line change
Expand Up @@ -482,9 +482,38 @@ var Utilities = {
if(typeof(x) != "string") {
throw new Error("cleanDOI: argument must be a string");
}

// If it's a URL, decode it
if (x.match(/^https?:/)) {
x = decodeURIComponent(x);
}
// Even if it's not a URL, decode %3C followed by %3E as < >
var openingPos = x.indexOf("%3C");
if (openingPos != -1 && openingPos < x.indexOf("%3E")) {
x = x.replace(/%3C/g, "<");
x = x.replace(/%3E/g, ">");
}
var doi = x.match(/10(?:\.[0-9]{4,})?\/[^\s]*[^\s\.,]/);
return doi ? doi[0] : null;
if (!doi) {
return null;
}
var result = doi[0];

// Check if the DOI ends with a bracket
var trailingBracket = result.slice(-1);
if ([']', ')', '}'].includes(trailingBracket)) {
// Check the portion of the string before the matched DOI for an unclosed bracket
let beforeDOI = x.slice(0, doi.index);
let openingBracket = {
']': '[',
')': '(',
'}': '{'
}[trailingBracket];
if (beforeDOI.lastIndexOf(openingBracket) > beforeDOI.lastIndexOf(trailingBracket)) {
// Remove the trailing bracket from the DOI
result = result.slice(0, -1);
}
}
return result;
},

/**
Expand Down

0 comments on commit 98fd154

Please sign in to comment.