diff --git a/test/tests/utilitiesTest.js b/test/tests/utilitiesTest.js index bb949d9..fa4e00a 100644 --- a/test/tests/utilitiesTest.js +++ b/test/tests/utilitiesTest.js @@ -93,13 +93,23 @@ describe("Zotero.Utilities", function() { assert.equal(cleanDOI(`Foo bar ${doi}. Foo bar`), doi); }); - // FIXME - it.skip("should parse a DOI in parentheses", function () { + it("should parse a DOI with encoded < and >", function () { + const encodedUri = "10.1002/1096-9128(200005)12:6%3C375::AID-CPE480%3E3.0.CO;2-M"; + const expected = "10.1002/1096-9128(200005)12:6<375::AID-CPE480>3.0.CO;2-M"; + assert.equal(cleanDOI(`Foo bar ${encodedUri}. Foo bar`), expected); + }); + + it("should parse a DOI with url encoded params", function () { + const encodedUri = "https://doi.org/10.1002/1096-9128(200005)12:6%3C375::AID-CPE480%3E3.0.CO;2-M"; + const expected = "10.1002/1096-9128(200005)12:6<375::AID-CPE480>3.0.CO;2-M"; + assert.equal(cleanDOI(`Foo bar ${encodedUri}. Foo bar`), expected); + }); + + it("should parse a DOI in parentheses", function () { assert.equal(cleanDOI(`Foo bar (${doi}) foo bar`), doi); }); - // FIXME - it.skip("should parse a DOI in brackets", function () { + it("should parse a DOI in brackets", function () { assert.equal(cleanDOI(`Foo bar [${doi}] foo bar`), doi); }); }); diff --git a/utilities.js b/utilities.js index b54aaf0..2e142b2 100644 --- a/utilities.js +++ b/utilities.js @@ -482,9 +482,37 @@ var Utilities = { if(typeof(x) != "string") { throw new Error("cleanDOI: argument must be a string"); } - + // If it's a url, decode it + if (x.match(/^https?:/)) { + x = decodeURIComponent(x); + } + // Even if it's not a URL decode %3C followed by %3E as < > + if (x.indexOf("%3C") < x.indexOf("%3E") && x.indexOf("%3C") >= 0) { + x = x.replace(/%3C/g, "<"); + x = x.replace(/%3E/g, ">"); + } var doi = x.match(/10(?:\.[0-9]{4,})?\/[^\s]*[^\s\.,]/); - return doi ? doi[0] : null; + if (!doi) { + return null; + } + var result = doi[0]; + + // Check if the DOI ends with a bracket + const trailingBracket = result.slice(-1); + if ([']', ')', '}'].includes(trailingBracket)) { + // Check the portion of the string before the matched DOI for an unclosed bracket + const beforeDOI = x.slice(0, doi.index); + const openingBracket = { + ']': '[', + ')': '(', + '}': '{' + }[trailingBracket]; + if (beforeDOI.lastIndexOf(openingBracket) > beforeDOI.lastIndexOf(trailingBracket)) { + // Remove the trailing bracket from the DOI + result = result.slice(0, -1); + } + } + return result; }, /**