From b63434715720395f60337dc522a22635ecf11cce Mon Sep 17 00:00:00 2001 From: Bogdan Abaev Date: Wed, 6 Sep 2023 15:19:05 -0400 Subject: [PATCH 1/5] decode uri components in doi when possible Also, attempt to drop last bracket or paren if there is an opened one before DOI to not skip and pass existing tests. Fixes: zotero/zotero#3218 --- test/tests/utilitiesTest.js | 18 ++++++++++++++---- utilities.js | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/test/tests/utilitiesTest.js b/test/tests/utilitiesTest.js index bb949d9..fa4e00a 100644 --- a/test/tests/utilitiesTest.js +++ b/test/tests/utilitiesTest.js @@ -93,13 +93,23 @@ describe("Zotero.Utilities", function() { assert.equal(cleanDOI(`Foo bar ${doi}. Foo bar`), doi); }); - // FIXME - it.skip("should parse a DOI in parentheses", function () { + it("should parse a DOI with encoded < and >", function () { + const encodedUri = "10.1002/1096-9128(200005)12:6%3C375::AID-CPE480%3E3.0.CO;2-M"; + const expected = "10.1002/1096-9128(200005)12:6<375::AID-CPE480>3.0.CO;2-M"; + assert.equal(cleanDOI(`Foo bar ${encodedUri}. Foo bar`), expected); + }); + + it("should parse a DOI with url encoded params", function () { + const encodedUri = "https://doi.org/10.1002/1096-9128(200005)12:6%3C375::AID-CPE480%3E3.0.CO;2-M"; + const expected = "10.1002/1096-9128(200005)12:6<375::AID-CPE480>3.0.CO;2-M"; + assert.equal(cleanDOI(`Foo bar ${encodedUri}. Foo bar`), expected); + }); + + it("should parse a DOI in parentheses", function () { assert.equal(cleanDOI(`Foo bar (${doi}) foo bar`), doi); }); - // FIXME - it.skip("should parse a DOI in brackets", function () { + it("should parse a DOI in brackets", function () { assert.equal(cleanDOI(`Foo bar [${doi}] foo bar`), doi); }); }); diff --git a/utilities.js b/utilities.js index b54aaf0..2e142b2 100644 --- a/utilities.js +++ b/utilities.js @@ -482,9 +482,37 @@ var Utilities = { if(typeof(x) != "string") { throw new Error("cleanDOI: argument must be a string"); } - + // If it's a url, decode it + if (x.match(/^https?:/)) { + x = decodeURIComponent(x); + } + // Even if it's not a URL decode %3C followed by %3E as < > + if (x.indexOf("%3C") < x.indexOf("%3E") && x.indexOf("%3C") >= 0) { + x = x.replace(/%3C/g, "<"); + x = x.replace(/%3E/g, ">"); + } var doi = x.match(/10(?:\.[0-9]{4,})?\/[^\s]*[^\s\.,]/); - return doi ? doi[0] : null; + if (!doi) { + return null; + } + var result = doi[0]; + + // Check if the DOI ends with a bracket + const trailingBracket = result.slice(-1); + if ([']', ')', '}'].includes(trailingBracket)) { + // Check the portion of the string before the matched DOI for an unclosed bracket + const beforeDOI = x.slice(0, doi.index); + const openingBracket = { + ']': '[', + ')': '(', + '}': '{' + }[trailingBracket]; + if (beforeDOI.lastIndexOf(openingBracket) > beforeDOI.lastIndexOf(trailingBracket)) { + // Remove the trailing bracket from the DOI + result = result.slice(0, -1); + } + } + return result; }, /** From c7b216e47f7ed3d3a8f281c024335fce162a2e2a Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Mon, 5 Aug 2024 00:25:24 -0400 Subject: [PATCH 2/5] Only check for substring once --- utilities.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utilities.js b/utilities.js index 2e142b2..21f36ec 100644 --- a/utilities.js +++ b/utilities.js @@ -487,7 +487,8 @@ var Utilities = { x = decodeURIComponent(x); } // Even if it's not a URL decode %3C followed by %3E as < > - if (x.indexOf("%3C") < x.indexOf("%3E") && x.indexOf("%3C") >= 0) { + var openingPos = x.indexOf("%3C"); + if (openingPos != -1 && openingPos < x.indexOf("%3E")) { x = x.replace(/%3C/g, "<"); x = x.replace(/%3E/g, ">"); } From 14f5096dd96448bc2285704445b50cc90885c37d Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Mon, 5 Aug 2024 00:25:45 -0400 Subject: [PATCH 3/5] Style and comment tweaks --- test/tests/utilitiesTest.js | 2 +- utilities.js | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/tests/utilitiesTest.js b/test/tests/utilitiesTest.js index fa4e00a..2cc21c7 100644 --- a/test/tests/utilitiesTest.js +++ b/test/tests/utilitiesTest.js @@ -99,7 +99,7 @@ describe("Zotero.Utilities", function() { assert.equal(cleanDOI(`Foo bar ${encodedUri}. Foo bar`), expected); }); - it("should parse a DOI with url encoded params", function () { + it("should parse a DOI URL with encoded characters", function () { const encodedUri = "https://doi.org/10.1002/1096-9128(200005)12:6%3C375::AID-CPE480%3E3.0.CO;2-M"; const expected = "10.1002/1096-9128(200005)12:6<375::AID-CPE480>3.0.CO;2-M"; assert.equal(cleanDOI(`Foo bar ${encodedUri}. Foo bar`), expected); diff --git a/utilities.js b/utilities.js index 21f36ec..14ea081 100644 --- a/utilities.js +++ b/utilities.js @@ -499,11 +499,11 @@ var Utilities = { var result = doi[0]; // Check if the DOI ends with a bracket - const trailingBracket = result.slice(-1); + var trailingBracket = result.slice(-1); if ([']', ')', '}'].includes(trailingBracket)) { // Check the portion of the string before the matched DOI for an unclosed bracket - const beforeDOI = x.slice(0, doi.index); - const openingBracket = { + let beforeDOI = x.slice(0, doi.index); + let openingBracket = { ']': '[', ')': '(', '}': '{' From 830babb11d5721e12f74a893be0dc3aa820e0e2b Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Mon, 5 Aug 2024 00:26:34 -0400 Subject: [PATCH 4/5] Clarify comment --- test/tests/utilitiesTest.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tests/utilitiesTest.js b/test/tests/utilitiesTest.js index 2cc21c7..e714291 100644 --- a/test/tests/utilitiesTest.js +++ b/test/tests/utilitiesTest.js @@ -93,7 +93,7 @@ describe("Zotero.Utilities", function() { assert.equal(cleanDOI(`Foo bar ${doi}. Foo bar`), doi); }); - it("should parse a DOI with encoded < and >", function () { + it("should parse a DOI with URL-encoded < and >", function () { const encodedUri = "10.1002/1096-9128(200005)12:6%3C375::AID-CPE480%3E3.0.CO;2-M"; const expected = "10.1002/1096-9128(200005)12:6<375::AID-CPE480>3.0.CO;2-M"; assert.equal(cleanDOI(`Foo bar ${encodedUri}. Foo bar`), expected); From b5f3a1b622155bcf3f0206164d59b2850bdb5d21 Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Mon, 5 Aug 2024 00:27:16 -0400 Subject: [PATCH 5/5] Tweak comments --- utilities.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities.js b/utilities.js index 14ea081..c1c67ca 100644 --- a/utilities.js +++ b/utilities.js @@ -482,11 +482,11 @@ var Utilities = { if(typeof(x) != "string") { throw new Error("cleanDOI: argument must be a string"); } - // If it's a url, decode it + // If it's a URL, decode it if (x.match(/^https?:/)) { x = decodeURIComponent(x); } - // Even if it's not a URL decode %3C followed by %3E as < > + // Even if it's not a URL, decode %3C followed by %3E as < > var openingPos = x.indexOf("%3C"); if (openingPos != -1 && openingPos < x.indexOf("%3E")) { x = x.replace(/%3C/g, "<");