From 2371723baea3f6ebf093297d57c4459b49cb1f53 Mon Sep 17 00:00:00 2001 From: swifterslb Date: Mon, 3 Apr 2023 16:28:00 +0200 Subject: [PATCH 01/17] FAO Publications: Update for new layout (#2975) --- FAO Publications.js | 904 ++++++++++++++++++++++++++++---------------- 1 file changed, 586 insertions(+), 318 deletions(-) diff --git a/FAO Publications.js b/FAO Publications.js index 75c5249e24e..b5e7a456865 100644 --- a/FAO Publications.js +++ b/FAO Publications.js @@ -2,14 +2,14 @@ "translatorID": "4883f662-29df-44ad-959e-27c9d036d165", "label": "FAO Publications", "creator": "Bin Liu ", - "target": "^https?://www\\.fao\\.org/(documents|publications)/", - "minVersion": "3.0", + "target": "^https?://www\\.fao\\.org/(publications|documents)/", + "minVersion": "5.0", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2021-08-31 04:00:00" + "lastUpdated": "2023-04-01 16:44:37" } /* @@ -30,11 +30,17 @@ */ function detectWeb(doc, url) { // Just differentiate single and multiple. - // Identify item type (book or conferencePaper) based on "fdr_label" class. - if (url.includes('card')) { + if (url.includes('/card/')) { let isConferencePaper = false; let confMetaName = ['اسم الاجتماع', '会议名称', 'Meeting Name', 'Nom de la réunion', 'Название мероприятия', 'Nombre de la reunión']; - let labelArray = doc.querySelectorAll('.fdr_label'); + let labelArray = []; + if (url.includes('/publications/')) { + labelArray = doc.querySelectorAll('.fdr_label'); // Identify item type (book or conferencePaper) based on "fdr_label" class. + } + else if (url.includes('/documents/')) { + labelArray = doc.querySelectorAll('.fw-bold'); // Identify item type (book or conferencePaper) based on "fw-bold" class. + // Page layout for meeting documents is not functioning properly at "documents" pages (e.g. https://www.fao.org/documents/card/en/c/ND423EN/ and http://www.fao.org/documents/card/zh/c/mw246ZH/ ). Keep the code for now because it doesn't interfere with books and meeting documents are very few. + } for (let i = 0; i < labelArray.length; i++) { for (let j = 0; j < confMetaName.length; j++) { isConferencePaper = labelArray[i].innerText.includes(confMetaName[j]); @@ -62,21 +68,85 @@ function detectWeb(doc, url) { return false; } -function cleanMeta(str) { - // clean meta fields obtained from page +function cleanMetaPub(str) { + // clean meta fields obtained from page for "publications" pages if (str.includes(';') === false) { return str.slice(str.indexOf(':') + 2); } else { - var strArray = str.slice(str.indexOf(':') + 2).split(';'); + let strArray = str.slice(str.indexOf(':') + 2).split(';'); + return strArray; + } +} + +function cleanMetaDoc(str) { + // clean meta fields obtained from page for "documents" pages + if (str.includes(';') === false) { + return str; + } + else { + let strArray = str.split(';').filter(String); // split by semicolon and remove empty elements return strArray; } } +function getLang(str) { + // language: 2 or 3 letters following ISO 639 + // indicated by the last 1-3 letters in PDF file name (langCode) + // One good example is the various language versions of http://www.fao.org/publications/card/en/c/I2801E + let langCode, lang = ''; + let matches = str.match(/([a-z]+)\.pdf$/i); + if (matches) { + langCode = matches[1]; + } + // In the new PDF naming scheme, langCode follows ISO 639. + if (langCode.length > 1) { + lang = langCode.toLowerCase(); + } + // In the old PDF naming scheme, langCode is one lower/upper case letter and only differentiates between the 6 UN languages. + else if ((langCode == 'a') || (langCode == 'A')) { + lang = 'ar'; + } + else if ((langCode == 'c') || (langCode == 'C')) { + lang = 'zh'; + } + else if ((langCode == 'e') || (langCode == 'E')) { + lang = 'en'; + } + else if ((langCode == 'f') || (langCode == 'F')) { + lang = 'fr'; + } + else if ((langCode == 'r') || (langCode == 'R')) { + lang = 'ru'; + } + else if ((langCode == 's') || (langCode == 'S')) { + lang = 'es'; + } + else { // Other languages are usually designated 'o'. Using 'else' just to be safe. + lang = 'other'; + } + return lang; +} + function scrape(doc, url) { var newItem = new Z.Item(); + var abs, existingMeta = {}; + var textVariable = { // declarations for metadata names as appeared in document pages in different languages + date: ['سنة النشر', '出版年份', 'Year of publication', 'Année de publication', 'Год издания', 'Fecha de publicación'], + publisher: ['الناشر', '出版方', 'Publisher', 'Éditeur', 'Издатель', 'Editor'], + place: ['مكان النشر', '出版地点', 'Place of publication', 'Lieu de publication', 'Место публикации', 'Lugar de publicacion'], + pages: ['الصفحات', '页数', 'Pages', 'Страницы', 'Páginas'], + ISBN: ['الرقم الدولي الموحد للكتاب', 'ISBN'], + author: ['الكاتب', '作者', 'Author', 'Auteur', 'Автор', 'Autor'], + seriesTitle: ['العنوان التسلسي', '系列标题', 'Serial Title', 'Titre de la série', 'Название серии', 'Título de la serie'], + seriesNumber: ['رقم المسلسل', '系列号码', 'Series number', 'Numéro de série', 'Серийный номер', 'Número de serie'], + conference: ['اسم الاجتماع', '会议名称', 'Meeting Name', 'Nom de la réunion', 'Название мероприятия', 'Nombre de la reunión'] + }; + var metaText = []; + var DOIMatch, pdfUrl, mainTitle, subTitle, metaResult, conferenceWeb = ''; + var DOILead = 'https://doi.org/'; - if (url.includes('card')) { + if (url.includes('/card/')) { // attach document card URL and snapshot // TEMP: Disable at least until we have post-JS snapshots /* newItem.attachments.push({ @@ -85,235 +155,365 @@ function scrape(doc, url) { mimeType: 'text/html', snapshot: true }); */ + if (url.includes('/publications/')) { + //* ********* Begin fixed-location variables ********** - //* ********* Begin fixed-location variables ********** - - // Some variables always appear and appear at the same location in all document pages. + // Some variables always appear and appear at the same location in all document pages. - // abstract - var abs = doc.getElementById("mainContentN0"); - // The childrens of `abs` are the label "Abstract:" in a strong-tag, - // the abstract in several p-tags or text nodes directly, and possibly - // a note about other languages which begins also with a strong-tag. - if (abs) { - var children = abs.childNodes; - var abstractFound = false; - for (let child of children) { - if (child.tagName == "STRONG" || (child.nodeType == 1 && ZU.xpathText(child, './/strong'))) { - if (abstractFound) { - break; // stop when another strong tag is found + // abstract + abs = doc.getElementById("mainContentN0"); + // The childrens of `abs` are the label "Abstract:" in a strong-tag, + // the abstract in several p-tags or text nodes directly, and possibly + // a note about other languages which begins also with a strong-tag. + if (abs) { + let children = abs.childNodes; + let abstractFound = false; + for (let child of children) { + if (child.tagName == "STRONG" || (child.nodeType == Node.ELEMENT_NODE && text(child, 'strong'))) { + if (abstractFound) { + break; // stop when another strong tag is found + } + else { + abstractFound = true; + continue; // exclude the label "Abstract" + } } - else { - abstractFound = true; - continue; // exclude the label "Abstract" + if (newItem.abstractNote) { + if (newItem.abstractNote.slice(-1) !== "\n") { + newItem.abstractNote += "\n\n"; + } + newItem.abstractNote += child.textContent; } - } - if (newItem.abstractNote) { - if (newItem.abstractNote.slice(-1) !== "\n") { - newItem.abstractNote += "\n\n"; + else { + newItem.abstractNote = child.textContent; } - newItem.abstractNote += child.textContent; } - else { - newItem.abstractNote = child.textContent; + // DOI: Some docs contain DOI as a separate paragraph in abs field + if (abs.innerText.includes(DOILead)) { + DOIMatch = abs.innerText.match(/https:\/\/doi\.org\/(.+)/i); + newItem.DOI = DOIMatch[1]; } } - // DOI: Some docs contain DOI as a separate paragraph in abs field - var DOILead = 'https://doi.org/'; - if (abs.innerText.includes(DOILead)) { - var DOIMatch = abs.innerText.match(/https:\/\/doi\.org\/(.+)/i); - newItem.DOI = DOIMatch[1]; + + // attach PDF: PDF link in innerHTML of "dynafef_det" class. + pdfUrl = attr(doc, '.dynafef_det a[href$=".pdf"]', 'href'); + newItem.attachments.push({ + url: pdfUrl, + title: 'Full Text PDF', + mimeType: 'application/pdf' + }); + + // url + newItem.url = url; + + //language + newItem.language = getLang(pdfUrl); + + // title: use colon to connect main title and subtitle (if subtitle exists) + mainTitle = text(doc, '#headerN0 > h1'); + subTitle = text(doc, 'h4.csc-firstHeader'); + if (!subTitle) { + newItem.title = mainTitle; + } + else if ((newItem.language == 'zh') || (newItem.language == 'ja')) { + newItem.title = mainTitle + ':' + subTitle; + } + else { + newItem.title = mainTitle + ': ' + subTitle; } - } - // attach PDF - var pdfUrl = ZU.xpath(doc, '//*[@id="mainRightN0"]/div[2]/a')[0].href; - newItem.attachments.push({ - url: pdfUrl, - title: 'Full Text PDF', - mimeType: 'application/pdf' - }); - // url - newItem.url = url; - // language: 2 or 3 letters following ISO 639 - // indicated by the last 1-3 letters in PDF file name (langCode) - // One good example is the various language versions of http://www.fao.org/publications/card/en/c/I2801E - var langCode = ''; - var matches = pdfUrl.match(/([a-z]+)\.pdf$/i); - if (matches) { - langCode = matches[1]; - } - // In the new PDF naming scheme, langCode follows ISO 639. - if (langCode.length > 1) { - newItem.language = langCode.toLowerCase(); - } - // In the old PDF naming scheme, langCode is one lower/upper case letter and only differentiates between the 6 UN languages. - else if ((langCode == 'a') || (langCode == 'A')) { - newItem.language = 'ar'; - } - else if ((langCode == 'c') || (langCode == 'C')) { - newItem.language = 'zh'; - } - else if ((langCode == 'e') || (langCode == 'E')) { - newItem.language = 'en'; - } - else if ((langCode == 'f') || (langCode == 'F')) { - newItem.language = 'fr'; - } - else if ((langCode == 'r') || (langCode == 'R')) { - newItem.language = 'ru'; - } - else if ((langCode == 's') || (langCode == 'S')) { - newItem.language = 'es'; - } - else { // Other languages are usually designated 'o'. Using 'else' just to be safe. - newItem.language = 'other'; - } - // title: use colon to connect main title and subtitle (if subtitle exists) - var mainTitle = ZU.xpathText(doc, '//*[@id="headerN0"]/h1'); - var subTitle = ZU.xpathText(doc, '//h4[@class="csc-firstHeader h1"]'); - if (!subTitle) { - newItem.title = mainTitle; - } - else if ((newItem.language == 'zh') || (newItem.language == 'ja')) { - newItem.title = mainTitle + ':' + subTitle; - } - else { - newItem.title = mainTitle + ': ' + subTitle; - } - //* ********* End fixed-location variables ********** - - - //* ********* Begin dynamic-location variables ********** - - // Variables that appear neither in all document pages nor at same positions in the pages. - var metaText = ZU.xpath(doc, '//*[@id="mainN0"]')[0].innerText.split('\n'); // scrape text of meta area and split into an array based on line breaks. - // get what variables are listed in the page, save to object existingMeta - var textVariable = { // declarations for metadata names as appeared in document pages in different languages - date: ['سنة النشر', '出版年份', 'Year of publication', 'Année de publication', 'Год издания', 'Fecha de publicación'], - publisher: ['الناشر', '出版方', 'Publisher', 'Éditeur', 'Издатель', 'Editor'], - place: ['مكان النشر', '出版地点', 'Place of publication', 'Lieu de publication', 'Место публикации', 'Lugar de publicacion'], - pages: ['الصفحات', '页数', 'Pages', 'Страницы', 'Páginas'], - ISBN: ['الرقم الدولي الموحد للكتاب', 'ISBN'], - author: ['الكاتب', '作者', 'Author', 'Auteur', 'Автор', 'Autor'], - seriesTitle: ['العنوان التسلسي', '系列标题', 'Serial Title', 'Titre de la série', 'Название серии', 'Título de la serie'], - seriesNumber: ['رقم المسلسل', '系列号码', 'Series number', 'Numéro de série', 'Серийный номер', 'Número de serie'], - conference: ['اسم الاجتماع', '会议名称', 'Meeting Name', 'Nom de la réunion', 'Название мероприятия', 'Nombre de la reunión'], - tags: ['المعجم الكلمات الموضوع', 'AGROVOC', 'Agrovoc', 'АГРОВОК'] - }; - var existingMeta = {}; - for (let i = 0; i < metaText.length; i++) { - for (let key in textVariable) { - for (let j = 0; j < textVariable[key].length; j++) { - if (metaText[i].includes(textVariable[key][j])) { - existingMeta[key] = metaText[i]; + //* ********* End fixed-location variables ********** + + + //* ********* Begin dynamic-location variables ********** + + // Variables that appear neither in all document pages nor at same positions in the pages. + // scrape text of meta area and split into an array based on line breaks. + metaText = text(doc, '#fdr_label').split('\n'); + // get what variables are listed in the page, save to object existingMeta + for (let i = 0; i < metaText.length; i++) { + for (let key in textVariable) { + for (let j = 0; j < textVariable[key].length; j++) { + if (metaText[i].includes(textVariable[key][j])) { + existingMeta[key] = metaText[i]; + } } } } + + for (let key in existingMeta) { + metaResult = cleanMetaPub(existingMeta[key]); + + // date + if (key.includes('date')) { + newItem.date = metaResult; + } + // publisher + if (key.includes('publisher')) { + newItem.publisher = metaResult; + } + // place + if (key.includes('place')) { + newItem.place = metaResult; + } + // number of pages + if (key.includes('pages')) { + newItem.numPages = metaResult.match(/\d+/)[0]; + } + // ISBN + if (key.includes('ISBN')) { + newItem.ISBN = ZU.cleanISBN(metaResult, false); + } + // author(s): whether there is one or more authors; whether last and first name are separated by ',' (if not, use single-field mode). + if (key.includes('author')) { + if (Array.isArray(metaResult)) { // If there are more than 1 authors, metaResult returns an array. + for (let i = 0; i < metaResult.length; i++) { + if (metaResult[i].includes(',')) { + newItem.creators.push(ZU.cleanAuthor(metaResult[i], 'author', true)); + } + else { + newItem.creators.push({ + lastName: metaResult[i], + creatorType: 'author', + fieldMode: 1 + }); + } + } + } + else if (metaResult.includes(',')) { + newItem.creators.push(ZU.cleanAuthor(metaResult, 'author', true)); + } + else { + newItem.creators.push({ + lastName: metaResult, + creatorType: 'author', + fieldMode: 1 + }); + } + } + // tag (Agrovoc) + if (key.includes('tags')) { + for (var i = 0; i < metaResult.length; i++) { + newItem.tags[i] = metaResult[i].trim(); + } + } + // seriesTitle + if (key.includes('seriesTitle')) { + newItem.series = metaResult; + } + // seriesNumber + if (key.includes('seriesNumber')) { + newItem.seriesNumber = metaResult; + } + // conferenceName: save for later conditions. + if (key.includes('conference')) { + conferenceWeb = metaResult[0]; + newItem.conferenceName = conferenceWeb; + } + } + + // If there's no publisher, use 'FAO' as publisher. + if (!newItem.publisher) { + newItem.publisher = 'FAO'; + } + // If there's no place, use 'Rome, Italy' as place. + if (!newItem.place) { + newItem.place = 'Rome, Italy'; + } + // If there's no author, use 'FAO' as author. + if (!newItem.creators.length) { + newItem.creators.push({ + lastName: 'FAO', + creatorType: 'author', + fieldMode: 1 + }); + } + // If conference exists in document page, the itemType is 'conferencePaper'; otherwise it's 'book'. + if (conferenceWeb) { + newItem.itemType = 'conferencePaper'; + } + else { + newItem.itemType = 'book'; + } + //* ********* End dynamic-location variables ********** } + if (url.includes('documents')) { + //* ********* Begin fixed-location variables ********** + + // Some variables always appear and appear at the same location in all document pages. - for (let key in existingMeta) { - var metaResult = cleanMeta(existingMeta[key]); + // abstract + abs = doc.getElementsByClassName("_card-body-info-center")[0]; + // abstractNote should be all text before the class "others-info". See example: https://www.fao.org/documents/card/en/c/ca8466en + var otherInfo = abs.querySelectorAll(".others-info")[0]; + var keywords = abs.querySelectorAll(".tags-list")[0]; // "KEYWORDS:" + tags + newItem.abstractNote = (abs.innerText.replace(otherInfo.innerText, '').replace(keywords.innerText, '')).trim(); - // date - if (key.includes('date')) { - newItem.date = metaResult; + // tags: class="badge" within abs + var tags = abs.querySelectorAll(".badge"); + for (let i = 0; i < tags.length; i++) { + newItem.tags[i] = tags[i].innerText.trim(); } - // publisher - if (key.includes('publisher')) { - newItem.publisher = metaResult; + + // attach PDF: PDF link in innerHTML of "_card-buttons-downloads" class. + pdfUrl = (doc.getElementsByClassName("_card-buttons-downloads")[0].innerHTML).match(/http\S*\.pdf/gi)[0]; + newItem.attachments.push({ + url: pdfUrl, + title: 'Full Text PDF', + mimeType: 'application/pdf' + }); + + // url + newItem.url = url; + + // language: 2 or 3 letters following ISO 639 + newItem.language = getLang(pdfUrl); + + // title: use colon to connect main title and subtitle (if subtitle exists) + mainTitle = doc.getElementsByClassName("page-title")[0].innerText; + var subTitleElement = doc.getElementsByClassName("sub-title"); + if (subTitleElement.length == '0') { // If there's no sub-title class in the web page, subTitleElement is an empty HTMLCollection with “0” (string, not number) as the length attribute. + newItem.title = mainTitle; } - // place - if (key.includes('place')) { - newItem.place = metaResult; + else if ((newItem.language == 'zh') || (newItem.language == 'ja')) { + newItem.title = mainTitle + ':' + subTitleElement[0].innerText; } - // number of pages - if (key.includes('pages')) { - newItem.numPages = metaResult.match(/\d+/)[0]; + else { + newItem.title = mainTitle + ': ' + subTitleElement[0].innerText; } - // ISBN - if (key.includes('ISBN')) { - newItem.ISBN = ZU.cleanISBN(metaResult, false); + + //* ********* End fixed-location variables ********** + + + //* ********* Begin dynamic-location variables ********** + + // Variables that appear neither in all document pages nor at same positions in the pages. + metaText = doc.getElementsByClassName("_card-body-info-left")[0].innerText; + + // DOI + if (metaText.includes(DOILead)) { + DOIMatch = metaText.match(/https:\/\/doi\.org\/(.+)/i); + newItem.DOI = DOIMatch[1]; } - // author(s): whether there is one or more authors; whether last and first name are separated by ',' (if not, use single-field mode). - if (key.includes('author')) { - if (Array.isArray(metaResult)) { // If there are more than 1 authors, metaResult returns an array. - for (let i = 0; i < metaResult.length; i++) { - if (metaResult[i].includes(',')) { - newItem.creators.push(ZU.cleanAuthor(metaResult[i], 'author', true)); + + // scrape text of meta area and split into an array based on line breaks. + var metaTextArr = metaText.split('\n'); + // get what variables are listed in the page, save to object existingMeta + for (let i = 0; i < metaTextArr.length; i++) { + for (let key in textVariable) { + for (let j = 0; j < textVariable[key].length; j++) { + if (metaTextArr[i].includes(textVariable[key][j])) { + existingMeta[key] = metaTextArr[i + 1]; // In metaTextArr, the value of a meta field always appears at the next element of the meta. } - else { - newItem.creators.push({ - lastName: metaResult[i], - creatorType: 'author', - fieldMode: 1 - }); + } + } + } + + for (let key in existingMeta) { + metaResult = cleanMetaDoc(existingMeta[key]); + + // date + if (key.includes('date')) { + newItem.date = metaResult; + } + // publisher + if (key.includes('publisher')) { + if (Array.isArray(metaResult)) { // differentiate between multiple (array) and single (string) + newItem.publisher = metaResult.join(', '); + } + else { + newItem.publisher = metaResult; + } + } + // place + if (key.includes('place')) { // differentiate between multiple (array) and single (string) + if (Array.isArray(metaResult)) { + newItem.publisher = metaResult.join(', '); + } + else { + newItem.publisher = metaResult; + } + } + // number of pages + if (key.includes('pages')) { + newItem.numPages = metaResult.match(/\d+/)[0]; + } + // ISBN + if (key.includes('ISBN')) { + newItem.ISBN = ZU.cleanISBN(metaResult, false); + } + // author(s): whether there is one or more authors; whether last and first name are separated by ',' (if not, use single-field mode). + if (key.includes('author')) { + if (Array.isArray(metaResult)) { // If there are more than 1 authors, metaResult returns an array. + for (let i = 0; i < metaResult.length; i++) { + if (metaResult[i].includes(',')) { + newItem.creators.push(ZU.cleanAuthor(metaResult[i], 'author', true)); + } + else { + newItem.creators.push({ + lastName: metaResult[i], + creatorType: 'author', + fieldMode: 1 + }); + } } } + else if (metaResult.includes(',')) { + newItem.creators.push(ZU.cleanAuthor(metaResult, 'author', true)); + } + else { + newItem.creators.push({ + lastName: metaResult, + creatorType: 'author', + fieldMode: 1 + }); + } } - else if (metaResult.includes(',')) { - newItem.creators.push(ZU.cleanAuthor(metaResult, 'author', true)); + // seriesTitle + if (key.includes('seriesTitle')) { + newItem.series = metaResult; } - else { - newItem.creators.push({ - lastName: metaResult, - creatorType: 'author', - fieldMode: 1 - }); + // seriesNumber + if (key.includes('seriesNumber')) { + newItem.seriesNumber = metaResult; } - } - // tag (Agrovoc) - if (key.includes('tags')) { - for (var i = 0; i < metaResult.length; i++) { - newItem.tags[i] = metaResult[i].trim(); + // conferenceName + if (key.includes('conference')) { + newItem.conferenceName = metaResult[0]; } } - // seriesTitle - if (key.includes('seriesTitle')) { - newItem.series = metaResult; + // If there's no publisher, use 'FAO' as publisher. + if (!newItem.publisher) { + newItem.publisher = 'FAO'; } - // seriesNumber: extract the number. - if (key.includes('seriesNumber')) { - newItem.seriesNumber = (metaResult.match(/\d+/) || [])[0]; + // If there's no place, use 'Rome, Italy' as place. + if (!newItem.place) { + newItem.place = 'Rome, Italy'; } - // conferenceName: save for later conditions. - if (key.includes('conference')) { - var conferenceWeb = metaResult[0]; - newItem.conferenceName = conferenceWeb; + // If there's no author, use 'FAO' as author. + if (!newItem.creators.length) { + newItem.creators.push({ + lastName: 'FAO', + creatorType: 'author', + fieldMode: 1 + }); } + // If conference exists in document page, the itemType is 'conferencePaper'; otherwise it's 'book'. + if (newItem.conferenceName) { + newItem.itemType = 'conferencePaper'; + } + else { + newItem.itemType = 'book'; + } + //* ********* End dynamic-location variables ********** } - - // If there's no publisher, use 'FAO' as publisher. - if (!newItem.publisher) { - newItem.publisher = 'FAO'; - } - // If there's no place, use 'Rome, Italy' as place. - if (!newItem.place) { - newItem.place = 'Rome, Italy'; - } - // If there's no author, use 'FAO' as author. - if (!newItem.creators.length) { - newItem.creators.push({ - lastName: 'FAO', - creatorType: 'author', - fieldMode: 1 - }); - } - // If conference exists in document page, the itemType is 'conferencePaper'; otherwise it's 'book'. - if (conferenceWeb) { - newItem.itemType = 'conferencePaper'; - } - else { - newItem.itemType = 'book'; - } - //* ********* End dynamic-location variables ********** } newItem.complete(); } - // get items from a multiple-item page -function getSearchResults(doc, checkOnly) { +// Multiple-item searching is no longer provided. +/*function getSearchResults(doc, checkOnly) { var items = {}; var found = false; var rows = ZU.xpath(doc, '//*[@class="item-image"]'); @@ -326,24 +526,23 @@ function getSearchResults(doc, checkOnly) { items[href] = title; } return found ? items : false; -} +}*/ function doWeb(doc, url) { - if (detectWeb(doc, url) == "multiple") { - Z.selectItems(getSearchResults(doc, false), function (items) { - if (!items) { - return; - } - var articles = []; - for (var i in items) { - articles.push(i); - } - ZU.processDocuments(articles, scrape); - }); - } - else { - scrape(doc, url); - } + // if (detectWeb(doc, url) == "multiple") { + // Z.selectItems(getSearchResults(doc, false), function (items) { + // if (!items) { + // return; + // } + // var articles = []; + // for (var i in items) {// articles.push(i); + // } + // ZU.processDocuments(articles, scrape); + // }); + // } + // else { + scrape(doc, url); + // } } // Note on test cases: Because the pages use dynamic elements (which is also why the translator doesn't work for multiple item pages), automatic test in Scaffold doesn't work. Every time a test is needed, use "New Web" to manually add it. @@ -352,12 +551,12 @@ function doWeb(doc, url) { var testCases = [ { "type": "web", - "url": "http://www.fao.org/documents/card/en/c/ca8466en", + "url": "https://www.fao.org/documents/card/en?details=cc0461en", "defer": true, "items": [ { "itemType": "book", - "title": "Responding to the impact of the COVID-19 outbreak on food value chains through efficient logistics", + "title": "The State of World Fisheries and Aquaculture 2022: Towards Blue Transformation", "creators": [ { "lastName": "FAO", @@ -365,15 +564,18 @@ var testCases = [ "fieldMode": 1 } ], - "date": "2020", - "ISBN": "9789251323717", - "abstractNote": "Measures implemented around the world to contain the COVID-19 pandemic have entailed a severe reduction not only in the transportation of goods and services that rely on transport, but also in the migration of labour domestically and internationally. Workers are less available reflecting both disruptions in transportation systems and restrictions to stop the transmission of the disease, within and across borders. \n\nThe Food and Agriculture Organization of the United Nations (FAO) urges countries to maintain functioning food value chains to avoid food shortages, following practices that are being proven to work. This note summarizes some practices that could be useful for governments and the private sector to maintain critical logistical elements in food value chain.\n\nRevised 26 April 2020.\n\nSee the full list of policy briefs related to COVID-19\n\n.", + "date": "2022", + "ISBN": "9789251363645", + "abstractNote": "The 2022 edition of The State of World Fisheries and Aquaculture coincides with the launch of the Decade of Action to deliver the Global Goals, the United Nations Decade of Ocean Science for Sustainable Development and the United Nations Decade on Ecosystem Restoration. It presents how these and other equally important United Nations events, such as the International Year of Artisanal Fisheries and Aquaculture (IYAFA 2022), are being integrated and supported through Blue Transformation, a priority area of FAO’s new Strategic Framework 2022–2031 designed to accelerate achievement of the 2030 Agenda for Sustainable Development in food and agriculture.\n\nThe concept of Blue Transformation emerged from the Thirty-fourth Session of the FAO Committee on Fisheries in February 2021, and in particular the Declaration for Sustainable Fisheries and Aquaculture, which was negotiated and endorsed by all FAO Members. The Declaration calls for support for “an evolving and positive vision for fisheries and aquaculture in the twenty first century, where the sector is fully recognized for its contribution to fighting poverty, hunger and malnutrition.” In this context, Part 1 of this edition of The State of World Fisheries and Aquaculture reviews the world status of fisheries and aquaculture, while Parts 2 and 3 are devoted to Blue Transformation and its pillars on intensifying and expanding aquaculture, improving fisheries management and innovating fisheries and aquaculture value chains. Blue Transformation emphasizes the need for forward-looking and bold actions to be launched or accelerated in coming years to achieve the objectives of the Declaration and in support of the 2030 Agenda. Part 4 covers current and high-impact emerging issues – COVID-19, climate change and gender equality – that require thorough consideration for transformative steps and preparedness to secure sustainable, efficient and equitable fisheries and aquaculture, and finally draws some outlook on future trends based on projections.\n\nThe State of World Fisheries and Aquaculture aims to provide objective, reliable and up-to-date information to a wide audience – policymakers, managers, scientists, stakeholders and indeed everyone interested in the fisheries and aquaculture sector.", "language": "en", "libraryCatalog": "FAO Publications", - "numPages": "4", + "numPages": "266", "place": "Rome, Italy", "publisher": "FAO", - "url": "http://www.fao.org/documents/card/en/c/ca8466en", + "series": "The State of World Fisheries and Aquaculture (SOFIA)", + "seriesNumber": "2022", + "shortTitle": "The State of World Fisheries and Aquaculture 2022", + "url": "https://www.fao.org/documents/card/en?details=cc0461en", "attachments": [ { "title": "Full Text PDF", @@ -382,16 +584,28 @@ var testCases = [ ], "tags": [ { - "tag": "Coronavirus" + "tag": "aquaculture production" }, { - "tag": "agrifood sector" + "tag": "climate change adaptation" }, { - "tag": "infectious diseases" + "tag": "fish trade" }, { - "tag": "logistics" + "tag": "fishery management" + }, + { + "tag": "fishery production" + }, + { + "tag": "fishery resources" + }, + { + "tag": "gender equality" + }, + { + "tag": "sustainable fisheries" }, { "tag": "value chains" @@ -404,35 +618,95 @@ var testCases = [ }, { "type": "web", - "url": "http://www.fao.org/documents/card/en/c/ca8751en/", + "url": "https://www.fao.org/publications/card/en?details=cc0461en", "defer": true, "items": [ { "itemType": "book", - "title": "Blockchain application in seafood value chains", + "title": "The State of World Fisheries and Aquaculture 2022: Towards Blue Transformation", "creators": [ { - "firstName": "F.", - "lastName": "Blaha", - "creatorType": "author" + "lastName": "FAO", + "creatorType": "author", + "fieldMode": 1 + } + ], + "date": "2022", + "ISBN": "9789251363645", + "abstractNote": "The 2022 edition of The State of World Fisheries and Aquaculture coincides with the launch of the Decade of Action to deliver the Global Goals, the United Nations Decade of Ocean Science for Sustainable Development and the United Nations Decade on Ecosystem Restoration. It presents how these and other equally important United Nations events, such as the International Year of Artisanal Fisheries and Aquaculture (IYAFA 2022), are being integrated and supported through Blue Transformation, a priority area of FAO’s new Strategic Framework 2022–2031 designed to accelerate achievement of the 2030 Agenda for Sustainable Development in food and agriculture. \n\nThe concept of Blue Transformation emerged from the Thirty-fourth Session of the FAO Committee on Fisheries in February 2021, and in particular the Declaration for Sustainable Fisheries and Aquaculture, which was negotiated and endorsed by all FAO Members. The Declaration calls for support for “an evolving and positive vision for fisheries and aquaculture in the twenty first century, where the sector is fully recognized for its contribution to fighting poverty, hunger and malnutrition.” In this context, Part 1 of this edition of The State of World Fisheries and Aquaculture reviews the world status of fisheries and aquaculture, while Parts 2 and 3 are devoted to Blue Transformation and its pillars on intensifying and expanding aquaculture, improving fisheries management and innovating fisheries and aquaculture value chains. Blue Transformation emphasizes the need for forward-looking and bold actions to be launched or accelerated in coming years to achieve the objectives of the Declaration and in support of the 2030 Agenda. Part 4 covers current and high-impact emerging issues – COVID-19, climate change and gender equality – that require thorough consideration for transformative steps and preparedness to secure sustainable, efficient and equitable fisheries and aquaculture, and finally draws some outlook on future trends based on projections. \n\nThe State of World Fisheries and Aquaculture aims to provide objective, reliable and up-to-date information to a wide audience – policymakers, managers, scientists, stakeholders and indeed everyone interested in the fisheries and aquaculture sector.\n\nThe following complementary information is available:\n\nRead online the full digital reportSee the interactive storyRead the In Brief\n\nHelp us improve your reading experience\n\nLast updated date 19/08/2022", + "language": "other", + "libraryCatalog": "FAO Publications", + "numPages": "266", + "place": "Rome, Italy", + "publisher": "FAO", + "series": "The State of World Fisheries and Aquaculture (SOFIA)", + "seriesNumber": "2022", + "shortTitle": "The State of World Fisheries and Aquaculture 2022", + "url": "https://www.fao.org/publications/card/en?details=cc0461en", + "attachments": [ + { + "title": "Full Text PDF", + "mimeType": "application/pdf" + } + ], + "tags": [ + { + "tag": "aquaculture production" }, { - "firstName": "K.", - "lastName": "Katafono", - "creatorType": "author" + "tag": "climate change adaptation" + }, + { + "tag": "fish trade" + }, + { + "tag": "fishery management" + }, + { + "tag": "fishery production" + }, + { + "tag": "fishery resources" + }, + { + "tag": "gender equality" + }, + { + "tag": "sustainable fisheries" + }, + { + "tag": "value chains" + } + ], + "notes": [], + "seeAlso": [] + } + ] + }, + { + "type": "web", + "url": "https://www.fao.org/documents/card/en/c/ca8466en", + "defer": true, + "items": [ + { + "itemType": "book", + "title": "Responding to the impact of the COVID-19 outbreak on food value chains through efficient logistics", + "creators": [ + { + "lastName": "FAO", + "creatorType": "author", + "fieldMode": 1 } ], "date": "2020", - "ISBN": "9789251324530", - "abstractNote": "Innovation through information and communication technologies is a key enabler in transforming food systems and holds great potential to achieve the Sustainable Development Goals. Recent developments, such as mobile technologies, smart networks, drones, remote-sensing, distributed computing, as well as disruptive technologies, such as blockchain, the Internet of things and artificial intelligence, are serving as the premise for a “digital revolution” whereby management of resources can potentially be highly optimized, intelligent and anticipatory. This publication establishes chain traceability as the substrate over which digital solutions need to operate. It provides a comprehensive introduction to blockchain, and covers smart contracts, explores how they relate to blockchain with an example of their use in seafood value chains, and then examines major development and operational considerations for blockchain applications. The publication also analyses the seafood supply chain with considerations on flag, coastal, port, processing and market States. It identifies general control elements (critical tracking events and corresponding key data elements) that form the basis for traceability monitoring and acquisition, and summarizes suitability for blockchain. It also investigates considerations for legality, transparency, species fraud and food safety.", + "ISBN": "9789251323717", + "abstractNote": "Measures implemented around the world to contain the COVID-19 pandemic have entailed a severe reduction not only in the transportation of goods and services that rely on transport, but also in the migration of labour domestically and internationally. Workers are less available reflecting both disruptions in transportation systems and restrictions to stop the transmission of the disease, within and across borders.\n\nThe Food and Agriculture Organization of the United Nations (FAO) urges countries to maintain functioning food value chains to avoid food shortages, following practices that are being proven to work. This note summarizes some practices that could be useful for governments and the private sector to maintain critical logistical elements in food value chain.", "language": "en", "libraryCatalog": "FAO Publications", - "numPages": "56", + "numPages": "4", "place": "Rome, Italy", "publisher": "FAO", - "series": "FAO Fisheries and Aquaculture Circular", - "seriesNumber": "1207", - "url": "http://www.fao.org/documents/card/en/c/ca8751en/", + "url": "https://www.fao.org/documents/card/en/c/ca8466en", "attachments": [ { "title": "Full Text PDF", @@ -441,22 +715,19 @@ var testCases = [ ], "tags": [ { - "tag": "analysis" - }, - { - "tag": "blockchain technology" + "tag": "Coronavirus" }, { - "tag": "fisheries" + "tag": "agrifood sector" }, { - "tag": "food production" + "tag": "infectious diseases" }, { - "tag": "food systems" + "tag": "logistics" }, { - "tag": "traceability" + "tag": "value chains" } ], "notes": [], @@ -466,33 +737,35 @@ var testCases = [ }, { "type": "web", - "url": "http://www.fao.org/documents/card/en/c/I9069EN", + "url": "https://www.fao.org/documents/card/en/c/ca8751en/", "defer": true, "items": [ { "itemType": "book", - "title": "Republic of Moldova Value Chain Gap Analysis", + "title": "Blockchain application in seafood value chains", "creators": [ { - "firstName": "J.", - "lastName": "O'Connell", + "firstName": "F.", + "lastName": "Blaha", "creatorType": "author" }, { - "firstName": "P.", - "lastName": "Kiparisov", + "firstName": "K.", + "lastName": "Katafono", "creatorType": "author" } ], - "date": "2018", - "ISBN": "9789251304839", - "abstractNote": "Agriculture and food industry sectors have a major importance for the Moldovan economy. The Republic of Moldova has one of the highest share of rural population among the countries in Europe and Central Asia, and its agriculture sector significantly contributes to the country’s gross domestic product.\n\nThis work is a part of a series of studies on the value chain development gaps and the environment for doing business for farmers. The goal of this study is to try to consolidate the information on countrywide value chain development gathered from various open sources and based on materials developed in a field mission by FAO officers with an emphasis on the plum and berry value chains. The authors did not aim at close examination of the selected value chains; rather, this paper is a general overview that will be a reference point for future field work in the country.\n\nTo get the results, the authors analysed the legislative history related to value chains, collected materials and statistics from open sources, conducted a field mission and interviewed stakeholders.\n\nThe first part of the report observes the overall situation in the Republic of Moldova with a focus on the agriculture sector, reviewing related legislation, the environment for doing business for farmers, and trade. The paper examines existing support measures for agriculture and covers the banking sector and trade policy. The second part examines value chain actors and overviews the selected value chains of plums and berries. The final part provides recommendations.", + "date": "2020", + "ISBN": "9789251324530", + "abstractNote": "Innovation through information and communication technologies is a key enabler in transforming food systems and holds great potential to achieve the Sustainable Development Goals. Recent developments, such as mobile technologies, smart networks, drones, remote-sensing, distributed computing, as well as disruptive technologies, such as blockchain, the Internet of things and artificial intelligence, are serving as the premise for a “digital revolution” whereby management of resources can potentially be highly optimized, intelligent and anticipatory. This publication establishes chain traceability as the substrate over which digital solutions need to operate. It provides a comprehensive introduction to blockchain, and covers smart contracts, explores how they relate to blockchain with an example of their use in seafood value chains, and then examines major development and operational considerations for blockchain applications. The publication also analyses the seafood supply chain with considerations on flag, coastal, port, processing and market States. It identifies general control elements (critical tracking events and corresponding key data elements) that form the basis for traceability monitoring and acquisition, and summarizes suitability for blockchain. It also investigates considerations for legality, transparency, species fraud and food safety.", "language": "en", "libraryCatalog": "FAO Publications", - "numPages": "65", - "place": "Budapest, Hungary", + "numPages": "56", + "place": "Rome, Italy", "publisher": "FAO", - "url": "http://www.fao.org/documents/card/en/c/I9069EN", + "series": "FAO Fisheries and Aquaculture Circular", + "seriesNumber": "No. 1207", + "url": "https://www.fao.org/documents/card/en/c/ca8751en/", "attachments": [ { "title": "Full Text PDF", @@ -501,28 +774,22 @@ var testCases = [ ], "tags": [ { - "tag": "Republic of Moldova" - }, - { - "tag": "agricultural sector" - }, - { - "tag": "data analysis" + "tag": "analysis" }, { - "tag": "economic analysis" + "tag": "blockchain technology" }, { - "tag": "economic infrastructure" + "tag": "fisheries" }, { - "tag": "economic situation" + "tag": "food production" }, { - "tag": "research" + "tag": "food systems" }, { - "tag": "supply chain" + "tag": "traceability" } ], "notes": [], @@ -532,7 +799,7 @@ var testCases = [ }, { "type": "web", - "url": "http://www.fao.org/documents/card/en/c/ca7988en/", + "url": "https://www.fao.org/documents/card/en/c/ca7988en/", "defer": true, "items": [ { @@ -554,7 +821,7 @@ var testCases = [ "place": "Rome, Italy", "publisher": "FAO", "shortTitle": "FAO publications catalogue 2020", - "url": "http://www.fao.org/documents/card/en/c/ca7988en/", + "url": "https://www.fao.org/documents/card/en/c/ca7988en/", "attachments": [ { "title": "Full Text PDF", @@ -582,7 +849,7 @@ var testCases = [ }, { "type": "web", - "url": "http://www.fao.org/publications/card/fr/c/77dbd058-8dd4-4295-af77-23f6b28cc683/", + "url": "https://www.fao.org/documents/card/fr/c/77dbd058-8dd4-4295-af77-23f6b28cc683/", "defer": true, "items": [ { @@ -602,15 +869,15 @@ var testCases = [ ], "date": "2016", "ISBN": "9789252094890", - "abstractNote": "Ce livre nous emmène au cœur des zones de forêts denses et sahéliennes de l’Afrique centrale, un écosystème précieux et essentiel à la vie quotidienne de ses habitants, représentant l’un des trois principaux ensembles boisés tropicaux de la planète. Dix pays (Burundi, Cameroun, Congo, Gabon, Guinée Equatoriale, République Centrafricaine, République Démocratique du Congo, Rwanda, Sao Tomé & Principe, Tchad) abritent ces forêts et savanes, riches d’importantes ressources naturelles. Ils ont en com mun une longue histoire liée à la colonisation, suivie d'une expérience de coopération multiforme depuis les indépendances qui évolue incontestablement vers une intégration économique et monétaire. De nos jours, alors que les équilibres séculaires entre l’homme et la nature semblent ébranlés, que la sécurité alimentaire, la lutte contre la pauvreté et la préservation de la biodiversité et des ressources forestières sont devenus des enjeux mondiaux ; à l’heure où la croissance démographique non m aîtrisée fragilise le maintien des écosystèmes forestiers tout en accentuant les conflits liés à la recherche d’espace vital, le phénomène des changements climatiques vient davantage sonder le génie créateur des populations forestières dans la préservation et la gestion durable de la forêt et des produits forestiers non ligneux (PFNL) qui en sont issus. Cette publication est l’œuvre du personnel technique de la FAO, avec la contribution des partenaires internationaux et locaux engagés dans l’évo lution des PFNL. Elle est un document précieux consacré au développement des peuples par la promotion des PFNL en Afrique centrale en vue du renforcement de la sécurité alimentaire et la lutte contre la pauvreté. \n\n Voir aussi la sommaire en version anglais", + "abstractNote": "Ce livre nous emmène au cœur des zones de forêts denses et sahéliennes de l’Afrique centrale, un écosystème précieux et essentiel à la vie quotidienne de ses habitants, représentant l’un des trois principaux ensembles boisés tropicaux de la planète. Dix pays (Burundi, Cameroun, Congo, Gabon, Guinée Equatoriale, République Centrafricaine, République Démocratique du Congo, Rwanda, Sao Tomé & Principe, Tchad) abritent ces forêts et savanes, riches d’importantes ressources naturelles. Ils ont en com mun une longue histoire liée à la colonisation, suivie d'une expérience de coopération multiforme depuis les indépendances qui évolue incontestablement vers une intégration économique et monétaire. De nos jours, alors que les équilibres séculaires entre l’homme et la nature semblent ébranlés, que la sécurité alimentaire, la lutte contre la pauvreté et la préservation de la biodiversité et des ressources forestières sont devenus des enjeux mondiaux ; à l’heure où la croissance démographique non m aîtrisée fragilise le maintien des écosystèmes forestiers tout en accentuant les conflits liés à la recherche d’espace vital, le phénomène des changements climatiques vient davantage sonder le génie créateur des populations forestières dans la préservation et la gestion durable de la forêt et des produits forestiers non ligneux (PFNL) qui en sont issus. Cette publication est l’œuvre du personnel technique de la FAO, avec la contribution des partenaires internationaux et locaux engagés dans l’évo lution des PFNL. Elle est un document précieux consacré au développement des peuples par la promotion des PFNL en Afrique centrale en vue du renforcement de la sécurité alimentaire et la lutte contre la pauvreté.\n\nVoir aussi la sommaire en version anglais", "language": "fr", "libraryCatalog": "FAO Publications", "numPages": "251", "place": "Rome, Italy", "publisher": "FAO", "series": "Produits Forestiers Non-Ligneux", - "seriesNumber": "21", - "url": "http://www.fao.org/publications/card/fr/c/77dbd058-8dd4-4295-af77-23f6b28cc683/", + "seriesNumber": "No. 21", + "url": "https://www.fao.org/documents/card/fr/c/77dbd058-8dd4-4295-af77-23f6b28cc683/", "attachments": [ { "title": "Full Text PDF", @@ -662,7 +929,7 @@ var testCases = [ }, { "type": "web", - "url": "http://www.fao.org/publications/card/zh/c/mw246ZH/", + "url": "https://www.fao.org/publications/card/zh/c/mw246ZH/", "defer": true, "items": [ { @@ -682,7 +949,7 @@ var testCases = [ "libraryCatalog": "FAO Publications", "place": "Rome, Italy", "publisher": "FAO", - "url": "http://www.fao.org/publications/card/zh/c/mw246ZH/", + "url": "https://www.fao.org/publications/card/zh/c/mw246ZH/", "attachments": [ { "title": "Full Text PDF", @@ -749,12 +1016,12 @@ var testCases = [ }, { "type": "web", - "url": "http://www.fao.org/publications/card/en/c/5014f143-be17-4b58-b90e-f1c6bef344a0/", + "url": "https://www.fao.org/documents/card/ar/c/c6c2c8d7-3683-53a7-ab58-ce480c65f36c/", "defer": true, "items": [ { "itemType": "book", - "title": "Climate-Smart Agriculture: A Call for Action: Synthesis of the Asia-Pacific Regional Workshop Bangkok, Thailand, 18 to 20 June 2015", + "title": "الخطوط التوجيهية الطوعية بشأن الحوكمة المسؤولة لحيازة الأراضي ومصايد الأسماك والغابات في سياق الأمن الغذائي الوطني", "creators": [ { "lastName": "FAO", @@ -762,17 +1029,15 @@ var testCases = [ "fieldMode": 1 } ], - "date": "2015", - "ISBN": "9789251088630", - "abstractNote": "This publication is a summary of the workshop held in Bangkok, Thailand from 18 to 20 June 2015 to promote the mainstreaming and up-scaling of Climate-Smart Agriculture in the region. Included in the report are successful case studies that agriculturists have been practicing as a means to address food security under adverse circumstances.", - "language": "en", + "date": "2012", + "ISBN": "9789256072771", + "abstractNote": "هذه الخطوط التوجيهية هي أول صكّ عالمي شامل خاص بالحيازات وإدارتها يُعدّ من خلال مفاوضات حكومية دولية. وتضع هذه الخطوط التوجيهية مبادئ ومعايير مقبولة دولياً للممارسات المسؤولة لاستخدام الأراضي ومصايد الأسماك والغابات وللتحكّم بها. وهي تعطي توجيهات لتحسين الأطر القانونية والتنظيمية والمتصلة بالسياسات التي تنظّم حقوق الحيازة ولزيادة شفافية نظم الحيازة وإدارتها ولتعزيز القدرات والإجراءات التي تتخذها الأجهزة العامة ومؤسسات القطاع الخاص ومنظمات المجتمع المدني وجميع المعنيين بالحيازات وإد ارتها. وتُدرج هذه الخطوط التوجيهية إدارة الحيازات ضمن السياق الوطني للأمن الغذائي وهي تسعى إلى المساهمة في الإعمال المطرد للحق في غذاء كافٍ والقضاء على الفقر وحماية البيئة وتحقيق التنمية الاجتماعية والاقتصادية المستدامة.", + "language": "ar", "libraryCatalog": "FAO Publications", - "numPages": "106", + "numPages": "40", "place": "Rome, Italy", - "publisher": "FAO Regional Office for Asia and the Pacific", - "series": "RAP Publication", - "shortTitle": "Climate-Smart Agriculture", - "url": "http://www.fao.org/publications/card/en/c/5014f143-be17-4b58-b90e-f1c6bef344a0/", + "publisher": "FAO", + "url": "https://www.fao.org/documents/card/ar/c/c6c2c8d7-3683-53a7-ab58-ce480c65f36c/", "attachments": [ { "title": "Full Text PDF", @@ -781,28 +1046,25 @@ var testCases = [ ], "tags": [ { - "tag": "climate-smart agriculture" - }, - { - "tag": "forestry" + "tag": "guidelines" }, { - "tag": "market gardens" + "tag": "أمن غذائي" }, { - "tag": "meetings" + "tag": "إقتصاديات الغابة" }, { - "tag": "sustainable agriculture" + "tag": "اقتصاد الصيد" }, { - "tag": "sustainable development" + "tag": "الحكم" }, { - "tag": "urban farmers" + "tag": "النوع الاجتماعي" }, { - "tag": "water harvesting" + "tag": "حيازة الأراضي" } ], "notes": [], @@ -812,12 +1074,12 @@ var testCases = [ }, { "type": "web", - "url": "http://www.fao.org/publications/card/ar/c/c6c2c8d7-3683-53a7-ab58-ce480c65f36c/", + "url": "https://www.fao.org/documents/card/en/c/5014f143-be17-4b58-b90e-f1c6bef344a0/", "defer": true, "items": [ { "itemType": "book", - "title": "الخطوط التوجيهية الطوعية بشأن الحوكمة المسؤولة لحيازة الأراضي ومصايد الأسماك والغابات في سياق الأمن الغذائي الوطني", + "title": "Climate-Smart Agriculture: A Call for Action: Synthesis of the Asia-Pacific Regional Workshop Bangkok, Thailand, 18 to 20 June 2015", "creators": [ { "lastName": "FAO", @@ -825,14 +1087,17 @@ var testCases = [ "fieldMode": 1 } ], - "date": "2012", - "abstractNote": "هذه الخطوط التوجيهية هي أول صكّ عالمي شامل خاص بالحيازات وإدارتها يُعدّ من خلال مفاوضات حكومية دولية. وتضع هذه الخطوط التوجيهية مبادئ ومعايير مقبولة دولياً للممارسات المسؤولة لاستخدام الأراضي ومصايد الأسماك والغابات وللتحكّم بها. وهي تعطي توجيهات لتحسين الأطر القانونية والتنظيمية والمتصلة بالسياسات التي تنظّم حقوق الحيازة ولزيادة شفافية نظم الحيازة وإدارتها ولتعزيز القدرات والإجراءات التي تتخذها الأجهزة العامة ومؤسسات القطاع الخاص ومنظمات المجتمع المدني وجميع المعنيين بالحيازات وإد ارتها. وتُدرج هذه الخطوط التوجيهية إدارة الحيازات ضمن السياق الوطني للأمن الغذائي وهي تسعى إلى المساهمة في الإعمال المطرد للحق في غذاء كافٍ والقضاء على الفقر وحماية البيئة وتحقيق التنمية الاجتماعية والاقتصادية المستدامة.", - "language": "ar", + "date": "2015", + "ISBN": "9789251088630", + "abstractNote": "This publication is a summary of the workshop held in Bangkok, Thailand from 18 to 20 June 2015 to promote the mainstreaming and up-scaling of Climate-Smart Agriculture in the region. Included in the report are successful case studies that agriculturists have been practicing as a means to address food security under adverse circumstances.", + "language": "en", "libraryCatalog": "FAO Publications", - "numPages": "40", + "numPages": "106", "place": "Rome, Italy", - "publisher": "FAO", - "url": "http://www.fao.org/publications/card/ar/c/c6c2c8d7-3683-53a7-ab58-ce480c65f36c/", + "publisher": "FAO Regional Office for Asia and the Pacific", + "series": "RAP Publication", + "shortTitle": "Climate-Smart Agriculture", + "url": "https://www.fao.org/documents/card/en/c/5014f143-be17-4b58-b90e-f1c6bef344a0/", "attachments": [ { "title": "Full Text PDF", @@ -841,25 +1106,28 @@ var testCases = [ ], "tags": [ { - "tag": "null" + "tag": "climate-smart agriculture" + }, + { + "tag": "forestry" }, { - "tag": "null" + "tag": "market gardens" }, { - "tag": "null" + "tag": "meetings" }, { - "tag": "null" + "tag": "sustainable agriculture" }, { - "tag": "أمن غذائي" + "tag": "sustainable development" }, { - "tag": "الحكم" + "tag": "urban farmers" }, { - "tag": "حيازة الأراضي" + "tag": "water harvesting" } ], "notes": [], From b3dc7f92b82ce99fc8e89d0a738fd6ebdbdf4448 Mon Sep 17 00:00:00 2001 From: zoe-translates <116055375+zoe-translates@users.noreply.github.com> Date: Mon, 3 Apr 2023 19:43:37 +0000 Subject: [PATCH 02/17] CI: Update extension button selector (#3003) --- .ci/pull-request-check/selenium-test.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/pull-request-check/selenium-test.js b/.ci/pull-request-check/selenium-test.js index 2076a96ec2a..37da3a04c97 100755 --- a/.ci/pull-request-check/selenium-test.js +++ b/.ci/pull-request-check/selenium-test.js @@ -113,10 +113,10 @@ var allPassed = false; // No API to retrieve extension ID. Hacks, sigh. await driver.get("chrome://system/"); - await driver.wait(until.elementLocated({id: 'extensions-value-btn'}), 60*1000); + await driver.wait(until.elementLocated({id: 'btn-extensions-value'}), 60*1000); // Chrome 89+ has the extension list expanded by default try { - let extBtn = await driver.findElement({css: '#extensions-value-btn'}); + let extBtn = await driver.findElement({css: '#btn-extensions-value'}); await extBtn.click(); } catch (e) {} let contentElem = await driver.findElement({css: '#content'}); From e6c28bf9164199217d63b0eeef8b750aa83e87bc Mon Sep 17 00:00:00 2001 From: Abe Jellinek Date: Wed, 5 Apr 2023 11:26:39 -0400 Subject: [PATCH 03/17] Newspapers.com: Support /article/ pages, improve metadata https://forums.zotero.org/discussion/comment/432139/#Comment_432139 --- newspapers.com.js | 178 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 139 insertions(+), 39 deletions(-) diff --git a/newspapers.com.js b/newspapers.com.js index dd7caf6a490..abfa0f20ae1 100644 --- a/newspapers.com.js +++ b/newspapers.com.js @@ -2,14 +2,14 @@ "translatorID": "22dd8e35-02da-4968-b306-6efe0779a48d", "label": "newspapers.com", "creator": "Peter Binkley", - "target": "^https?://www\\.newspapers\\.com/clip/", + "target": "^https?://[^/]+\\.newspapers\\.com/(clip|article)/", "minVersion": "3.0", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2020-10-29 03:32:09" + "lastUpdated": "2023-04-05 15:26:20" } /* @@ -39,23 +39,17 @@ function detectWeb(_doc, _url) { return "newspaperArticle"; } +function doWeb(doc, url) { + if (url.includes('/clip/')) { + scrapeClip(doc, url); + } + else { + scrapeArticle(doc, url); + } +} -function doWeb(doc, _url) { +function scrapeClip(doc, url) { var newItem = new Zotero.Item("newspaperArticle"); - var scripts = doc.getElementsByTagName("script"); - var json = ''; - var jsonre = /var staPageDetail = JSON.parse\((.+?)\);/; - for (var i = 0; i < scripts.length; i++) { - var arr = scripts[i].textContent.match(jsonre); - if (arr) { - json = arr[1]; - break; - } - } - - // one JSON.parse to unstringify the json string, and one to parse it into an object - // the replace fixes escaped apostrophes in the source, which JSON.parse considers invalid - var details = JSON.parse(JSON.parse(json.replace(/\\'/g, "'"))); var metaArr = {}; var metaTags = doc.getElementsByTagName("meta"); @@ -64,9 +58,9 @@ function doWeb(doc, _url) { metaArr[metaTag.getAttribute("property")] = metaTag.getAttribute("content"); } } - newItem.title = details.citation.title; + newItem.title = text(doc, '#mainContent h1') || text(doc, '[itemprop="about"]'); // remove the unnecessary xid param - newItem.url = details.citation.url.replace(/\?xid=[0-9]*$/, ""); + newItem.url = attr(doc, 'link[rel="canonical"]', 'href'); /* The user can append the author to the title with a forward slash @@ -82,41 +76,69 @@ function doWeb(doc, _url) { newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author")); } } - - newItem.abstractNote = details.media.note; - var uniqueID = newItem.url.match(/\/clip\/(\d+)/)[1]; - var pdfurl = "https://www.newspapers.com/clippings/download/?id=" + uniqueID; - newItem.attachments.push({ - title: "Full Text PDF", - mimeType: "application/pdf", - url: pdfurl - }); - - newItem.publicationTitle = details.source.publisherName; + newItem.publicationTitle = text(doc, '[itemprop="name"]'); // details["source"]["title"] gives a string like // "Newspapers.com - The Akron Beacon Journal - 1939-10-30 - Page Page 15" - var editiontokens = details.source.title.replace(/ - /g, "|").split("|"); - if (editiontokens.length == 3) { // there's an edition label - newItem.edition = editiontokens[1]; - } - newItem.pages = editiontokens.slice(-1)[0].replace(/Page/g, ''); - newItem.date = details.source.publishedDate; - newItem.place = details.source.publishedLocation; + newItem.pages = text(doc, '[itemprop="position"]').replace(/Page/g, ''); + newItem.date = ZU.strToISO(text(doc, '[itemprop="dateCreated"]')); + newItem.place = text(doc, '[itemprop="locationCreated"]'); + + newItem.attachments.push(makeImageAttachment(url)); + newItem.attachments.push(makePDFAttachment(url)); // handle empty title if (newItem.title === "") { - newItem.title = "Clipped From " + newItem.publicationTitle; + newItem.title = "Article clipped from " + newItem.publicationTitle + ""; } newItem.complete(); } +function scrapeArticle(doc, url) { + let item = new Zotero.Item('newspaperArticle'); + let json = JSON.parse(text(doc, 'script[type="application/ld+json"]')); + + item.publicationTitle = json.publisher && ZU.unescapeHTML(json.publisher.legalName); + item.title = ZU.trimInternal(ZU.unescapeHTML(json.about)) + || 'Article clipped from ' + item.publicationTitle + ''; + item.abstractNote = ZU.unescapeHTML(json.text); + item.place = ZU.unescapeHTML(json.locationCreated); + item.date = json.datePublished; + item.pages = json.pageStart && ZU.unescapeHTML(json.pageStart.replace('Page', '')); + item.url = attr(doc, 'link[rel="canonical"]', 'href'); + item.attachments.push(makeImageAttachment(url)); + item.attachments.push(makePDFAttachment(url)); + + item.complete(); +} + +function getID(url) { + return url.match(/\/(\d+)/)[1]; +} + +function makePDFAttachment(url) { + return { + title: 'Full Text PDF', + mimeType: 'application/pdf', + url: 'https://www.newspapers.com/clippings/download/?id=' + getID(url) + }; +} + +function makeImageAttachment(url) { + return { + title: 'Image', + mimeType: 'image/jpeg', + url: 'https://img.newspapers.com/img/img?clippingId=' + getID(url) + }; +} + /** BEGIN TEST CASES **/ var testCases = [ { "type": "web", "url": "https://www.newspapers.com/clip/7960447/my-day-eleanor-roosevelt/", + "detectedItemType": "newspaperArticle", "items": [ { "itemType": "newspaperArticle", @@ -135,6 +157,10 @@ var testCases = [ "publicationTitle": "The Akron Beacon Journal", "url": "https://www.newspapers.com/clip/7960447/my-day-eleanor-roosevelt/", "attachments": [ + { + "title": "Image", + "mimeType": "image/jpeg" + }, { "title": "Full Text PDF", "mimeType": "application/pdf" @@ -149,10 +175,11 @@ var testCases = [ { "type": "web", "url": "https://www.newspapers.com/clip/18535448/the-sunday-leader/", + "detectedItemType": "newspaperArticle", "items": [ { "itemType": "newspaperArticle", - "title": "Clipped From The Sunday Leader", + "title": "Article clipped from The Sunday Leader", "creators": [], "date": "1887-07-17", "libraryCatalog": "newspapers.com", @@ -161,6 +188,10 @@ var testCases = [ "publicationTitle": "The Sunday Leader", "url": "https://www.newspapers.com/clip/18535448/the-sunday-leader/", "attachments": [ + { + "title": "Image", + "mimeType": "image/jpeg" + }, { "title": "Full Text PDF", "mimeType": "application/pdf" @@ -175,6 +206,7 @@ var testCases = [ { "type": "web", "url": "https://www.newspapers.com/clip/31333699/driven-from-governors-office-ohio/", + "detectedItemType": "newspaperArticle", "items": [ { "itemType": "newspaperArticle", @@ -187,6 +219,74 @@ var testCases = [ "publicationTitle": "Rushville Republican", "url": "https://www.newspapers.com/clip/31333699/driven-from-governors-office-ohio/", "attachments": [ + { + "title": "Image", + "mimeType": "image/jpeg" + }, + { + "title": "Full Text PDF", + "mimeType": "application/pdf" + } + ], + "tags": [], + "notes": [], + "seeAlso": [] + } + ] + }, + { + "type": "web", + "url": "https://www.newspapers.com/article/the-times-picayune-telegraphed-to-the-ne/120087578/", + "detectedItemType": "newspaperArticle", + "items": [ + { + "itemType": "newspaperArticle", + "title": "Telegraphed to the New Orleans Picayune. Latest from Charleston. Fort Sumter Returns Fire", + "creators": [], + "date": "1861-04-13", + "abstractNote": "Telegraphed to the New Orleans Picayune. LATEST FROM CHARLESTON. FORT SUMTER RETflUS FIRE. SULLI VAN12AND MORRIS ISLAND BATTERIES AT WORK. BREACH MADE IN FORT SUMTER. War Vessels Reported Outside. By the Southwestern Line. Charleston, April 12. The batteries of Sullivan's Island, Morris Island and other points opened fire on Fort Sumter at half - past four o'clock this morning. Fort Sumter returned the fire. A brisk cannonading is being kept up. There is no infoimation from the seaboard. The military are under arms. The whole population is on the streets, and the harbor is filled with anxious spectators. SECONB DISPATCH. The Moating battery is doing good service. Up to eleven o clock there has been no loea on our side. Fort Sumter replied at 7 o'clock this morning, and has kept up an astonishing fire ever since. Stevens's battery is slightly injured. Three sbejls are fired per minute. Four hundred, in all, have fallen. A breach is expected to be made in Fort Sumter to - morrow. Major Anderson's fire is principally directed I against the floating battery. j War vessels are reported outside the harbor. Only two soldiers are wounded on Salli - ! van's Island. The range is more perfect from the land batteries. Every shot tells. It ia thought from Mnjor Anderson's fire thai he haa more men than was supposed. Fort Sumter will succumb by to - morrow. It is raining at Charleston, but there - is no cessation of the batteries. A continuous steady fire on both sides is beinc kept up. The cutter Harriet Lane, and the steam gnu boat Crntader, are reported olf the bar, but have not entered the harbor. The War Department have as yet no official diepatches. (Jen. Beauregard was at the batteries all day. , The Government expects Fort Sumter to succumb to - morrow. third dispatch The firing continued all day. Two of Fort Sumter's guns are silenced, and it is reported a breach has been made through the southeast wall. No casualty has yet happened to any of the forces. Only seven of the nineteen batteries have opened fire on Fort Sumter. The remainder are held ready for the expected fleet. Two thousand men reached the city this morning and immediately embarked for Morris Island. FOURTH DI fAT H. Charleston, April 10, 11 P. M. Tne bombardment of Fort Saniter is going on every twenty minutes from the mortars It is supposed Major Anderson is resting his men for the night. Three vessels of war are reported outside tho bar. They cannot get in on account of the roughness of the sea. No one has as yet received any injury. The floating battery works admirably well. Every inlet to the harbor is well guarded. Our forces are having a lively time of it.", + "libraryCatalog": "newspapers.com", + "pages": "4", + "place": "New Orleans, Louisiana", + "publicationTitle": "The Times-Picayune", + "url": "https://www.newspapers.com/article/the-times-picayune-telegraphed-to-the-ne/120087578/", + "attachments": [ + { + "title": "Image", + "mimeType": "image/jpeg" + }, + { + "title": "Full Text PDF", + "mimeType": "application/pdf" + } + ], + "tags": [], + "notes": [], + "seeAlso": [] + } + ] + }, + { + "type": "web", + "url": "https://nydailynews.newspapers.com/article/daily-news/121098969/", + "detectedItemType": "newspaperArticle", + "items": [ + { + "itemType": "newspaperArticle", + "title": "Article clipped from Daily News", + "creators": [], + "date": "1965-02-26", + "abstractNote": "Donavena 8-5 Choice; Can He Kayo Folley? By Jim McCulIey Professional oddsmakers, otherwise referred to as bookies, evidently are counting on Oscar (Ringo) Bona- vena to flatten Zora Folley the Garden. Otherwise, why young thumper from Argentina an 8-5 favorite over the ringwise No. 5 heavyweight contender from Arizona? Only two fighters in Folley's 13-year career have outpointed him England's Henry Cooper, in London, and big Ernie Terrell, in New York. Five men have stopped Zora, however. IT DOESN'T SEEM possible Bonavena, with only eight pro fights under his belt, could win a decision over tonight's 32-year-old opponent. Oscar has stopped seven of his eight opponents, however, and, of course, does have a powerful body and a punishing punch in either mitt. The fight mob is really puzzled over this fight. Some of those well versed in fisticuffs can't understand how the odds-bodkins can make 22-year-old Bonavena such a big favorite. Some 10,000 fans are expected to come see for themselves and put another $40,- 000 into the current boxing revival. \"I KNOW FOLLEY dogs it at times,\" said a former heavyweight contender, who did not want to be named because he is now an official with the boxing commission. ''But Bonavena is a real novice compared to Zora. It seems to me Folley should be a big favorite, but then the kid does have a punch and he is game. It's possible he can reach Folley and knock him out.\" The price, for Folley backers, Is most enticing. \"I CAN'T RESIST the price,\" said a knowledgeable fight man who has been known to wager a bob now and then when the figures are right. \"Know something, 1 think it will be down close to pick 'em before they get into the ring.\" One thing is certain. Folley can't lose another fight in New York at this time, or he is through as a top contender. He is going for a payday on the gamble that he can go the distance with Oscar; and there is a chance he might stop the young man, too, though nobody has done that yet. RIGHT NOW, FOLLEY is unbeaten in hi3 last six bouts since losing to Terrell here July 27, '63. In that span he has whipped George Chuvalo, easily, and has recorded a draw with European champion Karl Maldenbfrger in Germany. Zora's overall record stands 68-7-4, for 79 professional fights, and includes 38 knockouts, some proof that he can punch as well as box. Only opponent to go the route (10 rounds) with Bonavena was Dick Wipperman, last Nov. 13 here. Oscar came back to the Garden a month later and knocked out Billy Stephan in six. The South American still is unranked among the big boys, but a win tonight will put him up there where he can start hollering. History shows heavyweights do mature a lot quicker than the lighter men, and Oscar may ev.en. be an unusual young fighter, v . . ( . r in 10 rounds or less tonight at would they continue to list the - Vlsic lliv ;.. Vnn-t ST!rt -. lM. FEB. 26, 1958 ZDhe BOSTON CELTICS WOM THEIR SECOMt STRAIGHT N.&.A. EASTERN CROWN BY DOWNING DETROIT, 106-99, AS &1LL RUSSELL COUTftOLU&THE BOARDS. BOBCOUSYAN& BILL SHAfeMAH EACH SCORED 18 POIWTS. Lincoln Downs Results 1ST Clmp.: 4-np: 5 f.: off 1:33. Ravenala Prince (Garry)5.ti0 4i 2 SO Mission Bound (Parker) 6.10 .'i.8'1 Favorite Act (Bradley) K.MI T-l:02, Also Lord Culpeper. Your Reporter, Deacon Shnne. Prmrie Rose. Rinsr Shut, Fearless Leader, ilaryg Gilt. Soft Glance. 2D Clmg-.; 4-np: 7 f.: off 2:00. Idle Threats (Allan) 4 no 2 SO Grey Whirl (Giovanni) 3.40 3.00 Good Effort (Maeda) B.20 T-1:32t4. Also Greek Paire. Inquisition. Frozen North, Fast Bid. Foxy Sway. (Daily Double. 8-1, Paid :!.\". liOl 3D Clm?:3yrs:mdns:5 f :off 2 :2!) . Dogrwood Pateh(MaRia)7.ai) o.no 4.20 I.L Abie K. t Bradley) 13. NO U.KO Peaceful T. (Donahue) H.uO T.-l:t)3. Also Doe I.ark. AlHnx. Miss Pilot. Sum Bomb. Fast Bell. Greek Action, Win Joe. Dont Btatne Babe. 4TH Clmar.: 4-up; 7 t.: off 2:58. Irish Dotty (Bradley) 4.4D 3.20 2. SO Sibling- (Allan) 9.80 6.20 Brimstone Road (Row an 6. Of) T.-l :35 . Also Stahlstown. Emerson Hill. Patti Dowd. Ou The Lawn. Sieve H.. Game Start. Set. 5TH Clma:.: 3-up: 8 t.; off 3:254. Ancient Queen (Lamonte)-4.80 3. no 2.40 Wlwndilly (Merrier) 3 20 2 .So Lady Mink (Bradley) 2.80 T-l:02. Alio Mandolas. Lady Rhody. O. K. Debbie. Jury Verdict. Swift Salonga. Mix n Match. La Calvados. 6TH Clm?: 3-4 yrs; 5 f: off 3:52. Tessie Tansor(Davern)12.60 o.BO 5.00 French Line (Myers) 4.80 5 40 Captain Bronze (Allan) 10. hi) T.-l:02 9i. Alyso Rosie Anirel. Lony-bridge Lu Lu. Star Status, Toute Ma Vie. Tompkins County. 7TH Alw.: 3-4-yos.: 5 fur. off 4:20. Lories Honey (Hole) 24.20 20 3.8\" Rndoon (Clinch) 2.40 2.40 Presta Sun (Gamb'della) 5.00 T.-l:03. Also Green Toea. Anthony Scarfo. Prince O Morn. Captain Lockitup. Caronia. 8TH Clmr.: 4-up: 1 m.: off 4:48. ratcount (Alberts) 13.HO 5 HO 4.20 Lone Peak (Rodriguez) 5.60 3 flu Kilda (Ledezma) 3.40 T-l:48Si. Also Hue or Spank. Carb-anrel, Whitey. Wild Desire. 9TH Clmg-: 41iip: 1 m: off 5:16. Oportscaster (Allan) 20.80 8.KO 7.20 Waste Of Time(Miller) 49.20 2B.20 Da.vFromDallas(G's'do) 20.40 T.-1:5H4. Also Symboleer, Dandy Randy. Sea Tread. My Buyer. Cosmic Rule. Busted Budeet. Another Take, Presented. (Twin Double 8-1 8-3 Paid $3.51 1.20) , Att, 4,744. Handle $364,968. ' r think ( ConraoLf) THEY'LL 7t1-fT EVER SvSXv. ' C COME J uAV' BE A LOHG JfeTV", + "libraryCatalog": "newspapers.com", + "pages": "60", + "place": "New York, New York", + "publicationTitle": "Daily News", + "url": "https://www.newspapers.com/article/daily-news/121098969/", + "attachments": [ + { + "title": "Image", + "mimeType": "image/jpeg" + }, { "title": "Full Text PDF", "mimeType": "application/pdf" From 66f5fa4395da1d1e37bf2327380a274b811eb15a Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Thu, 6 Apr 2023 11:27:54 -0500 Subject: [PATCH 04/17] Create Lexis+ Scraper Translator --- Lexis+.js | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 Lexis+.js diff --git a/Lexis+.js b/Lexis+.js new file mode 100644 index 00000000000..e0bd8931f6f --- /dev/null +++ b/Lexis+.js @@ -0,0 +1,79 @@ +{ + "translatorID": "419638d9-9049-44ad-ba08-fa54ed24b5e6", + "label": "Lexis+", + "creator": "Brandon F", + "target": "https://plus.lexis.*/", + "minVersion": "5.0", + "maxVersion": "", + "priority": 100, + "inRepository": true, + "translatorType": 4, + "browserSupport": "gcsibv", + "lastUpdated": "2023-04-06 16:12:39" +} + +function scrape(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == "x" ) return namespace; else return null; + } : null; + + if (detectWeb(doc, url) == "case") + { + + } + else if (detectWeb(doc, url) == "statute") + { + + } +} + +function detectWeb(doc, url) { + Zotero.debug("Title: " + doc.title) + if (doc.title.match(/.*results.*/)) { + return "multiple" + } + else if (doc.title.match(/\d+\s[a-zA-Z\. ]+\s§\s\d+/)) + { + return "statute" + } + else if (doc.title.match(/\d+\s[a-zA-Z0-9\. ]+\s\d+/)) + { + return "case" + } +} + +function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == "x" ) return namespace; else return null; + } : null; + + var casesOrStatutes = new Array(); + var items = new Object(); + var nextTitle; + + if (detectWeb(doc, url) == "multiple") { + var titles = doc.evaluate('(//a[@class="titleLink"])', + doc, nsResolver, XPathResult.ANY_TYPE, null); + while (nextTitle = titles.iterateNext()) { + // TODO format this a little, maybe add a year parenthetical + items[nextTitle.href] = nextTitle.textContent; + } + + items = Zotero.selectItems(items); + for(var i in items) { + casesOrStatutes.push(i); + } + } + else + { + casesOrStatutes = [url] + } + + Zotero.Utilities.processDocuments(casesOrStatutes, scrape, function(){Zotero.done();}); + Zotero.wait(); +}/** BEGIN TEST CASES **/ +var testCases = [ +] +/** END TEST CASES **/ From 1c18dd8af4937e9c4e4bc13193388809b44c15ad Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Thu, 6 Apr 2023 14:39:27 -0500 Subject: [PATCH 05/17] Lexis+: add case/statute scraping --- Lexis+.js | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index e0bd8931f6f..abbd86a9fd6 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -9,7 +9,7 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2023-04-06 16:12:39" + "lastUpdated": "2023-04-06 19:35:50" } function scrape(doc, url) { @@ -20,24 +20,70 @@ function scrape(doc, url) { if (detectWeb(doc, url) == "case") { + var newCase = new Zotero.Item("case"); + newCase.url = doc.location.href; + + var xPathofTitle = doc.evaluate('//h1[@id="SS_DocumentTitle"]', + doc, nsResolver, XPathResult.ANY_TYPE, null); + newCase.title = xPathofTitle.iterateNext().textContent; + var xPathofCitation = doc.evaluate('//span[@class="active-reporter"]', + doc, nsResolver, XPathResult.ANY_TYPE, null); + var citation = xPathofCitation.iterateNext().textContent; + newCase.reporterVolume = citation.substring(0, citation.indexOf(' ')); + newCase.reporter = citation.substring(citation.indexOf(' ') + 1, citation.lastIndexOf(' ')); + newCase.firstPage = citation.substring(citation.lastIndexOf(' ') + 1); + + var xPathofCourt = doc.evaluate('(//p[@class="SS_DocumentInfo"])[1]', + doc, nsResolver, XPathResult.ANY_TYPE, null); + newCase.court = xPathofCourt.iterateNext().textContent; + + var xPathofDate = doc.evaluate('//span[@class="date"]', + doc, nsResolver, XPathResult.ANY_TYPE, null); + newCase.dateDecided = xPathofDate.iterateNext().textContent; + + newCase.complete(); } else if (detectWeb(doc, url) == "statute") { + var newStatute = new Zotero.Item("statute"); + newStatute.url = doc.location.href; + + var xPathofTitle = doc.evaluate('//h1[@id="SS_DocumentTitle"]', + doc, nsResolver, XPathResult.ANY_TYPE, null); + var title = xPathofTitle.iterateNext().textContent; + newStatute.title = title; + newStatute.codeNumber = title.substring(0, title.indexOf(' ')); + var isolation = title.substring(title.indexOf(' '), title.lastIndexOf(' ')); // isolate reporter and section symbol + newStatute.code = isolation.substring(0, isolation.lastIndexOf(' ')); + newStatute.section = title.substring(title.lastIndexOf(' ') + 1); + + var xPathofInfo = doc.evaluate('//p[@class="SS_DocumentInfo"]', + doc, nsResolver, XPathResult.ANY_TYPE, null); + var info = xPathofInfo.iterateNext().textContent; + isolation = info.substring(info.search(/\d+-\d+/)); // isolate public law number on the frontend + newStatute.publicLawNumber = isolation.substring(0, isolation.indexOf(' ')).replace(/(^,)|(,$)/g, ''); + newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); + isolation = info.substring(info.search( + /\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)/ + )) // isolate date on the frontend + newStatute.dateEnacted = isolation.substring(0, isolation.search(/[1-2][0-9][0-9][0-9]/) + 4); + Zotero.debug(isolation) + + newStatute.complete(); } } function detectWeb(doc, url) { - Zotero.debug("Title: " + doc.title) if (doc.title.match(/.*results.*/)) { return "multiple" } - else if (doc.title.match(/\d+\s[a-zA-Z\. ]+\s§\s\d+/)) + else if (doc.title.match(/\d+\s[a-zA-Z\. ]+\s§\s\d+/)) // Match: ... 42 U.S.C.S. § 230 ... { return "statute" } - else if (doc.title.match(/\d+\s[a-zA-Z0-9\. ]+\s\d+/)) + else if (doc.title.match(/\d+\s[a-zA-Z0-9\. ]+\s\d+/)) // Match: ... 5 U.S. 137 ... { return "case" } @@ -56,9 +102,20 @@ function doWeb(doc, url) { if (detectWeb(doc, url) == "multiple") { var titles = doc.evaluate('(//a[@class="titleLink"])', doc, nsResolver, XPathResult.ANY_TYPE, null); + var dates = doc.evaluate('(//span[contains(@class,"metaDataItem")])', + doc, nsResolver, XPathResult.ANY_TYPE, null) + var nextDate; + dates.iterateNext(); // First court name + nextDate = dates.iterateNext(); // First date is [2] + dates.iterateNext(); // First citation + while (nextTitle = titles.iterateNext()) { // TODO format this a little, maybe add a year parenthetical - items[nextTitle.href] = nextTitle.textContent; + items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; + + dates.iterateNext(); // Court name + nextDate = dates.iterateNext(); // Every 3 items is a date + dates.iterateNext(); // Citation } items = Zotero.selectItems(items); From 9ad8a5846fff84ec02ec31987b9c281629ab2cf5 Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Thu, 6 Apr 2023 17:08:36 -0500 Subject: [PATCH 06/17] Lexis+: Treat session laws as a subtype of statute --- Lexis+.js | 100 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 15 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index abbd86a9fd6..0197d3b4a18 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -2,14 +2,14 @@ "translatorID": "419638d9-9049-44ad-ba08-fa54ed24b5e6", "label": "Lexis+", "creator": "Brandon F", - "target": "https://plus.lexis.*/", + "target": "^https://plus.lexis.*/", "minVersion": "5.0", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2023-04-06 19:35:50" + "lastUpdated": "2023-04-06 21:58:14" } function scrape(doc, url) { @@ -54,22 +54,89 @@ function scrape(doc, url) { var title = xPathofTitle.iterateNext().textContent; newStatute.title = title; - newStatute.codeNumber = title.substring(0, title.indexOf(' ')); - var isolation = title.substring(title.indexOf(' '), title.lastIndexOf(' ')); // isolate reporter and section symbol - newStatute.code = isolation.substring(0, isolation.lastIndexOf(' ')); - newStatute.section = title.substring(title.lastIndexOf(' ') + 1); - var xPathofInfo = doc.evaluate('//p[@class="SS_DocumentInfo"]', - doc, nsResolver, XPathResult.ANY_TYPE, null); + doc, nsResolver, XPathResult.ANY_TYPE, null); var info = xPathofInfo.iterateNext().textContent; - isolation = info.substring(info.search(/\d+-\d+/)); // isolate public law number on the frontend - newStatute.publicLawNumber = isolation.substring(0, isolation.indexOf(' ')).replace(/(^,)|(,$)/g, ''); - newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); + isolation = info.substring(info.search( /\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)/ - )) // isolate date on the frontend + )) // isolate date on the frontend newStatute.dateEnacted = isolation.substring(0, isolation.search(/[1-2][0-9][0-9][0-9]/) + 4); - Zotero.debug(isolation) + + if (title.match(/[Aa][cC][tT]/) || + title.match(/[Oo][Ff]\s[1-2][0-9][0-9][0-9]/)) // session law, not codified statute + { + // BB 21st ed. requires parallel cite to Pub. L. No. and Stat. for session laws + var statutesAtLarge, publicLawNo; + var xPathofActiveReporter = doc.evaluate('//a[@class="SS_ActiveRptr"]', + doc, nsResolver, XPathResult.ANY_TYPE, null); + var potentialReporter = xPathofActiveReporter.iterateNext(); + if (potentialReporter) // Sometimes Lexis is weird and doesn't give an ActiveRptr + { + if (potentialReporter.textContent.match(/[sS]tat\./)) + statutesAtLarge = potentialReporter.textContent; + else if (potentialReporter.textContent.match(/[pP]ub\./) || + potentialReporter.textContent.match(/[pP]\.[lL]\./)) + publicLawNo = potentialReporter.textContent; + } + + var xPathofNonPaginatedReporter = doc.evaluate('//span[@class="SS_NonPaginatedRptr"]', + doc, nsResolver, XPathResult.ANY_TYPE, null); + var nextReporter; + while (nextReporter = xPathofNonPaginatedReporter.iterateNext()) + { + if (nextReporter.textContent.match(/[sS]tat\./)) + statutesAtLarge = nextReporter.textContent; + else if (nextReporter.textContent.match(/[pP]ub\./) || + nextReporter.textContent.match(/[pP]\.[lL]\./)) + publicLawNo = nextReporter.textContent; + } + + // Turn publicLawNo into the public law fields + if (publicLawNo.match(/\d+-\d+/)) // Ex. P.L. 115-164 + { + var numPos = publicLawNo.search(/\d+-\d+/) + newStatute.publicLawNumber = publicLawNo.substring( + numPos, + publiclawNo.substring(numPos + 1).indexOf(' ')); // Gets 115-164 + + newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); + } + else // Ex. 115 P.L. 164 or 115 Pub. L. No. 164 + { + newStatute.session = publicLawNo.substring(0, publicLawNo.indexOf(' ')); + newStatute.publicLawNumber = newStatute.session + '-' + publicLawNo.substring(publicLawNo.lastIndexOf(' ') + 1); + } + + // Turn statutesAtLarge into the code#/code/section fields + // TODO in styles, check for "Stat." as the code, and if so, don't append a section symbol + newStatute.codeNumber = statutesAtLarge.substring(0, statutesAtLarge.indexOf(' ')); + newStatute.code = "Stat."; + newStatute.section = statutesAtLarge.substring(statutesAtLarge.lastIndexOf(' ') + 1); + + + } + else + { + if (title.match(/^\d+/)) // Starts with digit, organized by title, ex. 47 U.S.C.S. § 230 + { + newStatute.codeNumber = title.substring(0, title.indexOf(' ')); + var isolation = title.substring(title.indexOf(' '), title.lastIndexOf(' ')); // isolate code and section symbol + newStatute.code = isolation.substring(0, isolation.lastIndexOf(' ')); + newStatute.section = title.substring(title.lastIndexOf(' ') + 1); + } + else // Starts with letter, organized by code, ex. Tex. Bus. & Com. Code § 26.01 + { + newStatute.code = title.substring(0, title.lastIndexOf('§') - 1); + newStatute.section = title.substring(title.lastIndexOf(' ') + 1); + } + + var isolation = info.substring(info.search(/\d+-\d+/)); // isolate public law number on the frontend + newStatute.publicLawNumber = isolation.substring(0, isolation.indexOf(' ')).replace(/(^,)|(,$)/g, ''); + newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); + } + + newStatute.extra = info; // Since the info section is all over the place, just dump the whole thing in for manual cite checks newStatute.complete(); } @@ -79,7 +146,9 @@ function detectWeb(doc, url) { if (doc.title.match(/.*results.*/)) { return "multiple" } - else if (doc.title.match(/\d+\s[a-zA-Z\. ]+\s§\s\d+/)) // Match: ... 42 U.S.C.S. § 230 ... + else if (doc.title.match(/[a-zA-Z\. ]+\s§\s\d+/) || + doc.title.match(/[aA][cC][tT]/) || + doc.title.match(/[pP]\.[lL]\./)) // Match: ... Tex. Bus. & Com. Code § 26.01 ... { return "statute" } @@ -87,6 +156,7 @@ function detectWeb(doc, url) { { return "case" } + // TODO secondary sources } function doWeb(doc, url) { @@ -100,6 +170,7 @@ function doWeb(doc, url) { var nextTitle; if (detectWeb(doc, url) == "multiple") { + // TODO check what type of element it is (currently only working for 'cases' searches) var titles = doc.evaluate('(//a[@class="titleLink"])', doc, nsResolver, XPathResult.ANY_TYPE, null); var dates = doc.evaluate('(//span[contains(@class,"metaDataItem")])', @@ -110,7 +181,6 @@ function doWeb(doc, url) { dates.iterateNext(); // First citation while (nextTitle = titles.iterateNext()) { - // TODO format this a little, maybe add a year parenthetical items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; dates.iterateNext(); // Court name From d4cfcf2459cad5e43b9f5b48d3698da65bdc8102 Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Fri, 7 Apr 2023 10:39:56 -0500 Subject: [PATCH 07/17] Lexis+: Begin refactoring to new API --- Lexis+.js | 171 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 120 insertions(+), 51 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index 0197d3b4a18..70ba972d609 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -9,12 +9,84 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2023-04-06 21:58:14" + "lastUpdated": "2023-04-07 14:01:13" } +/* + ***** BEGIN LICENSE BLOCK ***** + + Copyright © 2022 YOUR_NAME <- TODO + + This file is part of Zotero. + + Zotero is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Zotero is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with Zotero. If not, see . + + ***** END LICENSE BLOCK ***** +*/ + + +// function detectWeb(doc, url) { +// // TODO: adjust the logic here +// if (url.includes('/article/')) { +// return 'newspaperArticle'; +// } +// else if (getSearchResults(doc, true)) { +// return 'multiple'; +// } +// return false; +// } + +// function getSearchResults(doc, checkOnly) { +// var items = {}; +// var found = false; +// // TODO: adjust the CSS selector +// var rows = doc.querySelectorAll('h2 > a.title[href*="/article/"]'); +// for (let row of rows) { +// // TODO: check and maybe adjust +// let href = row.href; +// // TODO: check and maybe adjust +// let title = ZU.trimInternal(row.textContent); +// if (!href || !title) continue; +// if (checkOnly) return true; +// found = true; +// items[href] = title; +// } +// return found ? items : false; +// } + +// async function doWeb(doc, url) { +// if (detectWeb(doc, url) == 'multiple') { +// let items = await Zotero.selectItems(getSearchResults(doc, false)); +// if (!items) return; +// for (let url of Object.keys(items)) { +// await scrape(await requestDocument(url)); +// } +// } +// else { +// await scrape(doc, url); +// } +// } + +// async function scrape(doc, url = doc.location.href) { +// // TODO: implement or add a scrape function template +// } + + + function scrape(doc, url) { var namespace = doc.documentElement.namespaceURI; - var nsResolver = namespace ? function(prefix) { + var nsResolver = namespace ? function([prefix]) { if (prefix == "x" ) return namespace; else return null; } : null; @@ -23,24 +95,19 @@ function scrape(doc, url) { var newCase = new Zotero.Item("case"); newCase.url = doc.location.href; - var xPathofTitle = doc.evaluate('//h1[@id="SS_DocumentTitle"]', - doc, nsResolver, XPathResult.ANY_TYPE, null); - newCase.title = xPathofTitle.iterateNext().textContent; + newCase.title = text(doc, 'h1#SS_DocumentTitle'); - var xPathofCitation = doc.evaluate('//span[@class="active-reporter"]', - doc, nsResolver, XPathResult.ANY_TYPE, null); - var citation = xPathofCitation.iterateNext().textContent; + var xPathofCitation = ZU.xpath(doc, '//span[@class="active-reporter"]'); + var citation = xPathofCitation[0].textContent; newCase.reporterVolume = citation.substring(0, citation.indexOf(' ')); newCase.reporter = citation.substring(citation.indexOf(' ') + 1, citation.lastIndexOf(' ')); newCase.firstPage = citation.substring(citation.lastIndexOf(' ') + 1); - var xPathofCourt = doc.evaluate('(//p[@class="SS_DocumentInfo"])[1]', - doc, nsResolver, XPathResult.ANY_TYPE, null); - newCase.court = xPathofCourt.iterateNext().textContent; + var xPathofCourt = ZU.xpath(doc, '(//p[@class="SS_DocumentInfo"])[1]', nsResolver); + newCase.court = xPathofCourt[0].textContent; - var xPathofDate = doc.evaluate('//span[@class="date"]', - doc, nsResolver, XPathResult.ANY_TYPE, null); - newCase.dateDecided = xPathofDate.iterateNext().textContent; + var xPathofDate = ZU.xpath(doc, '//span[@class="date"]', nsResolver); + newCase.dateDecided = xPathofDate[0].textContent; newCase.complete(); } @@ -49,14 +116,12 @@ function scrape(doc, url) { var newStatute = new Zotero.Item("statute"); newStatute.url = doc.location.href; - var xPathofTitle = doc.evaluate('//h1[@id="SS_DocumentTitle"]', - doc, nsResolver, XPathResult.ANY_TYPE, null); - var title = xPathofTitle.iterateNext().textContent; + var xPathofTitle = ZU.xpath(doc, '//h1[@id="SS_DocumentTitle"]', nsResolver); + var title = xPathofTitle[0].textContent; newStatute.title = title; - var xPathofInfo = doc.evaluate('//p[@class="SS_DocumentInfo"]', - doc, nsResolver, XPathResult.ANY_TYPE, null); - var info = xPathofInfo.iterateNext().textContent; + var xPathofInfo = ZU.xpath(doc, '//p[@class="SS_DocumentInfo"]', nsResolver); + var info = xPathofInfo[0].textContent; isolation = info.substring(info.search( /\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)/ @@ -68,11 +133,11 @@ function scrape(doc, url) { { // BB 21st ed. requires parallel cite to Pub. L. No. and Stat. for session laws var statutesAtLarge, publicLawNo; - var xPathofActiveReporter = doc.evaluate('//a[@class="SS_ActiveRptr"]', - doc, nsResolver, XPathResult.ANY_TYPE, null); - var potentialReporter = xPathofActiveReporter.iterateNext(); - if (potentialReporter) // Sometimes Lexis is weird and doesn't give an ActiveRptr + var xPathofActiveReporter = ZU.xpath(doc, '//a[@class="SS_ActiveRptr"]', nsResolver); + if (xPathofActiveReporter.length > 0) // Sometimes Lexis is weird and doesn't give an ActiveRptr { + var potentialReporter = xPathofActiveReporter[0]; + Zotero.debug(potentialReporter.textContent); if (potentialReporter.textContent.match(/[sS]tat\./)) statutesAtLarge = potentialReporter.textContent; else if (potentialReporter.textContent.match(/[pP]ub\./) || @@ -80,16 +145,16 @@ function scrape(doc, url) { publicLawNo = potentialReporter.textContent; } - var xPathofNonPaginatedReporter = doc.evaluate('//span[@class="SS_NonPaginatedRptr"]', - doc, nsResolver, XPathResult.ANY_TYPE, null); - var nextReporter; - while (nextReporter = xPathofNonPaginatedReporter.iterateNext()) + var xPathofNonPaginatedReporter = ZU.xpath(doc, '//span[@class="SS_NonPaginatedRptr"]', nsResolver); + + for (var i = 0; i < xPathofNonPaginatedReporter.length; i++) { - if (nextReporter.textContent.match(/[sS]tat\./)) - statutesAtLarge = nextReporter.textContent; - else if (nextReporter.textContent.match(/[pP]ub\./) || - nextReporter.textContent.match(/[pP]\.[lL]\./)) - publicLawNo = nextReporter.textContent; + var nextReporter = xPathofNonPaginatedReporter[i].textContent; + if (nextReporter.match(/[sS]tat\./)) + statutesAtLarge = nextReporter; + else if (nextReporter.match(/[pP]ub\./) || + nextReporter.match(/[pP]\.[lL]\./)) + publicLawNo = nextReporter; } // Turn publicLawNo into the public law fields @@ -98,7 +163,7 @@ function scrape(doc, url) { var numPos = publicLawNo.search(/\d+-\d+/) newStatute.publicLawNumber = publicLawNo.substring( numPos, - publiclawNo.substring(numPos + 1).indexOf(' ')); // Gets 115-164 + publicLawNo.substring(numPos + 1).indexOf(' ')); // Gets 115-164 newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); } @@ -144,17 +209,18 @@ function scrape(doc, url) { function detectWeb(doc, url) { if (doc.title.match(/.*results.*/)) { - return "multiple" + Zotero.debug("multiple"); + return "multiple"; } else if (doc.title.match(/[a-zA-Z\. ]+\s§\s\d+/) || doc.title.match(/[aA][cC][tT]/) || doc.title.match(/[pP]\.[lL]\./)) // Match: ... Tex. Bus. & Com. Code § 26.01 ... { - return "statute" + return "statute"; } else if (doc.title.match(/\d+\s[a-zA-Z0-9\. ]+\s\d+/)) // Match: ... 5 U.S. 137 ... { - return "case" + return "case"; } // TODO secondary sources } @@ -167,30 +233,33 @@ function doWeb(doc, url) { var casesOrStatutes = new Array(); var items = new Object(); - var nextTitle; + var nextTitle; if (detectWeb(doc, url) == "multiple") { // TODO check what type of element it is (currently only working for 'cases' searches) - var titles = doc.evaluate('(//a[@class="titleLink"])', - doc, nsResolver, XPathResult.ANY_TYPE, null); - var dates = doc.evaluate('(//span[contains(@class,"metaDataItem")])', - doc, nsResolver, XPathResult.ANY_TYPE, null) + var titles = ZU.xpath(doc, '//a[@class="titleLink"]', nsResolver); + var dates = ZU.xpath(doc, '(//span[contains(@class,"metaDataItem")])', nsResolver); var nextDate; - dates.iterateNext(); // First court name - nextDate = dates.iterateNext(); // First date is [2] - dates.iterateNext(); // First citation + var dateOffset = 1; + + // dates[0] is first court name + nextDate = dates[dateOffset]; + dateOffset += 3; + // dates[2] is first citation - while (nextTitle = titles.iterateNext()) { + for (var i = 0; i < titles.length; i++) { + nextTitle = titles[i]; items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; - dates.iterateNext(); // Court name - nextDate = dates.iterateNext(); // Every 3 items is a date - dates.iterateNext(); // Citation + // dates[0] is court name + nextDate = dates[dateOffset]; + dateOffset += 3; + // dates[2] is a citation } items = Zotero.selectItems(items); - for(var i in items) { - casesOrStatutes.push(i); + for(var e in items) { + casesOrStatutes.push(e); } } else From 9ebace2d3c8cf14bf5c9d9ae808745fb079592d1 Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Fri, 7 Apr 2023 10:55:20 -0500 Subject: [PATCH 08/17] Lexis+: Conform to template format --- Lexis+.js | 185 +++++++++++++++++++++--------------------------------- 1 file changed, 72 insertions(+), 113 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index 70ba972d609..95508cb6ed7 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -35,56 +35,78 @@ ***** END LICENSE BLOCK ***** */ +function detectWeb(doc, url) { + if (doc.title.match(/.*results.*/)) { + Zotero.debug("multiple"); + return "multiple"; + } + else if (doc.title.match(/[a-zA-Z\. ]+\s§\s\d+/) || + doc.title.match(/[aA][cC][tT]/) || + doc.title.match(/[pP]\.[lL]\./)) // Match: ... Tex. Bus. & Com. Code § 26.01 ... + { + return "statute"; + } + else if (doc.title.match(/\d+\s[a-zA-Z0-9\. ]+\s\d+/)) // Match: ... 5 U.S. 137 ... + { + return "case"; + } + // TODO secondary sources + + return false; +} + +function getSearchResults(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == "x" ) return namespace; else return null; + } : null; + + var casesOrStatutes = new Array(); + var items = new Object(); + var nextTitle; + + if (detectWeb(doc, url) == "multiple") { + // TODO check what type of element it is (currently only working for 'cases' searches) + var titles = ZU.xpath(doc, '//a[@class="titleLink"]', nsResolver); + var dates = ZU.xpath(doc, '(//span[contains(@class,"metaDataItem")])', nsResolver); + var nextDate; + var dateOffset = 1; + + // dates[0] is first court name + nextDate = dates[dateOffset]; + dateOffset += 3; + // dates[2] is first citation + + for (var i = 0; i < titles.length; i++) { + nextTitle = titles[i]; + items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; + + // dates[0] is court name + nextDate = dates[dateOffset]; + dateOffset += 3; + // dates[2] is a citation + + return items; + } + } + + return false; +} + +async function doWeb(doc, url) { + if (detectWeb(doc, url) == 'multiple') { + let items = await Zotero.selectItems(getSearchResults(doc, url)); + if (!items) return; + for (let url of Object.keys(items)) { + await scrape(await requestDocument(url)); + } + } + else { + await scrape(doc, url); + } +} -// function detectWeb(doc, url) { -// // TODO: adjust the logic here -// if (url.includes('/article/')) { -// return 'newspaperArticle'; -// } -// else if (getSearchResults(doc, true)) { -// return 'multiple'; -// } -// return false; -// } - -// function getSearchResults(doc, checkOnly) { -// var items = {}; -// var found = false; -// // TODO: adjust the CSS selector -// var rows = doc.querySelectorAll('h2 > a.title[href*="/article/"]'); -// for (let row of rows) { -// // TODO: check and maybe adjust -// let href = row.href; -// // TODO: check and maybe adjust -// let title = ZU.trimInternal(row.textContent); -// if (!href || !title) continue; -// if (checkOnly) return true; -// found = true; -// items[href] = title; -// } -// return found ? items : false; -// } - -// async function doWeb(doc, url) { -// if (detectWeb(doc, url) == 'multiple') { -// let items = await Zotero.selectItems(getSearchResults(doc, false)); -// if (!items) return; -// for (let url of Object.keys(items)) { -// await scrape(await requestDocument(url)); -// } -// } -// else { -// await scrape(doc, url); -// } -// } - -// async function scrape(doc, url = doc.location.href) { -// // TODO: implement or add a scrape function template -// } - - - -function scrape(doc, url) { +async function scrape(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function([prefix]) { if (prefix == "x" ) return namespace; else return null; @@ -178,8 +200,6 @@ function scrape(doc, url) { newStatute.codeNumber = statutesAtLarge.substring(0, statutesAtLarge.indexOf(' ')); newStatute.code = "Stat."; newStatute.section = statutesAtLarge.substring(statutesAtLarge.lastIndexOf(' ') + 1); - - } else { @@ -207,69 +227,8 @@ function scrape(doc, url) { } } -function detectWeb(doc, url) { - if (doc.title.match(/.*results.*/)) { - Zotero.debug("multiple"); - return "multiple"; - } - else if (doc.title.match(/[a-zA-Z\. ]+\s§\s\d+/) || - doc.title.match(/[aA][cC][tT]/) || - doc.title.match(/[pP]\.[lL]\./)) // Match: ... Tex. Bus. & Com. Code § 26.01 ... - { - return "statute"; - } - else if (doc.title.match(/\d+\s[a-zA-Z0-9\. ]+\s\d+/)) // Match: ... 5 U.S. 137 ... - { - return "case"; - } - // TODO secondary sources -} - -function doWeb(doc, url) { - var namespace = doc.documentElement.namespaceURI; - var nsResolver = namespace ? function(prefix) { - if (prefix == "x" ) return namespace; else return null; - } : null; - - var casesOrStatutes = new Array(); - var items = new Object(); - var nextTitle; - - if (detectWeb(doc, url) == "multiple") { - // TODO check what type of element it is (currently only working for 'cases' searches) - var titles = ZU.xpath(doc, '//a[@class="titleLink"]', nsResolver); - var dates = ZU.xpath(doc, '(//span[contains(@class,"metaDataItem")])', nsResolver); - var nextDate; - var dateOffset = 1; - - // dates[0] is first court name - nextDate = dates[dateOffset]; - dateOffset += 3; - // dates[2] is first citation - - for (var i = 0; i < titles.length; i++) { - nextTitle = titles[i]; - items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; - - // dates[0] is court name - nextDate = dates[dateOffset]; - dateOffset += 3; - // dates[2] is a citation - } - - items = Zotero.selectItems(items); - for(var e in items) { - casesOrStatutes.push(e); - } - } - else - { - casesOrStatutes = [url] - } - Zotero.Utilities.processDocuments(casesOrStatutes, scrape, function(){Zotero.done();}); - Zotero.wait(); -}/** BEGIN TEST CASES **/ +/** BEGIN TEST CASES **/ var testCases = [ ] /** END TEST CASES **/ From e1f018fb15ac8b2c80672028c8364cff786f0956 Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Fri, 7 Apr 2023 11:05:56 -0500 Subject: [PATCH 09/17] Lexis+: Fix search results premature termination --- Lexis+.js | 56 +++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index 95508cb6ed7..d715d0d35c8 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -9,35 +9,34 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2023-04-07 14:01:13" + "lastUpdated": "2023-04-07 16:00:59" } /* - ***** BEGIN LICENSE BLOCK ***** + ***** BEGIN LICENSE BLOCK ***** - Copyright © 2022 YOUR_NAME <- TODO + Copyright © 2022 YOUR_NAME <- TODO - This file is part of Zotero. + This file is part of Zotero. - Zotero is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Zotero is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Zotero is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. + Zotero is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. - You should have received a copy of the GNU Affero General Public License - along with Zotero. If not, see . + You should have received a copy of the GNU Affero General Public License + along with Zotero. If not, see . - ***** END LICENSE BLOCK ***** + ***** END LICENSE BLOCK ***** */ function detectWeb(doc, url) { if (doc.title.match(/.*results.*/)) { - Zotero.debug("multiple"); return "multiple"; } else if (doc.title.match(/[a-zA-Z\. ]+\s§\s\d+/) || @@ -70,24 +69,25 @@ function getSearchResults(doc, url) { var titles = ZU.xpath(doc, '//a[@class="titleLink"]', nsResolver); var dates = ZU.xpath(doc, '(//span[contains(@class,"metaDataItem")])', nsResolver); var nextDate; - var dateOffset = 1; - - // dates[0] is first court name + var dateOffset = 1; + + // dates[0] is first court name nextDate = dates[dateOffset]; - dateOffset += 3; + dateOffset += 3; // dates[2] is first citation - + Zotero.debug(titles.length); for (var i = 0; i < titles.length; i++) { - nextTitle = titles[i]; + Zotero.debug(titles[i].textContent); + nextTitle = titles[i]; items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; // dates[0] is court name nextDate = dates[dateOffset]; - dateOffset += 3; + dateOffset += 3; // dates[2] is a citation - - return items; } + + return items; } return false; @@ -158,8 +158,8 @@ async function scrape(doc, url) { var xPathofActiveReporter = ZU.xpath(doc, '//a[@class="SS_ActiveRptr"]', nsResolver); if (xPathofActiveReporter.length > 0) // Sometimes Lexis is weird and doesn't give an ActiveRptr { - var potentialReporter = xPathofActiveReporter[0]; - Zotero.debug(potentialReporter.textContent); + var potentialReporter = xPathofActiveReporter[0]; + Zotero.debug(potentialReporter.textContent); if (potentialReporter.textContent.match(/[sS]tat\./)) statutesAtLarge = potentialReporter.textContent; else if (potentialReporter.textContent.match(/[pP]ub\./) || @@ -171,7 +171,7 @@ async function scrape(doc, url) { for (var i = 0; i < xPathofNonPaginatedReporter.length; i++) { - var nextReporter = xPathofNonPaginatedReporter[i].textContent; + var nextReporter = xPathofNonPaginatedReporter[i].textContent; if (nextReporter.match(/[sS]tat\./)) statutesAtLarge = nextReporter; else if (nextReporter.match(/[pP]ub\./) || From 778c8b4fc5e407fea39694bfea677f6ed0a2c866 Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Fri, 7 Apr 2023 14:27:19 -0500 Subject: [PATCH 10/17] Lexis+: Finish API refactor --- Lexis+.js | 135 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 71 insertions(+), 64 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index d715d0d35c8..0a214d1556f 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -1,7 +1,7 @@ { "translatorID": "419638d9-9049-44ad-ba08-fa54ed24b5e6", "label": "Lexis+", - "creator": "Brandon F", + "creator": "bfahrenfort", "target": "^https://plus.lexis.*/", "minVersion": "5.0", "maxVersion": "", @@ -9,13 +9,13 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2023-04-07 16:00:59" + "lastUpdated": "2023-04-07 19:21:13" } /* ***** BEGIN LICENSE BLOCK ***** - Copyright © 2022 YOUR_NAME <- TODO + Copyright © 2023 Brandon Fahrenfort This file is part of Zotero. @@ -40,8 +40,8 @@ function detectWeb(doc, url) { return "multiple"; } else if (doc.title.match(/[a-zA-Z\. ]+\s§\s\d+/) || - doc.title.match(/[aA][cC][tT]/) || - doc.title.match(/[pP]\.[lL]\./)) // Match: ... Tex. Bus. & Com. Code § 26.01 ... + doc.title.match(/act/i) || + doc.title.match(/p\.l\./i)) // Match: ... Tex. Bus. & Com. Code § 26.01 ... { return "statute"; } @@ -55,39 +55,33 @@ function detectWeb(doc, url) { } function getSearchResults(doc, url) { - var namespace = doc.documentElement.namespaceURI; - var nsResolver = namespace ? function(prefix) { - if (prefix == "x" ) return namespace; else return null; - } : null; - var casesOrStatutes = new Array(); var items = new Object(); var nextTitle; if (detectWeb(doc, url) == "multiple") { // TODO check what type of element it is (currently only working for 'cases' searches) - var titles = ZU.xpath(doc, '//a[@class="titleLink"]', nsResolver); - var dates = ZU.xpath(doc, '(//span[contains(@class,"metaDataItem")])', nsResolver); + var titles = doc.querySelectorAll('a.titleLink'); + var dates = doc.querySelectorAll('span.metaDataItem'); // Not technically only dates, but that's all I use it for atm var nextDate; var dateOffset = 1; // dates[0] is first court name - nextDate = dates[dateOffset]; + nextDate = dates[dateOffset]; dateOffset += 3; // dates[2] is first citation - Zotero.debug(titles.length); for (var i = 0; i < titles.length; i++) { - Zotero.debug(titles[i].textContent); - nextTitle = titles[i]; + Zotero.debug(titles[i].textContent); + nextTitle = titles[i]; items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; // dates[0] is court name nextDate = dates[dateOffset]; - dateOffset += 3; + dateOffset += 3; // dates[2] is a citation } - - return items; + + return items; } return false; @@ -107,11 +101,6 @@ async function doWeb(doc, url) { } async function scrape(doc, url) { - var namespace = doc.documentElement.namespaceURI; - var nsResolver = namespace ? function([prefix]) { - if (prefix == "x" ) return namespace; else return null; - } : null; - if (detectWeb(doc, url) == "case") { var newCase = new Zotero.Item("case"); @@ -119,17 +108,20 @@ async function scrape(doc, url) { newCase.title = text(doc, 'h1#SS_DocumentTitle'); - var xPathofCitation = ZU.xpath(doc, '//span[@class="active-reporter"]'); - var citation = xPathofCitation[0].textContent; + var citation = text(doc, 'span.active-reporter'); newCase.reporterVolume = citation.substring(0, citation.indexOf(' ')); newCase.reporter = citation.substring(citation.indexOf(' ') + 1, citation.lastIndexOf(' ')); newCase.firstPage = citation.substring(citation.lastIndexOf(' ') + 1); - var xPathofCourt = ZU.xpath(doc, '(//p[@class="SS_DocumentInfo"])[1]', nsResolver); - newCase.court = xPathofCourt[0].textContent; + newCase.court = text(doc, 'p.SS_DocumentInfo', 0); + + newCase.dateDecided = text(doc, 'span.date'); - var xPathofDate = ZU.xpath(doc, '//span[@class="date"]', nsResolver); - newCase.dateDecided = xPathofDate[0].textContent; + var docket = text(doc, 'p.SS_DocumentInfo', 2); + if (docket.match(/^no\./i) || + docket.match(/^\d+/) || + docket.match(/^case no\./i)) + newCase.docketNumber = docket; // This won't be in perfect cite form, shouldn't be a hassle unless you're citing dozens of memorandum opinions newCase.complete(); } @@ -138,44 +130,40 @@ async function scrape(doc, url) { var newStatute = new Zotero.Item("statute"); newStatute.url = doc.location.href; - var xPathofTitle = ZU.xpath(doc, '//h1[@id="SS_DocumentTitle"]', nsResolver); - var title = xPathofTitle[0].textContent; - newStatute.title = title; + var title = text(doc, 'h1#SS_DocumentTitle'); // Saves some lines to have a temp here + newStatute.title = title; - var xPathofInfo = ZU.xpath(doc, '//p[@class="SS_DocumentInfo"]', nsResolver); - var info = xPathofInfo[0].textContent; + var info = text(doc, 'p.SS_DocumentInfo'); isolation = info.substring(info.search( - /\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)/ + /\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)/i )) // isolate date on the frontend newStatute.dateEnacted = isolation.substring(0, isolation.search(/[1-2][0-9][0-9][0-9]/) + 4); - if (title.match(/[Aa][cC][tT]/) || - title.match(/[Oo][Ff]\s[1-2][0-9][0-9][0-9]/)) // session law, not codified statute + if (title.match(/act/i) || + title.match(/of\s[1-2][0-9][0-9][0-9]/i)) // Session law, not codified statute { // BB 21st ed. requires parallel cite to Pub. L. No. and Stat. for session laws var statutesAtLarge, publicLawNo; - var xPathofActiveReporter = ZU.xpath(doc, '//a[@class="SS_ActiveRptr"]', nsResolver); - if (xPathofActiveReporter.length > 0) // Sometimes Lexis is weird and doesn't give an ActiveRptr + var potentialReporter = text(doc, 'a.SS_ActiveRptr'); + if (potentialReporter) // Sometimes Lexis is weird and doesn't give an ActiveRptr { - var potentialReporter = xPathofActiveReporter[0]; - Zotero.debug(potentialReporter.textContent); - if (potentialReporter.textContent.match(/[sS]tat\./)) + if (potentialReporter.textContent.match(/stat\./i)) statutesAtLarge = potentialReporter.textContent; - else if (potentialReporter.textContent.match(/[pP]ub\./) || - potentialReporter.textContent.match(/[pP]\.[lL]\./)) + else if (potentialReporter.textContent.match(/pub\./i) || + potentialReporter.textContent.match(/p\.l\./i)) publicLawNo = potentialReporter.textContent; } - var xPathofNonPaginatedReporter = ZU.xpath(doc, '//span[@class="SS_NonPaginatedRptr"]', nsResolver); + var otherReporters = doc.querySelectorAll('span.SS_NonPaginatedRptr'); - for (var i = 0; i < xPathofNonPaginatedReporter.length; i++) + for (var i = 0; i < otherReporters.length; i++) { - var nextReporter = xPathofNonPaginatedReporter[i].textContent; - if (nextReporter.match(/[sS]tat\./)) + var nextReporter = otherReporters[i].textContent; + if (nextReporter.match(/stat\./i)) statutesAtLarge = nextReporter; - else if (nextReporter.match(/[pP]ub\./) || - nextReporter.match(/[pP]\.[lL]\./)) + else if (nextReporter.match(/pub\./i) || + nextReporter.match(/p\.l\./i)) publicLawNo = nextReporter; } @@ -183,9 +171,7 @@ async function scrape(doc, url) { if (publicLawNo.match(/\d+-\d+/)) // Ex. P.L. 115-164 { var numPos = publicLawNo.search(/\d+-\d+/) - newStatute.publicLawNumber = publicLawNo.substring( - numPos, - publicLawNo.substring(numPos + 1).indexOf(' ')); // Gets 115-164 + newStatute.publicLawNumber = publicLawNo.substring(numPos, publicLawNo.substring(numPos + 1).indexOf(' ')); // Gets 115-164 newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); } @@ -201,24 +187,45 @@ async function scrape(doc, url) { newStatute.code = "Stat."; newStatute.section = statutesAtLarge.substring(statutesAtLarge.lastIndexOf(' ') + 1); } - else + else // Codified statute { if (title.match(/^\d+/)) // Starts with digit, organized by title, ex. 47 U.S.C.S. § 230 { - newStatute.codeNumber = title.substring(0, title.indexOf(' ')); - var isolation = title.substring(title.indexOf(' '), title.lastIndexOf(' ')); // isolate code and section symbol - newStatute.code = isolation.substring(0, isolation.lastIndexOf(' ')); - newStatute.section = title.substring(title.lastIndexOf(' ') + 1); + // Sadly, named groups aren't working + let groups = title.match(/^(\d+)\s([a-zA-Z0-9\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); + newStatute.codeNumber = groups[1]; + newStatute.code = groups[2]; + newStatute.section = groups[3]; } else // Starts with letter, organized by code, ex. Tex. Bus. & Com. Code § 26.01 { - newStatute.code = title.substring(0, title.lastIndexOf('§') - 1); - newStatute.section = title.substring(title.lastIndexOf(' ') + 1); + let groups = title.match(/^([a-zA-Z\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); + newStatute.code = groups[1]; + newStatute.section = groups[2]; } - var isolation = info.substring(info.search(/\d+-\d+/)); // isolate public law number on the frontend - newStatute.publicLawNumber = isolation.substring(0, isolation.indexOf(' ')).replace(/(^,)|(,$)/g, ''); - newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); + // No way to tell which will be present + var pL = info.match(/p\.l\. (\d+-\d+)/i); + var pubLaw = info.match(/pub\. law (\d+-\d+)/i); + var pubLawNo = info.match(/pub\. law no\. (\d+-\d+)/i); + var publicLaw = info.match(/public law (\d+-\d+)/i); + var publicLawNo = info.match(/public law no\. (\d+-\d+)/i); + var publicLawNumber = info.match(/public law number (\d+-\d+)/i); + if (pL) + newStatute.publicLawNumber = pL[1]; + if (pubLaw) + newStatute.publicLawNumber = pubLaw[1]; + if (pubLawNo) + newStatute.publicLawNumber = pubLawNo[1]; + if (publicLaw) + newStatute.publicLawNumber = publicLaw[1]; + if (publicLawNo) + newStatute.publicLawNumber = publicLawNo[1]; + if (publicLawNumber) + newStatute.publicLawNumber = publicLawNumber[1]; + + if (newStatute.publicLawNumber) + newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); } newStatute.extra = info; // Since the info section is all over the place, just dump the whole thing in for manual cite checks From 8dac993db83788ecab4b9a90924969df4acb05be Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Fri, 7 Apr 2023 14:36:55 -0500 Subject: [PATCH 11/17] Lexis+: Formatting --- Lexis+.js | 413 +++++++++++++++++++++++++++--------------------------- 1 file changed, 206 insertions(+), 207 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index 0a214d1556f..a342d9e2041 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -1,237 +1,236 @@ { - "translatorID": "419638d9-9049-44ad-ba08-fa54ed24b5e6", - "label": "Lexis+", - "creator": "bfahrenfort", - "target": "^https://plus.lexis.*/", - "minVersion": "5.0", - "maxVersion": "", - "priority": 100, - "inRepository": true, - "translatorType": 4, - "browserSupport": "gcsibv", - "lastUpdated": "2023-04-07 19:21:13" + "translatorID": "419638d9-9049-44ad-ba08-fa54ed24b5e6", + "label": "Lexis+", + "creator": "bfahrenfort", + "target": "^https://plus.lexis.*/", + "minVersion": "5.0", + "maxVersion": "", + "priority": 100, + "inRepository": true, + "translatorType": 4, + "browserSupport": "gcsibv", + "lastUpdated": "2023-04-07 19:21:13" } /* - ***** BEGIN LICENSE BLOCK ***** + ***** BEGIN LICENSE BLOCK ***** - Copyright © 2023 Brandon Fahrenfort + Copyright © 2023 Brandon Fahrenfort - This file is part of Zotero. + This file is part of Zotero. - Zotero is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Zotero is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Zotero is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. + Zotero is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. - You should have received a copy of the GNU Affero General Public License - along with Zotero. If not, see . + You should have received a copy of the GNU Affero General Public License + along with Zotero. If not, see . - ***** END LICENSE BLOCK ***** + ***** END LICENSE BLOCK ***** */ function detectWeb(doc, url) { - if (doc.title.match(/.*results.*/)) { - return "multiple"; - } - else if (doc.title.match(/[a-zA-Z\. ]+\s§\s\d+/) || - doc.title.match(/act/i) || - doc.title.match(/p\.l\./i)) // Match: ... Tex. Bus. & Com. Code § 26.01 ... - { - return "statute"; - } - else if (doc.title.match(/\d+\s[a-zA-Z0-9\. ]+\s\d+/)) // Match: ... 5 U.S. 137 ... - { - return "case"; - } - // TODO secondary sources + if (doc.title.match(/.*results.*/)) { + return "multiple"; + } + else if (doc.title.match(/[a-zA-Z\. ]+\s§\s\d+/) || + doc.title.match(/act/i) || + doc.title.match(/p\.l\./i)) // Match: ... Tex. Bus. & Com. Code § 26.01 ... + { + return "statute"; + } + else if (doc.title.match(/\d+\s[a-zA-Z0-9\. ]+\s\d+/)) // Match: ... 5 U.S. 137 ... + { + return "case"; + } + // TODO secondary sources return false; } function getSearchResults(doc, url) { - var casesOrStatutes = new Array(); - var items = new Object(); - var nextTitle; - - if (detectWeb(doc, url) == "multiple") { - // TODO check what type of element it is (currently only working for 'cases' searches) - var titles = doc.querySelectorAll('a.titleLink'); - var dates = doc.querySelectorAll('span.metaDataItem'); // Not technically only dates, but that's all I use it for atm - var nextDate; - var dateOffset = 1; - - // dates[0] is first court name - nextDate = dates[dateOffset]; - dateOffset += 3; - // dates[2] is first citation - for (var i = 0; i < titles.length; i++) { - Zotero.debug(titles[i].textContent); - nextTitle = titles[i]; - items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; - - // dates[0] is court name - nextDate = dates[dateOffset]; - dateOffset += 3; - // dates[2] is a citation - } - - return items; + var casesOrStatutes = new Array(); + var items = new Object(); + var nextTitle; + + if (detectWeb(doc, url) == "multiple") { + // TODO check what type of element it is (currently only working for 'cases' searches) + var titles = doc.querySelectorAll('a.titleLink'); + var dates = doc.querySelectorAll('span.metaDataItem'); // Not technically only dates, but that's all I use it for atm + var nextDate; + var dateOffset = 1; + + // dates[0] is first court name + nextDate = dates[dateOffset]; + dateOffset += 3; + // dates[2] is first citation + for (var i = 0; i < titles.length; i++) { + nextTitle = titles[i]; + items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; + + // dates[0] is court name + nextDate = dates[dateOffset]; + + // dates[2] is a citation + } + + return items; } return false; } async function doWeb(doc, url) { - if (detectWeb(doc, url) == 'multiple') { - let items = await Zotero.selectItems(getSearchResults(doc, url)); - if (!items) return; - for (let url of Object.keys(items)) { - await scrape(await requestDocument(url)); - } - } - else { - await scrape(doc, url); - } + if (detectWeb(doc, url) == 'multiple') { + let items = await Zotero.selectItems(getSearchResults(doc, url)); + if (!items) return; + for (let url of Object.keys(items)) { + await scrape(await requestDocument(url)); + } + } + else { + await scrape(doc, url); + } } async function scrape(doc, url) { - if (detectWeb(doc, url) == "case") - { - var newCase = new Zotero.Item("case"); - newCase.url = doc.location.href; - - newCase.title = text(doc, 'h1#SS_DocumentTitle'); - - var citation = text(doc, 'span.active-reporter'); - newCase.reporterVolume = citation.substring(0, citation.indexOf(' ')); - newCase.reporter = citation.substring(citation.indexOf(' ') + 1, citation.lastIndexOf(' ')); - newCase.firstPage = citation.substring(citation.lastIndexOf(' ') + 1); - - newCase.court = text(doc, 'p.SS_DocumentInfo', 0); - - newCase.dateDecided = text(doc, 'span.date'); - - var docket = text(doc, 'p.SS_DocumentInfo', 2); - if (docket.match(/^no\./i) || - docket.match(/^\d+/) || - docket.match(/^case no\./i)) - newCase.docketNumber = docket; // This won't be in perfect cite form, shouldn't be a hassle unless you're citing dozens of memorandum opinions - - newCase.complete(); - } - else if (detectWeb(doc, url) == "statute") - { - var newStatute = new Zotero.Item("statute"); - newStatute.url = doc.location.href; - - var title = text(doc, 'h1#SS_DocumentTitle'); // Saves some lines to have a temp here - newStatute.title = title; - - var info = text(doc, 'p.SS_DocumentInfo'); - - isolation = info.substring(info.search( - /\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)/i - )) // isolate date on the frontend - newStatute.dateEnacted = isolation.substring(0, isolation.search(/[1-2][0-9][0-9][0-9]/) + 4); - - if (title.match(/act/i) || - title.match(/of\s[1-2][0-9][0-9][0-9]/i)) // Session law, not codified statute - { - // BB 21st ed. requires parallel cite to Pub. L. No. and Stat. for session laws - var statutesAtLarge, publicLawNo; - var potentialReporter = text(doc, 'a.SS_ActiveRptr'); - if (potentialReporter) // Sometimes Lexis is weird and doesn't give an ActiveRptr - { - if (potentialReporter.textContent.match(/stat\./i)) - statutesAtLarge = potentialReporter.textContent; - else if (potentialReporter.textContent.match(/pub\./i) || - potentialReporter.textContent.match(/p\.l\./i)) - publicLawNo = potentialReporter.textContent; - } - - var otherReporters = doc.querySelectorAll('span.SS_NonPaginatedRptr'); - - for (var i = 0; i < otherReporters.length; i++) - { - var nextReporter = otherReporters[i].textContent; - if (nextReporter.match(/stat\./i)) - statutesAtLarge = nextReporter; - else if (nextReporter.match(/pub\./i) || - nextReporter.match(/p\.l\./i)) - publicLawNo = nextReporter; - } - - // Turn publicLawNo into the public law fields - if (publicLawNo.match(/\d+-\d+/)) // Ex. P.L. 115-164 - { - var numPos = publicLawNo.search(/\d+-\d+/) - newStatute.publicLawNumber = publicLawNo.substring(numPos, publicLawNo.substring(numPos + 1).indexOf(' ')); // Gets 115-164 - - newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); - } - else // Ex. 115 P.L. 164 or 115 Pub. L. No. 164 - { - newStatute.session = publicLawNo.substring(0, publicLawNo.indexOf(' ')); - newStatute.publicLawNumber = newStatute.session + '-' + publicLawNo.substring(publicLawNo.lastIndexOf(' ') + 1); - } - - // Turn statutesAtLarge into the code#/code/section fields - // TODO in styles, check for "Stat." as the code, and if so, don't append a section symbol - newStatute.codeNumber = statutesAtLarge.substring(0, statutesAtLarge.indexOf(' ')); - newStatute.code = "Stat."; - newStatute.section = statutesAtLarge.substring(statutesAtLarge.lastIndexOf(' ') + 1); - } - else // Codified statute - { - if (title.match(/^\d+/)) // Starts with digit, organized by title, ex. 47 U.S.C.S. § 230 - { - // Sadly, named groups aren't working - let groups = title.match(/^(\d+)\s([a-zA-Z0-9\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); - newStatute.codeNumber = groups[1]; - newStatute.code = groups[2]; - newStatute.section = groups[3]; - } - else // Starts with letter, organized by code, ex. Tex. Bus. & Com. Code § 26.01 - { - let groups = title.match(/^([a-zA-Z\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); - newStatute.code = groups[1]; - newStatute.section = groups[2]; - } - - // No way to tell which will be present - var pL = info.match(/p\.l\. (\d+-\d+)/i); - var pubLaw = info.match(/pub\. law (\d+-\d+)/i); - var pubLawNo = info.match(/pub\. law no\. (\d+-\d+)/i); - var publicLaw = info.match(/public law (\d+-\d+)/i); - var publicLawNo = info.match(/public law no\. (\d+-\d+)/i); - var publicLawNumber = info.match(/public law number (\d+-\d+)/i); - if (pL) - newStatute.publicLawNumber = pL[1]; - if (pubLaw) - newStatute.publicLawNumber = pubLaw[1]; - if (pubLawNo) - newStatute.publicLawNumber = pubLawNo[1]; - if (publicLaw) - newStatute.publicLawNumber = publicLaw[1]; - if (publicLawNo) - newStatute.publicLawNumber = publicLawNo[1]; - if (publicLawNumber) - newStatute.publicLawNumber = publicLawNumber[1]; - - if (newStatute.publicLawNumber) - newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); - } - - newStatute.extra = info; // Since the info section is all over the place, just dump the whole thing in for manual cite checks - - newStatute.complete(); - } + if (detectWeb(doc, url) == "case") + { + var newCase = new Zotero.Item("case"); + newCase.url = doc.location.href; + + newCase.title = text(doc, 'h1#SS_DocumentTitle'); + + var citation = text(doc, 'span.active-reporter'); + newCase.reporterVolume = citation.substring(0, citation.indexOf(' ')); + newCase.reporter = citation.substring(citation.indexOf(' ') + 1, citation.lastIndexOf(' ')); + newCase.firstPage = citation.substring(citation.lastIndexOf(' ') + 1); + + newCase.court = text(doc, 'p.SS_DocumentInfo', 0); + + newCase.dateDecided = text(doc, 'span.date'); + + var docket = text(doc, 'p.SS_DocumentInfo', 2); + if (docket.match(/^no\./i) || + docket.match(/^\d+/) || + docket.match(/^case no\./i)) + newCase.docketNumber = docket; // This won't be in perfect cite form, shouldn't be a hassle unless you're citing dozens of memorandum opinions + + newCase.complete(); + } + else if (detectWeb(doc, url) == "statute") + { + var newStatute = new Zotero.Item("statute"); + newStatute.url = doc.location.href; + + var title = text(doc, 'h1#SS_DocumentTitle'); // Saves some lines to have a temp here + newStatute.title = title; + + var info = text(doc, 'p.SS_DocumentInfo'); + + var isolation = info.substring(info.search( + /\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)/i + )) // isolate date on the frontend + newStatute.dateEnacted = isolation.substring(0, isolation.search(/[1-2][0-9][0-9][0-9]/) + 4); + + if (title.match(/act/i) || + title.match(/of\s[1-2][0-9][0-9][0-9]/i)) // Session law, not codified statute + { + // BB 21st ed. requires parallel cite to Pub. L. No. and Stat. for session laws + var statutesAtLarge, publicLawNo; + var potentialReporter = text(doc, 'a.SS_ActiveRptr'); + if (potentialReporter) // Sometimes Lexis is weird and doesn't give an ActiveRptr + { + if (potentialReporter.textContent.match(/stat\./i)) + statutesAtLarge = potentialReporter.textContent; + else if (potentialReporter.textContent.match(/pub\./i) || + potentialReporter.textContent.match(/p\.l\./i)) + publicLawNo = potentialReporter.textContent; + } + + var otherReporters = doc.querySelectorAll('span.SS_NonPaginatedRptr'); + + for (var i = 0; i < otherReporters.length; i++) + { + var nextReporter = otherReporters[i].textContent; + if (nextReporter.match(/stat\./i)) + statutesAtLarge = nextReporter; + else if (nextReporter.match(/pub\./i) || + nextReporter.match(/p\.l\./i)) + publicLawNo = nextReporter; + } + + // Turn publicLawNo into the public law fields + if (publicLawNo.match(/\d+-\d+/)) // Ex. P.L. 115-164 + { + var numPos = publicLawNo.search(/\d+-\d+/) + newStatute.publicLawNumber = publicLawNo.substring(numPos, publicLawNo.substring(numPos + 1).indexOf(' ')); // Gets 115-164 + + newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); + } + else // Ex. 115 P.L. 164 or 115 Pub. L. No. 164 + { + newStatute.session = publicLawNo.substring(0, publicLawNo.indexOf(' ')); + newStatute.publicLawNumber = newStatute.session + '-' + publicLawNo.substring(publicLawNo.lastIndexOf(' ') + 1); + } + + // Turn statutesAtLarge into the code#/code/section fields + // TODO in styles, check for "Stat." as the code, and if so, don't append a section symbol + newStatute.codeNumber = statutesAtLarge.substring(0, statutesAtLarge.indexOf(' ')); + newStatute.code = "Stat."; + newStatute.section = statutesAtLarge.substring(statutesAtLarge.lastIndexOf(' ') + 1); + } + else // Codified statute + { + if (title.match(/^\d+/)) // Starts with digit, organized by title, ex. 47 U.S.C.S. § 230 + { + // Sadly, named groups aren't working + let groups = title.match(/^(\d+)\s([a-zA-Z0-9\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); + newStatute.codeNumber = groups[1]; + newStatute.code = groups[2]; + newStatute.section = groups[3]; + } + else // Starts with letter, organized by code, ex. Tex. Bus. & Com. Code § 26.01 + { + let groups = title.match(/^([a-zA-Z\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); + newStatute.code = groups[1]; + newStatute.section = groups[2]; + } + + // No way to tell which will be present + var pL = info.match(/p\.l\. (\d+-\d+)/i); + var pubLaw = info.match(/pub\. law (\d+-\d+)/i); + var pubLawNo = info.match(/pub\. law no\. (\d+-\d+)/i); + var publicLaw = info.match(/public law (\d+-\d+)/i); + var publicLawNo = info.match(/public law no\. (\d+-\d+)/i); + var publicLawNumber = info.match(/public law number (\d+-\d+)/i); + if (pL) + newStatute.publicLawNumber = pL[1]; + if (pubLaw) + newStatute.publicLawNumber = pubLaw[1]; + if (pubLawNo) + newStatute.publicLawNumber = pubLawNo[1]; + if (publicLaw) + newStatute.publicLawNumber = publicLaw[1]; + if (publicLawNo) + newStatute.publicLawNumber = publicLawNo[1]; + if (publicLawNumber) + newStatute.publicLawNumber = publicLawNumber[1]; + + if (newStatute.publicLawNumber) + newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); + } + + newStatute.extra = info; // Since the info section is all over the place, just dump the whole thing in for manual cite checks + + newStatute.complete(); + } } From 71e02a78d3d1629e7cc236435a33846e15b671f8 Mon Sep 17 00:00:00 2001 From: Sebastian Karcher Date: Fri, 7 Apr 2023 16:10:05 -0400 Subject: [PATCH 12/17] Fix automated linting --- Lexis+.js | 365 ++++++++++++++++++++++++++---------------------------- 1 file changed, 175 insertions(+), 190 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index a342d9e2041..19b56fe8f5e 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -1,15 +1,15 @@ { - "translatorID": "419638d9-9049-44ad-ba08-fa54ed24b5e6", - "label": "Lexis+", - "creator": "bfahrenfort", - "target": "^https://plus.lexis.*/", - "minVersion": "5.0", - "maxVersion": "", - "priority": 100, - "inRepository": true, - "translatorType": 4, - "browserSupport": "gcsibv", - "lastUpdated": "2023-04-07 19:21:13" + "translatorID": "419638d9-9049-44ad-ba08-fa54ed24b5e6", + "label": "Lexis+", + "creator": "bfahrenfort", + "target": "^https://plus.lexis.*/", + "minVersion": "5.0", + "maxVersion": "", + "priority": 100, + "inRepository": true, + "translatorType": 4, + "browserSupport": "gcsibv", + "lastUpdated": "2023-04-07 19:21:13" } /* @@ -36,201 +36,186 @@ */ function detectWeb(doc, url) { - if (doc.title.match(/.*results.*/)) { - return "multiple"; - } - else if (doc.title.match(/[a-zA-Z\. ]+\s§\s\d+/) || - doc.title.match(/act/i) || - doc.title.match(/p\.l\./i)) // Match: ... Tex. Bus. & Com. Code § 26.01 ... - { - return "statute"; - } - else if (doc.title.match(/\d+\s[a-zA-Z0-9\. ]+\s\d+/)) // Match: ... 5 U.S. 137 ... - { - return "case"; - } - // TODO secondary sources - - return false; + if (doc.title.match(/.*results.*/)) { + return "multiple"; + } + else if (doc.title.match(/[a-zA-Z\. ]+\s§\s\d+/) + || doc.title.match(/act/i) + || doc.title.match(/p\.l\./i)) // Match: ... Tex. Bus. & Com. Code § 26.01 ... + { + return "statute"; + } + else if (doc.title.match(/\d+\s[a-zA-Z0-9\. ]+\s\d+/)) // Match: ... 5 U.S. 137 ... + { + return "case"; + } + // TODO secondary sources + + return false; } function getSearchResults(doc, url) { - var casesOrStatutes = new Array(); - var items = new Object(); - var nextTitle; - - if (detectWeb(doc, url) == "multiple") { - // TODO check what type of element it is (currently only working for 'cases' searches) - var titles = doc.querySelectorAll('a.titleLink'); - var dates = doc.querySelectorAll('span.metaDataItem'); // Not technically only dates, but that's all I use it for atm - var nextDate; - var dateOffset = 1; + var casesOrStatutes = new Array(); + var items = new Object(); + var nextTitle; + + if (detectWeb(doc, url) == "multiple") { + // TODO check what type of element it is (currently only working for 'cases' searches) + var titles = doc.querySelectorAll('a.titleLink'); + var dates = doc.querySelectorAll('span.metaDataItem'); // Not technically only dates, but that's all I use it for atm + var nextDate; + var dateOffset = 1; - // dates[0] is first court name - nextDate = dates[dateOffset]; - dateOffset += 3; - // dates[2] is first citation - for (var i = 0; i < titles.length; i++) { - nextTitle = titles[i]; - items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; + // dates[0] is first court name + nextDate = dates[dateOffset]; + dateOffset += 3; + // dates[2] is first citation + for (var i = 0; i < titles.length; i++) { + nextTitle = titles[i]; + items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; - // dates[0] is court name - nextDate = dates[dateOffset]; + // dates[0] is court name + nextDate = dates[dateOffset]; - // dates[2] is a citation - } + // dates[2] is a citation + } - return items; - } + return items; + } - return false; + return false; } async function doWeb(doc, url) { - if (detectWeb(doc, url) == 'multiple') { - let items = await Zotero.selectItems(getSearchResults(doc, url)); - if (!items) return; - for (let url of Object.keys(items)) { - await scrape(await requestDocument(url)); - } - } - else { - await scrape(doc, url); - } + if (detectWeb(doc, url) == 'multiple') { + let items = await Zotero.selectItems(getSearchResults(doc, url)); + if (!items) return; + for (let url of Object.keys(items)) { + await scrape(await requestDocument(url)); + } + } + else { + await scrape(doc, url); + } } async function scrape(doc, url) { - if (detectWeb(doc, url) == "case") - { - var newCase = new Zotero.Item("case"); - newCase.url = doc.location.href; + if (detectWeb(doc, url) == "case") { + var newCase = new Zotero.Item("case"); + newCase.url = doc.location.href; - newCase.title = text(doc, 'h1#SS_DocumentTitle'); - - var citation = text(doc, 'span.active-reporter'); - newCase.reporterVolume = citation.substring(0, citation.indexOf(' ')); - newCase.reporter = citation.substring(citation.indexOf(' ') + 1, citation.lastIndexOf(' ')); - newCase.firstPage = citation.substring(citation.lastIndexOf(' ') + 1); - - newCase.court = text(doc, 'p.SS_DocumentInfo', 0); - - newCase.dateDecided = text(doc, 'span.date'); - - var docket = text(doc, 'p.SS_DocumentInfo', 2); - if (docket.match(/^no\./i) || - docket.match(/^\d+/) || - docket.match(/^case no\./i)) - newCase.docketNumber = docket; // This won't be in perfect cite form, shouldn't be a hassle unless you're citing dozens of memorandum opinions - - newCase.complete(); - } - else if (detectWeb(doc, url) == "statute") - { - var newStatute = new Zotero.Item("statute"); - newStatute.url = doc.location.href; - - var title = text(doc, 'h1#SS_DocumentTitle'); // Saves some lines to have a temp here - newStatute.title = title; - - var info = text(doc, 'p.SS_DocumentInfo'); - - var isolation = info.substring(info.search( - /\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)/i - )) // isolate date on the frontend - newStatute.dateEnacted = isolation.substring(0, isolation.search(/[1-2][0-9][0-9][0-9]/) + 4); - - if (title.match(/act/i) || - title.match(/of\s[1-2][0-9][0-9][0-9]/i)) // Session law, not codified statute - { - // BB 21st ed. requires parallel cite to Pub. L. No. and Stat. for session laws - var statutesAtLarge, publicLawNo; - var potentialReporter = text(doc, 'a.SS_ActiveRptr'); - if (potentialReporter) // Sometimes Lexis is weird and doesn't give an ActiveRptr - { - if (potentialReporter.textContent.match(/stat\./i)) - statutesAtLarge = potentialReporter.textContent; - else if (potentialReporter.textContent.match(/pub\./i) || - potentialReporter.textContent.match(/p\.l\./i)) - publicLawNo = potentialReporter.textContent; - } - - var otherReporters = doc.querySelectorAll('span.SS_NonPaginatedRptr'); + newCase.title = text(doc, 'h1#SS_DocumentTitle'); + + var citation = text(doc, 'span.active-reporter'); + newCase.reporterVolume = citation.substring(0, citation.indexOf(' ')); + newCase.reporter = citation.substring(citation.indexOf(' ') + 1, citation.lastIndexOf(' ')); + newCase.firstPage = citation.substring(citation.lastIndexOf(' ') + 1); + + newCase.court = text(doc, 'p.SS_DocumentInfo', 0); + + newCase.dateDecided = text(doc, 'span.date'); + + var docket = text(doc, 'p.SS_DocumentInfo', 2); + if (docket.match(/^no\./i) + || docket.match(/^\d+/) + || docket.match(/^case no\./i)) newCase.docketNumber = docket; // This won't be in perfect cite form, shouldn't be a hassle unless you're citing dozens of memorandum opinions + + newCase.complete(); + } + else if (detectWeb(doc, url) == "statute") { + var newStatute = new Zotero.Item("statute"); + newStatute.url = doc.location.href; + + var title = text(doc, 'h1#SS_DocumentTitle'); // Saves some lines to have a temp here + newStatute.title = title; + + var info = text(doc, 'p.SS_DocumentInfo'); + + var isolation = info.substring(info.search( + /\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)/i + )); // isolate date on the frontend + newStatute.dateEnacted = isolation.substring(0, isolation.search(/[1-2][0-9][0-9][0-9]/) + 4); + + if (title.match(/act/i) + || title.match(/of\s[1-2][0-9][0-9][0-9]/i)) // Session law, not codified statute + { + // BB 21st ed. requires parallel cite to Pub. L. No. and Stat. for session laws + var statutesAtLarge, publicLawNo; + var potentialReporter = text(doc, 'a.SS_ActiveRptr'); + if (potentialReporter) // Sometimes Lexis is weird and doesn't give an ActiveRptr + { + if (potentialReporter.textContent.match(/stat\./i)) statutesAtLarge = potentialReporter.textContent; + else if (potentialReporter.textContent.match(/pub\./i) + || potentialReporter.textContent.match(/p\.l\./i)) publicLawNo = potentialReporter.textContent; + } + + var otherReporters = doc.querySelectorAll('span.SS_NonPaginatedRptr'); - for (var i = 0; i < otherReporters.length; i++) - { - var nextReporter = otherReporters[i].textContent; - if (nextReporter.match(/stat\./i)) - statutesAtLarge = nextReporter; - else if (nextReporter.match(/pub\./i) || - nextReporter.match(/p\.l\./i)) - publicLawNo = nextReporter; - } - - // Turn publicLawNo into the public law fields - if (publicLawNo.match(/\d+-\d+/)) // Ex. P.L. 115-164 - { - var numPos = publicLawNo.search(/\d+-\d+/) - newStatute.publicLawNumber = publicLawNo.substring(numPos, publicLawNo.substring(numPos + 1).indexOf(' ')); // Gets 115-164 - - newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); - } - else // Ex. 115 P.L. 164 or 115 Pub. L. No. 164 - { - newStatute.session = publicLawNo.substring(0, publicLawNo.indexOf(' ')); - newStatute.publicLawNumber = newStatute.session + '-' + publicLawNo.substring(publicLawNo.lastIndexOf(' ') + 1); - } - - // Turn statutesAtLarge into the code#/code/section fields - // TODO in styles, check for "Stat." as the code, and if so, don't append a section symbol - newStatute.codeNumber = statutesAtLarge.substring(0, statutesAtLarge.indexOf(' ')); - newStatute.code = "Stat."; - newStatute.section = statutesAtLarge.substring(statutesAtLarge.lastIndexOf(' ') + 1); - } - else // Codified statute - { - if (title.match(/^\d+/)) // Starts with digit, organized by title, ex. 47 U.S.C.S. § 230 - { - // Sadly, named groups aren't working - let groups = title.match(/^(\d+)\s([a-zA-Z0-9\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); - newStatute.codeNumber = groups[1]; - newStatute.code = groups[2]; - newStatute.section = groups[3]; - } - else // Starts with letter, organized by code, ex. Tex. Bus. & Com. Code § 26.01 - { - let groups = title.match(/^([a-zA-Z\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); - newStatute.code = groups[1]; - newStatute.section = groups[2]; - } - - // No way to tell which will be present - var pL = info.match(/p\.l\. (\d+-\d+)/i); - var pubLaw = info.match(/pub\. law (\d+-\d+)/i); - var pubLawNo = info.match(/pub\. law no\. (\d+-\d+)/i); - var publicLaw = info.match(/public law (\d+-\d+)/i); - var publicLawNo = info.match(/public law no\. (\d+-\d+)/i); - var publicLawNumber = info.match(/public law number (\d+-\d+)/i); - if (pL) - newStatute.publicLawNumber = pL[1]; - if (pubLaw) - newStatute.publicLawNumber = pubLaw[1]; - if (pubLawNo) - newStatute.publicLawNumber = pubLawNo[1]; - if (publicLaw) - newStatute.publicLawNumber = publicLaw[1]; - if (publicLawNo) - newStatute.publicLawNumber = publicLawNo[1]; - if (publicLawNumber) - newStatute.publicLawNumber = publicLawNumber[1]; - - if (newStatute.publicLawNumber) - newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); - } - - newStatute.extra = info; // Since the info section is all over the place, just dump the whole thing in for manual cite checks - - newStatute.complete(); - } + for (var i = 0; i < otherReporters.length; i++) { + var nextReporter = otherReporters[i].textContent; + if (nextReporter.match(/stat\./i)) statutesAtLarge = nextReporter; + else if (nextReporter.match(/pub\./i) + || nextReporter.match(/p\.l\./i)) publicLawNo = nextReporter; + } + + // Turn publicLawNo into the public law fields + if (publicLawNo.match(/\d+-\d+/)) // Ex. P.L. 115-164 + { + var numPos = publicLawNo.search(/\d+-\d+/); + newStatute.publicLawNumber = publicLawNo.substring(numPos, publicLawNo.substring(numPos + 1).indexOf(' ')); // Gets 115-164 + + newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); + } + else // Ex. 115 P.L. 164 or 115 Pub. L. No. 164 + { + newStatute.session = publicLawNo.substring(0, publicLawNo.indexOf(' ')); + newStatute.publicLawNumber = newStatute.session + '-' + publicLawNo.substring(publicLawNo.lastIndexOf(' ') + 1); + } + + // Turn statutesAtLarge into the code#/code/section fields + // TODO in styles, check for "Stat." as the code, and if so, don't append a section symbol + newStatute.codeNumber = statutesAtLarge.substring(0, statutesAtLarge.indexOf(' ')); + newStatute.code = "Stat."; + newStatute.section = statutesAtLarge.substring(statutesAtLarge.lastIndexOf(' ') + 1); + } + else // Codified statute + { + if (title.match(/^\d+/)) // Starts with digit, organized by title, ex. 47 U.S.C.S. § 230 + { + // Sadly, named groups aren't working + let groups = title.match(/^(\d+)\s([a-zA-Z0-9\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); + newStatute.codeNumber = groups[1]; + newStatute.code = groups[2]; + newStatute.section = groups[3]; + } + else // Starts with letter, organized by code, ex. Tex. Bus. & Com. Code § 26.01 + { + let groups = title.match(/^([a-zA-Z\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); + newStatute.code = groups[1]; + newStatute.section = groups[2]; + } + + // No way to tell which will be present + var pL = info.match(/p\.l\. (\d+-\d+)/i); + var pubLaw = info.match(/pub\. law (\d+-\d+)/i); + var pubLawNo = info.match(/pub\. law no\. (\d+-\d+)/i); + var publicLaw = info.match(/public law (\d+-\d+)/i); + var publicLawNo = info.match(/public law no\. (\d+-\d+)/i); + var publicLawNumber = info.match(/public law number (\d+-\d+)/i); + if (pL) newStatute.publicLawNumber = pL[1]; + if (pubLaw) newStatute.publicLawNumber = pubLaw[1]; + if (pubLawNo) newStatute.publicLawNumber = pubLawNo[1]; + if (publicLaw) newStatute.publicLawNumber = publicLaw[1]; + if (publicLawNo) newStatute.publicLawNumber = publicLawNo[1]; + if (publicLawNumber) newStatute.publicLawNumber = publicLawNumber[1]; + + if (newStatute.publicLawNumber) newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); + } + + newStatute.extra = info; // Since the info section is all over the place, just dump the whole thing in for manual cite checks + + newStatute.complete(); + } } From b3fb837e2b9e7b6f83ac785a10bd3f8e20061fde Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Fri, 7 Apr 2023 15:36:43 -0500 Subject: [PATCH 13/17] Lexis+: Linting --- Lexis+.js | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index 19b56fe8f5e..98d73be9789 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -35,18 +35,16 @@ ***** END LICENSE BLOCK ***** */ -function detectWeb(doc, url) { +function detectWeb(doc, _url) { if (doc.title.match(/.*results.*/)) { return "multiple"; } - else if (doc.title.match(/[a-zA-Z\. ]+\s§\s\d+/) + else if (doc.title.match(/[a-zA-Z. ]+\s§\s\d+/) || doc.title.match(/act/i) - || doc.title.match(/p\.l\./i)) // Match: ... Tex. Bus. & Com. Code § 26.01 ... - { + || doc.title.match(/p\.l\./i)) { // Match: ... Tex. Bus. & Com. Code § 26.01 ... return "statute"; } - else if (doc.title.match(/\d+\s[a-zA-Z0-9\. ]+\s\d+/)) // Match: ... 5 U.S. 137 ... - { + else if (doc.title.match(/\d+\s[a-zA-Z0-9. ]+\s\d+/)) { // Match: ... 5 U.S. 137 ... return "case"; } // TODO secondary sources @@ -55,8 +53,7 @@ function detectWeb(doc, url) { } function getSearchResults(doc, url) { - var casesOrStatutes = new Array(); - var items = new Object(); + var items = {}; var nextTitle; if (detectWeb(doc, url) == "multiple") { @@ -137,13 +134,11 @@ async function scrape(doc, url) { newStatute.dateEnacted = isolation.substring(0, isolation.search(/[1-2][0-9][0-9][0-9]/) + 4); if (title.match(/act/i) - || title.match(/of\s[1-2][0-9][0-9][0-9]/i)) // Session law, not codified statute - { + || title.match(/of\s[1-2][0-9][0-9][0-9]/i)) { // Session law, not codified statute // BB 21st ed. requires parallel cite to Pub. L. No. and Stat. for session laws var statutesAtLarge, publicLawNo; var potentialReporter = text(doc, 'a.SS_ActiveRptr'); - if (potentialReporter) // Sometimes Lexis is weird and doesn't give an ActiveRptr - { + if (potentialReporter) { // Sometimes Lexis is weird and doesn't give an ActiveRptr if (potentialReporter.textContent.match(/stat\./i)) statutesAtLarge = potentialReporter.textContent; else if (potentialReporter.textContent.match(/pub\./i) || potentialReporter.textContent.match(/p\.l\./i)) publicLawNo = potentialReporter.textContent; @@ -159,15 +154,13 @@ async function scrape(doc, url) { } // Turn publicLawNo into the public law fields - if (publicLawNo.match(/\d+-\d+/)) // Ex. P.L. 115-164 - { + if (publicLawNo.match(/\d+-\d+/)) { // Ex. P.L. 115-164 var numPos = publicLawNo.search(/\d+-\d+/); newStatute.publicLawNumber = publicLawNo.substring(numPos, publicLawNo.substring(numPos + 1).indexOf(' ')); // Gets 115-164 newStatute.session = newStatute.publicLawNumber.substring(0, newStatute.publicLawNumber.indexOf('-')); } - else // Ex. 115 P.L. 164 or 115 Pub. L. No. 164 - { + else { // Ex. 115 P.L. 164 or 115 Pub. L. No. 164 newStatute.session = publicLawNo.substring(0, publicLawNo.indexOf(' ')); newStatute.publicLawNumber = newStatute.session + '-' + publicLawNo.substring(publicLawNo.lastIndexOf(' ') + 1); } @@ -178,19 +171,16 @@ async function scrape(doc, url) { newStatute.code = "Stat."; newStatute.section = statutesAtLarge.substring(statutesAtLarge.lastIndexOf(' ') + 1); } - else // Codified statute - { - if (title.match(/^\d+/)) // Starts with digit, organized by title, ex. 47 U.S.C.S. § 230 - { + else { // Codified statute + if (title.match(/^\d+/)) { // Starts with digit, organized by title, ex. 47 U.S.C.S. § 230 // Sadly, named groups aren't working - let groups = title.match(/^(\d+)\s([a-zA-Z0-9\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); + let groups = title.match(/^(\d+)\s([a-zA-Z0-9. ]+) § ([0-9.()a-zA-Z]+)/); newStatute.codeNumber = groups[1]; newStatute.code = groups[2]; newStatute.section = groups[3]; } - else // Starts with letter, organized by code, ex. Tex. Bus. & Com. Code § 26.01 - { - let groups = title.match(/^([a-zA-Z\. ]+) § ([0-9\.\(\)a-zA-Z]+)/); + else { // Starts with letter, organized by code, ex. Tex. Bus. & Com. Code § 26.01 + let groups = title.match(/^([a-zA-Z. ]+) § ([0-9.()a-zA-Z]+)/); newStatute.code = groups[1]; newStatute.section = groups[2]; } From 515dea68314e296bf191a2a0e45fac6fd9bbc4e0 Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Fri, 7 Apr 2023 15:40:37 -0500 Subject: [PATCH 14/17] Lexis+: Linting 2 --- Lexis+.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lexis+.js b/Lexis+.js index 98d73be9789..ddf8f5d9768 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -190,7 +190,7 @@ async function scrape(doc, url) { var pubLaw = info.match(/pub\. law (\d+-\d+)/i); var pubLawNo = info.match(/pub\. law no\. (\d+-\d+)/i); var publicLaw = info.match(/public law (\d+-\d+)/i); - var publicLawNo = info.match(/public law no\. (\d+-\d+)/i); + publicLawNo = info.match(/public law no\. (\d+-\d+)/i); var publicLawNumber = info.match(/public law number (\d+-\d+)/i); if (pL) newStatute.publicLawNumber = pL[1]; if (pubLaw) newStatute.publicLawNumber = pubLaw[1]; From 6f0d94e440e4fee6f906a2b7a89378f41eebdb2d Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Fri, 7 Apr 2023 15:46:22 -0500 Subject: [PATCH 15/17] Lexis+: Linting 3 --- Lexis+.js | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index ddf8f5d9768..ffdadbdc0d5 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -40,8 +40,8 @@ function detectWeb(doc, _url) { return "multiple"; } else if (doc.title.match(/[a-zA-Z. ]+\s§\s\d+/) - || doc.title.match(/act/i) - || doc.title.match(/p\.l\./i)) { // Match: ... Tex. Bus. & Com. Code § 26.01 ... + || doc.title.match(/act/i) + || doc.title.match(/p\.l\./i)) { // Match: ... Tex. Bus. & Com. Code § 26.01 ... return "statute"; } else if (doc.title.match(/\d+\s[a-zA-Z0-9. ]+\s\d+/)) { // Match: ... 5 U.S. 137 ... @@ -62,7 +62,7 @@ function getSearchResults(doc, url) { var dates = doc.querySelectorAll('span.metaDataItem'); // Not technically only dates, but that's all I use it for atm var nextDate; var dateOffset = 1; - + // dates[0] is first court name nextDate = dates[dateOffset]; dateOffset += 3; @@ -70,10 +70,10 @@ function getSearchResults(doc, url) { for (var i = 0; i < titles.length; i++) { nextTitle = titles[i]; items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; - + // dates[0] is court name nextDate = dates[dateOffset]; - + // dates[2] is a citation } @@ -100,7 +100,7 @@ async function scrape(doc, url) { if (detectWeb(doc, url) == "case") { var newCase = new Zotero.Item("case"); newCase.url = doc.location.href; - + newCase.title = text(doc, 'h1#SS_DocumentTitle'); var citation = text(doc, 'span.active-reporter'); @@ -114,8 +114,10 @@ async function scrape(doc, url) { var docket = text(doc, 'p.SS_DocumentInfo', 2); if (docket.match(/^no\./i) - || docket.match(/^\d+/) - || docket.match(/^case no\./i)) newCase.docketNumber = docket; // This won't be in perfect cite form, shouldn't be a hassle unless you're citing dozens of memorandum opinions + || docket.match(/^\d+/) + || docket.match(/^case no\./i)) { + newCase.docketNumber = docket; // This won't be in perfect cite form, shouldn't be a hassle unless you're citing dozens of memorandum opinions + } newCase.complete(); } @@ -134,23 +136,27 @@ async function scrape(doc, url) { newStatute.dateEnacted = isolation.substring(0, isolation.search(/[1-2][0-9][0-9][0-9]/) + 4); if (title.match(/act/i) - || title.match(/of\s[1-2][0-9][0-9][0-9]/i)) { // Session law, not codified statute + || title.match(/of\s[1-2][0-9][0-9][0-9]/i)) { // Session law, not codified statute // BB 21st ed. requires parallel cite to Pub. L. No. and Stat. for session laws var statutesAtLarge, publicLawNo; var potentialReporter = text(doc, 'a.SS_ActiveRptr'); if (potentialReporter) { // Sometimes Lexis is weird and doesn't give an ActiveRptr if (potentialReporter.textContent.match(/stat\./i)) statutesAtLarge = potentialReporter.textContent; else if (potentialReporter.textContent.match(/pub\./i) - || potentialReporter.textContent.match(/p\.l\./i)) publicLawNo = potentialReporter.textContent; + || potentialReporter.textContent.match(/p\.l\./i)) { + publicLawNo = potentialReporter.textContent; + } } var otherReporters = doc.querySelectorAll('span.SS_NonPaginatedRptr'); - + for (var i = 0; i < otherReporters.length; i++) { var nextReporter = otherReporters[i].textContent; if (nextReporter.match(/stat\./i)) statutesAtLarge = nextReporter; else if (nextReporter.match(/pub\./i) - || nextReporter.match(/p\.l\./i)) publicLawNo = nextReporter; + || nextReporter.match(/p\.l\./i)) { + publicLawNo = nextReporter; + } } // Turn publicLawNo into the public law fields From 651e915247d397d91e0eef5e18531ddaca420ca5 Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Fri, 7 Apr 2023 15:50:06 -0500 Subject: [PATCH 16/17] Lexis+: Linting 4 --- Lexis+.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index ffdadbdc0d5..76678e405a3 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -70,10 +70,10 @@ function getSearchResults(doc, url) { for (var i = 0; i < titles.length; i++) { nextTitle = titles[i]; items[nextTitle.href] = nextTitle.textContent + "(" + nextDate.textContent + ")"; - + // dates[0] is court name nextDate = dates[dateOffset]; - + // dates[2] is a citation } @@ -149,7 +149,7 @@ async function scrape(doc, url) { } var otherReporters = doc.querySelectorAll('span.SS_NonPaginatedRptr'); - + for (var i = 0; i < otherReporters.length; i++) { var nextReporter = otherReporters[i].textContent; if (nextReporter.match(/stat\./i)) statutesAtLarge = nextReporter; From 1fa2aa1db0c403673ec662562b4cdb64b3d67792 Mon Sep 17 00:00:00 2001 From: OCDkirby <59982409+OCDkirby@users.noreply.github.com> Date: Fri, 7 Apr 2023 16:59:17 -0500 Subject: [PATCH 17/17] Lexis+: Properly detect code-title-categorized statutes --- Lexis+.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lexis+.js b/Lexis+.js index 76678e405a3..0c31afb2b7b 100644 --- a/Lexis+.js +++ b/Lexis+.js @@ -9,7 +9,7 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2023-04-07 19:21:13" + "lastUpdated": "2023-04-07 21:55:44" } /* @@ -76,7 +76,7 @@ function getSearchResults(doc, url) { // dates[2] is a citation } - + return items; } @@ -100,7 +100,7 @@ async function scrape(doc, url) { if (detectWeb(doc, url) == "case") { var newCase = new Zotero.Item("case"); newCase.url = doc.location.href; - + newCase.title = text(doc, 'h1#SS_DocumentTitle'); var citation = text(doc, 'span.active-reporter'); @@ -186,7 +186,7 @@ async function scrape(doc, url) { newStatute.section = groups[3]; } else { // Starts with letter, organized by code, ex. Tex. Bus. & Com. Code § 26.01 - let groups = title.match(/^([a-zA-Z. ]+) § ([0-9.()a-zA-Z]+)/); + let groups = title.match(/^([a-zA-Z&. ]+) § ([0-9.()a-zA-Z]+)/); newStatute.code = groups[1]; newStatute.section = groups[2]; } @@ -216,6 +216,6 @@ async function scrape(doc, url) { /** BEGIN TEST CASES **/ -var testCases = [ +var testCases = [ ] /** END TEST CASES **/