Skip to content

Commit

Permalink
支持新版全文阅读、修复文档抓取时丢失volume
Browse files Browse the repository at this point in the history
  • Loading branch information
jiaojiaodubai committed Sep 12, 2024
1 parent 408c495 commit 23f0777
Showing 1 changed file with 25 additions and 30 deletions.
55 changes: 25 additions & 30 deletions CNKI.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
"translatorID": "5c95b67b-41c5-4f55-b71a-48d5d7183063",
"label": "CNKI",
"creator": "Aurimas Vinckevicius, Xingzhong Lin, jiaojiaodubai",
"target": "https?://.*?(cnki\\.net)?/(kns8?s?|kcms2?|KNavi|KX?Reader)",
"target": "https?://.*?(cnki\\.net)?/(kns8?s?|kcms2?|KNavi|xmlRead)/",
"minVersion": "3.0",
"maxVersion": "",
"priority": 150,
"inRepository": true,
"translatorType": 12,
"browserSupport": "gcsibv",
"lastUpdated": "2024-09-12 10:08:09"
"lastUpdated": "2024-09-12 14:41:19"
}

/*
Expand Down Expand Up @@ -537,6 +537,9 @@ async function scrape(doc, itemKey = { url: '', cite: '', cookieName: '', downlo
await translator.translate();
}
else {
if (/\/xmlRead\//i.test(url)) {
doc = await requestDocument(strChild(doc, 'a.details', 'href'));
}
try {
// During debugging, may manually throw an error to guide the program to run inward
// throw new Error('debug');
Expand Down Expand Up @@ -800,7 +803,7 @@ async function scrapeDoc(doc, itemKey) {
case 'journalArticle': {
const pubInfo = ZU.trimInternal(innerText(doc, '.top-tip'));
newItem.publicationTitle = tryMatch(pubInfo, /^(.+?)\./, 1).trim().replace(/\(([\u4e00-\u9fff]*)\)$/, '($1)');
newItem.volume = tryMatch(pubInfo, /,\s?0*([1-9]\d*)\(/, 1);
newItem.volume = tryMatch(pubInfo, /,\s?0*([1-9]\d*)\s*\(/, 1);
newItem.issue = tryMatch(pubInfo, /\(([A-Z]?\d*)\)/i, 1).replace(/0*(\d+)/, '$1');
newItem.pages = labels.get(['页码', '頁碼', 'Page$']);
newItem.date = tryMatch(pubInfo, /\.\s?(\d{4})/, 1);
Expand Down Expand Up @@ -1503,38 +1506,30 @@ function addAttachments(item, doc, url, itemKey) {
// 如果你想将PDF文件替换为CAJ文件,将下面一行 keepPDF 设为 false
let keepPDF = Z.getHiddenPref('CNKIPDF');
if (keepPDF === undefined) keepPDF = true;
if (/KX?Reader/.test(url)) {
// The legal status of patent is shown in the picture on webpage.
if (item.itemType == 'patent') {
item.attachments.push({
title: 'Snapshot',
document: doc
});
}
else {
// The legal status of patent is shown in the picture on webpage.
if (item.itemType == 'patent') {
item.attachments.push({
title: 'Snapshot',
document: doc
});
}
const pdfLink = strChild(doc, 'a[id^="pdfDown"]', 'href');
Z.debug(`get PDF Link:\n${pdfLink}`);
const cajLink = strChild(doc, 'a#cajDown', 'href') || itemKey.downloadlink || strChild(doc, 'a[href*="bar/download"]', 'href');
Z.debug(`get CAJ link:\n${cajLink}`);
if (keepPDF && pdfLink) {
item.attachments.push({
title: 'Full Text PDF',
mimeType: 'application/pdf',
url: pdfLink
});
}
else if (cajLink) {
item.attachments.push({
title: 'Full Text CAJ',
mimeType: 'application/caj',
url: cajLink
});
}
const pdfLink = strChild(doc, 'a[id^="pdfDown"]', 'href');
Z.debug(`get PDF Link:\n${pdfLink}`);
const cajLink = strChild(doc, 'a#cajDown', 'href') || itemKey.downloadlink || strChild(doc, 'a[href*="bar/download"]', 'href');
Z.debug(`get CAJ link:\n${cajLink}`);
if (keepPDF && pdfLink) {
item.attachments.push({
title: 'Full Text PDF',
mimeType: 'application/pdf',
url: pdfLink
});
}
else if (cajLink) {
item.attachments.push({
title: 'Full Text CAJ',
mimeType: 'application/caj',
url: cajLink
});
}
}

Expand Down

0 comments on commit 23f0777

Please sign in to comment.