From 44aa011ff4f297c5f9cd83774abb1eed6bdff3f0 Mon Sep 17 00:00:00 2001 From: Andvari <31068367+dzx-dzx@users.noreply.github.com> Date: Tue, 5 Sep 2023 23:43:20 +0800 Subject: [PATCH 01/13] fix(route): (Maybe) fix the stheadline feed occasionally dropping details. (#13206) --- lib/v2/stheadline/std/realtime.js | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/v2/stheadline/std/realtime.js b/lib/v2/stheadline/std/realtime.js index 8d79004bd37139..bfca3488a381a2 100644 --- a/lib/v2/stheadline/std/realtime.js +++ b/lib/v2/stheadline/std/realtime.js @@ -11,7 +11,7 @@ module.exports = async (ctx) => { const { data: response } = await got(url); const $ = cheerio.load(response); - const items = $(`${category === '即時' ? '.moreNews > .col-md-4' : ''} .media-body > .my-2 > a`) + let items = $(`${category === '即時' ? '.moreNews > .col-md-4' : ''} .media-body > .my-2 > a`) .toArray() .map((item) => { item = $(item); @@ -22,17 +22,18 @@ module.exports = async (ctx) => { }; }); - await Promise.all( + items = await Promise.all( items.map((item) => ctx.cache.tryGet(item.link, async () => { const { data: response } = await got(item.link); const $ = cheerio.load(response); - item.description = $('.paragraphs').html(); - item.pubDate = timezone(parseDate($('.content .date').text()), +8); - item.category = [$('nav .nav-item.active a')?.text()?.trim(), ...$("meta[name='keyword']").attr('content').split(',')]; - - return item; + return { + ...item, + description: $('.paragraphs').html(), + pubDate: timezone(parseDate($('.content .date').text()), +8), + category: [$('nav .nav-item.active a')?.text()?.trim(), ...$("meta[name='keyword']").attr('content').split(',')], + }; }) ) ); From 54f7b4591e7f3343f97bee6d21546310a59d154e Mon Sep 17 00:00:00 2001 From: qyx596 Date: Tue, 5 Sep 2023 10:51:05 -0500 Subject: [PATCH 02/13] fix(route): Fix the BBC Chinese 404 page not found. (#13205) * fix: BBC News rss fetching error * refactor: migrate to v2 --------- --- lib/router.js | 2 +- lib/{routes => v2}/bbc/index.js | 8 ++++---- lib/v2/bbc/maintainer.js | 4 ++++ lib/v2/bbc/radar.js | 11 +++++++++++ lib/v2/bbc/router.js | 3 +++ lib/{routes => v2}/bbc/utils.js | 0 website/docs/routes/traditional-media.md | 16 ++++++++++++---- 7 files changed, 35 insertions(+), 9 deletions(-) rename lib/{routes => v2}/bbc/index.js (86%) create mode 100644 lib/v2/bbc/maintainer.js create mode 100644 lib/v2/bbc/radar.js create mode 100644 lib/v2/bbc/router.js rename lib/{routes => v2}/bbc/utils.js (100%) diff --git a/lib/router.js b/lib/router.js index 6d1ce134a6b8e6..4a818131d90578 100644 --- a/lib/router.js +++ b/lib/router.js @@ -442,7 +442,7 @@ router.get('/rs05/rs05', lazyloadRouteHandler('./routes/rs05/rs05')); router.get('/qutoutiao/category/:cid', lazyloadRouteHandler('./routes/qutoutiao/category')); // BBC -router.get('/bbc/:site?/:channel?', lazyloadRouteHandler('./routes/bbc/index')); +// router.get('/bbc/:site?/:channel?', lazyloadRouteHandler('./routes/bbc/index')); // 看雪 router.get('/pediy/topic/:category?/:type?', lazyloadRouteHandler('./routes/pediy/topic')); diff --git a/lib/routes/bbc/index.js b/lib/v2/bbc/index.js similarity index 86% rename from lib/routes/bbc/index.js rename to lib/v2/bbc/index.js index 71fb375aacb4f9..acb02c637aec30 100644 --- a/lib/routes/bbc/index.js +++ b/lib/v2/bbc/index.js @@ -16,9 +16,9 @@ module.exports = async (ctx) => { title = 'BBC News 中文网'; if (!channel) { - feed = await parser.parseURL('http://www.bbc.co.uk/zhongwen/simp/index.xml'); + feed = await parser.parseURL('https://www.bbc.co.uk/zhongwen/simp/index.xml'); } else { - feed = await parser.parseURL(`http://www.bbc.co.uk/zhongwen/simp/${channel}/index.xml`); + feed = await parser.parseURL(`https://www.bbc.co.uk/zhongwen/simp/${channel}/index.xml`); } break; @@ -26,9 +26,9 @@ module.exports = async (ctx) => { title = 'BBC News 中文網'; if (!channel) { - feed = await parser.parseURL('http://www.bbc.co.uk/zhongwen/trad/index.xml'); + feed = await parser.parseURL('https://www.bbc.co.uk/zhongwen/trad/index.xml'); } else { - feed = await parser.parseURL(`http://www.bbc.co.uk/zhongwen/trad/${channel}/index.xml`); + feed = await parser.parseURL(`https://www.bbc.co.uk/zhongwen/trad/${channel}/index.xml`); } link = 'https://www.bbc.com/zhongwen/trad'; break; diff --git a/lib/v2/bbc/maintainer.js b/lib/v2/bbc/maintainer.js new file mode 100644 index 00000000000000..485c161be01cc9 --- /dev/null +++ b/lib/v2/bbc/maintainer.js @@ -0,0 +1,4 @@ +module.exports = { + '/:channel': ['HenryQW', 'DIYgod'], + '/:lang/:channel?': ['HenryQW'], +}; diff --git a/lib/v2/bbc/radar.js b/lib/v2/bbc/radar.js new file mode 100644 index 00000000000000..8a61aadc53706b --- /dev/null +++ b/lib/v2/bbc/radar.js @@ -0,0 +1,11 @@ +module.exports = { + 'bbc.com': { + _name: 'BBC', + '.': [ + { + title: 'News', + docs: 'https://docs.rsshub.app/routes/traditional-media#bbc', + }, + ], + }, +}; diff --git a/lib/v2/bbc/router.js b/lib/v2/bbc/router.js new file mode 100644 index 00000000000000..d0b16d259d2d79 --- /dev/null +++ b/lib/v2/bbc/router.js @@ -0,0 +1,3 @@ +module.exports = (router) => { + router.get('/:site?/:channel?', require('./index')); +}; diff --git a/lib/routes/bbc/utils.js b/lib/v2/bbc/utils.js similarity index 100% rename from lib/routes/bbc/utils.js rename to lib/v2/bbc/utils.js diff --git a/website/docs/routes/traditional-media.md b/website/docs/routes/traditional-media.md index 3aa4b9d8f1386c..be1c56501af22d 100644 --- a/website/docs/routes/traditional-media.md +++ b/website/docs/routes/traditional-media.md @@ -124,7 +124,7 @@ There is no RSS source for Al Jazeera Chinese, returning homepage content by def ## BBC {#bbc} -### BBC {#bbc-bbc} +### News {#bbc-news} @@ -136,11 +136,19 @@ Support major channels, refer to [BBC RSS feeds](https://www.bbc.co.uk/news/1062 -### BBC Chinese {#bbc-bbc-chinese} +### BBC 中文网 {#bbc-bbc-zhong-wen-wang} - + -See [BBC 中文网](/routes/traditional-media#bbc-bbc-zhong-wen-wang). +支持大部分频道,频道名称见 [BBC 中文网 RSS 服务](https://www.bbc.com/zhongwen/simp/services/2009/09/000000_rss)。 + +简体版: + +- 频道,如金融财经 `https://www.bbc.co.uk/zhongwen/simp/business/index.xml` 则为 `/bbc/chinese/business`. + +繁體版: + +- 频道,如金融財經 `https://www.bbc.co.uk/zhongwen/trad/business/index.xml` 则为 `/bbc/traditionalchinese/business`. From 31b56187c0f349e7232fb97befef95d9af06cb43 Mon Sep 17 00:00:00 2001 From: Andvari <31068367+dzx-dzx@users.noreply.github.com> Date: Tue, 5 Sep 2023 23:57:42 +0800 Subject: [PATCH 03/13] feat(route): use API for reuters. (#13190) * feat(route): use reuters api. * Fix typo in doc. * Refining * Update common.js * Update common.js * Update common.js * Update common.js * Update common.js * Refactor the code, and apologize for my stubbornness. --- lib/v2/reuters/common.js | 75 ++++++++++++++++-------- website/docs/routes/traditional-media.md | 6 +- 2 files changed, 52 insertions(+), 29 deletions(-) diff --git a/lib/v2/reuters/common.js b/lib/v2/reuters/common.js index f72250926cd16d..2bfa07bc23eb47 100644 --- a/lib/v2/reuters/common.js +++ b/lib/v2/reuters/common.js @@ -9,31 +9,54 @@ module.exports = async (ctx) => { const topic = ctx.params.topic ?? (category === 'authors' ? 'reuters' : ''); const limit = ctx.query.limit ? parseInt(ctx.query.limit) : 20; - const rootUrl = 'https://www.reuters.com'; - const currentUrl = topic ? `${rootUrl}/${category}/${topic}/` : `${rootUrl}/${category}/`; - const response = await got(currentUrl); - const $ = cheerio.load(response.data); - - let items = $('.media-story-card__body__3tRWy a.media-story-card__heading__eqhp9, a.svelte-pxbp38, a.svelte-11dknnx, a.svelte-e21rsn') - .slice(0, limit) - .toArray() - .map((item) => { - item = $(item); - item.find('span.visually-hidden__hidden__2qXMW').remove(); + const MUST_FETCH_BY_TOPICS = ['authors']; + const section_id = `/${category}/${topic ? `${topic}/` : ''}`; + const { title, description, rootUrl, response } = await (async () => { + if (!MUST_FETCH_BY_TOPICS.includes(category)) { + const rootUrl = 'https://www.reuters.com/pf/api/v3/content/fetch/articles-by-section-alias-or-id-v1'; + const response = await got(rootUrl, { + searchParams: { + query: JSON.stringify({ + offset: 0, + size: limit, + section_id, + website: 'reuters', + }), + }, + }).json(); return { - title: item.text(), - link: new URL(item.prop('href'), rootUrl).href, + title: response.result.section.title, + description: response.result.section.section_about, + rootUrl, + response, }; - }); - if (!items.length) { - const metadata = $('script#fusion-metadata').html(); - const metadataObj = JSON.parse(metadata.match(/Fusion.globalContent=(\{[\s\S]*?});/)[1]); - const articles = metadataObj.arcResult?.articles ?? metadataObj.result?.articles ?? []; - items = articles.map((article) => ({ - title: article.title, - link: rootUrl + article.canonical_url, - })); - } + } else { + const rootUrl = 'https://www.reuters.com/pf/api/v3/content/fetch/articles-by-topic-v1'; + const response = await got(rootUrl, { + searchParams: { + query: JSON.stringify({ + offset: 0, + size: limit, + topic_url: section_id, + website: 'reuters', + }), + }, + }).json(); + + return { + title: `${response.result.topics[0].name} | Reuters`, + description: response.result.topics[0].entity_id, + rootUrl, + response, + }; + } + })(); + + let items = response.result.articles.map((e) => ({ + title: e.title, + link: new URL(e.canonical_url, rootUrl).href, + })); + items = await Promise.all( items.map((item) => ctx.cache.tryGet(item.link, async () => { @@ -88,10 +111,10 @@ module.exports = async (ctx) => { ); ctx.state.data = { - title: $('head title').text(), - description: $('head meta[name=description]').attr('content'), + title, + description, image: 'https://www.reuters.com/pf/resources/images/reuters/logo-vertical-default-512x512.png?d=116', - link: currentUrl, + link: `https://www.reuters.com${section_id}`, item: items, }; }; diff --git a/website/docs/routes/traditional-media.md b/website/docs/routes/traditional-media.md index be1c56501af22d..089a1ba5803141 100644 --- a/website/docs/routes/traditional-media.md +++ b/website/docs/routes/traditional-media.md @@ -544,13 +544,13 @@ Parameters can be obtained from the official website, for instance: | All | Aerospace & Defense | Autos & Transportation | Energy | Environment | Finance | Healthcare & Pharmaceuticals | Media & Telecom | Retail & Consumer | Sustainable Business | Charged | Future of Health | Future of Money | Take Five | Reuters Impact | | --- | ------------------- | ---------------------- | ------ | ----------- | ------- | ---------------------------- | --------------- | ----------------- | -------------------- | ------- | ---------------- | --------------- | --------- | -------------- | - | | aerospace-defense | autos-transportation | energy | environment | finance | healthcare-pharmaceuticals | media-telecom | retail-consumer | sustainable-business | charged | future-of-health | futrue-of-money | take-five | reuters-impact | + | | aerospace-defense | autos-transportation | energy | environment | finance | healthcare-pharmaceuticals | media-telecom | retail-consumer | sustainable-business | charged | future-of-health | future-of-money | take-five | reuters-impact | - `legal/:topic`: - | All | Goverment | Legal Industry | Litigation | Transaction | + | All | Government | Legal Industry | Litigation | Transactional | | --- | --------- | -------------- | ---------- | ----------- | - | | goverment | legalindustry | litigation | transaction | + | | government | legalindustry | litigation | transactional | - `authors/:topic`: From d717d190bce3239ff37d0282d3df975e44af5466 Mon Sep 17 00:00:00 2001 From: Andvari <31068367+dzx-dzx@users.noreply.github.com> Date: Wed, 6 Sep 2023 01:34:38 +0800 Subject: [PATCH 04/13] feat(route): add category to cna & other improvements (#13216) * feat(route): add category to cna. * Include `carouseList` articles. * Update index.js * fix: use web api --------- --- lib/v2/cna/index.js | 57 +++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/lib/v2/cna/index.js b/lib/v2/cna/index.js index d33fb28ef0f63d..0e26bf1c27b7e5 100644 --- a/lib/v2/cna/index.js +++ b/lib/v2/cna/index.js @@ -5,31 +5,30 @@ const timezone = require('@/utils/timezone'); module.exports = async (ctx) => { const id = ctx.params.id || 'aall'; - - let rootUrl; - - if (/^\d+$/.test(id)) { - rootUrl = `https://www.cna.com.tw/topic/newstopic/${id}.aspx`; - } else { - rootUrl = `https://www.cna.com.tw/list/${id}.aspx`; - } - const response = await got({ - method: 'get', - url: rootUrl, + const isTopic = /^\d+$/.test(id); + const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20; + + const { data: response } = await got({ + method: 'post', + url: `https://www.cna.com.tw/cna2018api/api/${isTopic ? 'WTopic' : 'WNewsList'}`, + json: { + action: '0', + category: isTopic ? 'newstopic' : id, + tno: isTopic ? id : undefined, + pagesize: limit, + pageidx: 1, + }, }); - const $ = cheerio.load(response.data); - const list = $('.mainList li a div h2') - .slice(0, ctx.query.limit ? parseInt(ctx.query.limit) : 10) - .toArray() - .map((item) => { - item = $(item); - return { - title: item.text(), - link: item.parents('a').attr('href'), - pubDate: timezone(parseDate(item.next().text()), +8), - }; - }); + const { + ResultData: { MetaData: metadata }, + ResultData: resultData, + } = response; + const list = (isTopic ? resultData.Topic.NewsItems : resultData.Items).slice(0, limit).map((item) => ({ + title: item.HeadLine, + link: item.PageUrl, + pubDate: timezone(parseDate(item.CreateTime), +8), + })); const items = await Promise.all( list.map((item) => @@ -42,6 +41,12 @@ module.exports = async (ctx) => { const topImage = content('.fullPic').html(); item.description = (topImage === null ? '' : topImage) + content('.paragraph').eq(0).html(); + item.category = [ + ...content("meta[property='article:tag']") + .get() + .map((e) => e.attribs.content), + content('.active > a').text(), + ]; return item; }) @@ -49,8 +54,10 @@ module.exports = async (ctx) => { ); ctx.state.data = { - title: $('title').text(), - link: rootUrl, + title: metadata.Title, + description: metadata.Description, + link: metadata.CanonicalUrl, + image: metadata.Image, item: items, }; }; From b129b565b6874364acf6bc1585c407246cb39dfe Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 6 Sep 2023 06:33:55 +0800 Subject: [PATCH 05/13] chore(deps): bump @sentry/node from 7.66.0 to 7.67.0 (#13219) * chore(deps): bump @sentry/node from 7.66.0 to 7.67.0 Bumps [@sentry/node](https://github.com/getsentry/sentry-javascript) from 7.66.0 to 7.67.0. - [Release notes](https://github.com/getsentry/sentry-javascript/releases) - [Changelog](https://github.com/getsentry/sentry-javascript/blob/develop/CHANGELOG.md) - [Commits](https://github.com/getsentry/sentry-javascript/compare/7.66.0...7.67.0) --- updated-dependencies: - dependency-name: "@sentry/node" dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * chore: fix pnpm install --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- package.json | 2 +- pnpm-lock.yaml | 44 ++++++++++++++++++++++---------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/package.json b/package.json index 4770e13dca12ba..e0248eca84fb0f 100644 --- a/package.json +++ b/package.json @@ -86,7 +86,7 @@ "@koa/router": "12.0.0", "@notionhq/client": "2.2.13", "@postlight/parser": "2.2.3", - "@sentry/node": "7.66.0", + "@sentry/node": "7.67.0", "aes-js": "3.1.2", "art-template": "4.13.2", "bbcodejs": "0.0.4", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2c1fac099db243..791a65e13d071d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -15,8 +15,8 @@ dependencies: specifier: 2.2.3 version: 2.2.3 '@sentry/node': - specifier: 7.66.0 - version: 7.66.0 + specifier: 7.67.0 + version: 7.67.0 aes-js: specifier: 3.1.2 version: 3.1.2 @@ -1153,33 +1153,33 @@ packages: selderee: 0.11.0 dev: false - /@sentry-internal/tracing@7.66.0: - resolution: {integrity: sha512-3vCgC2hC3T45pn53yTDVcRpHoJTBxelDPPZVsipAbZnoOVPkj7n6dNfDhj3I3kwWCBPahPkXmE+R4xViR8VqJg==} + /@sentry-internal/tracing@7.67.0: + resolution: {integrity: sha512-+3wpnzW2HczPlZsp1pWtdOavBKLK/tu1qDEg+blqLfW7b/qZZ8hqQ+A+2mEWRLgWfIoGZ8t4U84nN4tzDXv+nQ==} engines: {node: '>=8'} dependencies: - '@sentry/core': 7.66.0 - '@sentry/types': 7.66.0 - '@sentry/utils': 7.66.0 + '@sentry/core': 7.67.0 + '@sentry/types': 7.67.0 + '@sentry/utils': 7.67.0 tslib: 2.6.2 dev: false - /@sentry/core@7.66.0: - resolution: {integrity: sha512-WMAEPN86NeCJ1IT48Lqiz4MS5gdDjBwP4M63XP4msZn9aujSf2Qb6My5uT87AJr9zBtgk8MyJsuHr35F0P3q1w==} + /@sentry/core@7.67.0: + resolution: {integrity: sha512-apk0WHnFJTHX86TvN4LOa2GBfguKwvV94WsssyizMi4qurGN2V0I8ZUmlypjBxvMY9MOBZ/2LwgYPf3U1QeE5g==} engines: {node: '>=8'} dependencies: - '@sentry/types': 7.66.0 - '@sentry/utils': 7.66.0 + '@sentry/types': 7.67.0 + '@sentry/utils': 7.67.0 tslib: 2.6.2 dev: false - /@sentry/node@7.66.0: - resolution: {integrity: sha512-PxqIqLr4Sh5xcDfECiBQ4PuZ7v8yTgLhaRkruWrZPYxQrcJFPkwbFkw/IskzVnhT2VwXUmeWEIlRMQKBJ0t83A==} + /@sentry/node@7.67.0: + resolution: {integrity: sha512-QRPLdHp00HMTSpanonf4MfAcWiWHSnnCaNT0at0SJlpPf1z1Ch8FXxFl5CKwcbYyMCQDBxsZ2tdQwjrmPdMDcA==} engines: {node: '>=8'} dependencies: - '@sentry-internal/tracing': 7.66.0 - '@sentry/core': 7.66.0 - '@sentry/types': 7.66.0 - '@sentry/utils': 7.66.0 + '@sentry-internal/tracing': 7.67.0 + '@sentry/core': 7.67.0 + '@sentry/types': 7.67.0 + '@sentry/utils': 7.67.0 cookie: 0.4.2 https-proxy-agent: 5.0.1 lru_map: 0.3.3 @@ -1188,16 +1188,16 @@ packages: - supports-color dev: false - /@sentry/types@7.66.0: - resolution: {integrity: sha512-uUMSoSiar6JhuD8p7ON/Ddp4JYvrVd2RpwXJRPH1A4H4Bd4DVt1mKJy1OLG6HdeQv39XyhB1lPZckKJg4tATPw==} + /@sentry/types@7.67.0: + resolution: {integrity: sha512-GV/Hxdsp/hes1YQGPGgSUG1IHRNQVJMnCfYcpuZtZI6CvNJ+7qNOLkdmC/xGFwfpYH9kYsFBvmGsmeC6yUENYA==} engines: {node: '>=8'} dev: false - /@sentry/utils@7.66.0: - resolution: {integrity: sha512-9GYUVgXjK66uXXcLXVMXVzlptqMtq1eJENCuDeezQiEFrNA71KkLDg00wESp+LL+bl3wpVTBApArpbF6UEG5hQ==} + /@sentry/utils@7.67.0: + resolution: {integrity: sha512-OstjIfAD0xPWVnIRzoAtFBW+YTmxix4h3ehgtFxhA4VJHkC9KXchaTNwk+nlRy/nx4phx5vW9p7YWhO3kJBJmA==} engines: {node: '>=8'} dependencies: - '@sentry/types': 7.66.0 + '@sentry/types': 7.67.0 tslib: 2.6.2 dev: false From 8806b7a1b93332d6d50123bf4b955daa7e5b5dbe Mon Sep 17 00:00:00 2001 From: Tony Date: Wed, 6 Sep 2023 11:43:20 +0000 Subject: [PATCH 06/13] test: extend timeout for /test/httperror --- test/middleware/onerror.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/middleware/onerror.js b/test/middleware/onerror.js index 1d6850cd055687..6047fd52e0e8c0 100644 --- a/test/middleware/onerror.js +++ b/test/middleware/onerror.js @@ -31,7 +31,7 @@ describe('httperror', () => { expect(response.text).toMatch( /Response code 404 \(Not Found\): target website might be blocking our access, you can host your own RSSHub instance<\/a> for a better usability\./ ); - }); + }, 10000); }); describe('RequestInProgressError', () => { From caa8800641751779450935b1b0805bb9c69e3f18 Mon Sep 17 00:00:00 2001 From: Andvari <31068367+dzx-dzx@users.noreply.github.com> Date: Wed, 6 Sep 2023 21:08:34 +0800 Subject: [PATCH 07/13] fix(route): prevent backspace character from breaking JSON parsing in udn (#13220) * fix(route): prevent backspace character from breaking JSON parsing in udn. * Update breaking-news.js --- lib/v2/udn/breaking-news.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/v2/udn/breaking-news.js b/lib/v2/udn/breaking-news.js index 6d1b68d0ff9a2e..1e636a211699be 100644 --- a/lib/v2/udn/breaking-news.js +++ b/lib/v2/udn/breaking-news.js @@ -30,7 +30,7 @@ module.exports = async (ctx) => { } const $ = cheerio.load(result.data); - const metadata = $('script[type="application/ld+json"]').eq(0).text().trim(); + const metadata = $('script[type="application/ld+json"]').eq(0).text().trim().replaceAll(/[\b]/g, ""); const data = metadata.startsWith('[') ? JSON.parse(metadata)[0] : JSON.parse(metadata); // e.g. https://udn.com/news/story/7331/6576320 const content = $('.article-content__editor'); From 0d69ffde651144b788c264022464b634501d64b9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 6 Sep 2023 13:10:57 +0000 Subject: [PATCH 08/13] style: auto format --- lib/v2/udn/breaking-news.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/v2/udn/breaking-news.js b/lib/v2/udn/breaking-news.js index 1e636a211699be..c894c311d5001e 100644 --- a/lib/v2/udn/breaking-news.js +++ b/lib/v2/udn/breaking-news.js @@ -30,7 +30,7 @@ module.exports = async (ctx) => { } const $ = cheerio.load(result.data); - const metadata = $('script[type="application/ld+json"]').eq(0).text().trim().replaceAll(/[\b]/g, ""); + const metadata = $('script[type="application/ld+json"]').eq(0).text().trim().replaceAll(/[\b]/g, ''); const data = metadata.startsWith('[') ? JSON.parse(metadata)[0] : JSON.parse(metadata); // e.g. https://udn.com/news/story/7331/6576320 const content = $('.article-content__editor'); From 02491add100bfd3b01a6d2da727a5356ca246f89 Mon Sep 17 00:00:00 2001 From: Ethan Shen <42264778+nczitzk@users.noreply.github.com> Date: Thu, 7 Sep 2023 00:12:23 +0800 Subject: [PATCH 09/13] =?UTF-8?q?feat(route):=20add=20=E7=90=86=E6=83=B3?= =?UTF-8?q?=E7=94=9F=E6=B4=BB=E5=AE=9E=E9=AA=8C=E5=AE=A4=20(#13207)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(route): add 理想生活实验室 * fix docs * update lib/v2/toodaylab/index.js --------- --- lib/v2/toodaylab/index.js | 99 ++++++++++++++++++++++++++++++++ lib/v2/toodaylab/maintainer.js | 7 +++ lib/v2/toodaylab/radar.js | 37 ++++++++++++ lib/v2/toodaylab/router.js | 3 + website/docs/routes/new-media.md | 40 +++++++++++++ 5 files changed, 186 insertions(+) create mode 100644 lib/v2/toodaylab/index.js create mode 100644 lib/v2/toodaylab/maintainer.js create mode 100644 lib/v2/toodaylab/radar.js create mode 100644 lib/v2/toodaylab/router.js diff --git a/lib/v2/toodaylab/index.js b/lib/v2/toodaylab/index.js new file mode 100644 index 00000000000000..fda14bfecb76ac --- /dev/null +++ b/lib/v2/toodaylab/index.js @@ -0,0 +1,99 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const timezone = require('@/utils/timezone'); +const { parseDate, parseRelativeDate } = require('@/utils/parse-date'); + +module.exports = async (ctx) => { + const { params = 'posts' } = ctx.params; + const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 30; + + const isHot = params === 'hot'; + + const rootUrl = 'https://www.toodaylab.com'; + const currentUrl = new URL(isHot ? 'posts' : params, rootUrl).href; + + const { data: response } = await got(currentUrl); + + const $ = cheerio.load(response); + + let items = isHot + ? $('div.hot-list a') + .slice(0, limit) + .toArray() + .map((item) => { + item = $(item); + + return { + title: item.find('div.hot-item p').text(), + link: new URL(item.prop('href'), rootUrl).href, + }; + }) + : $('div.single-post') + .slice(0, limit) + .toArray() + .map((item) => { + item = $(item); + + const a = item.find('p.title a'); + + const pubDate = item + .find('div.left-infos p') + .text() + .trim() + .split(/\/\/\s/) + .pop(); + + return { + title: a.text(), + link: new URL(a.prop('href'), rootUrl).href, + description: item.find('p.excerpt').html(), + author: item.find('div.left-infos p a').text().trim(), + pubDate: timezone(/年|月|日/.test(pubDate) ? parseDate(pubDate, ['YYYY年M月D日 HH:mm', 'M月D日 HH:mm']) : parseRelativeDate(pubDate), +8), + }; + }); + + items = await Promise.all( + items.map((item) => + ctx.cache.tryGet(item.link, async () => { + const { data: detailResponse } = await got(item.link); + + const content = cheerio.load(detailResponse); + + const pubDate = content('div.left-infos p') + .text() + .trim() + .split(/\/\/\s/) + .pop(); + + item.title = content('h1').text() || item.title; + item.description = content('div.post-content').html() ?? item.description; + item.author = content('div.left-infos p a').text().trim() ?? item.author; + item.category = content('div.right-infos a') + .slice(1) + .toArray() + .map((c) => content(c).text().replace(/#/, '')); + item.pubDate = item.pubDate ?? timezone(/年|月|日/.test(pubDate) ? parseDate(pubDate, ['YYYY年M月D日 HH:mm', 'M月D日 HH:mm']) : parseRelativeDate(pubDate), +8); + item.upvotes = content('#like_count').text() ? parseInt(content('#like_count').text(), 10) : 0; + item.comments = parseInt(content('div.right-infos a').first().text(), 10) || 0; + + return item; + }) + ) + ); + + const title = $('title').text().split(/\s-/)[0]; + const icon = $('link[rel="apple-touch-icon"]').last().prop('href'); + + ctx.state.data = { + item: items, + title: isHot ? title.replace(/[^|]+/, '最热 ') : title, + link: currentUrl, + description: $('meta[name="description"]').prop('content'), + language: $('html').prop('lang'), + image: $('h3.logo a img').prop('src'), + icon, + logo: icon, + subtitle: $('meta[name="keywords"]').prop('content'), + author: $('h3.logo a img').prop('alt'), + }; +}; diff --git a/lib/v2/toodaylab/maintainer.js b/lib/v2/toodaylab/maintainer.js new file mode 100644 index 00000000000000..f7e886857a4548 --- /dev/null +++ b/lib/v2/toodaylab/maintainer.js @@ -0,0 +1,7 @@ +module.exports = { + '/column/:id': ['nczitzk'], + '/field/:id': ['nczitzk'], + '/hot': ['nczitzk'], + '/posts': ['nczitzk'], + '/topic/:id': ['nczitzk'], +}; diff --git a/lib/v2/toodaylab/radar.js b/lib/v2/toodaylab/radar.js new file mode 100644 index 00000000000000..80a45f2b77b687 --- /dev/null +++ b/lib/v2/toodaylab/radar.js @@ -0,0 +1,37 @@ +module.exports = { + 'toodaylab.com': { + _name: '理想生活实验室', + '.': [ + { + title: '滚动', + docs: 'https://docs.rsshub.app/routes/new-media#li-xiang-sheng-huo-shi-yan-shi-gun-dong', + source: ['/posts'], + target: '/toodaylab/posts', + }, + { + title: '最热', + docs: 'https://docs.rsshub.app/routes/new-media#li-xiang-sheng-huo-shi-yan-shi-zui-re', + source: ['/posts'], + target: '/toodaylab/hot', + }, + { + title: '专栏', + docs: 'https://docs.rsshub.app/routes/new-media#li-xiang-sheng-huo-shi-yan-shi-zhuan-lan', + source: ['/column/:id'], + target: '/toodaylab/column/:id', + }, + { + title: '领域', + docs: 'https://docs.rsshub.app/routes/new-media#li-xiang-sheng-huo-shi-yan-shi-ling-yu', + source: ['/field/:id'], + target: '/toodaylab/field/:id', + }, + { + title: '话题', + docs: 'https://docs.rsshub.app/routes/new-media#li-xiang-sheng-huo-shi-yan-shi-hua-ti', + source: ['/topic/:id'], + target: '/toodaylab/topic/:id', + }, + ], + }, +}; diff --git a/lib/v2/toodaylab/router.js b/lib/v2/toodaylab/router.js new file mode 100644 index 00000000000000..a8cead003da8cd --- /dev/null +++ b/lib/v2/toodaylab/router.js @@ -0,0 +1,3 @@ +module.exports = (router) => { + router.get('/:params*', require('./')); +}; diff --git a/website/docs/routes/new-media.md b/website/docs/routes/new-media.md index 059223fdcddfaf..45d7fb7a0a5581 100644 --- a/website/docs/routes/new-media.md +++ b/website/docs/routes/new-media.md @@ -3384,6 +3384,46 @@ column 为 third 时可选的 category: +## 理想生活实验室 {#li-xiang-sheng-huo-shi-yan-shi} + +### 滚动 {#li-xiang-sheng-huo-shi-yan-shi-gun-dong} + + + +### 最热 {#li-xiang-sheng-huo-shi-yan-shi-zui-re} + + + +### 专栏 {#li-xiang-sheng-huo-shi-yan-shi-zhuan-lan} + + + +| 专题 | 攻略 | +| ---- | ---- | +| 299 | 300 | + + + +### 领域 {#li-xiang-sheng-huo-shi-yan-shi-ling-yu} + + + +| 快消 | 时尚 | 智能 | 娱乐 | 运动 | 生活 | 设计 | 出行 | +| ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | +| 308 | 307 | 306 | 305 | 304 | 303 | 302 | 301 | + + + +### 话题 {#li-xiang-sheng-huo-shi-yan-shi-hua-ti} + + + +| 今日消费资讯 | 实验室带你过周末 | 实验室带你过假期 | 每日一图 | 每周一书 | 实验室数字 | 新鲜社会人 | 实验室TV | +| ------------ | ---------------- | ---------------- | -------- | -------- | ---------- | ---------- | -------- | +| 309 | 37 | 40 | 32 | 33 | 310 | 316 | 476 | + + + ## 链捕手 ChainCatcher {#lian-bu-shou-chaincatcher} ### 首页 {#lian-bu-shou-chaincatcher-shou-ye} From bb524fbb1990de54275dad964b464fd5945843b1 Mon Sep 17 00:00:00 2001 From: Tony Date: Thu, 7 Sep 2023 00:48:14 +0800 Subject: [PATCH 10/13] fix(route): douyin live (#13225) --- lib/v2/douyin/live.js | 59 ++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/lib/v2/douyin/live.js b/lib/v2/douyin/live.js index e22ee7e6b967ca..a6cc9cdfa7603b 100644 --- a/lib/v2/douyin/live.js +++ b/lib/v2/douyin/live.js @@ -1,5 +1,6 @@ const config = require('@/config').value; -const { getOriginAvatar, universalGet } = require('./utils'); +const { getOriginAvatar } = require('./utils'); +const logger = require('@/utils/logger'); module.exports = async (ctx) => { const { rid } = ctx.params; @@ -9,31 +10,59 @@ module.exports = async (ctx) => { const pageUrl = `https://live.douyin.com/${rid}`; - const renderData = await ctx.cache.tryGet(`douyin:live:${rid}`, () => universalGet(pageUrl, 'live', ctx), config.cache.routeExpire, false); + const renderData = await ctx.cache.tryGet( + `douyin:live:${rid}`, + async () => { + let roomInfo; + const browser = await require('@/utils/puppeteer')(); + const page = await browser.newPage(); + await page.setRequestInterception(true); - if (renderData[Object.keys(renderData).find((k) => renderData[k].status_code)].status_code !== '0') { - throw `Status code ${renderData[Object.keys(renderData).find((k) => renderData[k].status_code)].status_code}`; + page.on('request', (request) => { + request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? request.continue() : request.abort(); + }); + page.on('response', async (response) => { + const request = response.request(); + if (request.url().includes('/webcast/room/web/enter')) { + roomInfo = await response.json(); + } + }); + logger.debug(`Requesting ${pageUrl}`); + await page.goto(pageUrl, { + waitUntil: 'networkidle2', + }); + browser.close(); + + return roomInfo; + }, + config.cache.routeExpire, + false + ); + + if (renderData.status_code !== 0) { + throw Error(`Status code ${renderData.status_code}`); } - const roomInfo = renderData.app.initialState.roomStore.roomInfo; - const nickname = roomInfo.anchor.nickname; - const userAvatar = roomInfo.anchor.avatar_thumb.url_list[0]; + const roomInfo = renderData.data.data[0]; + const nickname = roomInfo.owner.nickname; + const userAvatar = roomInfo.owner.avatar_thumb.url_list[0]; const items = []; - if (roomInfo.roomId) { - if (roomInfo.room.status === 2) { + if (roomInfo.id_str) { + if (roomInfo.status === 2) { items.push({ - title: `开播:${roomInfo.room.title}`, + title: `开播:${roomInfo.title}`, + description: ``, link: pageUrl, author: nickname, - guid: roomInfo.roomId, // roomId is unique for each live event + guid: roomInfo.id_str, // roomId is unique for each live event }); - } else if (roomInfo.room.status === 4) { + } else if (roomInfo.status === 4) { items.push({ - title: `当前直播已结束,期待下一场:${roomInfo.room.title}`, - link: `https://www.douyin.com/user/${roomInfo.anchor.sec_uid}`, + title: `当前直播已结束,期待下一场:${roomInfo.title}`, + link: `https://www.douyin.com/user/${roomInfo.owner.sec_uid}`, author: nickname, - guid: roomInfo.roomId, + guid: roomInfo.id_str, }); } } From fa27a51ebffadedef35981aea1b8e8d792aff073 Mon Sep 17 00:00:00 2001 From: JimenezLi <75196426+JimenezLi@users.noreply.github.com> Date: Thu, 7 Sep 2023 04:34:35 +0800 Subject: [PATCH 11/13] fix(route): fix bjfu encoding (#13223) * fix(route): fix bjfu encoding * fix: use cache.tryGet --------- --- lib/v2/bjfu/grs.js | 58 +++++++++++++-------------------- lib/v2/bjfu/it/index.js | 16 ++++++---- lib/v2/bjfu/it/utils.js | 67 ++++++++++++++++++++++----------------- lib/v2/bjfu/jwc/index.js | 11 ++----- lib/v2/bjfu/jwc/utils.js | 55 +++++++++++++++----------------- lib/v2/bjfu/kjc.js | 63 +++++++++++++++--------------------- lib/v2/bjfu/news/index.js | 21 ++++++------ lib/v2/bjfu/news/utils.js | 36 ++++++++++----------- 8 files changed, 152 insertions(+), 175 deletions(-) diff --git a/lib/v2/bjfu/grs.js b/lib/v2/bjfu/grs.js index 8ccab5e06cc8b3..fd61c43f4ef9a1 100644 --- a/lib/v2/bjfu/grs.js +++ b/lib/v2/bjfu/grs.js @@ -1,60 +1,46 @@ const got = require('@/utils/got'); const cheerio = require('cheerio'); -const iconv = require('iconv-lite'); +const { parseDate } = require('@/utils/parse-date'); +const timezone = require('@/utils/timezone'); module.exports = async (ctx) => { const url = 'http://graduate.bjfu.edu.cn/pygl/pydt/index.html'; - const response = await got.get(url, { - responseType: 'buffer', - }); - const data = iconv.decode(response.data, 'gb2312'); + const response = await got.get(url); + const data = response.data; const $ = cheerio.load(data); const list = $('.itemList li') - .slice(0, 10) - .map((i, e) => { + .slice(0, 11) + .toArray() + .map((e) => { const element = $(e); const title = element.find('li a').attr('title'); const link = element.find('li a').attr('href'); - const date = new Date( - element - .find('li a') - .text() - .match(/\d{4}-\d{2}-\d{2}/) - ); - const timeZone = 8; - const serverOffset = date.getTimezoneOffset() / 60; - const pubDate = new Date(date.getTime() - 60 * 60 * 1000 * (timeZone + serverOffset)).toUTCString(); + const date = element + .find('li a') + .text() + .match(/\d{4}-\d{2}-\d{2}/); + const pubDate = timezone(parseDate(date), 8); return { title, - description: '', link: 'http://graduate.bjfu.edu.cn/pygl/pydt/' + link, author: '北京林业大学研究生院培养动态', pubDate, }; - }) - .get(); + }); const result = await Promise.all( - list.map(async (item) => { - const link = item.link; + list.map((item) => + ctx.cache.tryGet(item.link, async () => { + const itemReponse = await got.get(item.link); + const data = itemReponse.data; + const itemElement = cheerio.load(data); - const cache = await ctx.cache.get(link); - if (cache) { - return Promise.resolve(JSON.parse(cache)); - } + item.description = itemElement('.articleTxt').html(); - const itemReponse = await got.get(link, { - responseType: 'buffer', - }); - const data = iconv.decode(itemReponse.data, 'gb2312'); - const itemElement = cheerio.load(data); - - item.description = itemElement('.articleTxt').html(); - - ctx.cache.set(link, JSON.stringify(item)); - return item; - }) + return item; + }) + ) ); ctx.state.data = { diff --git a/lib/v2/bjfu/it/index.js b/lib/v2/bjfu/it/index.js index 5ce783dea0f0b5..50bfd092ff7d00 100644 --- a/lib/v2/bjfu/it/index.js +++ b/lib/v2/bjfu/it/index.js @@ -1,7 +1,7 @@ const got = require('@/utils/got'); const cheerio = require('cheerio'); const util = require('./utils'); -const iconv = require('iconv-lite'); // 转码 +const iconv = require('iconv-lite'); module.exports = async (ctx) => { const type = ctx.params.type; @@ -27,16 +27,18 @@ module.exports = async (ctx) => { const response = await got({ method: 'get', - responseType: 'buffer', // 转码 + responseType: 'buffer', url: base, }); - const data = iconv.decode(response.data, 'gb2312'); // 转码 - const $ = cheerio.load(data); - - // const list = $('div[item-content]').slice(0, 10).get(); + const data = response.data; + let $ = cheerio.load(iconv.decode(data, 'utf-8')); + const charset = $('meta[charset]').attr('charset'); + if (charset?.toLowerCase() !== 'utf-8') { + $ = cheerio.load(iconv.decode(data, charset ?? 'utf-8')); + } - const list = $('.item-content').get(); + const list = $('.item-content').toArray(); const result = await util.ProcessFeed(base, list, ctx.cache); // 感谢@hoilc指导 diff --git a/lib/v2/bjfu/it/utils.js b/lib/v2/bjfu/it/utils.js index ded31bf02a3835..eafc5fb3a47a8a 100644 --- a/lib/v2/bjfu/it/utils.js +++ b/lib/v2/bjfu/it/utils.js @@ -1,62 +1,71 @@ const got = require('@/utils/got'); const cheerio = require('cheerio'); -const iconv = require('iconv-lite'); // 转码 +const iconv = require('iconv-lite'); const { parseDate } = require('@/utils/parse-date'); const timezone = require('@/utils/timezone'); // 完整文章页 async function load(link) { - const response = await got.get(link, { - responseType: 'buffer', - }); + let response; + try { + response = await got.get(link, { + responseType: 'buffer', + }); + } catch (e) { + return { description: '' }; + } - const data = iconv.decode(response.data, 'gb2312'); // 转码 + const data = response.data; // 不用转码 // 加载文章内容 - const $ = cheerio.load(data); - - // 解析日期 - const pubDate = timezone( - parseDate( - $('.template-head-info') - .text() - .match(/\d{4}-\d{2}-\d{2}/) - ), - +8 - ); + let $ = cheerio.load(iconv.decode(data, 'utf-8')); + const charset = $('meta[charset]').attr('charset'); + if (charset?.toLowerCase() !== 'utf-8') { + $ = cheerio.load(iconv.decode(data, charset ?? 'utf-8')); + } // 提取内容 - const description = $('.template-body').html(); + const description = ($('.template-body').length ? $('.template-body').html() : '') + ($('.template-tail').length ? $('.template-tail').html() : ''); // 返回解析的结果 - return { description, pubDate }; + return { description }; } const ProcessFeed = (base, list, caches) => // 使用 Promise.all() 进行 async 并发 Promise.all( // 遍历每一篇文章 - list.map(async (item) => { + list.map((item) => { const $ = cheerio.load(item); const $title = $('a'); // 还原相对链接为绝对链接 const itemUrl = new URL($title.attr('href'), base).href; // 感谢@hoilc指导 - // 列表上提取到的信息 - const single = { - title: $title.text(), - link: itemUrl, - author: '北林信息', - guid: itemUrl, - }; + // 解析日期 + const pubDate = timezone( + parseDate( + $('span') + .text() + .match(/\d{4}-\d{2}-\d{2}/) + ), + +8 + ); // 使用tryGet方法从缓存获取内容。 // 当缓存中无法获取到链接内容的时候,则使用load方法加载文章内容。 - const other = await caches.tryGet(itemUrl, () => load(itemUrl)); + return caches.tryGet(itemUrl, async () => { + const { description } = await load(itemUrl); - // 合并解析后的结果集作为该篇文章最终的输出结果 - return { ...single, ...other }; + // 列表上提取到的信息 + return { + title: $title.text(), + link: itemUrl, + author: '北林信息', + description, + pubDate, + }; + }); }) ); module.exports = { diff --git a/lib/v2/bjfu/jwc/index.js b/lib/v2/bjfu/jwc/index.js index d11b75adbff707..6d195029c7e1e3 100644 --- a/lib/v2/bjfu/jwc/index.js +++ b/lib/v2/bjfu/jwc/index.js @@ -1,16 +1,11 @@ const got = require('@/utils/got'); const cheerio = require('cheerio'); const util = require('./utils'); -const iconv = require('iconv-lite'); // 转码 module.exports = async (ctx) => { const type = ctx.params.type; let title, path; switch (type) { - case 'jwkx': - title = '教务快讯'; - path = 'jwkx/'; - break; case 'jgdt': title = '教改动态'; path = 'jgdt/'; @@ -27,6 +22,7 @@ module.exports = async (ctx) => { title = '图片新闻'; path = 'tpxw/'; break; + case 'jwkx': default: title = '教务快讯'; path = 'jwkx/'; @@ -35,14 +31,13 @@ module.exports = async (ctx) => { const response = await got({ method: 'get', - responseType: 'buffer', // 转码 url: base, }); - const data = iconv.decode(response.data, 'gb2312'); // 转码 + const data = response.data; // 不用转码 const $ = cheerio.load(data); - const list = $('.list_c li').slice(0, 10).get(); + const list = $('.list_c li').slice(0, 15).toArray(); const result = await util.ProcessFeed(base, list, ctx.cache); // 感谢@hoilc指导 diff --git a/lib/v2/bjfu/jwc/utils.js b/lib/v2/bjfu/jwc/utils.js index a443fd9c0810b9..e997c6540a03b3 100644 --- a/lib/v2/bjfu/jwc/utils.js +++ b/lib/v2/bjfu/jwc/utils.js @@ -1,61 +1,58 @@ const got = require('@/utils/got'); const cheerio = require('cheerio'); -const iconv = require('iconv-lite'); // 转码 const { parseDate } = require('@/utils/parse-date'); const timezone = require('@/utils/timezone'); // 完整文章页 async function load(link) { - const response = await got.get(link, { - responseType: 'buffer', - }); + const response = await got.get(link); - const data = iconv.decode(response.data, 'gb2312'); // 转码 + const data = response.data; // 不用转码 // 加载文章内容 const $ = cheerio.load(data); - // 解析日期 - const pubDate = timezone( - parseDate( - $('div #con_djl') - .text() - .match(/\d{4}-\d{2}-\d{2}/) - ), - +8 - ); - // 提取内容 - const description = $('#con_c').html(); + const description = ($('#con_c').length ? $('#con_c').html() : '') + ($('#con_fujian').length ? $('#con_fujian').html() : ''); // 返回解析的结果 - return { description, pubDate }; + return { description }; } const ProcessFeed = (base, list, caches) => Promise.all( // 遍历每一篇文章 - list.map(async (item) => { + list.map((item) => { const $ = cheerio.load(item); const $title = $('a'); // 还原相对链接为绝对链接 const itemUrl = new URL($title.attr('href'), base).href; // 感谢@hoilc指导 - // 列表上提取到的信息 - const single = { - title: $title.text(), - link: itemUrl, - author: '北林教务处', - guid: itemUrl, - }; + // 解析日期 + const pubDate = timezone( + parseDate( + $('.datetime') + .text() + .match(/\d{4}-\d{2}-\d{2}/) + ), + +8 + ); // 使用tryGet方法从缓存获取内容。 // 当缓存中无法获取到链接内容的时候,则使用load方法加载文章内容。 - const other = await caches.tryGet(itemUrl, () => load(itemUrl)); - - // 合并解析后的结果集作为该篇文章最终的输出结果 - return { ...single, ...other }; + return caches.tryGet(itemUrl, async () => { + const { description } = await load(itemUrl); + + // 列表上提取到的信息 + return { + title: $title.text(), + link: itemUrl, + author: '北林教务处', + description, + pubDate, + }; + }); }) ); module.exports = { diff --git a/lib/v2/bjfu/kjc.js b/lib/v2/bjfu/kjc.js index dd54226c71c775..0e2a5f1ff907c7 100644 --- a/lib/v2/bjfu/kjc.js +++ b/lib/v2/bjfu/kjc.js @@ -1,60 +1,47 @@ const got = require('@/utils/got'); const cheerio = require('cheerio'); -const iconv = require('iconv-lite'); +const { parseDate } = require('@/utils/parse-date'); +const timezone = require('@/utils/timezone'); module.exports = async (ctx) => { const url = 'http://kyc.bjfu.edu.cn/tztg/index.html'; - const response = await got.get(url, { - responseType: 'buffer', - }); - const data = iconv.decode(response.data, 'gb2312'); + const response = await got.get(url); + const data = response.data; const $ = cheerio.load(data); const list = $('.ll_con_r_b li') - .slice(0, 10) - .map((i, e) => { + .slice(0, 15) + .toArray() + .map((e) => { const element = $(e); const title = element.find('.ll_con_r_b_title a').text(); const link = element.find('a').attr('href'); - const date = new Date( - element - .find('.ll_con_r_b_time') - .text() - .match(/\d{4}-\d{2}-\d{2}/) - ); - const timeZone = 8; - const serverOffset = date.getTimezoneOffset() / 60; - const pubDate = new Date(date.getTime() - 60 * 60 * 1000 * (timeZone + serverOffset)).toUTCString(); + const date = element + .find('.ll_con_r_b_time') + .text() + .match(/\d{4}-\d{2}-\d{2}/); + const pubDate = timezone(parseDate(date), 8); return { title, - description: '', link: 'http://kyc.bjfu.edu.cn/tztg/' + link, author: '北京林业大学科技处通知公告', pubDate, }; - }) - .get(); + }); const result = await Promise.all( - list.map(async (item) => { - const link = item.link; - - const cache = await ctx.cache.get(link); - if (cache) { - return Promise.resolve(JSON.parse(cache)); - } - - const itemReponse = await got.get(link, { - responseType: 'buffer', - }); - const data = iconv.decode(itemReponse.data, 'gb2312'); - const itemElement = cheerio.load(data); - - item.description = itemElement('#a_con_l_con').html(); - - ctx.cache.set(link, JSON.stringify(item)); - return item; - }) + list.map((item) => + ctx.cache.tryGet(item.link, async () => { + const itemReponse = await got.get(item.link); + const data = itemReponse.data; + const itemElement = cheerio.load(data); + + item.description = itemElement('#a_con_l_con').html(); + item.title = item.title.includes('...') ? itemElement('#a_con_l_title').text() : item.title; + + return item; + }) + ) ); ctx.state.data = { diff --git a/lib/v2/bjfu/news/index.js b/lib/v2/bjfu/news/index.js index c72c46ce826e49..0d0d2f1477c84a 100644 --- a/lib/v2/bjfu/news/index.js +++ b/lib/v2/bjfu/news/index.js @@ -1,16 +1,12 @@ const got = require('@/utils/got'); const cheerio = require('cheerio'); const util = require('./utils'); -const iconv = require('iconv-lite'); // 转码 +const iconv = require('iconv-lite'); module.exports = async (ctx) => { const type = ctx.params.type; let title, path; switch (type) { - case 'lsyw': - title = '绿色要闻'; - path = 'lsyw/'; - break; case 'xydt': title = '校园动态'; path = 'lsxy/'; @@ -27,6 +23,7 @@ module.exports = async (ctx) => { title = '一周排行'; path = 'yzph/'; break; + case 'lsyw': default: title = '绿色要闻'; path = 'lsyw/'; @@ -35,14 +32,20 @@ module.exports = async (ctx) => { const response = await got({ method: 'get', - responseType: 'buffer', // 转码 + responseType: 'buffer', url: base, }); - const data = iconv.decode(response.data, 'gb2312'); // 转码 - const $ = cheerio.load(data); + const data = response.data; + let $ = cheerio.load(iconv.decode(data, 'utf-8')); + const charset = $('meta[http-equiv="Content-Type"]') + .attr('content') + .match(/charset=(.*)/)?.[1]; + if (charset?.toLowerCase() !== 'utf-8') { + $ = cheerio.load(iconv.decode(data, charset ?? 'utf-8')); + } - const list = $('.news_ul li').slice(0, 10).get(); + const list = $('.news_ul li').slice(0, 12).toArray(); const result = await util.ProcessFeed(base, list, ctx.cache); // 感谢@hoilc指导 diff --git a/lib/v2/bjfu/news/utils.js b/lib/v2/bjfu/news/utils.js index 1d9851484ab1b2..88eeaab01439ff 100644 --- a/lib/v2/bjfu/news/utils.js +++ b/lib/v2/bjfu/news/utils.js @@ -1,16 +1,13 @@ const got = require('@/utils/got'); const cheerio = require('cheerio'); -const iconv = require('iconv-lite'); // 转码 const { parseDate } = require('@/utils/parse-date'); const timezone = require('@/utils/timezone'); // 完整文章页 async function load(link) { - const response = await got.get(link, { - responseType: 'buffer', - }); + const response = await got.get(link); - const data = iconv.decode(response.data, 'gb2312'); // 转码 + const data = response.data; // 不用转码 // 加载文章内容 const $ = cheerio.load(data); @@ -27,35 +24,36 @@ async function load(link) { // 提取内容 const description = $('.article_con').html(); + const title = $('h2').text(); // 返回解析的结果 - return { description, pubDate }; + return { description, pubDate, title }; } const ProcessFeed = (base, list, caches) => Promise.all( // 遍历每一篇文章 - list.map(async (item) => { + list.map((item) => { const $ = cheerio.load(item); const $title = $('a'); // 还原相对链接为绝对链接 const itemUrl = new URL($title.attr('href'), base).href; // 感谢@hoilc指导 - // 列表上提取到的信息 - const single = { - title: $title.text(), - link: itemUrl, - author: '绿色新闻网', - guid: itemUrl, - }; - // 使用tryGet方法从缓存获取内容。 // 当缓存中无法获取到链接内容的时候,则使用load方法加载文章内容。 - const other = await caches.tryGet(itemUrl, () => load(itemUrl)); - - // 合并解析后的结果集作为该篇文章最终的输出结果 - return { ...single, ...other }; + return caches.tryGet(itemUrl, async () => { + const { description, pubDate, title } = await load(itemUrl); + + // 列表上提取到的信息 + return { + title: $title.text().includes('...') ? title : $title.text(), + link: itemUrl, + author: '绿色新闻网', + description, + pubDate, + }; + }); }) ); module.exports = { From 0dd782417b9c44e5cbc49738e0470ba9ef4fe0f8 Mon Sep 17 00:00:00 2001 From: Tony Date: Thu, 7 Sep 2023 05:22:44 +0800 Subject: [PATCH 12/13] fix(route): douyin live (#13227) --- lib/v2/douyin/live.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/v2/douyin/live.js b/lib/v2/douyin/live.js index a6cc9cdfa7603b..c60cd9ba706bb3 100644 --- a/lib/v2/douyin/live.js +++ b/lib/v2/douyin/live.js @@ -44,8 +44,9 @@ module.exports = async (ctx) => { } const roomInfo = renderData.data.data[0]; - const nickname = roomInfo.owner.nickname; - const userAvatar = roomInfo.owner.avatar_thumb.url_list[0]; + const roomOwner = renderData.data.user; + const nickname = roomOwner.nickname; + const userAvatar = roomOwner.avatar_thumb.url_list[0]; const items = []; if (roomInfo.id_str) { @@ -60,7 +61,7 @@ module.exports = async (ctx) => { } else if (roomInfo.status === 4) { items.push({ title: `当前直播已结束,期待下一场:${roomInfo.title}`, - link: `https://www.douyin.com/user/${roomInfo.owner.sec_uid}`, + link: `https://www.douyin.com/user/${roomOwner.sec_uid}`, author: nickname, guid: roomInfo.id_str, }); From 0663a9d9f5cb569e6f72746c858f04acaca4fe01 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 7 Sep 2023 05:34:21 +0800 Subject: [PATCH 13/13] chore(deps): bump @sentry/node from 7.67.0 to 7.68.0 (#13226) * chore(deps): bump @sentry/node from 7.67.0 to 7.68.0 Bumps [@sentry/node](https://github.com/getsentry/sentry-javascript) from 7.67.0 to 7.68.0. - [Release notes](https://github.com/getsentry/sentry-javascript/releases) - [Changelog](https://github.com/getsentry/sentry-javascript/blob/develop/CHANGELOG.md) - [Commits](https://github.com/getsentry/sentry-javascript/compare/7.67.0...7.68.0) --- updated-dependencies: - dependency-name: "@sentry/node" dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * chore: fix pnpm install --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- package.json | 2 +- pnpm-lock.yaml | 44 ++++++++++++++++++++++---------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/package.json b/package.json index e0248eca84fb0f..b951ce0ad67cb2 100644 --- a/package.json +++ b/package.json @@ -86,7 +86,7 @@ "@koa/router": "12.0.0", "@notionhq/client": "2.2.13", "@postlight/parser": "2.2.3", - "@sentry/node": "7.67.0", + "@sentry/node": "7.68.0", "aes-js": "3.1.2", "art-template": "4.13.2", "bbcodejs": "0.0.4", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 791a65e13d071d..e1308a19cc36d9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -15,8 +15,8 @@ dependencies: specifier: 2.2.3 version: 2.2.3 '@sentry/node': - specifier: 7.67.0 - version: 7.67.0 + specifier: 7.68.0 + version: 7.68.0 aes-js: specifier: 3.1.2 version: 3.1.2 @@ -1153,33 +1153,33 @@ packages: selderee: 0.11.0 dev: false - /@sentry-internal/tracing@7.67.0: - resolution: {integrity: sha512-+3wpnzW2HczPlZsp1pWtdOavBKLK/tu1qDEg+blqLfW7b/qZZ8hqQ+A+2mEWRLgWfIoGZ8t4U84nN4tzDXv+nQ==} + /@sentry-internal/tracing@7.68.0: + resolution: {integrity: sha512-nNKS/q21+Iqzxs2K7T/l3dZi8Z9s/uxsAazpk2AYhFzx9mFnPj1Xfe3dgbFoygNifE+IrpUuldr6D5HQamTDPQ==} engines: {node: '>=8'} dependencies: - '@sentry/core': 7.67.0 - '@sentry/types': 7.67.0 - '@sentry/utils': 7.67.0 + '@sentry/core': 7.68.0 + '@sentry/types': 7.68.0 + '@sentry/utils': 7.68.0 tslib: 2.6.2 dev: false - /@sentry/core@7.67.0: - resolution: {integrity: sha512-apk0WHnFJTHX86TvN4LOa2GBfguKwvV94WsssyizMi4qurGN2V0I8ZUmlypjBxvMY9MOBZ/2LwgYPf3U1QeE5g==} + /@sentry/core@7.68.0: + resolution: {integrity: sha512-mT3ObBWgvAky/QF3dZy4KBoXbRXbNsD6evn+mYi9UEeIZQ5NpnQYDEp78mapiEjI/TAHZIhTIuaBhj1Jk0qUUA==} engines: {node: '>=8'} dependencies: - '@sentry/types': 7.67.0 - '@sentry/utils': 7.67.0 + '@sentry/types': 7.68.0 + '@sentry/utils': 7.68.0 tslib: 2.6.2 dev: false - /@sentry/node@7.67.0: - resolution: {integrity: sha512-QRPLdHp00HMTSpanonf4MfAcWiWHSnnCaNT0at0SJlpPf1z1Ch8FXxFl5CKwcbYyMCQDBxsZ2tdQwjrmPdMDcA==} + /@sentry/node@7.68.0: + resolution: {integrity: sha512-gtcHoi6Xu6Iu8MpPgKJA4E0nozqLvYF0fKtt+27T0QBzWioO6lkxSQkKGWMyJGL0AmpLCex0E28fck/rlbt0LA==} engines: {node: '>=8'} dependencies: - '@sentry-internal/tracing': 7.67.0 - '@sentry/core': 7.67.0 - '@sentry/types': 7.67.0 - '@sentry/utils': 7.67.0 + '@sentry-internal/tracing': 7.68.0 + '@sentry/core': 7.68.0 + '@sentry/types': 7.68.0 + '@sentry/utils': 7.68.0 cookie: 0.4.2 https-proxy-agent: 5.0.1 lru_map: 0.3.3 @@ -1188,16 +1188,16 @@ packages: - supports-color dev: false - /@sentry/types@7.67.0: - resolution: {integrity: sha512-GV/Hxdsp/hes1YQGPGgSUG1IHRNQVJMnCfYcpuZtZI6CvNJ+7qNOLkdmC/xGFwfpYH9kYsFBvmGsmeC6yUENYA==} + /@sentry/types@7.68.0: + resolution: {integrity: sha512-5J2pH1Pjx/029zTm3CNY9MaE8Aui81nG7JCtlMp7uEfQ//9Ja4d4Sliz/kV4ARbkIKUZerSgaRAm3xCy5XOXLg==} engines: {node: '>=8'} dev: false - /@sentry/utils@7.67.0: - resolution: {integrity: sha512-OstjIfAD0xPWVnIRzoAtFBW+YTmxix4h3ehgtFxhA4VJHkC9KXchaTNwk+nlRy/nx4phx5vW9p7YWhO3kJBJmA==} + /@sentry/utils@7.68.0: + resolution: {integrity: sha512-NecnQegvKARyeFmBx7mYmbI17mTvjARWs1nfzY5jhPyNc3Zk4M3bQsgIdnJ1t+jo93UYudlNND7hxhDzjcBAVg==} engines: {node: '>=8'} dependencies: - '@sentry/types': 7.67.0 + '@sentry/types': 7.68.0 tslib: 2.6.2 dev: false