Skip to content

Commit

Permalink
Merge pull request #49 from DIYgod/master
Browse files Browse the repository at this point in the history
[pull] master from diygod:master
  • Loading branch information
pull[bot] authored Sep 7, 2023
2 parents 74ed004 + 0663a9d commit fad9788
Show file tree
Hide file tree
Showing 28 changed files with 535 additions and 285 deletions.
2 changes: 1 addition & 1 deletion lib/router.js
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ router.get('/rs05/rs05', lazyloadRouteHandler('./routes/rs05/rs05'));
router.get('/qutoutiao/category/:cid', lazyloadRouteHandler('./routes/qutoutiao/category'));

// BBC
router.get('/bbc/:site?/:channel?', lazyloadRouteHandler('./routes/bbc/index'));
// router.get('/bbc/:site?/:channel?', lazyloadRouteHandler('./routes/bbc/index'));

// 看雪
router.get('/pediy/topic/:category?/:type?', lazyloadRouteHandler('./routes/pediy/topic'));
Expand Down
8 changes: 4 additions & 4 deletions lib/routes/bbc/index.js → lib/v2/bbc/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,19 @@ module.exports = async (ctx) => {
title = 'BBC News 中文网';

if (!channel) {
feed = await parser.parseURL('http://www.bbc.co.uk/zhongwen/simp/index.xml');
feed = await parser.parseURL('https://www.bbc.co.uk/zhongwen/simp/index.xml');
} else {
feed = await parser.parseURL(`http://www.bbc.co.uk/zhongwen/simp/${channel}/index.xml`);
feed = await parser.parseURL(`https://www.bbc.co.uk/zhongwen/simp/${channel}/index.xml`);
}
break;

case 'traditionalchinese':
title = 'BBC News 中文網';

if (!channel) {
feed = await parser.parseURL('http://www.bbc.co.uk/zhongwen/trad/index.xml');
feed = await parser.parseURL('https://www.bbc.co.uk/zhongwen/trad/index.xml');
} else {
feed = await parser.parseURL(`http://www.bbc.co.uk/zhongwen/trad/${channel}/index.xml`);
feed = await parser.parseURL(`https://www.bbc.co.uk/zhongwen/trad/${channel}/index.xml`);
}
link = 'https://www.bbc.com/zhongwen/trad';
break;
Expand Down
4 changes: 4 additions & 0 deletions lib/v2/bbc/maintainer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module.exports = {
'/:channel': ['HenryQW', 'DIYgod'],
'/:lang/:channel?': ['HenryQW'],
};
11 changes: 11 additions & 0 deletions lib/v2/bbc/radar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
module.exports = {
'bbc.com': {
_name: 'BBC',
'.': [
{
title: 'News',
docs: 'https://docs.rsshub.app/routes/traditional-media#bbc',
},
],
},
};
3 changes: 3 additions & 0 deletions lib/v2/bbc/router.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module.exports = (router) => {
router.get('/:site?/:channel?', require('./index'));
};
File renamed without changes.
58 changes: 22 additions & 36 deletions lib/v2/bjfu/grs.js
Original file line number Diff line number Diff line change
@@ -1,60 +1,46 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const iconv = require('iconv-lite');
const { parseDate } = require('@/utils/parse-date');
const timezone = require('@/utils/timezone');

module.exports = async (ctx) => {
const url = 'http://graduate.bjfu.edu.cn/pygl/pydt/index.html';
const response = await got.get(url, {
responseType: 'buffer',
});
const data = iconv.decode(response.data, 'gb2312');
const response = await got.get(url);
const data = response.data;
const $ = cheerio.load(data);
const list = $('.itemList li')
.slice(0, 10)
.map((i, e) => {
.slice(0, 11)
.toArray()
.map((e) => {
const element = $(e);
const title = element.find('li a').attr('title');
const link = element.find('li a').attr('href');
const date = new Date(
element
.find('li a')
.text()
.match(/\d{4}-\d{2}-\d{2}/)
);
const timeZone = 8;
const serverOffset = date.getTimezoneOffset() / 60;
const pubDate = new Date(date.getTime() - 60 * 60 * 1000 * (timeZone + serverOffset)).toUTCString();
const date = element
.find('li a')
.text()
.match(/\d{4}-\d{2}-\d{2}/);
const pubDate = timezone(parseDate(date), 8);

return {
title,
description: '',
link: 'http://graduate.bjfu.edu.cn/pygl/pydt/' + link,
author: '北京林业大学研究生院培养动态',
pubDate,
};
})
.get();
});

const result = await Promise.all(
list.map(async (item) => {
const link = item.link;
list.map((item) =>
ctx.cache.tryGet(item.link, async () => {
const itemReponse = await got.get(item.link);
const data = itemReponse.data;
const itemElement = cheerio.load(data);

const cache = await ctx.cache.get(link);
if (cache) {
return Promise.resolve(JSON.parse(cache));
}
item.description = itemElement('.articleTxt').html();

const itemReponse = await got.get(link, {
responseType: 'buffer',
});
const data = iconv.decode(itemReponse.data, 'gb2312');
const itemElement = cheerio.load(data);

item.description = itemElement('.articleTxt').html();

ctx.cache.set(link, JSON.stringify(item));
return item;
})
return item;
})
)
);

ctx.state.data = {
Expand Down
16 changes: 9 additions & 7 deletions lib/v2/bjfu/it/index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const util = require('./utils');
const iconv = require('iconv-lite'); // 转码
const iconv = require('iconv-lite');

module.exports = async (ctx) => {
const type = ctx.params.type;
Expand All @@ -27,16 +27,18 @@ module.exports = async (ctx) => {

const response = await got({
method: 'get',
responseType: 'buffer', // 转码
responseType: 'buffer',
url: base,
});

const data = iconv.decode(response.data, 'gb2312'); // 转码
const $ = cheerio.load(data);

// const list = $('div[item-content]').slice(0, 10).get();
const data = response.data;
let $ = cheerio.load(iconv.decode(data, 'utf-8'));
const charset = $('meta[charset]').attr('charset');
if (charset?.toLowerCase() !== 'utf-8') {
$ = cheerio.load(iconv.decode(data, charset ?? 'utf-8'));
}

const list = $('.item-content').get();
const list = $('.item-content').toArray();

const result = await util.ProcessFeed(base, list, ctx.cache); // 感谢@hoilc指导

Expand Down
67 changes: 38 additions & 29 deletions lib/v2/bjfu/it/utils.js
Original file line number Diff line number Diff line change
@@ -1,62 +1,71 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const iconv = require('iconv-lite'); // 转码
const iconv = require('iconv-lite');
const { parseDate } = require('@/utils/parse-date');
const timezone = require('@/utils/timezone');

// 完整文章页
async function load(link) {
const response = await got.get(link, {
responseType: 'buffer',
});
let response;
try {
response = await got.get(link, {
responseType: 'buffer',
});
} catch (e) {
return { description: '' };
}

const data = iconv.decode(response.data, 'gb2312'); // 转码
const data = response.data; // 不用转码

// 加载文章内容
const $ = cheerio.load(data);

// 解析日期
const pubDate = timezone(
parseDate(
$('.template-head-info')
.text()
.match(/\d{4}-\d{2}-\d{2}/)
),
+8
);
let $ = cheerio.load(iconv.decode(data, 'utf-8'));
const charset = $('meta[charset]').attr('charset');
if (charset?.toLowerCase() !== 'utf-8') {
$ = cheerio.load(iconv.decode(data, charset ?? 'utf-8'));
}

// 提取内容
const description = $('.template-body').html();
const description = ($('.template-body').length ? $('.template-body').html() : '') + ($('.template-tail').length ? $('.template-tail').html() : '');

// 返回解析的结果
return { description, pubDate };
return { description };
}

const ProcessFeed = (base, list, caches) =>
// 使用 Promise.all() 进行 async 并发
Promise.all(
// 遍历每一篇文章
list.map(async (item) => {
list.map((item) => {
const $ = cheerio.load(item);

const $title = $('a');
// 还原相对链接为绝对链接
const itemUrl = new URL($title.attr('href'), base).href; // 感谢@hoilc指导

// 列表上提取到的信息
const single = {
title: $title.text(),
link: itemUrl,
author: '北林信息',
guid: itemUrl,
};
// 解析日期
const pubDate = timezone(
parseDate(
$('span')
.text()
.match(/\d{4}-\d{2}-\d{2}/)
),
+8
);

// 使用tryGet方法从缓存获取内容。
// 当缓存中无法获取到链接内容的时候,则使用load方法加载文章内容。
const other = await caches.tryGet(itemUrl, () => load(itemUrl));
return caches.tryGet(itemUrl, async () => {
const { description } = await load(itemUrl);

// 合并解析后的结果集作为该篇文章最终的输出结果
return { ...single, ...other };
// 列表上提取到的信息
return {
title: $title.text(),
link: itemUrl,
author: '北林信息',
description,
pubDate,
};
});
})
);
module.exports = {
Expand Down
11 changes: 3 additions & 8 deletions lib/v2/bjfu/jwc/index.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const util = require('./utils');
const iconv = require('iconv-lite'); // 转码

module.exports = async (ctx) => {
const type = ctx.params.type;
let title, path;
switch (type) {
case 'jwkx':
title = '教务快讯';
path = 'jwkx/';
break;
case 'jgdt':
title = '教改动态';
path = 'jgdt/';
Expand All @@ -27,6 +22,7 @@ module.exports = async (ctx) => {
title = '图片新闻';
path = 'tpxw/';
break;
case 'jwkx':
default:
title = '教务快讯';
path = 'jwkx/';
Expand All @@ -35,14 +31,13 @@ module.exports = async (ctx) => {

const response = await got({
method: 'get',
responseType: 'buffer', // 转码
url: base,
});

const data = iconv.decode(response.data, 'gb2312'); // 转码
const data = response.data; // 不用转码
const $ = cheerio.load(data);

const list = $('.list_c li').slice(0, 10).get();
const list = $('.list_c li').slice(0, 15).toArray();

const result = await util.ProcessFeed(base, list, ctx.cache); // 感谢@hoilc指导

Expand Down
Loading

0 comments on commit fad9788

Please sign in to comment.