forked from DIYgod/RSSHub
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(routes/shu): add routes for SHU's Int'l Dept, Grad School, and C…
…ampus Highlights. (DIYgod#17730) * feat(routes/shu): add routes for SHU's Int'l Dept, Grad School, and Campus Highlights - Corrected the root URL in `index.ts`. - Added routes for: - SHU's International Department (Int'l Dept). - Graduate School (Grad School). - Campus Highlights. - Noted the unavailability of the policy in `jwb.ts` with a comment in `index.ts`. * Update lib/routes/shu/index.ts Co-authored-by: Tony <[email protected]> * Update lib/routes/shu/jwb.ts Co-authored-by: Tony <[email protected]> * Apply camelCase to variable names across the project. * Refactor: change to use detailed request format for GET request. * feat: refine content extraction and fix gs.shu.edu.cn issues - Refactored content extraction to focus on specific descriptions. - Added exception handling for inaccessible gs1.shu.edu.cn links. - Fixed bug where gs.shu.edu.cn content could not be retrieved. - Fixed Code scanning/ESLint warning: replaced disallowed syntax with .toArray(). * fix: Resolve ESLint warnings and errors * Update lib/routes/shu/xykd.ts Co-authored-by: Tony <[email protected]> * fix: Resolve ESLint warnings and errors again * fix: Resolve ESLint warnings and errors ---------
- Loading branch information
Showing
6 changed files
with
365 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import { Route } from '@/types'; | ||
import cache from '@/utils/cache'; | ||
import got from '@/utils/got'; | ||
import { load } from 'cheerio'; // [email protected] | ||
import { parseDate } from '@/utils/parse-date'; | ||
import timezone from '@/utils/timezone'; | ||
|
||
const noticeType = { | ||
tzgg: { title: '上海大学国际部港澳台-通知公告', url: 'https://global.shu.edu.cn/cd/tzgg/3.htm' }, | ||
}; | ||
|
||
export const route: Route = { | ||
path: '/global/:type?', | ||
categories: ['university'], | ||
example: '/shu/global/tzgg', | ||
parameters: { type: '分类,默认为通知公告' }, | ||
features: { | ||
requireConfig: false, | ||
requirePuppeteer: false, | ||
antiCrawler: false, | ||
supportBT: false, | ||
supportPodcast: false, | ||
supportScihub: false, | ||
}, | ||
radar: [ | ||
{ | ||
source: ['global.shu.edu.cn/'], | ||
target: '/global', | ||
}, | ||
], | ||
name: '国际部港澳台办公室', | ||
maintainers: ['GhhG123'], | ||
handler, | ||
url: 'global.shu.edu.cn/', | ||
description: `| 通知公告 | | ||
| -------- | | ||
| tzgg |`, | ||
}; | ||
|
||
async function handler(ctx) { | ||
const type = ctx.req.param('type') ?? 'tzgg'; | ||
const rootUrl = 'https://global.shu.edu.cn'; | ||
|
||
// 发起 HTTP GET 请求 | ||
const response = await got({ | ||
method: 'get', | ||
|
||
/* headers: { | ||
'user-agent': UA, | ||
cookie: await getCookie(ctx), | ||
}, */ | ||
url: noticeType[type].url, | ||
}); | ||
|
||
const $ = load(response.data); | ||
|
||
const list = $('div.only-list1 ul li') // 定位到HTML结构中的li元素 | ||
.toArray() | ||
.map((el) => { | ||
const item = $(el); // 使用Cheerio包装每个li元素 | ||
const rawLink = item.find('a').attr('href'); | ||
const pubDate = item.find('span').text().trim(); // 提取日期 | ||
|
||
return { | ||
title: item.find('a').text().trim(), // 获取标题 | ||
link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl, // 生成完整链接 | ||
pubDate: timezone(parseDate(pubDate, 'YYYY年MM月DD日'), +8), // 解析并转换日期 | ||
description: '', // 没有提供简要描述,设为空字符串 | ||
}; | ||
}); | ||
|
||
const items = await Promise.all( | ||
list.map((item) => | ||
cache.tryGet(item.link, async () => { | ||
const detailResponse = await got({ | ||
method: 'get', | ||
url: item.link | ||
}); // 获取详情页内容 | ||
const content = load(detailResponse.data); // 使用cheerio解析内容 | ||
|
||
item.description = content('#vsb_content_2 .v_news_content').html() || '内容无法提取';// 提取内容区详情 | ||
|
||
return item; // 返回完整的item | ||
}) | ||
) | ||
); | ||
|
||
return { | ||
title: noticeType[type].title, | ||
description: noticeType[type].title, | ||
link: noticeType[type].url, | ||
item: items, | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import { Route } from '@/types'; | ||
import cache from '@/utils/cache'; | ||
import got from '@/utils/got'; | ||
import { load } from 'cheerio'; // [email protected] | ||
import { parseDate } from '@/utils/parse-date'; | ||
import timezone from '@/utils/timezone'; | ||
|
||
const noticeType = { | ||
zhxw: { title: '上海大学研究生院-综合新闻', url: 'https://gs.shu.edu.cn/xwlb/zh.htm' }, // 综合新闻 | ||
pygl: { title: '上海大学研究生院-培养管理', url: 'https://gs.shu.edu.cn/xwlb/py.htm' }, // local //BUG error: Request https://gs1.shu.edu.cn:8080/py/KCBInfo.asp fail: TypeError: fetch failed | ||
gjjl: { title: '上海大学研究生院-国际交流', url: 'https://gs.shu.edu.cn/xwlb/gjjl.htm' }, | ||
}; | ||
|
||
export const route: Route = { | ||
path: '/gs/:type?', | ||
categories: ['university'], | ||
example: '/shu/gs/zhxw', | ||
parameters: { type: '分类,默认为学术公告' }, | ||
features: { | ||
requireConfig: false, | ||
requirePuppeteer: false, | ||
antiCrawler: false, | ||
supportBT: false, | ||
supportPodcast: false, | ||
supportScihub: false, | ||
}, | ||
radar: [ | ||
{ | ||
source: ['gs.shu.edu.cn/'], | ||
target: '/gs', | ||
}, | ||
], | ||
name: '研究生院', | ||
maintainers: ['GhhG123'], | ||
handler, | ||
url: 'gs.shu.edu.cn/', | ||
description: `| 综合新闻 | 培养管理 | 国际交流 | | ||
| -------- | --------- | --------- | | ||
| zhxw | pygl | gjjl |`, | ||
}; | ||
|
||
async function handler(ctx) { | ||
const type = ctx.req.param('type') ?? 'zhxw'; | ||
const rootUrl = 'https://gs.shu.edu.cn'; | ||
|
||
// 发起 HTTP GET 请求 | ||
const response = await got({ | ||
method: 'get', | ||
|
||
/* headers: { | ||
'user-agent': UA, | ||
cookie: await getCookie(ctx), | ||
}, */ | ||
url: noticeType[type].url, | ||
}); | ||
|
||
const $ = load(response.data); | ||
|
||
const list = $('tr[id^="line_u17_"]') // 定位到每个包含新闻的<tr>元素 | ||
.toArray() | ||
.map((el) => { | ||
const item = $(el); // 使用Cheerio包装每个<tr>元素 | ||
const rawLink = item.find('a').attr('href'); // 获取链接 | ||
const title = item.find('a').text().trim(); // 获取标题 | ||
const dateParts = item.find('td').eq(1).text().trim(); // 获取日期 | ||
|
||
return { | ||
title, // 获取标题 | ||
link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl, // 生成完整链接 | ||
pubDate: timezone(parseDate(dateParts, 'YYYY/MM/DD HH:mm:ss'), +8), // 解析日期 | ||
description: item.find('td').eq(2).text().trim(), // 提取访问次数或其他信息 | ||
}; | ||
}); | ||
|
||
const items = await Promise.all( | ||
list.map((item) => | ||
cache.tryGet(item.link, async () => { | ||
const url = new URL(item.link); // 创建 URL 对象以验证链接 | ||
// 确保链接是以正确的域名开头,并且不为空 | ||
if (url.hostname === 'gs1.shu.edu.cn') { // 需校内访问 | ||
// Skip or handle differently for URLs with gs1.shu.edu.cn domain | ||
item.description = 'gs1.shu.edu.cn, 无法直接获取'; | ||
return item; | ||
} | ||
|
||
const detailResponse = await got({ | ||
method: 'get', | ||
url: item.link | ||
}); // 获取详情页内容 | ||
const content = load(detailResponse.data); // 使用cheerio解析内容 | ||
|
||
item.description = content('#vsb_content .v_news_content').html() || item.description; | ||
|
||
|
||
return item; // 返回完整的item | ||
}) | ||
) | ||
); | ||
|
||
return { | ||
title: noticeType[type].title, | ||
description: noticeType[type].title, | ||
link: noticeType[type].url, | ||
item: items, | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,20 @@ | ||
import { Route } from '@/types'; | ||
import cache from '@/utils/cache'; | ||
import got from '@/utils/got'; | ||
import { load } from 'cheerio'; | ||
import { load } from 'cheerio'; // [email protected] | ||
import { parseDate } from '@/utils/parse-date'; | ||
import timezone from '@/utils/timezone'; | ||
|
||
const host = 'https://www.shu.edu.cn/'; | ||
const alias = new Map([ | ||
['news', 'zhxw'], // 综合新闻 | ||
['research', 'kydt1'], // 科研动态 | ||
['kydt', 'kydt1'], // 科研动态 | ||
['notice', 'tzgg'], // 通知公告 | ||
['important', 'zyxw'], // 重要新闻 | ||
]); | ||
const noticeType = { | ||
tzgg: { title: '上海大学 - 通知公告', url: 'https://www.shu.edu.cn/tzgg.htm' }, | ||
zyxw: { title: '上海大学 - 重要新闻', url: 'https://www.shu.edu.cn/zyxw.htm' }, | ||
}; | ||
|
||
export const route: Route = { | ||
path: '/:type?', | ||
path: '/news/:type?', | ||
categories: ['university'], | ||
example: '/shu/news', | ||
parameters: { type: '消息类型,默认为`news`' }, | ||
example: '/shu/news/tzgg', | ||
parameters: { type: '分类,默认为通知公告' }, | ||
features: { | ||
requireConfig: false, | ||
requirePuppeteer: false, | ||
|
@@ -28,50 +25,70 @@ export const route: Route = { | |
}, | ||
radar: [ | ||
{ | ||
source: ['www.shu.edu.cn/:type'], | ||
target: '/:type', | ||
source: ['www.shu.edu.cn/'], | ||
target: '/news', | ||
}, | ||
], | ||
name: '官网信息', | ||
maintainers: ['lonelyion'], | ||
name: '官网通知公告', | ||
maintainers: ['lonelyion', 'GhhG123'], | ||
handler, | ||
description: `| 综合新闻 | 科研动态 | 通知公告 | 重要新闻 | | ||
| -------- | -------- | -------- | --------- | | ||
| news | research | notice | important |`, | ||
url: 'www.shu.edu.cn/', | ||
description: `| 通知公告 | 重要新闻 | | ||
| -------- | --------- | | ||
| tzgg | zyxw |`, | ||
}; | ||
|
||
async function handler(ctx) { | ||
const type = ctx.req.param('type') || 'news'; | ||
const link = `https://www.shu.edu.cn/${alias.get(type) || type}.htm`; | ||
const respond = await got.get(link); | ||
const $ = load(respond.data); | ||
const title = $('title').text(); | ||
const list = $('.ej_main .list') | ||
.find('li') | ||
.slice(0, 5) | ||
const type = ctx.req.param('type') ?? 'tzgg'; | ||
const rootUrl = 'https://www.shu.edu.cn'; | ||
|
||
// 发起 HTTP GET 请求 | ||
const response = await got({ | ||
method: 'get', | ||
|
||
/* headers: { | ||
'user-agent': UA, | ||
cookie: await getCookie(ctx), | ||
}, */ | ||
url: noticeType[type].url, | ||
}); | ||
|
||
const $ = load(response.data); | ||
|
||
const list = $('div.list ul li') // 以下获取信息需要根据网页结构定制 | ||
// For cheerio 1.x.x . The item parameter in the .map callback is now explicitly typed as a Cheerio<Element>, not just Element. --fixed | ||
.toArray() | ||
.map((ele) => ({ | ||
title: $(ele).find('.bt').text(), | ||
link: new URL($(ele).find('a').attr('href'), host).href, | ||
date: $(ele).find('.sj').text(), | ||
})); | ||
.map((el) => { | ||
const item = $(el); // Wrap `el` in a Cheerio object | ||
const rawLink = item.find('a').attr('href'); | ||
return { | ||
title: item.find('p.bt').text().trim(), | ||
link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl, | ||
pubDate: timezone(parseDate(item.find('p.sj').text().trim(), 'YYYY.MM.DD'), +8), | ||
description: item.find('p.zy').text().trim(), | ||
}; | ||
}); | ||
|
||
const all = await Promise.all( | ||
const items = await Promise.all( | ||
list.map((item) => | ||
cache.tryGet(item.link, async () => { | ||
const response = await got.get(item.link); | ||
const $ = load(response.data); | ||
item.author = $('.xx>:nth-child(2)').text().trim().slice(3); // 投稿:xxx | ||
item.pubDate = parseDate(item.date, 'YYYY.MM.DD'); | ||
item.description = $('.v_news_content').html() || item.title; | ||
const detailResponse = await got({ | ||
method: 'get', | ||
url: item.link | ||
}); | ||
const content = load(detailResponse.data); | ||
|
||
item.description = content('#vsb_content .v_news_content').html() || item.description; | ||
|
||
return item; | ||
}) | ||
) | ||
); | ||
|
||
return { | ||
title, | ||
link, | ||
item: all, | ||
title: noticeType[type].title, | ||
description: noticeType[type].title, | ||
link: noticeType[type].url, | ||
item: items, | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.