diff --git a/.vscode/launch.json b/.vscode/launch.json index c9c0267e..ded85f45 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -41,11 +41,11 @@ { "type": "node", "request": "launch", - "name": "debug", + "name": "debug-command", "program": "${workspaceFolder}/src/ace.ts", "args": [ // 抓取数据 - "Command:Demo" + "debug" ], "outFiles": ["${workspaceFolder}/**/*.js"] } diff --git a/README.md b/README.md index 2126fca7..f7903290 100644 --- a/README.md +++ b/README.md @@ -16,25 +16,23 @@ # 软件下载 -最新版本: 1.1.0 -[点击下载](http://stablog.bookflaneur.cn/%E7%A8%B3%E9%83%A8%E8%90%BD%20Setup%201.1.0.exe) +最新版本: 1.2.0 +[点击下载-win](http://stablog.bookflaneur.cn/release/%E7%A8%B3%E9%83%A8%E8%90%BD%20Setup%201.2.0.exe) 使用截图: 软件界面: -![软件界面](http://ww1.sinaimg.cn/large/6671cfa8ly1g86clapkgxj211y0omt9r.jpg) +![软件界面](http://stablog.bookflaneur.cn//img/%E4%BD%BF%E7%94%A8%E6%88%AA%E5%9B%BE.png) 输出的html电子书: -![输出的html电子书](http://ww1.sinaimg.cn/large/6671cfa8ly1g86clb89ixj20rg0qjgs0.jpg) +![输出的html电子书](http://stablog.bookflaneur.cn//img/%E8%BE%93%E5%87%BA%E7%9A%84html%E7%94%B5%E5%AD%90%E4%B9%A6.png) 输出的pdf电子书: -![输出的pdf电子书](http://ww1.sinaimg.cn/large/6671cfa8ly1g86clba2g8j20tv0kbgr8.jpg) +![输出的pdf电子书](http://stablog.bookflaneur.cn//img/%E8%BE%93%E5%87%BA%E7%9A%84pdf%E7%94%B5%E5%AD%90%E4%B9%A6.png) ## 项目动机 -鉴于知乎世风日下, 还禁了项目前身[知乎助手](https://github.com/YaoZeyuan/zhihuhelp_with_node)的推广. 作者决定转战微博, 造福社会 - 严肃保护以[@Aioros先生](https://weibo.com/u/6646798696?refer_flag=0000015010_&from=feed&loc=nickname&sudaref=www.weibo.com&is_all=1)为代表的珍稀野生读物博主 @@ -46,9 +44,9 @@ 3. 备份完成后会自动打开电子书输出目录, `稳部落输出的电子书`目录下, 即为电子书, 其中, `html`目录下为html格式书籍, `index.html`为目录页. `pdf`下为pdf输出位置 -1. ![登录微博](http://ww1.sinaimg.cn/large/6671cfa8ly1g86clauxc7j211g0kydje.jpg) -2. ![配置任务](http://ww1.sinaimg.cn/large/6671cfa8ly1g86claim2zj211j0ohabb.jpg) -3. ![查看电子书](http://ww1.sinaimg.cn/large/6671cfa8ly1g86clad6r8j20ni0ce3zb.jpg) +1. ![登录微博](http://stablog.bookflaneur.cn//img/%E7%99%BB%E5%BD%95%E5%BE%AE%E5%8D%9A.jfif) +2. ![配置任务](http://stablog.bookflaneur.cn//img/%E4%BD%BF%E7%94%A8%E6%AD%A5%E9%AA%A4-6.png) +3. ![查看电子书](http://stablog.bookflaneur.cn//img/%E6%9F%A5%E7%9C%8B%E8%BE%93%E5%87%BA%E7%9A%84%E7%94%B5%E5%AD%90%E4%B9%A6.jfif) # 配置项说明 @@ -74,10 +72,6 @@ 问: 为什么备份这么慢? 为什么每次都要等20s之后才抓取下一条微博? 答: 新浪对爬虫进行了严格限制, 20s抓一次是我试验多次之后, 可以安全备份微博数据的最短间隔. 作为为非开发人员提供的备份工具, **稳定**第一🎵 -问: 为啥只有Windows版? 我是Mac用户, 什么时候会有Mac版? 会不会有Android版? iOS版? -答: 因为作者用的是Windows, 没有Mac...以及, 由于这本身是一个业余项目, 所以不会开发Android/iOS. 有兴趣的同学可以比照源代码自行开发. - - # 代码规范 @@ -112,13 +106,16 @@ 1. 打包时会向dist目录中复制一份node_modules目录, 导致npm run 时优先从dist中获取node_module信息, 导致无法启动 1. 因此, 打包结束后需要将dist里的node_modules目录删掉, 以免影响后续开发工作 3. 电子书封面分辨率为: 100 * 130(宽*高) -4. commit信息规范 => - | 关键字 | 功能 | - | ------ | ------------- | - | feat | 添加新功能 | - | format | 调整代码格式 | - | fix | 修复错误 | - | doc | 修订文档/注释 | + + +## commit信息规范 + +| 关键字 | 功能 | +| ------ | ------------- | +| feat | 添加新功能 | +| format | 调整代码格式 | +| fix | 修复错误 | +| doc | 修订文档/注释 | # 开发指南 @@ -154,6 +151,4 @@ # 支持作者 -![老铁,谢谢了](http://ww1.sinaimg.cn/large/6671cfa8ly1g7y4zur0juj20ri16s41d.jpg) - -[致谢列表](https://www.easy-mock.com/mock/5d9b49fc896b9432186c1fa5/stablog/thank_you/list) +![老铁,谢谢了](http://stablog.bookflaneur.cn//img/%E8%80%81%E9%93%81%E8%B0%A2%E8%B0%A2%E4%BA%86.jfif) \ No newline at end of file diff --git a/changelog.md b/changelog.md new file mode 100644 index 00000000..fe6e4eb9 --- /dev/null +++ b/changelog.md @@ -0,0 +1,6 @@ +# 1.2.0 + +1. 支持浏览已备份微博数据 +2. 解决微博排序异常问题 +3. 解决无法展示微博文章问题 +4. 日志记录超过10w行, 自动清空 \ No newline at end of file diff --git a/gui/public/index.html b/gui/public/index.html index bbc86a56..f0411de8 100644 --- a/gui/public/index.html +++ b/gui/public/index.html @@ -3,6 +3,8 @@ + + diff --git a/gui/src/view/App.vue b/gui/src/view/App.vue index dff30ee5..7942cd46 100644 --- a/gui/src/view/App.vue +++ b/gui/src/view/App.vue @@ -6,6 +6,9 @@ + + + @@ -31,6 +34,7 @@ import Log from './log/index.vue' import Helper from './helper/index.vue' import Donate from './donate/index.vue' + import Manage from './manage/index.vue' import _ from 'lodash' export default { @@ -41,6 +45,7 @@ Login, Helper, Donate, + Manage, }, data(){ return { @@ -52,6 +57,7 @@ login:'login', helper:'helper', donate:'donate', + manage:'manage', } }, // 页面数据 diff --git a/gui/src/view/customer_task/index.vue b/gui/src/view/customer_task/index.vue index 92b99755..7e499cc9 100644 --- a/gui/src/view/customer_task/index.vue +++ b/gui/src/view/customer_task/index.vue @@ -245,7 +245,7 @@ export default Vue.extend({ mergeCount: 1000, fetchStartAtPageNo: 0, fetchEndAtPageNo: 100000, - outputStartAtMs: moment('2015-01-01 00:00:00').unix() * 1000, + outputStartAtMs: moment('2011-01-01 00:00:00').unix() * 1000, outputEndAtMs: moment() .add(1, 'year') diff --git a/gui/src/view/helper/index.vue b/gui/src/view/helper/index.vue index 6acaa259..a161f242 100644 --- a/gui/src/view/helper/index.vue +++ b/gui/src/view/helper/index.vue @@ -4,27 +4,26 @@ name="github-readme" id="github-readme" src="https://github.com/YaoZeyuan/stablog/blob/master/README.md" - style > diff --git a/gui/src/view/log/index.vue b/gui/src/view/log/index.vue index 0d73ea85..264fb244 100644 --- a/gui/src/view/log/index.vue +++ b/gui/src/view/log/index.vue @@ -1,61 +1,64 @@ diff --git a/package.json b/package.json index a939864d..c9874c7a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "stablog", - "version": "1.1.0", + "version": "1.2.0", "description": "稳部落. 专业备份微blog, 老铁, 稳!", "repository": { "type": "git", @@ -72,7 +72,7 @@ "element-ui": "^2.12.0", "image-size": "^0.8.3", "json5": "^2.1.0", - "knex": "^0.19.5", + "knex": "^0.20.1", "lodash": "^4.17.5", "md5": "^2.2.1", "moment": "^2.24.0", diff --git a/src/ace.ts b/src/ace.ts index 20bd404c..c7d5428c 100644 --- a/src/ace.ts +++ b/src/ace.ts @@ -12,6 +12,7 @@ import ace from '@adonisjs/ace' const registedCommandList = [ './command/demo', // 命令demo + './command/debug', // 专业debug命令 './command/init_env', // 初始化运行环境 './command/dispatch_task', // 分发任务 diff --git a/src/api/weibo.ts b/src/api/weibo.ts index 00fa107e..94a9bbeb 100644 --- a/src/api/weibo.ts +++ b/src/api/weibo.ts @@ -2,6 +2,7 @@ import _ from 'lodash' import Base from '~/src/api/base' import * as TypeWeibo from '~/src/type/namespace/weibo' import Util from '~/src/library/util/common' +import moment from 'moment' /** * 用户信息部分容器 id @@ -153,19 +154,26 @@ export default class Weibo extends Base { /** * 获取微博文章, 获取不到返回空对象 */ - static async asyncGetWeiboArticle(url: string) { - let responseHtml = await Base.http.get(url) - let json: TypeWeibo.TypeWeiboArticleRecord - try { - let scriptContent = responseHtml.split('')[1] - let rawJsContent = scriptContent.split('')[0] - let rawJson = jsContent.split('var $render_data = [')[1] - let jsonStr = rawJson.split('][0] || {};')[0] - json = JSON.parse(jsonStr) - } catch (e) { - json = {} - } + static async asyncGetWeiboArticle(articleId: string) { + let apiUrl = `https://card.weibo.com/article/m/aj/detail?id=${articleId}&_t=${moment().unix()}` + let articleUrl = `https://card.weibo.com/article/m/show/id/${articleId}` + + let response = await Base.http.get(apiUrl, { + headers: { + Referer: articleUrl, + }, + }) + let json: TypeWeibo.TypeWeiboArticleRecord = _.get(response, ['data'], {}) + // try { + // let scriptContent = responseHtml.split('')[1] + // let rawJsContent = scriptContent.split('')[0] + // let rawJson = jsContent.split('var $render_data = [')[1] + // let jsonStr = rawJson.split('][0] || {};')[0] + // json = JSON.parse(jsonStr) + // } catch (e) { + // json = {} + // } return json } diff --git a/src/command/debug.ts b/src/command/debug.ts new file mode 100644 index 00000000..5d10090c --- /dev/null +++ b/src/command/debug.ts @@ -0,0 +1,23 @@ +import Base from '~/src/command/base' +import MBlog from '~/src/model/mblog' +import fs from 'fs' + +class CommandDebug extends Base { + static get signature() { + return ` + debug + ` + } + + static get description() { + return '专业Debug' + } + + async execute() { + let result = await MBlog.asyncGetWeiboDistribution('1221171697') + this.log(`debug it`) + console.log(result) + } +} + +export default CommandDebug diff --git a/src/command/fetch/customer.ts b/src/command/fetch/customer.ts index 818ceda2..e2df247f 100644 --- a/src/command/fetch/customer.ts +++ b/src/command/fetch/customer.ts @@ -13,6 +13,34 @@ import CommonUtil from '~/src/library/util/common' import * as TypeWeibo from '~/src/type/namespace/weibo' import Util from '~/src/library/util/common' +/** + * 解析微博文章id,方便构造api, 抓取文章内容 + * @param rawUrl + */ +function getArticleId(rawUrl = '') { + if (!rawUrl) { + return '' + } + // 需要多次解析,才能将url完全解码成正常文本 + let decodeUrl = unescape(unescape(unescape(rawUrl))) + if (!decodeUrl) { + return '' + } + let rawArticleUrl = decodeUrl.split('url=')[1] + if (!rawArticleUrl) { + return '' + } + let baseArticleUrl = rawArticleUrl.split('?')[0] // url => 'https://card.weibo.com/article/m/show/id/2309404446645566701785' + if (!baseArticleUrl) { + return '' + } + let articleId = baseArticleUrl.split('show/id/')[1] + if (!articleId) { + return '' + } + return articleId +} + class FetchCustomer extends Base { fetchStartAtPageNo = 0 fetchEndAtPageNo = 10000 @@ -156,8 +184,8 @@ class FetchCustomer extends Base { ) { // 转发的是微博文章 let pageInfo = rawMblog.mblog.retweeted_status.page_info - let articleUrl = pageInfo.page_url - let articleRecord = await ApiWeibo.asyncGetWeiboArticle(articleUrl) + let articleId = getArticleId(pageInfo.page_url) + let articleRecord = await ApiWeibo.asyncGetWeiboArticle(articleId) if (_.isEmpty(articleRecord)) { // 文章详情获取失败, 不储存该记录 continue @@ -168,8 +196,8 @@ class FetchCustomer extends Base { if (rawMblog.mblog.page_info && rawMblog.mblog.page_info.type === 'article') { // 文章类型为微博文章 let pageInfo = rawMblog.mblog.page_info - let articleUrl = pageInfo.page_url - let articleRecord = await ApiWeibo.asyncGetWeiboArticle(articleUrl) + let articleId = getArticleId(pageInfo.page_url) + let articleRecord = await ApiWeibo.asyncGetWeiboArticle(articleId) if (_.isEmpty(articleRecord)) { // 文章详情获取失败, 不储存该记录 continue diff --git a/src/command/generate/customer.ts b/src/command/generate/customer.ts index 71c277bc..3fdca31e 100644 --- a/src/command/generate/customer.ts +++ b/src/command/generate/customer.ts @@ -105,12 +105,9 @@ class GenerateCustomer extends Base { ) mblogList.sort((a, b) => { // 先进行排序 - let aSortBy = a.created_timestamp_at - let bSortBy = b.created_timestamp_at - if (a.created_timestamp_at === b.created_timestamp_at) { - aSortBy = parseInt(a.id) - bSortBy = parseInt(b.id) - } + // 根据接口 https://m.weibo.cn/feed/friends?max_id=4448802586999203 可以确认, id为确认时间线的关键 + let aSortBy = parseInt(a.idstr, 10) + let bSortBy = parseInt(b.idstr, 10) if (this.CUSTOMER_CONFIG_postAtOrderBy === 'asc') { return aSortBy! - bSortBy! } else { @@ -371,12 +368,12 @@ class GenerateCustomer extends Base { let dayIndex = 0 for (let weiboDayRecord of weiboDayList) { dayIndex++ - this.log(`正在处理第${dayIndex}/${weiboDayList.length}批微博记录`) + this.log(`将网页渲染为pdf, 正在处理第${dayIndex}/${weiboDayList.length}卷微博记录`) let weiboIndex = 0 for (let weiboRecord of weiboDayRecord.weiboList) { weiboIndex++ this.log( - `正在处理第${dayIndex}/${weiboDayList.length}批下,第${weiboIndex}/${weiboDayRecord.weiboList.length}条微博`, + `正在处理第${dayIndex}/${weiboDayList.length}卷中,第${weiboIndex}/${weiboDayRecord.weiboList.length}条微博`, ) let content = WeiboView.render([weiboRecord]) content = this.processContent(content) @@ -391,15 +388,15 @@ class GenerateCustomer extends Base { let imageBuffer = await page.screenshot({ type: 'jpeg', quality: 60, fullPage: true, omitBackground: true }) if (imageBuffer.length < 1000) { // 图片渲染失败 - this.log(`第${dayIndex}/${weiboDayList.length}条微博渲染失败, 自动跳过`) + this.log(`第${weiboIndex}/${weiboDayRecord.weiboList.length}条微博渲染失败, 自动跳过`) continue } else { - this.log(`第${dayIndex}/${weiboDayList.length}条微博渲染成功`) + this.log(`第${weiboIndex}/${weiboDayRecord.weiboList.length}条微博渲染成功`) let size = await imageSize.imageSize(imageBuffer) let { width, height } = size this.log(`图片size=>`, { width, height }) if (!width || width <= 0 || !height || height <= 0) { - this.log(`第${dayIndex}/${weiboDayList.length}条微博截图捕获失败, 自动跳过`) + this.log(`第${weiboIndex}/${weiboDayRecord.weiboList.length}条微博截图捕获失败, 自动跳过`) continue } diff --git a/src/index.ts b/src/index.ts index aeba4601..e8f06023 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,6 +5,8 @@ import ConfigHelperUtil from '~/src/library/util/config_helper' import PathConfig from '~/src/config/path' import Logger from '~/src/library/logger' import DispatchTaskCommand from '~/src/command/dispatch_task' +import MUser from '~/src/model/mblog_user' +import MBlog from '~/src/model/mblog' import fs from 'fs' import _ from 'lodash' @@ -48,10 +50,12 @@ function createWindow() { Menu.setApplicationMenu(null) } + const { screen } = Electron + const { width, height } = screen.getPrimaryDisplay().workAreaSize // Create the browser window. mainWindow = new BrowserWindow({ - width: 1366, - height: 768, + width, + height, // 自动隐藏菜单栏 autoHideMenuBar: true, // 窗口的默认标题 @@ -101,6 +105,9 @@ function createWindow() { }) global.pathConfig = PathConfig + // 向html代码注入MUser, 方便查询 + global.mUser = MUser + global.mBlog = MBlog } // This method will be called when Electron has finished diff --git a/src/model/mblog.ts b/src/model/mblog.ts index ae200cfb..5fc40416 100644 --- a/src/model/mblog.ts +++ b/src/model/mblog.ts @@ -11,6 +11,18 @@ type TypeMblogRecord = { post_publish_at: number } +type BlogDistributionMap = Map< + string, + { + date: string + key: string + type: 'year' | 'month' | 'day' + startAt: number + count: number + childrenMap: BlogDistributionMap + } +> + export default class Mblog extends Base { static TABLE_NAME = `total_mblog` static TABLE_COLUMN = [`id`, `author_uid`, `raw_json`] @@ -43,6 +55,104 @@ export default class Mblog extends Base { mblogRecordList.push(mblogRecord) } } + + // 按发布时间(id)排序 + mblogRecordList.sort((a, b) => { + // 先进行排序 + // 根据接口 https://m.weibo.cn/feed/friends?max_id=4448802586999203 可以确认, id为确认时间线的关键 + let aSortBy = parseInt(a.idstr, 10) + let bSortBy = parseInt(b.idstr, 10) + return aSortBy! - bSortBy! + }) + return mblogRecordList } + + /** + * 获取数据库中的微博记录数分布 + * @param uid + */ + static async asyncGetWeiboDistribution(uid: string): Promise { + let postPublishAtList = >await this.db + .select(`post_publish_at`) + .from(this.TABLE_NAME) + .where('author_uid', '=', uid) + .orderBy(`post_publish_at`, 'desc') + .catch(() => { + return [] + }) + let distributionMap: Map>> = new Map() + + for (let item of postPublishAtList) { + let publishAt = item.post_publish_at + let YYYY = moment.unix(publishAt).format('YYYY') + let MM = moment.unix(publishAt).format('MM') + let DD = moment.unix(publishAt).format('DD') + + if (distributionMap.has(YYYY) === false) { + distributionMap.set(YYYY, new Map()) + } + let yearMap = distributionMap.get(YYYY)! + if (yearMap.has(MM) === false) { + yearMap.set(MM, new Map()) + } + let monthMap = yearMap.get(MM)! + if (monthMap.has(DD) === false) { + monthMap.set(DD, []) + } + let dayList = monthMap.get(DD)! + dayList.push(publishAt) + monthMap.set(DD, dayList) + yearMap.set(MM, monthMap) + distributionMap.set(YYYY, yearMap) + } + + let newYearMap: BlogDistributionMap = new Map() + for (let year of distributionMap.keys()) { + let yearCounter = 0 + let yearMap = distributionMap.get(year)! + let newMonthMap: BlogDistributionMap = new Map() + for (let month of yearMap.keys()) { + let monthCounter = 0 + let monthMap = yearMap.get(month)! + let newDayMap: BlogDistributionMap = new Map() + for (let day of monthMap.keys()) { + let dayCounter = monthMap.get(day)!.length + monthCounter += dayCounter + newDayMap.set(`${day}日`, { + date: `${day}日`, + key: `${year}-${month}-${day}`, + type: 'day', + startAt: moment(`${year}-${month}-${day}`, 'YYYY-MM-DD') + .startOf('day') + .unix(), + count: dayCounter, + childrenMap: new Map(), + }) + } + yearCounter += monthCounter + newMonthMap.set(`${month}月`, { + date: `${month}月`, + key: `${year}-${month}`, + type: 'month', + startAt: moment(`${year}-${month}-01`, 'YYYY-MM-DD') + .startOf('day') + .unix(), + count: monthCounter, + childrenMap: newDayMap, + }) + } + newYearMap.set(`${year}年`, { + date: `${year}年`, + key: `${year}`, + type: 'year', + startAt: moment(`${year}-01-01`, 'YYYY-MM-DD') + .startOf('day') + .unix(), + count: yearCounter, + childrenMap: newMonthMap, + }) + } + return newYearMap + } } diff --git a/src/model/mblog_user.ts b/src/model/mblog_user.ts index 82468d1c..c363222d 100644 --- a/src/model/mblog_user.ts +++ b/src/model/mblog_user.ts @@ -13,6 +13,24 @@ export default class MblogUser extends Base { static TABLE_NAME = `total_user` static TABLE_COLUMN = [`author_uid`, `raw_json`] + /** + * 获取数据库中的用户列表 + * @param author_uid + */ + static async asyncGetUserList(): Promise { + let rawRecordList = >await this.db + .select(this.TABLE_COLUMN) + .from(this.TABLE_NAME) + .catch(() => { + return [] + }) + let recordList = [] + for (let rawItem of rawRecordList) { + let record = JSON.parse(rawItem.raw_json) + recordList.push(record) + } + return recordList + } /** * 从数据库中获取微博记录列表 * @param id diff --git a/src/type/namespace/weibo.d.ts b/src/type/namespace/weibo.d.ts index c8da8462..bbb19c98 100644 --- a/src/type/namespace/weibo.d.ts +++ b/src/type/namespace/weibo.d.ts @@ -210,7 +210,7 @@ export type TypePageInfo = { page_pic: { url: 'https://wx4.sinaimg.cn/wap720/48c999f1gy1g6uza6a6qlj20d407egm6.jpg' } - page_url: 'https://media.weibo.cn/article?object_id=1022%3A2309404414789865570479&extparam=lmid--4414789868083942&luicode=10000011&lfid=2304131221171697_-_WEIBO_SECOND_PROFILE_WEIBO&id=2309404414789865570479' + page_url: 'https://m.weibo.cn/feature/applink?scheme=sinaweibo%3A%2F%2Farticlebrowser%3Fobject_id%3D1022%253A2309404446645566701785%26url%3Dhttps%253A%252F%252Fcard.weibo.com%252Farticle%252Fm%252Fshow%252Fid%252F2309404446645566701785%253F_wb_client_%253D1%26extparam%3Dlmid--4446645569803228&luicode=10000011&lfid=2304131913094142_-_WEIBO_SECOND_PROFILE_WEIBO' page_title: '香港反对派推促的美国反华法案对香港经济的影响?' content1: '香港反对派推促的美国反华法案对香港经济的影响?' content2: ''