From d37cb7fda11fc7314c1ef30a0c1e0d2519861c79 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 26 Sep 2023 20:48:01 +0800 Subject: [PATCH 1/6] fix published date in chinese not parsed correctly --- packages/readabilityjs/Readability.js | 40 +++++++++++++++++++++------ 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 9fc6e6e889..a77b314498 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -51,16 +51,31 @@ const extractPublishedDateFromAuthor = (author)=> { return [null, null]; } const authorName = author.replace(/^by\s+/i, ''); - const regex = /(January|February|March|April|May|June|July|August|September|Octrober|November|December)\s\d{1,2},\s\d{2,4}/; - if (!regex.test(author)) { - return [authorName, null]; + const regexes = [ + /(January|February|March|April|May|June|July|August|September|Octrober|November|December)\s\d{1,2},\s\d{2,4}/i, + /(\d{2,4})年(\d{1,2})月(\d{1,2})日/, + ]; + + // English date + if (regexes[0].test(author)) { + const match = author.match(regex) || []; + return [authorName.replace(regex, ''), match[0]]; } + // Chinese date + if (regexes[1].test(author)) { + const match = author.match(regex); + if (match) { + const year = parseInt(match[1], 10); + const month = parseInt(match[2], 10) - 1; // January is 0 in JavaScript Date + const day = parseInt(match[3], 10); + + const publishedAt = new Date(year, month, day); + return [authorName.replace(regex, ''), publishedAt]; + } + } - const matchedDates = author.match(regex) || []; - const publishedAt = matchedDates[0]; - - return [authorName.replace(regex, ''), publishedAt]; + return [authorName, null]; }; /** @@ -204,7 +219,8 @@ Readability.prototype = { DATES_REGEXPS: [ /([0-9]{4}[-\/]?((0[13-9]|1[012])[-\/]?(0[1-9]|[12][0-9]|30)|(0[13578]|1[02])[-\/]?31|02[-\/]?(0[1-9]|1[0-9]|2[0-8]))|([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00)[-\/]?02[-\/]?29)/i, /(((0[13-9]|1[012])[-/]?(0[1-9]|[12][0-9]|30)|(0[13578]|1[02])[-/]?31|02[-/]?(0[1-9]|1[0-9]|2[0-8]))[-/]?[0-9]{4}|02[-/]?29[-/]?([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00))/i, - /(((0[1-9]|[12][0-9]|30)[-/]?(0[13-9]|1[012])|31[-/]?(0[13578]|1[02])|(0[1-9]|1[0-9]|2[0-8])[-/]?02)[-/]?[0-9]{4}|29[-/]?02[-/]?([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00))/i + /(((0[1-9]|[12][0-9]|30)[-/]?(0[13-9]|1[012])|31[-/]?(0[13578]|1[02])|(0[1-9]|1[0-9]|2[0-8])[-/]?02)[-/]?[0-9]{4}|29[-/]?02[-/]?([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00))/i, + /\d{2,4}年\d{1,2}月\d{1,2}日/ ] }, @@ -1085,9 +1101,15 @@ Readability.prototype = { && this._isValidPublishedDate(node.textContent) ) { try { - if (isNaN(publishedDateParsed)) + if (isNaN(publishedDateParsed) && dateRegExpFound) { // Trying to parse the Date from the found by REGEXP string publishedDateParsed = new Date(dateRegExpFound[0]) + if (isNaN(publishedDateParsed)) { + // Trying to parse the Chinese date + publishedDateParsed = new Date(dateRegExpFound[0].replace(/年|月/g, '-').replace(/日/g, '')) + } + } + if (!isNaN(publishedDateParsed) && !this._articlePublishedDate) this._articlePublishedDate = publishedDateParsed } From 60b7d500a29a93c59ac40400f9e52c3647ecee43 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 26 Sep 2023 21:41:27 +0800 Subject: [PATCH 2/6] fix long published date not parsed correctly --- packages/readabilityjs/Readability.js | 41 ++++++++++++++------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index a77b314498..353270c1cb 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -51,27 +51,25 @@ const extractPublishedDateFromAuthor = (author)=> { return [null, null]; } const authorName = author.replace(/^by\s+/i, ''); - const regexes = [ - /(January|February|March|April|May|June|July|August|September|Octrober|November|December)\s\d{1,2},\s\d{2,4}/i, - /(\d{2,4})年(\d{1,2})月(\d{1,2})日/, - ]; + const regex = /(January|February|March|April|May|June|July|August|September|October|November|December)\s\d{1,2},\s\d{2,4}/i; + const chineseDateRegex = /(\d{2,4})年(\d{1,2})月(\d{1,2})日/; // English date - if (regexes[0].test(author)) { + if (regex.test(author)) { const match = author.match(regex) || []; return [authorName.replace(regex, ''), match[0]]; } // Chinese date - if (regexes[1].test(author)) { - const match = author.match(regex); + if (chineseDateRegex.test(author)) { + const match = author.match(chineseDateRegex); if (match) { const year = parseInt(match[1], 10); const month = parseInt(match[2], 10) - 1; // January is 0 in JavaScript Date const day = parseInt(match[3], 10); const publishedAt = new Date(year, month, day); - return [authorName.replace(regex, ''), publishedAt]; + return [authorName.replace(chineseDateRegex, ''), publishedAt]; } } @@ -220,8 +218,9 @@ Readability.prototype = { /([0-9]{4}[-\/]?((0[13-9]|1[012])[-\/]?(0[1-9]|[12][0-9]|30)|(0[13578]|1[02])[-\/]?31|02[-\/]?(0[1-9]|1[0-9]|2[0-8]))|([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00)[-\/]?02[-\/]?29)/i, /(((0[13-9]|1[012])[-/]?(0[1-9]|[12][0-9]|30)|(0[13578]|1[02])[-/]?31|02[-/]?(0[1-9]|1[0-9]|2[0-8]))[-/]?[0-9]{4}|02[-/]?29[-/]?([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00))/i, /(((0[1-9]|[12][0-9]|30)[-/]?(0[13-9]|1[012])|31[-/]?(0[13578]|1[02])|(0[1-9]|1[0-9]|2[0-8])[-/]?02)[-/]?[0-9]{4}|29[-/]?02[-/]?([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00))/i, - /\d{2,4}年\d{1,2}月\d{1,2}日/ - ] + ], + LONG_DATE_REGEXP: /(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\s\d{1,2}(?:st|nd|rd|th)?(,)?\s\d{2,4}/i, + CHINESE_DATE_REGEXP: /\d{2,4}年\d{1,2}月\d{1,2}日/, }, UNLIKELY_ROLES: ["menu", "menubar", "complementary", "navigation", "alert", "alertdialog", "dialog"], @@ -1083,8 +1082,16 @@ Readability.prototype = { // we don't want to check for dates in the URL's if (node.tagName.toLowerCase() === 'a') return // Searching for the real date in the text content - let dateRegExpFound = this.REGEXPS.DATES_REGEXPS.find(regexp => regexp.test(node.textContent.trim())) - dateRegExpFound && (dateRegExpFound = dateRegExpFound.exec(node.textContent.trim())) + const content = node.textContent.trim() + let dateFound + const dateRegExpFound = this.REGEXPS.DATES_REGEXPS.find(regexp => regexp.test(content)) + if (dateRegExpFound) { + dateFound = dateRegExpFound.exec(content)[0] + } else if (this.REGEXPS.LONG_DATE_REGEXP.test(content)) { + dateFound = this.REGEXPS.LONG_DATE_REGEXP.exec(content)[0].replace(/st|nd|rd|th/i, '') + } else if (this.REGEXPS.CHINESE_DATE_REGEXP.test(content)) { + dateFound = this.REGEXPS.CHINESE_DATE_REGEXP.exec(content)[0].replace(/年|月/g, '-').replace(/日/g, '') + } let publishedDateParsed try { @@ -1097,17 +1104,13 @@ Readability.prototype = { ((this._someNodeAttribute(node, ({ value, name }) => { if (/href|uri|url/i.test(name)) return false; return this.REGEXPS.publishedDate.test(value) - }) || dateRegExpFound) || (/date/i.test(matchString) && !isNaN(publishedDateParsed))) + }) || dateFound) || (/date/i.test(matchString) && !isNaN(publishedDateParsed))) && this._isValidPublishedDate(node.textContent) ) { try { - if (isNaN(publishedDateParsed) && dateRegExpFound) { + if (isNaN(publishedDateParsed)) { // Trying to parse the Date from the found by REGEXP string - publishedDateParsed = new Date(dateRegExpFound[0]) - if (isNaN(publishedDateParsed)) { - // Trying to parse the Chinese date - publishedDateParsed = new Date(dateRegExpFound[0].replace(/年|月/g, '-').replace(/日/g, '')) - } + publishedDateParsed = new Date(dateFound) } if (!isNaN(publishedDateParsed) && !this._articlePublishedDate) From 33992133285e309b77c8a6f04e3a40ecba139e56 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 27 Sep 2023 15:32:42 +0800 Subject: [PATCH 3/6] add test cases from economist and caixin --- packages/readabilityjs/Readability.js | 4 + .../test-pages/caixin/expected-metadata.json | 11 + .../test/test-pages/caixin/expected.html | 45 + .../test/test-pages/caixin/source.html | 2268 +++++++++++++++++ .../test/test-pages/caixin/url.txt | 1 + .../economist/expected-metadata.json | 12 + .../test/test-pages/economist/expected.html | 39 + .../test/test-pages/economist/source.html | 1385 ++++++++++ .../test/test-pages/economist/url.txt | 1 + 9 files changed, 3766 insertions(+) create mode 100644 packages/readabilityjs/test/test-pages/caixin/expected-metadata.json create mode 100644 packages/readabilityjs/test/test-pages/caixin/expected.html create mode 100644 packages/readabilityjs/test/test-pages/caixin/source.html create mode 100644 packages/readabilityjs/test/test-pages/caixin/url.txt create mode 100644 packages/readabilityjs/test/test-pages/economist/expected-metadata.json create mode 100644 packages/readabilityjs/test/test-pages/economist/expected.html create mode 100644 packages/readabilityjs/test/test-pages/economist/source.html create mode 100644 packages/readabilityjs/test/test-pages/economist/url.txt diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 353270c1cb..08ed199595 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -1073,6 +1073,10 @@ Readability.prototype = { }, _checkPublishedDate: function (node, matchString) { + if (this._articlePublishedDate) { + return false; + } + // Skipping meta tags if (node.tagName.toLowerCase() === 'meta') return // return published date if the class name is 'omnivore-published-date' which we added when we scraped the article diff --git a/packages/readabilityjs/test/test-pages/caixin/expected-metadata.json b/packages/readabilityjs/test/test-pages/caixin/expected-metadata.json new file mode 100644 index 0000000000..d18524c51e --- /dev/null +++ b/packages/readabilityjs/test/test-pages/caixin/expected-metadata.json @@ -0,0 +1,11 @@ +{ + "title": "途虎养车港交所挂牌 腾讯为最大外部股东", + "byline": "文|财新 余聪", + "dir": null, + "excerpt": "途虎养车 腾讯国内汽车服务市场高度分散,2022年,途虎养车取得汽车服务收入115亿元,市场份额0.9%", + "siteName": "fakehost", + "previewImage": "https://img.caixin.com/2023-09-26/169572084568190_560_373.jpg", + "publishedDate": "2023-09-26T00:00:00.000Z", + "language": "English", + "readerable": true +} diff --git a/packages/readabilityjs/test/test-pages/caixin/expected.html b/packages/readabilityjs/test/test-pages/caixin/expected.html new file mode 100644 index 0000000000..feb2717e52 --- /dev/null +++ b/packages/readabilityjs/test/test-pages/caixin/expected.html @@ -0,0 +1,45 @@ +
+
+
+

途虎养车港交所挂牌 腾讯为最大外部股东 +

+ + +
+ +

文|财新 余聪

+

2023年09月26日 17:22

+ + + +

试听

+
+

国内汽车服务市场高度分散,2022年,途虎养车取得汽车服务收入115亿元,市场份额0.9%

+
+
+

  【财新网】9月26日,汽车服务平台途虎养车正式在港交所主板挂牌上市。途虎养车( 09690.HK )上市发行价为28港元/股,此前公司披露的发行价区间为28港元/股至31港元/股,即实际发行价为区间下限。当日,途虎养车收报29.5港元/股,较发行价涨5.36%,市值为239.6亿港元。

+

  途虎养车上市不易。途虎养车2022年1月即在港交所递表,2022年8月、2023年3月两次重新递交上市申请材料,终于在2023年8月23日通过聆讯。

+
+ + +
+

+

登录 后获取已订阅的阅读权限

+ + + + + + +
+
+

+
+ + +

  推荐进入财新数据库,可随时查阅公司股价走势、结构人员变化等投资信息。

+

责任编辑:屈运栩 | 版面编辑:刘潇(ZN028)

+
+
\ No newline at end of file diff --git a/packages/readabilityjs/test/test-pages/caixin/source.html b/packages/readabilityjs/test/test-pages/caixin/source.html new file mode 100644 index 0000000000..82ba1f9fae --- /dev/null +++ b/packages/readabilityjs/test/test-pages/caixin/source.html @@ -0,0 +1,2268 @@ + + + + + + + + + + + 途虎养车港交所挂牌 腾讯为最大外部股东_财新网_财新网 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + +
+
+
+ + +
财新传媒 + + +
+
+
+
+ 财新网 > 汽车 > 正文 +
+ +
+ +
+
+ +
+
+ + +
+ +
+ +
+
+ +
+ + + + +
+
+

+ 途虎养车港交所挂牌 腾讯为最大外部股东 +

+ +
+ +
+ 文|财新 余聪 +
+ 2023年09月26日 17:22 + + + 试听 +
+
+ 国内汽车服务市场高度分散,2022年,途虎养车取得汽车服务收入115亿元,市场份额0.9% +
+
+
+
+
+ +
+
+ 上海,一处途虎养车门店。途虎养车2022年1月即在港交所递表,2022年8月、2023年3月两次重新递交上市申请材料,终于在2023年8月23日通过聆讯。图:Qilai Shen/视觉中国 +
+
+
+
+ + +
+ + +
+

+   【财新网】9月26日,汽车服务平台途虎养车正式在港交所主板挂牌上市。途虎养车( 09690.HK )上市发行价为28港元/股,此前公司披露的发行价区间为28港元/股至31港元/股,即实际发行价为区间下限。当日,途虎养车收报29.5港元/股,较发行价涨5.36%,市值为239.6亿港元。 +

+

+   途虎养车上市不易。途虎养车2022年1月即在港交所递表,2022年8月、2023年3月两次重新递交上市申请材料,终于在2023年8月23日通过聆讯。 +

+
+
+
+
+ + +
+
+ +
+
+ + +
+ 登录 后获取已订阅的阅读权限 +
+
+
+ 财新通会员
+ 可畅读全文 +
订阅/会员升级 +
+
+
+
+
+
+ 请朋友免费读财新 +
+
+
+
+ +
+
+
+ + + + +
+
+
+ + +
+ +
+
+ +
+
+ +
+
+
+
+
+
+ +
+ +
+

+   推荐进入财新数据库,可随时查阅公司股价走势、结构人员变化等投资信息。 +

+
+
+
+ 责任编辑:屈运栩 | 版面编辑:刘潇(ZN028) +
+ + +
+ +
+ +
+
+ 话题: +
+
+ #港交所+关注 +
+
+ #腾讯+关注 +
+
+ #京东+关注 +
+
+
+ +
+ +
+ +
+ +
+
+ + + +
+ +
+ + + + +
+ + +
+ +
+ +
+

+ 图片推荐 +

+
+ + +
+ +
+
+ +
+
+ + + + +
+ +
+
+ +
+ +
+ +
+ +
+
+ +
+
+ + +
+
+
+ 财新网主编精选版电邮 + 样例 +
+
+ 财新网新闻版电邮全新升级!财新网主编精心编写,每个工作日定时投递,篇篇重磅,可信可引。 +
+
+ 订阅 +
+
+
+ + + +
+ + + + + + +
+

+ 视频 +

+
+ +
+
 + + + + +
+
+
+ +
+
+ + + + + + +
+ + + + + + + + + + + + + + + + +
+
+
+

+ +

+
+ +
+ + + diff --git a/packages/readabilityjs/test/test-pages/caixin/url.txt b/packages/readabilityjs/test/test-pages/caixin/url.txt new file mode 100644 index 0000000000..dbae9da787 --- /dev/null +++ b/packages/readabilityjs/test/test-pages/caixin/url.txt @@ -0,0 +1 @@ +https://www.caixin.com/2023-09-26/102112537.html \ No newline at end of file diff --git a/packages/readabilityjs/test/test-pages/economist/expected-metadata.json b/packages/readabilityjs/test/test-pages/economist/expected-metadata.json new file mode 100644 index 0000000000..c2b5d1cdf2 --- /dev/null +++ b/packages/readabilityjs/test/test-pages/economist/expected-metadata.json @@ -0,0 +1,12 @@ +{ + "title": "Could the 14th Amendment bar Donald Trump from becoming president again?", + "byline": null, + "dir": null, + "excerpt": "Some conservative legal scholars think so—but the idea is a long shot", + "siteName": "The Economist", + "siteIcon": "http://fakehost/favicon.ico", + "previewImage": "https://www.economist.com/img/b/1280/720/90/media-assets/image/20230923_BLP505.jpg", + "publishedDate": "2023-09-19T16:00:00.000Z", + "language": "English", + "readerable": true +} diff --git a/packages/readabilityjs/test/test-pages/economist/expected.html b/packages/readabilityjs/test/test-pages/economist/expected.html new file mode 100644 index 0000000000..9b66b5b0de --- /dev/null +++ b/packages/readabilityjs/test/test-pages/economist/expected.html @@ -0,0 +1,39 @@ +
+
+
+
+
+
+

Some conservative legal scholars think so—but the idea is a long shot 

+
+
+
+ Donald Trump speaks on the stage at South Dakota Republican party rally in Rapid City +
+ image: Reuters +
+
+
+
+
+

+ DONALD TRUMP’S campaign for re-election is dogged with legal woes. The former president faces the prospect of four criminal trials on felony charges, which will overlap with the Republican primary season and the general-election campaign. But another type of legal trouble could further complicate his return to the White House. +

+

America’s constitution—which Mr Trump swore to uphold on January 20th 2017—includes a provision barring people who have taken such an oath from holding federal office if they have “engaged in insurrection or rebellion” against the country or “given aid or comfort to the enemies thereof”. This language, found in Section 3 of the 14th Amendment, was ratified after the civil war to prevent former Confederate rebels from having a hand in running the country they had tried to saw in half. The disqualification clause has seen something of a renaissance. A year ago, Couy Griffin, then a county commissioner in New Mexico, was removed from office by a state judge for engaging in insurrection  at the Capitol on January 6th. But could this constitutional provision really thwart Mr Trump’s quest for a second presidential term?

+
+
+

The Economist today +

+

Handpicked stories, in your inbox

+

A daily newsletter with the best of our journalism

+
+

+

+
+
+
+
+
+
+
+
\ No newline at end of file diff --git a/packages/readabilityjs/test/test-pages/economist/source.html b/packages/readabilityjs/test/test-pages/economist/source.html new file mode 100644 index 0000000000..33239ec9a9 --- /dev/null +++ b/packages/readabilityjs/test/test-pages/economist/source.html @@ -0,0 +1,1385 @@ + + + + + + + Could the 14th Amendment bar Donald Trump from becoming president again? + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+
+
+
+
+
+
+
+ + + The Economist + + + Skip to content +
+ +
+
+ Subscribe +
+ +
+
+ + +
+ +
+
+ +
+ +
+
+
+
+
+
+
+
+
+
+ +
+
+
+
+
+
+ +

+ Could the 14th Amendment bar Donald Trump from becoming president again? +

+

+ Some conservative legal scholars think so—but the idea is a long shot  +

+
+
+
+
+
+ Donald Trump speaks on the stage at South Dakota Republican party rally in Rapid City +
+ image: Reuters +
+
+
+
+
+
+
+
+
+
+ +
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+ DONALD TRUMP’S campaign for re-election is dogged with legal woes. The former president faces the prospect of four criminal trials on felony charges, which will overlap with the Republican primary season and the general-election campaign. But another type of legal trouble could further complicate his return to the White House. +

+

+ America’s constitution—which Mr Trump swore to uphold on January 20th 2017—includes a provision barring people who have taken such an oath from holding federal office if they have “engaged in insurrection or rebellion” against the country or “given aid or comfort to the enemies thereof”. This language, found in Section 3 of the 14th Amendment, was ratified after the civil war to prevent former Confederate rebels from having a hand in running the country they had tried to saw in half. The disqualification clause has seen something of a renaissance. A year ago, Couy Griffin, then a county commissioner in New Mexico, was removed from office by a state judge for engaging in insurrection  at the Capitol on January 6th. But could this constitutional provision really thwart Mr Trump’s quest for a second presidential term? +

+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+ +
Reuse this content +
+
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + +
+
+
+ The Economist today +
+

+ Handpicked stories, in your inbox +

+
+

+ A daily newsletter with the best of our journalism +

+
+
+
+
+
+ +
+
+
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+ More from The Economist explains +

+
+
+
+
+
+
+ +
+
+

+ What is America’s farm bill, and why does it matter? +

+

+ It has transformed the agriculture industry and given millions of Americans food security +

+
+
+
+
+
+
+ +
+
+

+ Why Poland is halting its supply of weapons to Ukraine +

+

+ A row over duty-free grain has escalated rapidly—but Poland’s government is also posturing +

+
+
+
+
+
+ +
+
+

+ What is Khalistan, the independent homeland some Sikhs yearn for? +

+

+ The separatist movement is now largely propagated from abroad +

+
+
+
+
+
+
+
+
+
+
+
+ +
+
+
+
+ + + + + + + + + + + + + +
+ +
+ + diff --git a/packages/readabilityjs/test/test-pages/economist/url.txt b/packages/readabilityjs/test/test-pages/economist/url.txt new file mode 100644 index 0000000000..0e967ebe7d --- /dev/null +++ b/packages/readabilityjs/test/test-pages/economist/url.txt @@ -0,0 +1 @@ +https://www.economist.com/the-economist-explains/2023/09/20/could-the-14th-amendment-bar-donald-trump-from-becoming-president-again \ No newline at end of file From 0ccf332ab02c279397bbf45a292ef1fa1eed1a4f Mon Sep 17 00:00:00 2001 From: sywhb Date: Wed, 27 Sep 2023 07:33:55 +0000 Subject: [PATCH 4/6] Update generated html --- packages/readabilityjs/test/index.html | 652 +++++++++--------- .../test/test-pages/caixin/distiller.html | 20 + .../test/test-pages/economist/distiller.html | 9 + 3 files changed, 361 insertions(+), 320 deletions(-) create mode 100644 packages/readabilityjs/test/test-pages/caixin/distiller.html create mode 100644 packages/readabilityjs/test/test-pages/economist/distiller.html diff --git a/packages/readabilityjs/test/index.html b/packages/readabilityjs/test/index.html index 8f4ec6a1e5..c1eb5fe9b5 100644 --- a/packages/readabilityjs/test/index.html +++ b/packages/readabilityjs/test/index.html @@ -14,70 +14,88 @@ diff --git a/packages/readabilityjs/test/test-pages/caixin/distiller.html b/packages/readabilityjs/test/test-pages/caixin/distiller.html new file mode 100644 index 0000000000..adae61ed12 --- /dev/null +++ b/packages/readabilityjs/test/test-pages/caixin/distiller.html @@ -0,0 +1,20 @@ +

+   【财新网】9月26日,汽车服务平台途虎养车正式在港交所主板挂牌上市。途虎养车( 09690.HK )上市发行价为28港元/股,此前公司披露的发行价区间为28港元/股至31港元/股,即实际发行价为区间下限。当日,途虎养车收报29.5港元/股,较发行价涨5.36%,市值为239.6亿港元。 +

+   途虎养车上市不易。途虎养车2022年1月即在港交所递表,2022年8月、2023年3月两次重新递交上市申请材料,终于在2023年8月23日通过聆讯。 +

+ + +
+ 后获取已订阅的阅读权限 +
+ 财新通会员
+ 可畅读全文 +
+

+   推荐进入财新数据库,可随时查阅公司股价走势、结构人员变化等投资信息。 +

+ 责任编辑:屈运栩 | 版面编辑:刘潇(ZN028) +
+ 话题: +
\ No newline at end of file diff --git a/packages/readabilityjs/test/test-pages/economist/distiller.html b/packages/readabilityjs/test/test-pages/economist/distiller.html new file mode 100644 index 0000000000..a174e09511 --- /dev/null +++ b/packages/readabilityjs/test/test-pages/economist/distiller.html @@ -0,0 +1,9 @@ +

+ Some conservative legal scholars think so—but the idea is a long shot  +

Donald Trump speaks on the stage at South Dakota Republican party rally in Rapid City
image: Reuters
+ +

+ D

ONALD TRUMP’S

campaign for re-election is dogged with legal woes. The former president faces the prospect of four criminal trials on felony charges, which will overlap with the Republican primary season and the general-election campaign. But another type of legal trouble could further complicate his return to the White House. +

+ America’s constitution—which Mr Trump swore to uphold on January 20th 2017—includes a provision barring people who have taken such an oath from holding federal office if they have “engaged in insurrection or rebellion” against the country or “given aid or comfort to the enemies thereof”. This language, found in Section 3 of the 14th Amendment, was ratified after the civil war to prevent former Confederate rebels from having a hand in running the country they had tried to saw in half. The disqualification clause has seen something of a renaissance. A year ago, Couy Griffin, then a county commissioner in New Mexico, was removed from office by a state judge for engaging in insurrection  at the Capitol on January 6th. But could this constitutional provision really thwart Mr Trump’s quest for a second presidential term? +

\ No newline at end of file From 55e274a32c1627d81fe2205d09e6a176b3d95ff8 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 28 Sep 2023 10:34:05 +0800 Subject: [PATCH 5/6] better match of published date and avoid removing date string which is not a published date --- packages/readabilityjs/Readability.js | 10 +- .../test/test-pages/caixin/distiller.html | 20 - .../test-pages/caixin/expected-metadata.json | 11 - .../test/test-pages/caixin/expected.html | 45 - .../test/test-pages/caixin/source.html | 2268 ----------------- .../test/test-pages/caixin/url.txt | 1 - .../milkroad/expected-metadata.json | 3 +- .../test/test-pages/milkroad/expected.html | 541 ++-- .../test-pages/nytimes/expected-metadata.json | 2 + .../test/test-pages/nytimes/expected.html | 2 - .../test/test-pages/telegr.ph/expected.html | 6 - 11 files changed, 273 insertions(+), 2636 deletions(-) delete mode 100644 packages/readabilityjs/test/test-pages/caixin/distiller.html delete mode 100644 packages/readabilityjs/test/test-pages/caixin/expected-metadata.json delete mode 100644 packages/readabilityjs/test/test-pages/caixin/expected.html delete mode 100644 packages/readabilityjs/test/test-pages/caixin/source.html delete mode 100644 packages/readabilityjs/test/test-pages/caixin/url.txt diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 08ed199595..abdbfa8d0e 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -219,8 +219,8 @@ Readability.prototype = { /(((0[13-9]|1[012])[-/]?(0[1-9]|[12][0-9]|30)|(0[13578]|1[02])[-/]?31|02[-/]?(0[1-9]|1[0-9]|2[0-8]))[-/]?[0-9]{4}|02[-/]?29[-/]?([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00))/i, /(((0[1-9]|[12][0-9]|30)[-/]?(0[13-9]|1[012])|31[-/]?(0[13578]|1[02])|(0[1-9]|1[0-9]|2[0-8])[-/]?02)[-/]?[0-9]{4}|29[-/]?02[-/]?([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00))/i, ], - LONG_DATE_REGEXP: /(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\s\d{1,2}(?:st|nd|rd|th)?(,)?\s\d{2,4}/i, - CHINESE_DATE_REGEXP: /\d{2,4}年\d{1,2}月\d{1,2}日/, + LONG_DATE_REGEXP: /^(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\s\d{1,2}(?:st|nd|rd|th)?(,)?\s\d{2,4}$/i, + CHINESE_DATE_REGEXP: /^\d{2,4}年\d{1,2}月\d{1,2}日$/, }, UNLIKELY_ROLES: ["menu", "menubar", "complementary", "navigation", "alert", "alertdialog", "dialog"], @@ -1073,10 +1073,6 @@ Readability.prototype = { }, _checkPublishedDate: function (node, matchString) { - if (this._articlePublishedDate) { - return false; - } - // Skipping meta tags if (node.tagName.toLowerCase() === 'meta') return // return published date if the class name is 'omnivore-published-date' which we added when we scraped the article @@ -1100,7 +1096,7 @@ Readability.prototype = { let publishedDateParsed try { // Trying to parse the Date from the content itself - publishedDateParsed = new Date(node.textContent.trim()) + publishedDateParsed = new Date(content) } catch (error) { } if ( diff --git a/packages/readabilityjs/test/test-pages/caixin/distiller.html b/packages/readabilityjs/test/test-pages/caixin/distiller.html deleted file mode 100644 index adae61ed12..0000000000 --- a/packages/readabilityjs/test/test-pages/caixin/distiller.html +++ /dev/null @@ -1,20 +0,0 @@ -

-   【财新网】9月26日,汽车服务平台途虎养车正式在港交所主板挂牌上市。途虎养车( 09690.HK )上市发行价为28港元/股,此前公司披露的发行价区间为28港元/股至31港元/股,即实际发行价为区间下限。当日,途虎养车收报29.5港元/股,较发行价涨5.36%,市值为239.6亿港元。 -

-   途虎养车上市不易。途虎养车2022年1月即在港交所递表,2022年8月、2023年3月两次重新递交上市申请材料,终于在2023年8月23日通过聆讯。 -

- - -
- 后获取已订阅的阅读权限 -
- 财新通会员
- 可畅读全文 -
-

-   推荐进入财新数据库,可随时查阅公司股价走势、结构人员变化等投资信息。 -

- 责任编辑:屈运栩 | 版面编辑:刘潇(ZN028) -
- 话题: -
\ No newline at end of file diff --git a/packages/readabilityjs/test/test-pages/caixin/expected-metadata.json b/packages/readabilityjs/test/test-pages/caixin/expected-metadata.json deleted file mode 100644 index d18524c51e..0000000000 --- a/packages/readabilityjs/test/test-pages/caixin/expected-metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "title": "途虎养车港交所挂牌 腾讯为最大外部股东", - "byline": "文|财新 余聪", - "dir": null, - "excerpt": "途虎养车 腾讯国内汽车服务市场高度分散,2022年,途虎养车取得汽车服务收入115亿元,市场份额0.9%", - "siteName": "fakehost", - "previewImage": "https://img.caixin.com/2023-09-26/169572084568190_560_373.jpg", - "publishedDate": "2023-09-26T00:00:00.000Z", - "language": "English", - "readerable": true -} diff --git a/packages/readabilityjs/test/test-pages/caixin/expected.html b/packages/readabilityjs/test/test-pages/caixin/expected.html deleted file mode 100644 index feb2717e52..0000000000 --- a/packages/readabilityjs/test/test-pages/caixin/expected.html +++ /dev/null @@ -1,45 +0,0 @@ -
-
-
-

途虎养车港交所挂牌 腾讯为最大外部股东 -

- - -
- -

文|财新 余聪

-

2023年09月26日 17:22

- - - -

试听

-
-

国内汽车服务市场高度分散,2022年,途虎养车取得汽车服务收入115亿元,市场份额0.9%

-
-
-

  【财新网】9月26日,汽车服务平台途虎养车正式在港交所主板挂牌上市。途虎养车( 09690.HK )上市发行价为28港元/股,此前公司披露的发行价区间为28港元/股至31港元/股,即实际发行价为区间下限。当日,途虎养车收报29.5港元/股,较发行价涨5.36%,市值为239.6亿港元。

-

  途虎养车上市不易。途虎养车2022年1月即在港交所递表,2022年8月、2023年3月两次重新递交上市申请材料,终于在2023年8月23日通过聆讯。

-
- - -
-

-

登录 后获取已订阅的阅读权限

- - - - - - -
-
-

-
- - -

  推荐进入财新数据库,可随时查阅公司股价走势、结构人员变化等投资信息。

-

责任编辑:屈运栩 | 版面编辑:刘潇(ZN028)

-
-
\ No newline at end of file diff --git a/packages/readabilityjs/test/test-pages/caixin/source.html b/packages/readabilityjs/test/test-pages/caixin/source.html deleted file mode 100644 index 82ba1f9fae..0000000000 --- a/packages/readabilityjs/test/test-pages/caixin/source.html +++ /dev/null @@ -1,2268 +0,0 @@ - - - - - - - - - - - 途虎养车港交所挂牌 腾讯为最大外部股东_财新网_财新网 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
-
- - -
-
-
- - -
财新传媒 - - -
-
-
-
- 财新网 > 汽车 > 正文 -
- -
- -
-
- -
-
- - -
- -
- -
-
- -
- - - - -
-
-

- 途虎养车港交所挂牌 腾讯为最大外部股东 -

- -
- -
- 文|财新 余聪 -
- 2023年09月26日 17:22 - - - 试听 -
-
- 国内汽车服务市场高度分散,2022年,途虎养车取得汽车服务收入115亿元,市场份额0.9% -
-
-
-
-
- -
-
- 上海,一处途虎养车门店。途虎养车2022年1月即在港交所递表,2022年8月、2023年3月两次重新递交上市申请材料,终于在2023年8月23日通过聆讯。图:Qilai Shen/视觉中国 -
-
-
-
- - -
- - -
-

-   【财新网】9月26日,汽车服务平台途虎养车正式在港交所主板挂牌上市。途虎养车( 09690.HK )上市发行价为28港元/股,此前公司披露的发行价区间为28港元/股至31港元/股,即实际发行价为区间下限。当日,途虎养车收报29.5港元/股,较发行价涨5.36%,市值为239.6亿港元。 -

-

-   途虎养车上市不易。途虎养车2022年1月即在港交所递表,2022年8月、2023年3月两次重新递交上市申请材料,终于在2023年8月23日通过聆讯。 -

-
-
-
-
- - -
-
- -
-
- - -
- 登录 后获取已订阅的阅读权限 -
-
-
- 财新通会员
- 可畅读全文 -
订阅/会员升级 -
-
-
-
-
-
- 请朋友免费读财新 -
-
-
-
- -
-
-
- - - - -
-
-
- - -
- -
-
- -
-
- -
-
-
-
-
-
- -
- -
-

-   推荐进入财新数据库,可随时查阅公司股价走势、结构人员变化等投资信息。 -

-
-
-
- 责任编辑:屈运栩 | 版面编辑:刘潇(ZN028) -
- - -
- -
- -
-
- 话题: -
-
- #港交所+关注 -
-
- #腾讯+关注 -
-
- #京东+关注 -
-
-
- -
- -
- -
- -
-
- - - -
- -
- - - - -
- - -
- -
- -
-

- 图片推荐 -

-
- - -
- -
-
- -
-
- - - - -
- -
-
- -
- -
- -
- -
-
- -
-
- - -
-
-
- 财新网主编精选版电邮 - 样例 -
-
- 财新网新闻版电邮全新升级!财新网主编精心编写,每个工作日定时投递,篇篇重磅,可信可引。 -
-
- 订阅 -
-
-
- - - -
- - - - - - -
-

- 视频 -

-
- -
-
 - - - - -
-
-
- -
-
- - - - - - -
- - - - - - - - - - - - - - - - -
-
-
-

- -

-
- -
- - - diff --git a/packages/readabilityjs/test/test-pages/caixin/url.txt b/packages/readabilityjs/test/test-pages/caixin/url.txt deleted file mode 100644 index dbae9da787..0000000000 --- a/packages/readabilityjs/test/test-pages/caixin/url.txt +++ /dev/null @@ -1 +0,0 @@ -https://www.caixin.com/2023-09-26/102112537.html \ No newline at end of file diff --git a/packages/readabilityjs/test/test-pages/milkroad/expected-metadata.json b/packages/readabilityjs/test/test-pages/milkroad/expected-metadata.json index 13d56b5f8b..33cb834b53 100644 --- a/packages/readabilityjs/test/test-pages/milkroad/expected-metadata.json +++ b/packages/readabilityjs/test/test-pages/milkroad/expected-metadata.json @@ -6,6 +6,7 @@ "siteName": "fakehost", "siteIcon": "https://media.beehiiv.net/uploads/publication/logo/654e9594-184c-4884-8e02-e6e58a3a6871/thumb_Untitled__1000_x_1000_px___2_.png", "previewImage": "https://media.beehiiv.net/uploads/asset/file/30564/Screenshot_2022-04-08_115750.png", - "publishedDate": null, + "publishedDate": "2022-04-08T16:00:00.000Z", + "language": "English", "readerable": true } diff --git a/packages/readabilityjs/test/test-pages/milkroad/expected.html b/packages/readabilityjs/test/test-pages/milkroad/expected.html index 8fd76a6414..3d0cafc221 100644 --- a/packages/readabilityjs/test/test-pages/milkroad/expected.html +++ b/packages/readabilityjs/test/test-pages/milkroad/expected.html @@ -1,282 +1,273 @@ -
+
-
+
+

+

+

+ GM. This is the Milk Road, we cut the crypto sandwich into triangles, just the way you like it.  +

+

+ Today's estimated read time: 4 minutes & 7 seconds +

+

We’re skipping price action today, because we have a LOT of juicy stuff to go over. 

+

Juicy? What happened?

+

Day 2 of The Bitcoin Conference was fire, so I watched all the talks and summarized all the important stuff for you.

+

+

+

+ What happened:  +

+

Peter Thiel (co–founder of Paypal, and first investor in Facebook) got on stage and gave a crazy talk. 

+

+ I kept a running diary of my thoughts during his talk +

+

+ 0:00 - Wow, they got Peter Thiel to talk. *gets pen & notepad out* +

+

+ 0:04 - They start by playing a video clip of Peter talking at a conference in 1999. He’s making 2 predictions:  +

-

- By Shaan Puri & Ben Levy -

-

-

+
    +
  1. It’s 1999, but he’s predicting that 1B people will have cell phones connected to the internet in 5 years (he’s right, but it took about 10 years instead of 5)
  2. +
  3. People will have money on their cell phone. It will be a currency that isn’t controlled by their local government (sounds like crypto, without saying crypto) 
  4. +
-
-

-

-

- GM. This is the Milk Road, we cut the crypto sandwich into triangles, just the way you like it.  -

-

- Today's estimated read time: 4 minutes & 7 seconds -

-

We’re skipping price action today, because we have a LOT of juicy stuff to go over. 

-

Juicy? What happened?

-

Day 2 of The Bitcoin Conference was fire, so I watched all the talks and summarized all the important stuff for you.

-

-

-

- What happened:  -

-

Peter Thiel (co–founder of Paypal, and first investor in Facebook) got on stage and gave a crazy talk. 

-

- I kept a running diary of my thoughts during his talk -

-

- 0:00 - Wow, they got Peter Thiel to talk. *gets pen & notepad out* -

-

- 0:04 - They start by playing a video clip of Peter talking at a conference in 1999. He’s making 2 predictions:  -

-
-
    -
  1. It’s 1999, but he’s predicting that 1B people will have cell phones connected to the internet in 5 years (he’s right, but it took about 10 years instead of 5)
  2. -
  3. People will have money on their cell phone. It will be a currency that isn’t controlled by their local government (sounds like crypto, without saying crypto) 
  4. -
-
-

- 2:40 - OK, the real Peter is on stage now. Crowd cheers. They sound a little tipsy even though it’s 11am.. Nice. This is why we do conferences in Miami.  -

-

He starts off with a bold move. He’s waving a stack of $100 bills in the air.

-

He asks: “what is money?,” “this always gets people's attention,” “it’s not good as toilet paper. It’s not wallpaper. But people want it. Do you want it? Come get it!”

-

-

-

- 3:16 - He throws the wad of cash at someone in the crowd then laughs as they fight over it: “I thought you guys were supposed to be Bitcon maximalists.” The joke lands. -

-

- 3:30 - He talks about the early days of PayPal. Shows a picture of him and Elon Musk (they were both CEOs of Paypal early on). Pretty wild to think about how much talent was in 1 startup.  -

-

Here’s who worked at Paypal: 

-
-
    -
  • Peter - created PayPal, first investor in Facebook
  • -
  • Elon - CEO of Tesla, SpaceX
  • -
  • The founders of YouTube
  • -
  • The founder of LinkedIn
  • -
  • The founder of Yelp 
  • -
  • The founder of Kiva.org 
  • -
  • The founder of Yammer
  • -
  • The COO of Square
  • -
  • The future CEO of Reddit
  • -
-
-

This is called the PayPal mafia^. What a killer roster of talent. OK, back to Peter’s talk: 

-

- 3:55 - He shows a slide from the first pitch deck of PayPal. The initial idea was a lot like Bitcoin. A currency that lived outside of the banking system. He said “we didn’t know anything” back then, but they had big ambitions. -

-

-

-

- 4:12 - But that was really hard. So within a couple years, they had created PayPal. Which worked with the banking system instead of replacing it. This was more practical, but less ambitious. It was just a payment system, not a new kind of money.  -

-

- 6:49 - Payment system vs. Money. Why does it matter?  -

-

Peter points out an important thing about money. Velocity. 

-

The simple idea is that people will spend their “less valuable” money first. Back in the day, people would hoard gold, and spend silver. 

-

Today, people will save Bitcoin, and spend dollars.

-

This means some money moves “fast” throughout the economy (hopping from person to person). To most people, that makes a currency more valuable! It’s being used! Must be more valuable, right?

-

The velocity theory says that’s wrong. The slower moving money will end up being higher priced. Nobody is ‘holding’ the fast money, so it doesn’t go up in price (sellers willing to sell). 

-

Anyways, Peter’s belief is that Bitcoin is “slow money” and therefore, it’s valuable money. He says Ethereum is a payments network, high velocity, and lower value.

-

-

-

If Peter’s right, Bitcoin will end up a lot more valuable than ETH over time. (Editor’s note: I totally disagree. I think he’s wrong in thinking ETH is a payments network)

-

- 7:36 - OK, this is where the talk starts getting aggressive.  -

-

He puts up this slide. The crowd goes wild. Crypto is so tribal.

-

He is trying to talk but you can barely hear him, because the red-blooded-american crowd is going wild for no reason.

-

-

-

- 8:13 - He says Bitcoin is like gold (a store of value). And ETH is like Visa (a payments network).  -

-

He points out that Gold has a total market value of $12T, and the biggest payment network (Visa) is worth 26x less. 

-

If Peter is right, Bitcoin has more upside than ETH (again, I disagree,but let’s let Peter talk. He’s the billionaire chess grandmaster. I’m just a hairy dude with a free newsletter)

-

- 11:26 - He puts up this slide showing that back in the day (1980) Gold was equal to the total value of all stocks. (Both were $2.5T)  -

-

Fast forward to today, and stocks are worth 10x the total value of gold.

-

-

-

His point is that thinking of Bitcoin as “digital gold” is underselling how big it can be. Perhaps it will be on par with the S&P 500 (which today is a way that people store their wealth). 

-

- 12:49 - Bitcoin is $43k today, where does it go?  -

-

“Bitcoin is the most honest market in the world. It went from $5k to $50K in the last 2 years, showing us that inflation was real, and the central banks are bankrupt. This is the end of the fiat money regime.” 

-

“Mr. Powell should be extremely grateful to Bitcoin. It’s the last warning they are going to get. They have chosen to ignore it, and will pay the consequences for it in the years ahead”

-

- 14:00 - Who are the enemies of Bitcoin? Who is holding it back from a 10x or 100x rise? -

-

- Bitcoin has 3 enemy types:  -

-

- Enemy #1 - The Sociopathic Grandpa from Omaha  -

-

-

-

He’s honest about his hate for Bitcoin. He’s incentivized to hate Bitcoin for 2 reasons: 

-
-
    -
  1. He’s a winner of the current system 
  2. -
  3. He’s a money manager. If all people need to do is “buy bitcoin and chill” for a decade, all money managers are out of business
  4. -
-
-

- Enemy #2 - The NYC Bankers  -

-

Like Jamie Dimon from JP Morgan. 

-

For years they called crypto “worthless.” Now it is undeniable, and they are losing customers, so they’re pivoting to offering crypto assets. 

-

-

-

- Enemy #3 - Nameless, Faceless Bureaucrats -

-

These people hide behind a more passive-aggressive tone. They say things like “I see huge opportunity in blockchain” 

-

Anyone who says they are “pro-blockchain” is actually “anti-bitcoin” according to Peter. 

-

- 20:36- The Grand Finale -

-

-

-

He ended with this hilarious slide design. He calls these enemies the “gerontocracy” (aka old rich). I gotta say, whoever photoshopped this slide is my hero. Pure art. Genius. Like Picasso in his prime. 

-

He says it’s OLD vs. YOUTH. And calls bitcoin a revolutionary youth movement. His final line: “We have to leave this conference and take over the world.”   -

-

- *adds take-over-the-world to my to-do list* -

-

- The Milk Road’s Take:  -

-

I’ve listened to Peter Thiel for over a decade. The guy is super smart and loves to go against the grain. He’s great at doing this, painting an enemy, and getting people’s attention with bold claims. 

-

On one hand, I love the conviction he has for Bitcoin, and think that it’s cool he’s been predicting this since 1999. 

-

On the other hand, I think he’s wrong about Ethereum, and I find the whole tribal / “us against the world” thing from the conference to be a turnoff. The macho attitude just feels very forced when nerds do it. 

-

I mean look at this. Michael Saylor came out for his talk and hit the crowd with the 1999 “Raise the Roof” move 🤣

-

-

-

- That wasn’t all for Day 2… -

-

- Jack Mallers, the founder of Strike, announced they have partnered with the POS  companies that power huge stores like Starbucks, McDonalds, Walmart & Walgreens to allow anyone to pay in-store with Bitcoin over the lightning network. Every credit card swipe has a 3% fee and is slow to settle. Lightning is essentially free (~0% fee) and settles instantly.  -

-

- Cash App announces their new crypto services that will 1) let users auto-invest a percent of their paychecks into bitcoin and 2) round payments up to the nearest dollar to buy bitcoin with the difference (kinda like Acorns).  -

-

- Robinhood announced they’re rolling out their new crypto wallet to 2M users. -

-

- Ricardo Salinas, a Mexican billionaire, says his liquid portfolio is 60% Bitcoin. In 2020, Bitcoin only made up about 10% of his portfolio.  That’s what we call growth, baby! -

-

-

-

-

- TODAY'S MILK ROAD IS BROUGHT TO YOU BY CRYPTOTRADER.TAX -

-

-

When it comes to taxes, there are two types of people. 

-
-
    -
  1. Those who do taxes early (also known as psychopaths)
  2. -
  3. The rest of us who wait until the last minute
  4. -
-
-

With less than 2 weeks left to file taxes, we’ve partnered with Cryptotrader.tax (soon to be CoinLedger) to make sure you got everything you need.

-

- A common misconception: -

-

“I don’t get taxed until I cash out to fiat”. 

-

We hate to be the bearer of bad news, but you do in fact realize taxable events and capital gains when trading crypto > crypto.

-

It sucks, but that's the law for ya.

-

-

-

-

- FUNDING FRIDAY -

-

-

- Over $700M was raised in Web3 companies and funds this week! Who got the bread this time? -

-

- Dank Bank got $4.2M to let people buy & sell Memes as NFTs. Sounds silly, but…Meme’s are a part of culture. They are globally recognizable. Maybe memes will be collected as other works of culture & art?  -

-

- Fractal got $35M to build a new NFT gaming marketplace. This is Justin Kan’s new startup (previously JustinTV, Twitch). Worth keeping an eye on -

-

- Leap got $10.2M to build out a “super” wallet for the Terra ecosystem that can show off your NFTs, track the value of your wallet, and integrate with the most popular Terra protocols -

-

- GOALS got $15M to build the “FIFA of Web3” utilizing NFTs. The cool thing here is you’ll own all the assets in the game - from the players to the cleats they wear - and it can all be sold or traded. They still need to build an awesome game though.. -

-

- Hivemapper got $18M to build a decentralized Google Maps by giving drivers digital tokens to put dashcams in their cars. You know we love easy ways to make money. (They’re leading the wave of IRL → tokens that we talked about here). -

-

Dive into our full database of companies that have fundraised here!

-

-

-

-

- SIT BACK AND RELAX THIS WEEKEND WITH THE VITALIK SPECIAL -

-

-
-

Send a video of you drinking a Vitalik cocktail to @milkroaddaily on Twitter and we’ll retweet it!

-

-

-

-

-

See ya Monday!

-

- Reviews from the Road: -

-

- Warm... -

-

-

-

- Getting warmer... -

-

-

-

- Ahhh there it is... -

-

-

-

- What'd you think of today's email? -

- -

-

+

+ 2:40 - OK, the real Peter is on stage now. Crowd cheers. They sound a little tipsy even though it’s 11am.. Nice. This is why we do conferences in Miami.  +

+

He starts off with a bold move. He’s waving a stack of $100 bills in the air.

+

He asks: “what is money?,” “this always gets people's attention,” “it’s not good as toilet paper. It’s not wallpaper. But people want it. Do you want it? Come get it!”

+

+

+

+ 3:16 - He throws the wad of cash at someone in the crowd then laughs as they fight over it: “I thought you guys were supposed to be Bitcon maximalists.” The joke lands. +

+

+ 3:30 - He talks about the early days of PayPal. Shows a picture of him and Elon Musk (they were both CEOs of Paypal early on). Pretty wild to think about how much talent was in 1 startup.  +

+

Here’s who worked at Paypal: 

+
+
    +
  • Peter - created PayPal, first investor in Facebook
  • +
  • Elon - CEO of Tesla, SpaceX
  • +
  • The founders of YouTube
  • +
  • The founder of LinkedIn
  • +
  • The founder of Yelp 
  • +
  • The founder of Kiva.org 
  • +
  • The founder of Yammer
  • +
  • The COO of Square
  • +
  • The future CEO of Reddit
  • +
+
+

This is called the PayPal mafia^. What a killer roster of talent. OK, back to Peter’s talk: 

+

+ 3:55 - He shows a slide from the first pitch deck of PayPal. The initial idea was a lot like Bitcoin. A currency that lived outside of the banking system. He said “we didn’t know anything” back then, but they had big ambitions. +

+

+

+

+ 4:12 - But that was really hard. So within a couple years, they had created PayPal. Which worked with the banking system instead of replacing it. This was more practical, but less ambitious. It was just a payment system, not a new kind of money.  +

+

+ 6:49 - Payment system vs. Money. Why does it matter?  +

+

Peter points out an important thing about money. Velocity. 

+

The simple idea is that people will spend their “less valuable” money first. Back in the day, people would hoard gold, and spend silver. 

+

Today, people will save Bitcoin, and spend dollars.

+

This means some money moves “fast” throughout the economy (hopping from person to person). To most people, that makes a currency more valuable! It’s being used! Must be more valuable, right?

+

The velocity theory says that’s wrong. The slower moving money will end up being higher priced. Nobody is ‘holding’ the fast money, so it doesn’t go up in price (sellers willing to sell). 

+

Anyways, Peter’s belief is that Bitcoin is “slow money” and therefore, it’s valuable money. He says Ethereum is a payments network, high velocity, and lower value.

+

+

+

If Peter’s right, Bitcoin will end up a lot more valuable than ETH over time. (Editor’s note: I totally disagree. I think he’s wrong in thinking ETH is a payments network)

+

+ 7:36 - OK, this is where the talk starts getting aggressive.  +

+

He puts up this slide. The crowd goes wild. Crypto is so tribal.

+

He is trying to talk but you can barely hear him, because the red-blooded-american crowd is going wild for no reason.

+

+

+

+ 8:13 - He says Bitcoin is like gold (a store of value). And ETH is like Visa (a payments network).  +

+

He points out that Gold has a total market value of $12T, and the biggest payment network (Visa) is worth 26x less. 

+

If Peter is right, Bitcoin has more upside than ETH (again, I disagree,but let’s let Peter talk. He’s the billionaire chess grandmaster. I’m just a hairy dude with a free newsletter)

+

+ 11:26 - He puts up this slide showing that back in the day (1980) Gold was equal to the total value of all stocks. (Both were $2.5T)  +

+

Fast forward to today, and stocks are worth 10x the total value of gold.

+

+

+

His point is that thinking of Bitcoin as “digital gold” is underselling how big it can be. Perhaps it will be on par with the S&P 500 (which today is a way that people store their wealth). 

+

+ 12:49 - Bitcoin is $43k today, where does it go?  +

+

“Bitcoin is the most honest market in the world. It went from $5k to $50K in the last 2 years, showing us that inflation was real, and the central banks are bankrupt. This is the end of the fiat money regime.” 

+

“Mr. Powell should be extremely grateful to Bitcoin. It’s the last warning they are going to get. They have chosen to ignore it, and will pay the consequences for it in the years ahead”

+

+ 14:00 - Who are the enemies of Bitcoin? Who is holding it back from a 10x or 100x rise? +

+

+ Bitcoin has 3 enemy types:  +

+

+ Enemy #1 - The Sociopathic Grandpa from Omaha  +

+

+

+

He’s honest about his hate for Bitcoin. He’s incentivized to hate Bitcoin for 2 reasons: 

+
+
    +
  1. He’s a winner of the current system 
  2. +
  3. He’s a money manager. If all people need to do is “buy bitcoin and chill” for a decade, all money managers are out of business
  4. +
+
+

+ Enemy #2 - The NYC Bankers  +

+

Like Jamie Dimon from JP Morgan. 

+

For years they called crypto “worthless.” Now it is undeniable, and they are losing customers, so they’re pivoting to offering crypto assets. 

+

+

+

+ Enemy #3 - Nameless, Faceless Bureaucrats +

+

These people hide behind a more passive-aggressive tone. They say things like “I see huge opportunity in blockchain” 

+

Anyone who says they are “pro-blockchain” is actually “anti-bitcoin” according to Peter. 

+

+ 20:36- The Grand Finale +

+

+

+

He ended with this hilarious slide design. He calls these enemies the “gerontocracy” (aka old rich). I gotta say, whoever photoshopped this slide is my hero. Pure art. Genius. Like Picasso in his prime. 

+

He says it’s OLD vs. YOUTH. And calls bitcoin a revolutionary youth movement. His final line: “We have to leave this conference and take over the world.”   +

+

+ *adds take-over-the-world to my to-do list* +

+

+ The Milk Road’s Take:  +

+

I’ve listened to Peter Thiel for over a decade. The guy is super smart and loves to go against the grain. He’s great at doing this, painting an enemy, and getting people’s attention with bold claims. 

+

On one hand, I love the conviction he has for Bitcoin, and think that it’s cool he’s been predicting this since 1999. 

+

On the other hand, I think he’s wrong about Ethereum, and I find the whole tribal / “us against the world” thing from the conference to be a turnoff. The macho attitude just feels very forced when nerds do it. 

+

I mean look at this. Michael Saylor came out for his talk and hit the crowd with the 1999 “Raise the Roof” move 🤣

+

+

+

+ That wasn’t all for Day 2… +

+

+ Jack Mallers, the founder of Strike, announced they have partnered with the POS  companies that power huge stores like Starbucks, McDonalds, Walmart & Walgreens to allow anyone to pay in-store with Bitcoin over the lightning network. Every credit card swipe has a 3% fee and is slow to settle. Lightning is essentially free (~0% fee) and settles instantly.  +

+

+ Cash App announces their new crypto services that will 1) let users auto-invest a percent of their paychecks into bitcoin and 2) round payments up to the nearest dollar to buy bitcoin with the difference (kinda like Acorns).  +

+

+ Robinhood announced they’re rolling out their new crypto wallet to 2M users. +

+

+ Ricardo Salinas, a Mexican billionaire, says his liquid portfolio is 60% Bitcoin. In 2020, Bitcoin only made up about 10% of his portfolio.  That’s what we call growth, baby! +

+

+

+

+

+ TODAY'S MILK ROAD IS BROUGHT TO YOU BY CRYPTOTRADER.TAX +

+

+

When it comes to taxes, there are two types of people. 

+
+
    +
  1. Those who do taxes early (also known as psychopaths)
  2. +
  3. The rest of us who wait until the last minute
  4. +
+
+

With less than 2 weeks left to file taxes, we’ve partnered with Cryptotrader.tax (soon to be CoinLedger) to make sure you got everything you need.

+

+ A common misconception: +

+

“I don’t get taxed until I cash out to fiat”. 

+

We hate to be the bearer of bad news, but you do in fact realize taxable events and capital gains when trading crypto > crypto.

+

It sucks, but that's the law for ya.

+

+

+

+

+ FUNDING FRIDAY +

+

+

+ Over $700M was raised in Web3 companies and funds this week! Who got the bread this time? +

+

+ Dank Bank got $4.2M to let people buy & sell Memes as NFTs. Sounds silly, but…Meme’s are a part of culture. They are globally recognizable. Maybe memes will be collected as other works of culture & art?  +

+

+ Fractal got $35M to build a new NFT gaming marketplace. This is Justin Kan’s new startup (previously JustinTV, Twitch). Worth keeping an eye on +

+

+ Leap got $10.2M to build out a “super” wallet for the Terra ecosystem that can show off your NFTs, track the value of your wallet, and integrate with the most popular Terra protocols +

+

+ GOALS got $15M to build the “FIFA of Web3” utilizing NFTs. The cool thing here is you’ll own all the assets in the game - from the players to the cleats they wear - and it can all be sold or traded. They still need to build an awesome game though.. +

+

+ Hivemapper got $18M to build a decentralized Google Maps by giving drivers digital tokens to put dashcams in their cars. You know we love easy ways to make money. (They’re leading the wave of IRL → tokens that we talked about here). +

+

Dive into our full database of companies that have fundraised here!

+

+

+

+

+ SIT BACK AND RELAX THIS WEEKEND WITH THE VITALIK SPECIAL +

+

+
+

Send a video of you drinking a Vitalik cocktail to @milkroaddaily on Twitter and we’ll retweet it!

+

+

+

+

+

See ya Monday!

+

+ Reviews from the Road: +

+

+ Warm... +

+

+

+

+ Getting warmer... +

+

+

+

+ Ahhh there it is... +

+

+

+

+ What'd you think of today's email? +

+ +

+

-
\ No newline at end of file +
\ No newline at end of file diff --git a/packages/readabilityjs/test/test-pages/nytimes/expected-metadata.json b/packages/readabilityjs/test/test-pages/nytimes/expected-metadata.json index 882499fc20..c1154ff074 100644 --- a/packages/readabilityjs/test/test-pages/nytimes/expected-metadata.json +++ b/packages/readabilityjs/test/test-pages/nytimes/expected-metadata.json @@ -4,7 +4,9 @@ "dir": null, "excerpt": "Critics see the recent behavior of Brazil’s president — polarizing in the best of times — as an unnerving sign of a flailing leader. His strategy, if there is one, is difficult to discern.", "siteName": "fakehost", + "siteIcon": "http://fakehost/vi-assets/static-assets/favicon-d2483f10ef688e6f89e23806b9700298.ico", "previewImage": "https://static01.nyt.com/images/2021/03/31/world/31brazil/31brazil-facebookJumbo.jpg", "publishedDate": "2021-03-31T23:26:15.000Z", + "language": "English", "readerable": true } diff --git a/packages/readabilityjs/test/test-pages/nytimes/expected.html b/packages/readabilityjs/test/test-pages/nytimes/expected.html index 251653e4cf..9bd33619e5 100644 --- a/packages/readabilityjs/test/test-pages/nytimes/expected.html +++ b/packages/readabilityjs/test/test-pages/nytimes/expected.html @@ -19,8 +19,6 @@
-

-

diff --git a/packages/readabilityjs/test/test-pages/telegr.ph/expected.html b/packages/readabilityjs/test/test-pages/telegr.ph/expected.html index 76dd1b9d08..ea73a0088c 100644 --- a/packages/readabilityjs/test/test-pages/telegr.ph/expected.html +++ b/packages/readabilityjs/test/test-pages/telegr.ph/expected.html @@ -1,12 +1,6 @@
-
-
- -
-
@me
From f0abdd654a4926ea957def305573eff37fc03f5f Mon Sep 17 00:00:00 2001 From: sywhb Date: Thu, 28 Sep 2023 02:35:06 +0000 Subject: [PATCH 6/6] Update generated html --- packages/readabilityjs/test/index.html | 670 ++++++++++++------------- 1 file changed, 332 insertions(+), 338 deletions(-) diff --git a/packages/readabilityjs/test/index.html b/packages/readabilityjs/test/index.html index c1eb5fe9b5..22cb8dbef3 100644 --- a/packages/readabilityjs/test/index.html +++ b/packages/readabilityjs/test/index.html @@ -14,52 +14,88 @@