Skip to content

Commit

Permalink
fix: update tests to remove dayjs where possible, update formats
Browse files Browse the repository at this point in the history
  • Loading branch information
touchRED committed Mar 27, 2023
1 parent 98b8f69 commit 6a5f892
Show file tree
Hide file tree
Showing 29 changed files with 43 additions and 81 deletions.
4 changes: 1 addition & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,14 @@
},
"bundleDependencies": [
"jquery",
"moment-timezone",
"browser-request"
],
"browser": {
"main": "./dist/mercury.web.js",
"cheerio": "./src/shims/cheerio-query",
"jquery": "./node_modules/jquery/dist/jquery.min.js",
"postman-request": "browser-request",
"iconv-lite": "./src/shims/iconv-lite",
"moment-timezone": "./node_modules/moment-timezone/builds/moment-timezone-with-data-2012-2022.min.js"
"iconv-lite": "./src/shims/iconv-lite"
},
"husky": {
"hooks": {
Expand Down
16 changes: 11 additions & 5 deletions src/cleaners/date-published.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,17 @@ export function createDate(dateString, timezone, format) {
}

if (timezone) {
return format
? dayjs.tz(dateString, format, timezone)
: dayjs.tz(new Date(dateString), timezone);
try {
return format
? dayjs.tz(dateString, format, timezone)
: dayjs.tz(dayjs(dateString).format('YYYY-MM-DD HH:mm:ss'), timezone);
} catch (error) {
// return an intentionally invalid dayjs object,
// in case the input needs to be cleaned first
return dayjs('');
}
}
return format ? dayjs(dateString, format) : dayjs(new Date(dateString));
return format ? dayjs(dateString, format) : dayjs(dateString);
}

// Take a date published string, and hopefully return a date out of
Expand All @@ -70,7 +76,7 @@ export default function cleanDatePublished(

if (!date.isValid()) {
dateString = cleanDateString(dateString);
date = createDate(dateString, timezone, format);
date = createDate(dateString, timezone);
}

return date.isValid() ? date.toISOString() : null;
Expand Down
3 changes: 1 addition & 2 deletions src/extractors/custom/clinicaltrials.gov/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment-timezone';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -59,7 +58,7 @@ describe('ClinicaltrialsGovExtractor', () => {

// Update these values with the expected values from
// the article.
assert.equal(moment(date_published).format('YYYY-MM-DD'), '2018-11-21');
assert.equal(date_published, '2018-11-21T05:00:00.000Z');
});

it('returns the content', async () => {
Expand Down
2 changes: 0 additions & 2 deletions src/extractors/custom/fortune.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ export const FortuneComExtractor = {

date_published: {
selectors: ['.MblGHNMJ'],

timezone: 'UTC',
},

lead_image_url: {
Expand Down
4 changes: 1 addition & 3 deletions src/extractors/custom/genius.com/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -51,11 +50,10 @@ describe('GeniusComExtractor', () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/genius.com/index.js.
const { date_published } = await result;
const newDatePublished = moment(date_published).format();

// Update these values with the expected values from
// the article.
assert.equal(newDatePublished.split('T')[0], '1984-06-25');
assert.equal(date_published, '1984-06-25T04:00:00.000Z');
});

it('returns the lead_image_url', async () => {
Expand Down
3 changes: 1 addition & 2 deletions src/extractors/custom/news.nationalgeographic.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ export const NewsNationalgeographicComExtractor = {

date_published: {
selectors: [['meta[name="article:published_time"]', 'value']],
format: 'ddd MMM DD HH:mm:ss zz YYYY',
timezone: 'EST',
timezone: 'America/New_York',
},

dek: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ describe('NewsNationalgeographicComExtractor', () => {

// Update these values with the expected values from
// the article.
assert.equal(date_published, '2015-08-03T17:45:00.000Z');
assert.equal(date_published, '2015-08-03T16:45:00.000Z');
});

it('returns the dek', async () => {
Expand Down
7 changes: 1 addition & 6 deletions src/extractors/custom/people.com/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment-timezone';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -55,13 +54,9 @@ describe('PeopleComExtractor', () => {
// in ./src/extractors/custom/people.com/index.js.
const { date_published } = await result;

const new_date_published = moment(date_published)
.format()
.split('T')[0];

// Update these values with the expected values from
// the article.
assert.equal(new_date_published, '2016-12-12');
assert.equal(date_published, '2016-12-12T14:22:00.000Z');
});

it('returns the lead_image_url', async () => {
Expand Down
6 changes: 1 addition & 5 deletions src/extractors/custom/pitchfork.com/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment-timezone';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -41,11 +40,8 @@ describe('PitchforkComExtractor', () => {

it('returns the date_published', async () => {
const { date_published } = await result;
const new_date_published = moment(date_published)
.format()
.split('T')[0];

assert.equal(new_date_published, '2019-06-07');
assert.equal(date_published, '2019-06-07T04:00:00.000Z');
});

it('returns the dek', async () => {
Expand Down
4 changes: 1 addition & 3 deletions src/extractors/custom/takagi-hiromitsu.jp/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -57,11 +56,10 @@ describe('TakagihiromitsuJpExtractor', () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/takagi-hiromitsu.jp/index.js.
const { date_published } = await result;
const newDatePublished = moment(date_published).format();

// Update these values with the expected values from
// the article.
assert.equal(newDatePublished.split('T')[0], '2019-02-17');
assert.equal(date_published, '2019-02-17T14:34:06.000Z');
});

it('returns the dek', async () => {
Expand Down
1 change: 1 addition & 0 deletions src/extractors/custom/www.chicagotribune.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export const WwwChicagotribuneComExtractor = {

date_published: {
selectors: ['time'],
timezone: 'America/Chicago',
},

lead_image_url: {
Expand Down
6 changes: 1 addition & 5 deletions src/extractors/custom/www.chicagotribune.com/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment-timezone';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -55,13 +54,10 @@ describe('WwwChicagotribuneComExtractor', () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.chicagotribune.com/index.js.
const { date_published } = await result;
const new_date_published = moment(date_published)
.format()
.split('T')[0];

// Update these values with the expected values from
// the article.
assert.equal(new_date_published, '2016-12-13');
assert.equal(date_published, '2016-12-13T21:45:00.000Z');
});

it('returns the lead_image_url', async () => {
Expand Down
2 changes: 1 addition & 1 deletion src/extractors/custom/www.infoq.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export const WwwInfoqComExtractor = {

date_published: {
selectors: ['.article__readTime.date'],
format: 'YYYY年MM月DD日',
format: 'YYYY[年]M[月]D[日]',
timezone: 'Asia/Tokyo',
},

Expand Down
3 changes: 1 addition & 2 deletions src/extractors/custom/www.macrumors.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ export const WwwMacrumorsComExtractor = {

date_published: {
selectors: [['time', 'datetime']],

timezone: 'America/Los_Angeles',
// timezone: 'America/Los_Angeles',
},

dek: {
Expand Down
4 changes: 1 addition & 3 deletions src/extractors/custom/www.msn.com/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -58,11 +57,10 @@ describe('MSNExtractor', () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.msn.com/index.js.
const { date_published } = await result;
const newDatePublished = moment(date_published).format();

// Update these values with the expected values from
// the article.
assert.equal(newDatePublished.split('T')[0], '2016-09-21');
assert.equal(date_published.split('T')[0], '2016-09-21');
});

it('returns the lead_image_url', async () => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment-timezone';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -44,13 +43,10 @@ describe('WwwNationalgeographicComExtractor', () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.nationalgeographic.com/index.js.
const { date_published } = await result;
const new_date_published = moment(date_published)
.format()
.split('T')[0];

// Update these values with the expected values from
// the article.
assert.equal(new_date_published, '2016-12-15');
assert.equal(date_published.split('T')[0], '2016-12-15');
});

it('returns the dek', async () => {
Expand Down
2 changes: 1 addition & 1 deletion src/extractors/custom/www.nbcnews.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export const WwwNbcnewsComExtractor = {
'.flag_article-wrapper time',
],

timezone: 'America/New_York',
// timezone: 'America/New_York',
},

lead_image_url: {
Expand Down
2 changes: 1 addition & 1 deletion src/extractors/custom/www.nbcnews.com/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ describe('WwwNbcnewsComExtractor', () => {

// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-12-13T23:06:00.000Z');
assert.equal(date_published, '2016-12-13T18:06:00.000Z');
});

it('returns the lead_image_url', async () => {
Expand Down
6 changes: 1 addition & 5 deletions src/extractors/custom/www.nydailynews.com/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment-timezone';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -55,14 +54,11 @@ describe('WwwNydailynewsComExtractor', () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.nydailynews.com/index.js.
const { date_published } = await result;
const new_date_published = moment(date_published)
.format()
.split('T')[0];

// Update these values with the expected values from
// the article.

assert.equal(new_date_published, '2016-12-16');
assert.equal(date_published.split('T')[0], '2016-12-16');
});

it('returns the lead_image_url', async () => {
Expand Down
2 changes: 1 addition & 1 deletion src/extractors/custom/www.ossnews.jp/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export const WwwOssnewsJpExtractor = {

date_published: {
selectors: ['p.fs12'],
format: 'YYYY年MM月DD日 HH:mm',
format: 'YYYY[年]M[月]D[日] HH:mm',
timezone: 'Asia/Tokyo',
},

Expand Down
2 changes: 1 addition & 1 deletion src/extractors/custom/www.phoronix.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export const WwwPhoronixComExtractor = {
date_published: {
selectors: ['.author'],
// 1 June 2019 at 08:34 PM EDT
format: 'D MMMM YYYY at hh:mm',
format: 'D MMMM YYYY [at] hh:mm A',
timezone: 'America/New_York',
},

Expand Down
2 changes: 1 addition & 1 deletion src/extractors/custom/www.phoronix.com/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ describe('WwwPhoronixComExtractor', () => {

// Update these values with the expected values from
// the article.
assert.equal(date_published, '2019-06-01T12:34:00.000Z');
assert.equal(date_published, '2019-06-02T00:34:00.000Z');
});

it('returns the dek', async () => {
Expand Down
6 changes: 1 addition & 5 deletions src/extractors/custom/www.politico.com/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment-timezone';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -55,13 +54,10 @@ describe('PoliticoExtractor', () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.politico.com/index.js.
const { date_published } = await result;
const new_date_published = moment(date_published)
.format()
.split('T')[0];

// Update these values with the expected values from
// the article.
assert.equal(new_date_published, '2016-10-04');
assert.equal(date_published.split('T')[0], '2016-10-04');
});

it('returns the lead_image_url', async () => {
Expand Down
2 changes: 0 additions & 2 deletions src/extractors/custom/www.prospectmagazine.co.uk/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ export const WwwProspectmagazineCoUkExtractor = {

date_published: {
selectors: [['meta[name="article:published_time"]', 'value'], '.post-info'],

timezone: 'Europe/London',
},

dek: {
Expand Down
Loading

0 comments on commit 6a5f892

Please sign in to comment.