diff --git a/.eslintrc b/.eslintrc index 3240625..6fb31c5 100644 --- a/.eslintrc +++ b/.eslintrc @@ -6,6 +6,7 @@ "no-console": "warn", "typescript/no-unused-vars": "error", "typescript/explicit-member-accessibility": "error", + "typescript/member-ordering": "error", "no-only-tests/no-only-tests": "error" } } diff --git a/package-lock.json b/package-lock.json index 2d4f7eb..0689200 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1305,7 +1305,7 @@ }, "ansi-escapes": { "version": "3.1.0", - "resolved": "http://registry.npmjs.org/ansi-escapes/-/ansi-escapes-3.1.0.tgz", + "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-3.1.0.tgz", "integrity": "sha512-UgAb8H9D41AQnu/PbWlCofQVcnV4Gs2bBJi9eZPxfU/hgglFh3SMDMENRIqdr7H6XFnXdoknctFByVsCOotTVw==", "dev": true }, @@ -1440,7 +1440,7 @@ }, "util": { "version": "0.10.3", - "resolved": "http://registry.npmjs.org/util/-/util-0.10.3.tgz", + "resolved": "https://registry.npmjs.org/util/-/util-0.10.3.tgz", "integrity": "sha1-evsa/lCAUkZInj23/g7TeTNqwPk=", "dev": true, "requires": { @@ -1678,7 +1678,7 @@ }, "browserify-aes": { "version": "1.2.0", - "resolved": "http://registry.npmjs.org/browserify-aes/-/browserify-aes-1.2.0.tgz", + "resolved": "https://registry.npmjs.org/browserify-aes/-/browserify-aes-1.2.0.tgz", "integrity": "sha512-+7CHXqGuspUn/Sl5aO7Ea0xWGAtETPXNSAjHo48JfLdPWcMng33Xe4znFvQweqc/uzk5zSOI3H52CYnjCfb5hA==", "dev": true, "requires": { @@ -1715,7 +1715,7 @@ }, "browserify-rsa": { "version": "4.0.1", - "resolved": "http://registry.npmjs.org/browserify-rsa/-/browserify-rsa-4.0.1.tgz", + "resolved": "https://registry.npmjs.org/browserify-rsa/-/browserify-rsa-4.0.1.tgz", "integrity": "sha1-IeCr+vbyApzy+vsTNWenAdQTVSQ=", "dev": true, "requires": { @@ -1760,7 +1760,7 @@ }, "buffer": { "version": "4.9.1", - "resolved": "http://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz", "integrity": "sha1-bRu2AbB6TvztlwlBMgkwJ8lbwpg=", "dev": true, "requires": { @@ -1806,7 +1806,7 @@ }, "cacache": { "version": "10.0.4", - "resolved": "http://registry.npmjs.org/cacache/-/cacache-10.0.4.tgz", + "resolved": "https://registry.npmjs.org/cacache/-/cacache-10.0.4.tgz", "integrity": "sha512-Dph0MzuH+rTQzGPNT9fAnrPmMmjKfST6trxJeK7NQuHRaVw24VzPRWTmg9MpcwOVQZO0E1FBICUlFeNaKPIfHA==", "dev": true, "requires": { @@ -2200,7 +2200,7 @@ }, "create-hash": { "version": "1.2.0", - "resolved": "http://registry.npmjs.org/create-hash/-/create-hash-1.2.0.tgz", + "resolved": "https://registry.npmjs.org/create-hash/-/create-hash-1.2.0.tgz", "integrity": "sha512-z00bCGNHDG8mHAkP7CtT1qVu+bFQUPjYq/4Iv3C3kWjTFV10zIjfSoeqXo9Asws8gwSHDGj/hl2u4OGIjapeCg==", "dev": true, "requires": { @@ -2213,7 +2213,7 @@ }, "create-hmac": { "version": "1.1.7", - "resolved": "http://registry.npmjs.org/create-hmac/-/create-hmac-1.1.7.tgz", + "resolved": "https://registry.npmjs.org/create-hmac/-/create-hmac-1.1.7.tgz", "integrity": "sha512-MJG9liiZ+ogc4TzUwuvbER1JRdgvUFSB5+VR/g5h82fGaIRWMWddtKBHi7/sVhfjQZ6SehlyhvQYrcYkaUIpLg==", "dev": true, "requires": { @@ -2257,7 +2257,7 @@ }, "css-select": { "version": "1.2.0", - "resolved": "http://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz", "integrity": "sha1-KzoRBTnFNV8c2NMUYj6HCxIeyFg=", "requires": { "boolbase": "~1.0.0", @@ -2411,7 +2411,7 @@ }, "diffie-hellman": { "version": "5.0.3", - "resolved": "http://registry.npmjs.org/diffie-hellman/-/diffie-hellman-5.0.3.tgz", + "resolved": "https://registry.npmjs.org/diffie-hellman/-/diffie-hellman-5.0.3.tgz", "integrity": "sha512-kqag/Nl+f3GwyK25fhUMYj81BUOrZ9IuJsjIcDE5icNM9FJHAVm3VcUDxdLPoQtTuUylWm6ZIknYJwwaPxsUzg==", "dev": true, "requires": { @@ -2467,7 +2467,7 @@ "dependencies": { "domelementtype": { "version": "1.1.3", - "resolved": "http://registry.npmjs.org/domelementtype/-/domelementtype-1.1.3.tgz", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.1.3.tgz", "integrity": "sha1-vSh3PiZCiBrsUVRJJCmcXNgiGFs=" } } @@ -2850,7 +2850,7 @@ }, "events": { "version": "1.1.1", - "resolved": "http://registry.npmjs.org/events/-/events-1.1.1.tgz", + "resolved": "https://registry.npmjs.org/events/-/events-1.1.1.tgz", "integrity": "sha1-nr23Y1rQmccNzEwqH1AEKI6L2SQ=", "dev": true }, @@ -3763,7 +3763,7 @@ }, "get-stream": { "version": "3.0.0", - "resolved": "http://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz", "integrity": "sha1-jpQ9E1jcN1VQVOy+LtsFqhdO3hQ=", "dev": true }, @@ -4174,7 +4174,7 @@ }, "is-builtin-module": { "version": "1.0.0", - "resolved": "http://registry.npmjs.org/is-builtin-module/-/is-builtin-module-1.0.0.tgz", + "resolved": "https://registry.npmjs.org/is-builtin-module/-/is-builtin-module-1.0.0.tgz", "integrity": "sha1-VAVy0096wxGfj3bDDLwbHgN6/74=", "dev": true, "requires": { @@ -4392,7 +4392,7 @@ }, "json5": { "version": "0.5.1", - "resolved": "http://registry.npmjs.org/json5/-/json5-0.5.1.tgz", + "resolved": "https://registry.npmjs.org/json5/-/json5-0.5.1.tgz", "integrity": "sha1-Hq3nrMASA0rYTiOWdn6tn6VJWCE=", "dev": true }, @@ -4730,7 +4730,7 @@ }, "mkdirp": { "version": "0.5.1", - "resolved": "http://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=", "requires": { "minimist": "0.0.8" @@ -5412,7 +5412,7 @@ }, "os-tmpdir": { "version": "1.0.2", - "resolved": "http://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", + "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", "integrity": "sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=", "dev": true }, @@ -5465,7 +5465,7 @@ }, "parse-asn1": { "version": "5.1.1", - "resolved": "http://registry.npmjs.org/parse-asn1/-/parse-asn1-5.1.1.tgz", + "resolved": "https://registry.npmjs.org/parse-asn1/-/parse-asn1-5.1.1.tgz", "integrity": "sha512-KPx7flKXg775zZpnp9SxJlz00gTd4BmJ2yJufSc44gMCRrRQ7NSzAcSJQfifuOLgW6bEi+ftrALtsgALeB2Adw==", "dev": true, "requires": { @@ -5510,7 +5510,7 @@ }, "path-is-absolute": { "version": "1.0.1", - "resolved": "http://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=" }, "path-is-inside": { @@ -6008,7 +6008,7 @@ }, "safe-regex": { "version": "1.1.0", - "resolved": "http://registry.npmjs.org/safe-regex/-/safe-regex-1.1.0.tgz", + "resolved": "https://registry.npmjs.org/safe-regex/-/safe-regex-1.1.0.tgz", "integrity": "sha1-QKNmnzsHfR6UPURinhV91IAjvy4=", "dev": true, "requires": { @@ -6094,7 +6094,7 @@ }, "sha.js": { "version": "2.4.11", - "resolved": "http://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz", + "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz", "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==", "dev": true, "requires": { @@ -6382,7 +6382,7 @@ }, "stream-browserify": { "version": "2.0.1", - "resolved": "http://registry.npmjs.org/stream-browserify/-/stream-browserify-2.0.1.tgz", + "resolved": "https://registry.npmjs.org/stream-browserify/-/stream-browserify-2.0.1.tgz", "integrity": "sha1-ZiZu5fm9uZQKTkUUyvtDu3Hlyds=", "dev": true, "requires": { @@ -6467,7 +6467,7 @@ }, "strip-ansi": { "version": "3.0.1", - "resolved": "http://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz", "integrity": "sha1-ajhfuIU9lS1f8F0Oiq+UJ43GPc8=", "dev": true, "requires": { @@ -6476,7 +6476,7 @@ }, "strip-eof": { "version": "1.0.0", - "resolved": "http://registry.npmjs.org/strip-eof/-/strip-eof-1.0.0.tgz", + "resolved": "https://registry.npmjs.org/strip-eof/-/strip-eof-1.0.0.tgz", "integrity": "sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=", "dev": true }, @@ -7660,7 +7660,7 @@ }, "webpack-node-externals": { "version": "1.7.2", - "resolved": "http://registry.npmjs.org/webpack-node-externals/-/webpack-node-externals-1.7.2.tgz", + "resolved": "https://registry.npmjs.org/webpack-node-externals/-/webpack-node-externals-1.7.2.tgz", "integrity": "sha512-ajerHZ+BJKeCLviLUUmnyd5B4RavLF76uv3cs6KNuO8W+HuQaEs0y0L7o40NQxdPy5w0pcv8Ew7yPUAQG0UdCg==", "dev": true }, @@ -7705,7 +7705,7 @@ }, "wrap-ansi": { "version": "2.1.0", - "resolved": "http://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz", "integrity": "sha1-2Pw9KE3QV5T+hJc8rs3Rz4JP3YU=", "dev": true, "requires": { diff --git a/src/scraper/scrape-step/downloader/implementations/http.ts b/src/scraper/scrape-step/downloader/implementations/http.ts index cd96626..f8ff975 100644 --- a/src/scraper/scrape-step/downloader/implementations/http.ts +++ b/src/scraper/scrape-step/downloader/implementations/http.ts @@ -31,11 +31,6 @@ type FetchFunction = ( export class Downloader extends AbstractDownloader { private urlTemplate: ReturnType private headerTemplates: Map> - private verifyResponseOk = (response: Fetch.Response, url: string) => { - if (!response.ok) { - throw new Error(`status ${response.status} for ${url}`) - } - } private fetcher: FetchFunction public constructor(config: ScrapeConfig, options: Options, tools: Tools) { @@ -80,6 +75,11 @@ export class Downloader extends AbstractDownloader { return this.fetcher(downloadId, downloadData) } + private verifyResponseOk = (response: Fetch.Response, url: string) => { + if (!response.ok) { + throw new Error(`status ${response.status} for ${url}`) + } + } private downloadToFileAndMemory: FetchFunction = async ( downloadId, [url, fetchOptions] diff --git a/src/scraper/scrape-step/index.ts b/src/scraper/scrape-step/index.ts index 4a9479a..b0c124f 100644 --- a/src/scraper/scrape-step/index.ts +++ b/src/scraper/scrape-step/index.ts @@ -1,6 +1,5 @@ import * as Rx from 'rxjs' import * as ops from 'rxjs/operators' -import VError from 'verror' import { downloaderClassFactory } from './downloader' import { parserClassFactory } from './parser' import { incrementer } from './incrementer' @@ -69,6 +68,22 @@ class ScrapeStep extends AbstractScrapeStep { scrapeNextChild ) } + + public run: typeof AbstractScrapeStep.prototype.run = ( + parentValues: ParsedValue[] + ): Rx.Observable => + Rx.from(parentValues).pipe( + ops.flatMap(this.incrementObservableFunction), + ops.catchError(wrapError(`scraper '${this.scraperName}'`)), + ops.flatMap( + parsedValues => + this.children.length + ? this.children.map(child => child.run(parsedValues)) + : [Rx.of(parsedValues)] + ), + ops.mergeAll() + ) + private downloadParseFunction: DownloadParseFunction = async ( { parsedValue: value, id: parentId }, incrementIndex @@ -115,20 +130,6 @@ class ScrapeStep extends AbstractScrapeStep { return parsedValuesWithId } } - public run: typeof AbstractScrapeStep.prototype.run = ( - parentValues: ParsedValue[] - ): Rx.Observable => - Rx.from(parentValues).pipe( - ops.flatMap(this.incrementObservableFunction), - ops.catchError(wrapError(`scraper '${this.scraperName}'`)), - ops.flatMap( - parsedValues => - this.children.length - ? this.children.map(child => child.run(parsedValues)) - : [Rx.of(parsedValues)] - ), - ops.mergeAll() - ) } export { ScrapeStep, IdentityScrapeStep } diff --git a/src/scraper/scrape-step/parser/implementations/html.ts b/src/scraper/scrape-step/parser/implementations/html.ts index d0abaa1..0ff65ea 100644 --- a/src/scraper/scrape-step/parser/implementations/html.ts +++ b/src/scraper/scrape-step/parser/implementations/html.ts @@ -8,6 +8,12 @@ import { Tools } from '../../../../tools' export class Parser extends AbstractParser { private parser: (value: string) => string[] + public constructor(config: ScrapeConfig, options: Options, tools: Tools) { + super(config, options, tools) + this.parser = this.attribute ? this.selectAttrVals : this.selectTextVals + } + protected parse = (value: string) => this.parser(value) + private selectTextVals = (value: string) => { const $ = cheerio.load(value) const values: string[] = [] @@ -26,10 +32,4 @@ export class Parser extends AbstractParser { }) return values } - protected parse = (value: string) => this.parser(value) - - public constructor(config: ScrapeConfig, options: Options, tools: Tools) { - super(config, options, tools) - this.parser = this.attribute ? this.selectAttrVals : this.selectTextVals - } } diff --git a/src/tools/emitter.ts b/src/tools/emitter.ts index 5d0fb89..589cd76 100644 --- a/src/tools/emitter.ts +++ b/src/tools/emitter.ts @@ -12,13 +12,6 @@ const scraperEvents = { } class ScraperEmitter { - private emitter: EventEmitter - private name: string - - public constructor(name: string, emitter: EventEmitter) { - this.emitter = emitter - this.name = name - } public emit = { queued: (id: number) => { this.emitter.emit(`${this.name}:${scraperEvents.QUEUED}`, id) @@ -41,6 +34,13 @@ class ScraperEmitter { this.emitter.emit(`${this.name}:${scraperEvents.COMPLETE}`, id) } } + private emitter: EventEmitter + private name: string + + public constructor(name: string, emitter: EventEmitter) { + this.emitter = emitter + this.name = name + } } const events = { @@ -59,10 +59,21 @@ type EmitterEmit = ( class Emitter { public emitter: EventEmitter - private scrapers: { [scraper: string]: ScraperEmitter } = {} - private hasListenerFor = (eventName: string): boolean => - this.emitter.listenerCount(eventName) !== 0 + public emit = { + done: () => { + this.emitter.emit(events.DONE) + }, + error: (error: Error) => { + this.emitter.emit(events.ERROR, error) + } + } + public on = { + stop: (callback: () => void) => { + this.emitter.on(events.STOP, callback) + } + } + private scrapers: { [scraper: string]: ScraperEmitter } = {} public constructor(config: Config) { this.emitter = new EventEmitter() @@ -72,25 +83,13 @@ class Emitter { this.scrapers[name] = new ScraperEmitter(name, this.emitter) } } - public scraper = (name: string) => this.scrapers[name] public getBoundOn = (): EmitterOn => this.emitter.on.bind(this.emitter) public getBoundEmit = (): EmitterEmit => this.emitter.emit.bind(this.emitter) public getRxEventStream = (eventName: string) => Rx.fromEvent(this.emitter, eventName) - public emit = { - done: () => { - this.emitter.emit(events.DONE) - }, - error: (error: Error) => { - this.emitter.emit(events.ERROR, error) - } - } - public on = { - stop: (callback: () => void) => { - this.emitter.on(events.STOP, callback) - } - } + private hasListenerFor = (eventName: string): boolean => + this.emitter.listenerCount(eventName) !== 0 } export { Emitter } diff --git a/src/tools/logger.ts b/src/tools/logger.ts index cc1bfe6..28c0cec 100644 --- a/src/tools/logger.ts +++ b/src/tools/logger.ts @@ -12,12 +12,12 @@ const serializers = { parsedValuesWithId: (values: ParsedValue[]) => values.map(v => v.parsedValue) } class Logger { - private logger: bunyan - private scrapers: { [scraperName: string]: bunyan } public debug: typeof bunyan.prototype.debug public info: typeof bunyan.prototype.info public warn: typeof bunyan.prototype.warn public error: typeof bunyan.prototype.error + private logger: bunyan + private scrapers: { [scraperName: string]: bunyan } public constructor(options: OptionsInit, flatOptions: FlatOptions) { this.logger = bunyan.createLogger({ diff --git a/src/tools/queue/index.ts b/src/tools/queue/index.ts index 20a2730..55abc60 100644 --- a/src/tools/queue/index.ts +++ b/src/tools/queue/index.ts @@ -46,19 +46,6 @@ class Queue { this.queuePromise = taskObservable.toPromise() } - private executor = (): Promise => { - const taskWithCallback = this.queue.pop() - if (!taskWithCallback) { - throw new TypeError('queue popped an undefined task.') - } - return taskWithCallback() - } - - private wrapTask = (task: Task, callback: ErrorCallback): Task => () => - task() - .then(value => callback(undefined, value)) - .catch(error => callback(error, undefined)) - // returns a promise that resolves or rejects according to the promise passed in public add = (task: () => Promise, priority: number): Promise => { return new Promise((resolve, reject) => { @@ -83,5 +70,18 @@ class Queue { public toPromise() { return this.queuePromise } + + private executor = (): Promise => { + const taskWithCallback = this.queue.pop() + if (!taskWithCallback) { + throw new TypeError('queue popped an undefined task.') + } + return taskWithCallback() + } + + private wrapTask = (task: Task, callback: ErrorCallback): Task => () => + task() + .then(value => callback(undefined, value)) + .catch(error => callback(error, undefined)) } export { Queue } diff --git a/src/tools/queue/priority-queue.ts b/src/tools/queue/priority-queue.ts index e8de1cd..f3e6fc1 100644 --- a/src/tools/queue/priority-queue.ts +++ b/src/tools/queue/priority-queue.ts @@ -4,9 +4,9 @@ * e.g. priority one is popped before priority zero */ export class PriorityQueue { + public length = 0 private priorities: number[] = [] private queue: { [priority: number]: T[] } = {} - public length = 0 // all priorities used in the push function are defined in the constructor public constructor(availablePriorities: number[]) { diff --git a/src/tools/store/index.ts b/src/tools/store/index.ts index 69facc1..7e8d8f7 100644 --- a/src/tools/store/index.ts +++ b/src/tools/store/index.ts @@ -8,10 +8,10 @@ import { Transaction } from 'better-sqlite3' import { OptionsInit } from '../../settings/options/types' class Store { + public qs: ReturnType private config: Config private flatConfig: FlatConfig private database: Database - public qs: ReturnType public constructor(config: Config, { folder }: OptionsInit) { this.config = config