Skip to content

Commit

Permalink
✨ Add row structure validator
Browse files Browse the repository at this point in the history
  • Loading branch information
Tomas2D committed Oct 5, 2022
1 parent 25e5a8d commit 2e50f49
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 16 deletions.
21 changes: 18 additions & 3 deletions src/parseTable.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { FullParserSettings } from './types';
import { extraColsMapperFactory, diffFromSource } from './helpers';
import { FullParserSettings, RowValidationPolicy } from './types';
import { diffFromSource, extraColsMapperFactory } from './helpers';
import { ElementHandle } from 'puppeteer';
import { InvalidColumnError, MissingRequiredColumnsError } from './errors';
import { InvalidColumnError, InvalidSettingsError, MissingRequiredColumnsError } from './errors';

export function parseTableFactory(settings: FullParserSettings) {
const extraColsMapper = extraColsMapperFactory(settings.extraCols);
Expand Down Expand Up @@ -31,6 +31,20 @@ export function parseTableFactory(settings: FullParserSettings) {
return settings.rowValuesAsArray ? headerRow : headerRow.join(settings.csvSeparator);
};

const getRowStructureValidator = (allowedIndexes: Record<string, number>) => {
if (settings.rowValidationPolicy === RowValidationPolicy.NONE) {
return () => true;
}
if (settings.rowValidationPolicy === RowValidationPolicy.NON_EMPTY) {
return (rows: string[]) => rows.length > 0;
}
if (settings.rowValidationPolicy === RowValidationPolicy.EXACT_MATCH) {
const indexesCount = Object.keys(allowedIndexes).length;
return (rows: string[]) => rows.length === indexesCount;
}
throw new InvalidSettingsError('Unknown mode for the "rowValidationPolicy"');
};

const filterSortCols =
(allowedIndexes: Record<string, number>) =>
(row: ElementHandle): Promise<string[]> =>
Expand Down Expand Up @@ -126,6 +140,7 @@ export function parseTableFactory(settings: FullParserSettings) {
};

const finalRows = (await Promise.all(bodyRows.map(filterSortCols(allowedIndexes))))
.filter(getRowStructureValidator(allowedIndexes))
.map((row) => extraColsMapper(row, 'data'))
.filter((row, index, rows) => settings.rowValidator(row, getColumnIndex, index, rows))
.map((row) => row.map((cell, index) => settings.colParser(cell, index, getColumnIndex)))
Expand Down
8 changes: 7 additions & 1 deletion src/settings.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import { FullParserSettings, ParserSettings, ParserSettingsOptional } from './types';
import {
FullParserSettings,
ParserSettings,
ParserSettingsOptional,
RowValidationPolicy,
} from './types';
import { InvalidSettingsError } from './errors';
import { omitUndefined } from './helpers';

Expand All @@ -7,6 +12,7 @@ export const defaultSettings: ParserSettingsOptional = {
withHeader: true,
csvSeparator: ';',
newLine: '\n',
rowValidationPolicy: RowValidationPolicy.NON_EMPTY,
rowValidator: () => true,
rowTransform: () => {},
asArray: false,
Expand Down
7 changes: 7 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,19 @@ export interface ExtraCol {

export type GetColumnIndexType = (colName: string) => number;

export enum RowValidationPolicy {
NONE = 'NONE',
NON_EMPTY = 'NON_EMPTY',
EXACT_MATCH = 'EXACT_MATCH',
}

export type ParserSettingsOptional = {
temporaryColNames: string[];
extraCols: ExtraCol[];
withHeader: boolean;
csvSeparator: string;
newLine: string;
rowValidationPolicy: RowValidationPolicy;
rowValidator: (
row: string[],
getColumnIndex: GetColumnIndexType,
Expand Down
4 changes: 4 additions & 0 deletions test/assets/1.html
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
<td>332</td>
<td>2015</td>
</tr>
<tr class='empty-row'></tr>
<tr>
<td>Alfa Romeo Giulia</td>
<td>500</td>
Expand All @@ -34,6 +35,9 @@
<td>120</td>
<td>2012</td>
</tr>
<tr class='empty-row'>
<td></td>
</tr>
</tbody>
</table>

Expand Down
2 changes: 1 addition & 1 deletion test/helpers.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { omitUndefined } from '../src/helpers';
import { ParserSettingsOptional } from '../src/types';
import type { ParserSettingsOptional } from '../src/types';

describe('Helper utils', () => {
it('Removes undefined properties', () => {
Expand Down
52 changes: 41 additions & 11 deletions test/index.test.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { Server } from 'http';
import { promisify } from 'util';
import { createServer, getBaseUrl } from './createServer';
import { launch, Browser, Page } from 'puppeteer';
import tableParser from '../src';
import { Browser, launch, Page } from 'puppeteer';
import tableParser, { RowValidationPolicy } from '../src';

describe('Basic parsing', () => {
let server: Server;
Expand Down Expand Up @@ -128,6 +128,9 @@ describe('Basic parsing', () => {
"120",
"2012",
],
Array [
"",
],
]
`);
});
Expand Down Expand Up @@ -278,7 +281,8 @@ describe('Basic parsing', () => {
2021-03-15;Audi S5
2021-03-15;Alfa Romeo Giulia
2021-03-15;BMW X3
2021-03-15;Skoda Octavia"
2021-03-15;Skoda Octavia
2021-03-15;"
`);
});

Expand Down Expand Up @@ -329,6 +333,7 @@ describe('Basic parsing', () => {

const data = await tableParser(page, {
selector: 'table',
rowValidationPolicy: RowValidationPolicy.EXACT_MATCH,
allowedColNames: {
'Car Name': 'car',
'Some non existing column': 'non-existing',
Expand Down Expand Up @@ -361,6 +366,7 @@ describe('Basic parsing', () => {
selector: 'table',
allowedColNames: {
'Car Name': 'car',
'Horse Powers': 'hp',
},
extraCols: [
{
Expand All @@ -371,20 +377,44 @@ describe('Basic parsing', () => {
colName: 'ex2',
data: 'ex2',
},
],
});

expect(data).toMatchInlineSnapshot(`
"car;hp;ex1;ex2
Audi S5;332;ex1;ex2
Alfa Romeo Giulia;500;ex1;ex2
BMW X3;215;ex1;ex2
Skoda Octavia;120;ex1;ex2
;ex1;ex2"
`);
});

it('Handles filtering partial rows', async () => {
await page.goto(`${getBaseUrl()}/1.html`);

const data = await tableParser(page, {
selector: 'table',
rowValidationPolicy: RowValidationPolicy.EXACT_MATCH,
allowedColNames: {
'Car Name': 'car',
'Horse Powers': 'hp',
'Manufacture Year': 'year',
},
extraCols: [
{
colName: 'ex0',
data: 'ex0',
position: 0,
colName: 'sellerId',
data: '123',
},
],
});

expect(data).toMatchInlineSnapshot(`
"ex0;car;ex1;ex2
ex0;Audi S5;ex1;ex2
ex0;Alfa Romeo Giulia;ex1;ex2
ex0;BMW X3;ex1;ex2
ex0;Skoda Octavia;ex1;ex2"
"car;hp;year;sellerId
Audi S5;332;2015;123
Alfa Romeo Giulia;500;2020;123
BMW X3;215;2017;123
Skoda Octavia;120;2012;123"
`);
});
});

0 comments on commit 2e50f49

Please sign in to comment.