Skip to content

Commit

Permalink
refactor(parser): improved removal of unit numbers from $subject and …
Browse files Browse the repository at this point in the history
…$admin
  • Loading branch information
missinglink committed Sep 3, 2020
1 parent 80e10c1 commit ac26263
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
5 changes: 3 additions & 2 deletions sanitizer/_text_pelias_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,10 @@ function parse (clean) {
// ' VVVV NN SSSSSSS AAAAAA PPPPP '
let mask = solution.mask(t);

// the entire input text as seen by the parser with any postcode classification(s) removed
// the entire input text as seen by the parser with any postcode and unit
// classification(s) removed
let body = t.span.body.split('')
.map((c, i) => (mask[i] !== 'P') ? c : ' ')
.map((c, i) => !/[PU]/.test(mask[i]) ? c : ' ')
.join('');

// same as $body above but with consecutive whitespace squashed and trimmed.
Expand Down
10 changes: 10 additions & 0 deletions test/unit/sanitizer/_text_pelias_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,16 @@ module.exports.tests.text_parser = function (test, common) {
cases.push(['e8 1dn', { subject: 'e8 1dn' }, true]);
// cases.push(['london e8 1dn', { subject: 'e8 1dn' }, true]); // issue

// unit number between address and locality
cases.push(['7750 Kennedy Rd #2A Markham', {
subject: '7750 Kennedy Rd',
housenumber: '7750',
street: 'Kennedy Rd',
unit: '#2A',
locality: 'Markham',
admin: 'Markham',
}]);

cases.forEach(testcase => {
let input = testcase[0];
let expected = testcase[1];
Expand Down

0 comments on commit ac26263

Please sign in to comment.