Skip to content

Commit

Permalink
feat(trim): Trim spans greater than 140 characters (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
Joxit authored Sep 13, 2019
1 parent cd87ea4 commit d5126ca
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 0 deletions.
3 changes: 3 additions & 0 deletions tokenization/Span.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const Graph = require('./Graph')
const MAX_SPAN_LENGTH = 140

class Span {
constructor (body, start) {
Expand All @@ -13,6 +14,8 @@ class Span {
// update the token body
setBody (body) {
this.body = body || ''
if (this.body.length > MAX_SPAN_LENGTH) { this.body = this.body.slice(0, MAX_SPAN_LENGTH) }

this.norm = this.body.toLowerCase() // normalized body
this.end = this.start + this.body.length

Expand Down
13 changes: 13 additions & 0 deletions tokenization/Span.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,19 @@ module.exports.tests.setBody = (test) => {
t.true(span.contains.final.period)
t.end()
})
test('setBody: trim text when greater than 140 characters with spaces', (t) => {
let span = new Span(`Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.`)
t.equals(span.start, 0)
t.equals(span.end, 140)
t.end()
})
test(`setBody: do not trim text when it's 140 characters`, (t) => {
let span = new Span(`LoremipsumdolorsitametconsecteturadipiscingelitseddoeiusmodtemporincididuntutlaboreetdoloremagnaaliquaUtenimadminimveniamquisnostrudexercita`)
t.equals(span.start, 0)
t.equals(span.end, 140)
t.end()
})
}

module.exports.tests.intersects = (test) => {
Expand Down
12 changes: 12 additions & 0 deletions tokenization/Tokenizer.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,18 @@ module.exports.tests.computeCoverage = (test) => {
t.equal(30, tok.coverage)
t.end()
})
test('computeCoverage: trim text when greater than 140 characters with spaces', (t) => {
let tok = new Tokenizer(`Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.`)
t.ok(tok.coverage < 140)
t.equal(tok.coverage, 111)
t.end()
})
test(`computeCoverage: do not trim text when it's 140 characters`, (t) => {
let tok = new Tokenizer(`LoremipsumdolorsitametconsecteturadipiscingelitseddoeiusmodtemporincididuntutlaboreetdoloremagnaaliquaUtenimadminimveniamquisnostrudexercita`)
t.equal(tok.coverage, 140)
t.end()
})
}

module.exports.all = (tape, common) => {
Expand Down

0 comments on commit d5126ca

Please sign in to comment.