-
Notifications
You must be signed in to change notification settings - Fork 1
/
parser_test.go
59 lines (55 loc) · 1.61 KB
/
parser_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
package plaintext
import (
"github.com/huantt/plaintext-extractor/html"
"github.com/huantt/plaintext-extractor/markdown"
"github.com/stretchr/testify/assert"
"testing"
)
func TestParseHtml(t *testing.T) {
tests := []struct {
input string
expected string
}{
{`<div>This is a <a href="https://example.com">link</a></div>`, "This is a link"},
}
for _, test := range tests {
extractor := NewHtmlExtractor()
output, err := extractor.PlainText(test.input)
assert.NoError(t, err)
assert.NotNil(t, output)
assert.Equal(t, test.expected, *output)
}
}
func TestParseMarkdown(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"# H1 \n*italic* **bold** `code` `not code [link](https://example.com) ![image](https://image.com/image.png) ~~strikethrough~~", "H1 \nitalic bold code `not code link image strikethrough"},
}
for _, test := range tests {
extractor := NewMarkdownExtractor()
output, err := extractor.PlainText(test.input)
assert.NoError(t, err)
assert.NotNil(t, output)
assert.Equal(t, test.expected, *output)
}
}
func TestMultipleExtractors(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"<div> html </div> *markdown*", "html markdown"},
{"<div> *markdown in html* </div>", "markdown in html"},
}
for _, test := range tests {
markdownExtractor := markdown.NewExtractor()
htmlExtractor := html.NewExtractor()
extractor := NewExtractor(markdownExtractor.PlainText, htmlExtractor.PlainText)
output, err := extractor.PlainText(test.input)
assert.NoError(t, err)
assert.NotNil(t, output)
assert.Equal(t, test.expected, *output)
}
}