-
Notifications
You must be signed in to change notification settings - Fork 0
/
tree_query.go
81 lines (64 loc) · 1.64 KB
/
tree_query.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package scraper
import (
"bytes"
"reflect"
"strings"
"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)
func Query(n *html.Node, query string) []*html.Node {
sel, err := cascadia.Parse(query)
if err != nil {
return []*html.Node{}
}
return cascadia.QueryAll(n, sel)
}
func Attr(n *html.Node, name string) string {
for _, a := range n.Attr {
if a.Key == name {
return a.Val
}
}
return ""
}
type queryFilter struct {
kind string
options map[string]interface{}
}
func (f *queryFilter) matchIsAttrContains(n *html.Node) bool {
attrNameRaw, ok := f.options["attrName"]
if !ok || reflect.TypeOf(attrNameRaw).String() != "string" {
return false
}
valueRaw, ok := f.options["value"]
if !ok || reflect.TypeOf(valueRaw).String() != "string" {
return false
}
attrValue := Attr(n, attrNameRaw.(string))
return strings.Contains(attrValue, valueRaw.(string))
}
func (f *queryFilter) Match(n *html.Node) bool {
if f.kind == "isAttrContains" {
return f.matchIsAttrContains(n)
}
return false
}
func newQueryFilterIsAttrContains(attrName string, value string) *queryFilter {
opts := make(map[string]interface{})
opts["attrName"] = attrName
opts["value"] = value
return &queryFilter{kind: "isAttrContains", options: opts}
}
func AttrContains(nodes []*html.Node, attrName string, value string) []*html.Node {
filter := newQueryFilterIsAttrContains(attrName, value)
newNodes := cascadia.Filter(nodes, filter)
return newNodes
}
func TreeToHTML(n *html.Node) []byte {
buf := bytes.Buffer{}
html.Render(&buf, n)
return buf.Bytes()
}
func HtmlToTree(raw []byte) (*html.Node, error) {
return html.Parse(bytes.NewReader(raw))
}