-
Notifications
You must be signed in to change notification settings - Fork 118
/
fingerprint_body.go
156 lines (137 loc) · 3.71 KB
/
fingerprint_body.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
package wappalyzer
import (
"bytes"
"unsafe"
"golang.org/x/net/html"
)
// checkBody checks for fingerprints in the HTML body
func (s *Wappalyze) checkBody(body []byte) []matchPartResult {
var technologies []matchPartResult
bodyString := unsafeToString(body)
technologies = append(
technologies,
s.fingerprints.matchString(bodyString, htmlPart)...,
)
// Tokenize the HTML document and check for fingerprints as required
tokenizer := html.NewTokenizer(bytes.NewReader(body))
for {
tt := tokenizer.Next()
switch tt {
case html.ErrorToken:
return technologies
case html.StartTagToken:
token := tokenizer.Token()
switch token.Data {
case "script":
// Check if the script tag has a source file to check
source, found := getScriptSource(token)
if found {
// Check the script tags for script fingerprints
technologies = append(
technologies,
s.fingerprints.matchString(source, scriptPart)...,
)
continue
}
// Check the text attribute of the tag for javascript based technologies.
// The next token should be the contents of the script tag
if tokenType := tokenizer.Next(); tokenType != html.TextToken {
continue
}
// TODO: JS requires a running VM, for checking properties. Only
// possible with headless for now :(
// data := tokenizer.Token().Data
// technologies = append(
// technologies,
// s.fingerprints.matchString(data, jsPart)...,
// )
case "meta":
// For meta tag, we are only interested in name and content attributes.
name, content, found := getMetaNameAndContent(token)
if !found {
continue
}
technologies = append(
technologies,
s.fingerprints.matchKeyValueString(name, content, metaPart)...,
)
}
case html.SelfClosingTagToken:
token := tokenizer.Token()
if token.Data != "meta" {
continue
}
// Parse the meta tag and check for tech
name, content, found := getMetaNameAndContent(token)
if !found {
continue
}
technologies = append(
technologies,
s.fingerprints.matchKeyValueString(name, content, metaPart)...,
)
}
}
}
func (s *Wappalyze) getTitle(body []byte) string {
var title string
// Tokenize the HTML document and check for fingerprints as required
tokenizer := html.NewTokenizer(bytes.NewReader(body))
for {
tt := tokenizer.Next()
switch tt {
case html.ErrorToken:
return title
case html.StartTagToken:
token := tokenizer.Token()
switch token.Data {
case "title":
// Next text token will be the actual title of the page
if tokenType := tokenizer.Next(); tokenType != html.TextToken {
continue
}
title = tokenizer.Token().Data
}
}
}
}
// getMetaNameAndContent gets name and content attributes from meta html token
func getMetaNameAndContent(token html.Token) (string, string, bool) {
if len(token.Attr) < keyValuePairLength {
return "", "", false
}
var name, content string
for _, attr := range token.Attr {
switch attr.Key {
case "name":
name = attr.Val
case "content":
content = attr.Val
}
}
return name, content, true
}
// getScriptSource gets src tag from a script tag
func getScriptSource(token html.Token) (string, bool) {
if len(token.Attr) < 1 {
return "", false
}
var source string
for _, attr := range token.Attr {
switch attr.Key {
case "src":
source = attr.Val
}
}
return source, true
}
// unsafeToString converts a byte slice to string and does it with
// zero allocations.
//
// NOTE: This function should only be used if its certain that the underlying
// array has not been manipulated.
//
// Reference - https://github.com/golang/go/issues/25484
func unsafeToString(data []byte) string {
return *(*string)(unsafe.Pointer(&data))
}