-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
148 lines (124 loc) · 3.84 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// USAGE:
//
// root@workstation:~/tools/punfurl# echo "https://support.google.com/google-ads/answer/2472708?hl=en-GB" | go run ~/tools/punfurl/main.go
// https://support.google.com/google-ads
// https://support.google.com/answer
// https://support.google.com/google-ads/answer
// https://support.google.com/2472708
// https://support.google.com/google-ads/2472708
// https://support.google.com/answer/2472708
// https://support.google.com/google-ads/answer/2472708
//
package main
import (
"bufio"
"flag"
"fmt"
"os"
"strings"
"net/url"
"regexp"
)
func main() {
var verbose bool
flag.BoolVar(&verbose, "v", false, "")
flag.BoolVar(&verbose, "verbose", false, "")
flag.Parse()
scanner := bufio.NewScanner(os.Stdin)
buf := make([]byte, 0, 64*1024)
scanner.Buffer(buf, 1024*1024) // Now we can accept 1mb lines
// seen := make(map[string]bool)
for scanner.Scan() {
// Set up regex to find base64 urls
found, err := regexp.MatchString(`data\:\w+\/\w+\;base64`, scanner.Text())
if err != nil {
if verbose {
fmt.Fprintf(os.Stderr, "parse failure: %s\n", err)
}
}
// base64 urls are usually images that we don't want.
if found {
continue
}
// we have some confidence that we want to parse the url at this point so do it.
u, err := parseURL(scanner.Text())
if err != nil {
if verbose {
fmt.Fprintf(os.Stderr, "parse failure: %s\n", err)
}
continue
}
// Splits the path by the slashes
f := func(c rune) bool {
return c == '/'
}
routes := []string{}
// I had some garbage come through that I never want so i throw it out
// and create the routes for the next stage
for _, r := range strings.FieldsFunc(u.Path, f) {
if ! strings.ContainsAny(r, ".") {
routes = append(routes, r)
}
}
// Make it safer and easier to check for dupes
// split into 2 parts because golang hates me
//dupeString := strings.Join(routes[:], "/")
//dupeString = u.Scheme + string("://") + u.Host + string("/") + dupeString
// Skip duplicates if we've seen them before
// if seen[dupeString] {
// if verbose {
// fmt.Println(string("skipping - ") + dupeString) // This should probably be a stderror
// }
// continue
// }
// Take a note that we've seen this one before for checks on the next iteration
// seen[dupeString] = true
// The powerset allows us to keep the position of the paths but also
// gives a nicely succinct variation on each one which is great for fuzzing.
for _, r := range PowerSet(routes) {
if len(r) > 0 {
fmt.Println(u.Scheme + string("://") + u.Host + string("/") + strings.Join(r[:], "/"))
}
}
// Print the host so we don't lose it
fmt.Println(u.Scheme + string("://") + u.Host)
}
if err := scanner.Err(); err != nil {
fmt.Fprintln(os.Stderr, "reading standard input:", err)
}
}
// parseURL parses a string as a URL and returns a *url.URL
// or any error that occured. If the initially parsed URL
// has no scheme, http:// is prepended and the string is
// re-parsed
// https://github.com/tomnomnom/unfurl/blob/master/main.go
func parseURL(raw string) (*url.URL, error) {
u, err := url.Parse(raw)
if err != nil {
return nil, err
}
if ( u.Scheme == "" ) {
return url.Parse("http://" + raw)
}
return u, nil
}
func copyAndAppendString(slice []string, elem string) []string {
return append(append([]string(nil), slice...), elem)
}
// PowerSet creates unique combinations from a provided array
// not every combination and it keeps position which
// is what we want for api's for example.
func PowerSet(s []string) [][]string {
if s == nil {
return nil
}
r := [][]string{[]string{}}
for _, es := range s {
var u [][]string
for _, er := range r {
u = append(u, copyAndAppendString(er, es))
}
r = append(r, u...)
}
return r
}