From 25ca68cbebcafbde1de333e5761f3b430cb05e14 Mon Sep 17 00:00:00 2001 From: Ilya Milov Date: Tue, 16 May 2023 16:39:52 +0200 Subject: [PATCH] Add provider field in json output --- cmd/gau/main.go | 9 +++--- pkg/output/output.go | 29 ++++++++++--------- pkg/providers/commoncrawl/commoncrawl.go | 8 +++-- pkg/providers/otx/otx.go | 9 ++++-- pkg/providers/providers.go | 4 ++- pkg/providers/urlscan/urlscan.go | 11 +++++-- pkg/providers/wayback/wayback.go | 9 ++++-- runner/flags/flags.go | 37 +++++++++++++++++------- runner/runner.go | 16 +++++----- 9 files changed, 87 insertions(+), 45 deletions(-) diff --git a/cmd/gau/main.go b/cmd/gau/main.go index f1182bc..37dc5ed 100644 --- a/cmd/gau/main.go +++ b/cmd/gau/main.go @@ -2,13 +2,14 @@ package main import ( "bufio" + "io" + "os" + "sync" + "github.com/lc/gau/v2/pkg/output" "github.com/lc/gau/v2/runner" "github.com/lc/gau/v2/runner/flags" log "github.com/sirupsen/logrus" - "io" - "os" - "sync" ) func main() { @@ -36,7 +37,7 @@ func main() { log.Warn(err) } - results := make(chan string) + results := make(chan output.Result) var out io.Writer // Handle results in background diff --git a/pkg/output/output.go b/pkg/output/output.go index a5bdbe7..1f17ba8 100644 --- a/pkg/output/output.go +++ b/pkg/output/output.go @@ -1,24 +1,27 @@ package output import ( - jsoniter "github.com/json-iterator/go" - "github.com/valyala/bytebufferpool" "io" "net/url" "path" "strings" + + jsoniter "github.com/json-iterator/go" + "github.com/valyala/bytebufferpool" ) -type JSONResult struct { - Url string `json:"url"` +// Result of lookup from providers. +type Result struct { + URL string `json:"url,omitempty"` + Provider string `json:"provider,omitempty"` } -func WriteURLs(writer io.Writer, results <-chan string, blacklistMap map[string]struct{}, RemoveParameters bool) error { +func WriteURLs(writer io.Writer, results <-chan Result, blacklistMap map[string]struct{}, RemoveParameters bool) error { lastURL := make(map[string]struct{}) for result := range results { buf := bytebufferpool.Get() if len(blacklistMap) != 0 { - u, err := url.Parse(result) + u, err := url.Parse(result.URL) if err != nil { continue } @@ -32,19 +35,19 @@ func WriteURLs(writer io.Writer, results <-chan string, blacklistMap map[string] } } if RemoveParameters { - u, err := url.Parse(result) + u, err := url.Parse(result.URL) if err != nil { continue } if _, ok := lastURL[u.Host+u.Path]; ok { continue } else { - lastURL[u.Host+u.Path] = struct{}{} ; + lastURL[u.Host+u.Path] = struct{}{} } } - buf.B = append(buf.B, []byte(result)...) + buf.B = append(buf.B, []byte(result.URL)...) buf.B = append(buf.B, "\n"...) _, err := writer.Write(buf.B) if err != nil { @@ -55,12 +58,11 @@ func WriteURLs(writer io.Writer, results <-chan string, blacklistMap map[string] return nil } -func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap map[string]struct{}, RemoveParameters bool) { - var jr JSONResult +func WriteURLsJSON(writer io.Writer, results <-chan Result, blacklistMap map[string]struct{}, RemoveParameters bool) { enc := jsoniter.NewEncoder(writer) for result := range results { if len(blacklistMap) != 0 { - u, err := url.Parse(result) + u, err := url.Parse(result.URL) if err != nil { continue } @@ -73,8 +75,7 @@ func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap map[str } } } - jr.Url = result - if err := enc.Encode(jr); err != nil { + if err := enc.Encode(result); err != nil { // todo: handle this error continue } diff --git a/pkg/providers/commoncrawl/commoncrawl.go b/pkg/providers/commoncrawl/commoncrawl.go index 920a71b..2fb54e6 100644 --- a/pkg/providers/commoncrawl/commoncrawl.go +++ b/pkg/providers/commoncrawl/commoncrawl.go @@ -9,6 +9,7 @@ import ( jsoniter "github.com/json-iterator/go" "github.com/lc/gau/v2/pkg/httpclient" + "github.com/lc/gau/v2/pkg/output" "github.com/lc/gau/v2/pkg/providers" "github.com/sirupsen/logrus" ) @@ -55,7 +56,7 @@ func (c *Client) Name() string { // Fetch fetches all urls for a given domain and sends them to a channel. // It returns an error should one occur. -func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { +func (c *Client) Fetch(ctx context.Context, domain string, results chan output.Result) error { p, err := c.getPagination(domain) if err != nil { return err @@ -93,7 +94,10 @@ paginate: return fmt.Errorf("received an error from commoncrawl: %s", res.Error) } - results <- res.URL + results <- output.Result{ + URL: res.URL, + Provider: Name, + } } } } diff --git a/pkg/providers/otx/otx.go b/pkg/providers/otx/otx.go index 6a32359..849f214 100644 --- a/pkg/providers/otx/otx.go +++ b/pkg/providers/otx/otx.go @@ -3,9 +3,11 @@ package otx import ( "context" "fmt" + "github.com/bobesa/go-domain-util/domainutil" jsoniter "github.com/json-iterator/go" "github.com/lc/gau/v2/pkg/httpclient" + "github.com/lc/gau/v2/pkg/output" "github.com/lc/gau/v2/pkg/providers" "github.com/sirupsen/logrus" ) @@ -45,7 +47,7 @@ func (c *Client) Name() string { return Name } -func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { +func (c *Client) Fetch(ctx context.Context, domain string, results chan output.Result) error { paginate: for page := 1; ; page++ { select { @@ -66,7 +68,10 @@ paginate: } for _, entry := range result.URLList { - results <- entry.URL + results <- output.Result{ + URL: entry.URL, + Provider: Name, + } } if !result.HasNext { diff --git a/pkg/providers/providers.go b/pkg/providers/providers.go index 0766359..7247cc1 100644 --- a/pkg/providers/providers.go +++ b/pkg/providers/providers.go @@ -2,6 +2,8 @@ package providers import ( "context" + + "github.com/lc/gau/v2/pkg/output" "github.com/valyala/fasthttp" ) @@ -9,7 +11,7 @@ const Version = `2.1.2` // Provider is a generic interface for all archive fetchers type Provider interface { - Fetch(ctx context.Context, domain string, results chan string) error + Fetch(ctx context.Context, domain string, results chan output.Result) error Name() string } diff --git a/pkg/providers/urlscan/urlscan.go b/pkg/providers/urlscan/urlscan.go index 46b58a4..c935caf 100644 --- a/pkg/providers/urlscan/urlscan.go +++ b/pkg/providers/urlscan/urlscan.go @@ -4,11 +4,13 @@ import ( "bytes" "context" "fmt" + "strings" + jsoniter "github.com/json-iterator/go" "github.com/lc/gau/v2/pkg/httpclient" + "github.com/lc/gau/v2/pkg/output" "github.com/lc/gau/v2/pkg/providers" "github.com/sirupsen/logrus" - "strings" ) const ( @@ -32,7 +34,7 @@ func New(c *providers.Config) *Client { func (c *Client) Name() string { return Name } -func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { +func (c *Client) Fetch(ctx context.Context, domain string, results chan output.Result) error { var searchAfter string var header httpclient.Header @@ -73,7 +75,10 @@ paginate: total := len(result.Results) for i, res := range result.Results { if res.Page.Domain == domain || (c.config.IncludeSubdomains && strings.HasSuffix(res.Page.Domain, domain)) { - results <- res.Page.URL + results <- output.Result{ + URL: res.Page.URL, + Provider: Name, + } } if i == total-1 { diff --git a/pkg/providers/wayback/wayback.go b/pkg/providers/wayback/wayback.go index 3b73c79..e24b3bb 100644 --- a/pkg/providers/wayback/wayback.go +++ b/pkg/providers/wayback/wayback.go @@ -3,8 +3,10 @@ package wayback import ( "context" "fmt" + jsoniter "github.com/json-iterator/go" "github.com/lc/gau/v2/pkg/httpclient" + "github.com/lc/gau/v2/pkg/output" "github.com/lc/gau/v2/pkg/providers" "github.com/sirupsen/logrus" ) @@ -38,7 +40,7 @@ type waybackResult [][]string // Fetch fetches all urls for a given domain and sends them to a channel. // It returns an error should one occur. -func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { +func (c *Client) Fetch(ctx context.Context, domain string, results chan output.Result) error { pages, err := c.getPagination(domain) if err != nil { return fmt.Errorf("failed to fetch wayback pagination: %s", err) @@ -73,7 +75,10 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string) for i, entry := range result { // Skip first result by default if i != 0 { - results <- entry[0] + results <- output.Result{ + URL: entry[0], + Provider: Name, + } } } } diff --git a/runner/flags/flags.go b/runner/flags/flags.go index 4af2025..3808aff 100644 --- a/runner/flags/flags.go +++ b/runner/flags/flags.go @@ -4,17 +4,22 @@ import ( "crypto/tls" "flag" "fmt" + "net/url" + "os" + "path/filepath" + "strings" + "time" + "github.com/lc/gau/v2/pkg/providers" + "github.com/lc/gau/v2/pkg/providers/commoncrawl" + "github.com/lc/gau/v2/pkg/providers/otx" + "github.com/lc/gau/v2/pkg/providers/urlscan" + "github.com/lc/gau/v2/pkg/providers/wayback" "github.com/lynxsecurity/pflag" "github.com/lynxsecurity/viper" log "github.com/sirupsen/logrus" "github.com/valyala/fasthttp" "github.com/valyala/fasthttp/fasthttpproxy" - "net/url" - "os" - "path/filepath" - "strings" - "time" ) type URLScanConfig struct { @@ -101,7 +106,14 @@ func New() *Options { pflag.Uint("retries", 0, "retries for HTTP client") pflag.String("proxy", "", "http proxy to use") pflag.StringSlice("blacklist", []string{}, "list of extensions to skip") - pflag.StringSlice("providers", []string{}, "list of providers to use (wayback,commoncrawl,otx,urlscan)") + pflag.StringSlice( + "providers", + []string{}, + fmt.Sprintf( + "list of providers to use (%s,%s,%s,%s)", + wayback.Name, commoncrawl.Name, otx.Name, urlscan.Name, + ), + ) pflag.Bool("subs", false, "include subdomains of target domain") pflag.Bool("fp", false, "remove different parameters of the same endpoint") pflag.Bool("verbose", false, "show verbose output") @@ -168,10 +180,15 @@ func (o *Options) DefaultConfig() *Config { MaxRetries: 5, IncludeSubdomains: false, RemoveParameters: false, - Providers: []string{"wayback", "commoncrawl", "otx", "urlscan"}, - Blacklist: []string{}, - JSON: false, - Outfile: "", + Providers: []string{ + commoncrawl.Name, + otx.Name, + urlscan.Name, + wayback.Name, + }, + Blacklist: []string{}, + JSON: false, + Outfile: "", } o.getFlagValues(c) diff --git a/runner/runner.go b/runner/runner.go index d4c88a5..24b0cbd 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -3,13 +3,15 @@ package runner import ( "context" "fmt" + "sync" + + "github.com/lc/gau/v2/pkg/output" "github.com/lc/gau/v2/pkg/providers" "github.com/lc/gau/v2/pkg/providers/commoncrawl" "github.com/lc/gau/v2/pkg/providers/otx" "github.com/lc/gau/v2/pkg/providers/urlscan" "github.com/lc/gau/v2/pkg/providers/wayback" "github.com/sirupsen/logrus" - "sync" ) type Runner struct { @@ -30,14 +32,14 @@ func (r *Runner) Init(c *providers.Config, providerMap ProvidersMap) error { for name, filters := range providerMap { switch name { - case "urlscan": + case urlscan.Name: r.providers = append(r.providers, urlscan.New(c)) - case "otx": + case otx.Name: o := otx.New(c) r.providers = append(r.providers, o) - case "wayback": + case wayback.Name: r.providers = append(r.providers, wayback.New(c, filters)) - case "commoncrawl": + case commoncrawl.Name: cc, err := commoncrawl.New(c, filters) if err != nil { return fmt.Errorf("error instantiating commoncrawl: %v\n", err) @@ -50,7 +52,7 @@ func (r *Runner) Init(c *providers.Config, providerMap ProvidersMap) error { } // Starts starts the worker -func (r *Runner) Start(domains chan string, results chan string) { +func (r *Runner) Start(domains chan string, results chan output.Result) { for i := uint(0); i < r.config.Threads; i++ { r.wg.Add(1) go func() { @@ -66,7 +68,7 @@ func (r *Runner) Wait() { } // worker checks to see if the context is finished and executes the fetching process for each provider -func (r *Runner) worker(ctx context.Context, domains chan string, results chan string) { +func (r *Runner) worker(ctx context.Context, domains chan string, results chan output.Result) { work: for { select {