diff --git a/.gitignore b/.gitignore index 0517a76..9e69057 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ dev_app_* -config.yaml \ No newline at end of file +config.yaml +*_darwin.bin +*_amd64.bin diff --git a/test_cf_cache/Readme.md b/test_cf_cache/Readme.md new file mode 100644 index 0000000..80a1a3e --- /dev/null +++ b/test_cf_cache/Readme.md @@ -0,0 +1,20 @@ +## CloudFlare Cache Tester +#### This tool scrapes a URL to gather the local static assets files such as images, CSS and JS. And returns the relevant headers and the status code + +### Usage +``` +Usage of ./cfcache_darwin.bin: + -asset string + The type of the assets (default "all") + Choices: ['css', 'js', 'images', 'all'] + -url string + The site URL +``` + +### Examples +``` +./cfcache_darwin.bin --url=https://site.tld +./cfcache_darwin.bin --url=https://site.tld --asset=css +./cfcache_darwin.bin --url=https://site.tld --asset=js +./cfcache_darwin.bin --url=https://site.tld --asset=images +``` \ No newline at end of file diff --git a/test_cf_cache/assets.go b/test_cf_cache/assets.go new file mode 100644 index 0000000..23f2c8e --- /dev/null +++ b/test_cf_cache/assets.go @@ -0,0 +1,62 @@ +package main + +import ( + "fmt" + + "github.com/PuerkitoBio/goquery" + "github.com/RamyAllam/golang_journey/test_cf_cache/scrape" +) + +func generateImagesList(document *goquery.Document, siteUrl *string) []string { + assets := scrape.GetImages(document) + assetsList := filterUrls(*siteUrl, assets) + + if len(assetsList) > 0 { + fmt.Println("Images List:") + + for i, v := range assetsList { + i += 1 + fmt.Printf("%d) %s\n", i, v) + } + } else { + fmt.Println("Images List: Empty") + } + + return assetsList +} + +func generateCSSList(document *goquery.Document, siteUrl *string) []string { + assets := scrape.GetCss(document) + assetsList := filterUrls(*siteUrl, assets) + + if len(assetsList) > 0 { + fmt.Println("CSS List:") + + for i, v := range assetsList { + i += 1 + fmt.Printf("%d) %s\n", i, v) + } + } else { + fmt.Println("CSS List: Empty") + } + + return assetsList +} + +func generateJSList(document *goquery.Document, siteUrl *string) []string { + assets := scrape.GetJs(document) + assetsList := filterUrls(*siteUrl, assets) + + if len(assetsList) > 0 { + fmt.Println("JS List:") + + for i, v := range assetsList { + i += 1 + fmt.Printf("%d) %s\n", i, v) + } + } else { + fmt.Println("JS List: Empty") + } + + return assetsList +} diff --git a/test_cf_cache/cloudflare/http.go b/test_cf_cache/cloudflare/http.go new file mode 100644 index 0000000..7ce5cd4 --- /dev/null +++ b/test_cf_cache/cloudflare/http.go @@ -0,0 +1,20 @@ +package cloudflare + +import ( + "net/http" + "time" +) + +func GetResponseData(url string, timeoutLimit time.Duration) (http.Header, int) { + client := http.Client{ + Timeout: timeoutLimit * time.Second, + } + + r, err := client.Get(url) + + if err != nil { + panic(err) + } + + return r.Header, r.StatusCode +} diff --git a/test_cf_cache/cloudflare/report.go b/test_cf_cache/cloudflare/report.go new file mode 100644 index 0000000..cf1455e --- /dev/null +++ b/test_cf_cache/cloudflare/report.go @@ -0,0 +1,15 @@ +package cloudflare + +import "fmt" + +func Report(siteUrl string) { + headers, code := GetResponseData(siteUrl, 10) + + fmt.Println("Status Code:", code) + fmt.Println("Cf-Cache-Status:", headers.Get("Cf-Cache-Status")) + fmt.Println("Cache-Control:", headers.Get("Cache-Control")) + fmt.Println("ki-Cache-Tag:", headers.Get("ki-Cache-Tag")) + fmt.Println("ki-edge:", headers.Get("ki-edge")) + fmt.Println("X-Edge-Location-Klb:", headers.Get("X-Edge-Location-Klb")) + fmt.Println("CF-Ray:", headers.Get("CF-Ray")) +} diff --git a/test_cf_cache/go.mod b/test_cf_cache/go.mod new file mode 100644 index 0000000..617aa3c --- /dev/null +++ b/test_cf_cache/go.mod @@ -0,0 +1,5 @@ +module github.com/RamyAllam/golang_journey/test_cf_cache + +go 1.16 + +require github.com/PuerkitoBio/goquery v1.7.0 diff --git a/test_cf_cache/go.sum b/test_cf_cache/go.sum new file mode 100644 index 0000000..ec140c2 --- /dev/null +++ b/test_cf_cache/go.sum @@ -0,0 +1,10 @@ +github.com/PuerkitoBio/goquery v1.7.0 h1:O5SP3b9JWqMSVMG69zMfj577zwkSNpxrFf7ybS74eiw= +github.com/PuerkitoBio/goquery v1.7.0/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= +github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= +github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/test_cf_cache/main.go b/test_cf_cache/main.go new file mode 100644 index 0000000..117fc21 --- /dev/null +++ b/test_cf_cache/main.go @@ -0,0 +1,82 @@ +package main + +import ( + "flag" + "fmt" + "log" + + "github.com/RamyAllam/golang_journey/test_cf_cache/cloudflare" + "github.com/RamyAllam/golang_journey/test_cf_cache/scrape" +) + +func main() { + + banner := + ` + _____ ______ _____ _ _____ _ + / __ \| ___| / __ \ | | |_ _| | | + | / \/| |_ | / \/ __ _ ___| |__ ___ | | ___ ___| |_ ___ _ __ + | | | _| | | / _- |/ __| '_ \ / _ \ | |/ _ \/ __| __/ _ \ '__| + | \__/\| | | \__/\ (_| | (__| | | | __/ | | __/\__ \ || __/ | + \____/\_| \____/\__,_|\___|_| |_|\___| \_/\___||___/\__\___|_| + ` + + fmt.Println(banner) + + assetType := flag.String( + "asset", "all", "The type of the assets", + ) + + siteUrl := flag.String( + "url", "", "The site URL", + ) + + flag.Parse() + + if len(*siteUrl) < 5 { + log.Fatal("Please enter a valid URL") + } + + // Create a goquery document from the HTTP Response + document, err := scrape.GetDocument(*siteUrl) + + if err != nil { + log.Fatal("Error loading HTTP Response Body", err) + } + + if (*assetType) == "images" || (*assetType) == "all" { + fmt.Println() + imagesList := generateImagesList(document, siteUrl) + + for _, v := range imagesList { + fmt.Println("Testing URL: ", v) + cloudflare.Report(v) + fmt.Println("----------------------") + } + + } + + if (*assetType) == "css" || (*assetType) == "all" { + fmt.Println() + + cssList := generateCSSList(document, siteUrl) + for _, v := range cssList { + fmt.Println("Testing URL: ", v) + cloudflare.Report(v) + fmt.Println("----------------------") + } + } + + if (*assetType) == "js" || (*assetType) == "all" { + fmt.Println() + + jsList := generateJSList(document, siteUrl) + + for _, v := range jsList { + fmt.Println("Testing URL: ", v) + cloudflare.Report(v) + fmt.Println("----------------------") + } + } + +} diff --git a/test_cf_cache/parser.go b/test_cf_cache/parser.go new file mode 100644 index 0000000..8323660 --- /dev/null +++ b/test_cf_cache/parser.go @@ -0,0 +1,68 @@ +package main + +import ( + "fmt" + "net/url" + "strings" +) + +func filterUrls(srcUrl string, destUrls []string) []string { + /* + This function checks the list of the scrapped URLs + and making sure they are all of the source site and not external ones + Params: + - srcUrl: The URL that the user specifies (string) + - srcUrl: The list of the scrapped URLs ([]string) + Terms: + - Source URL: The URL that the user specifies + - Dest URL: The URLs we gather from the scrapping process + */ + var results []string + + /* + Parse the source URL + */ + srcUrlParsed, err := url.Parse(srcUrl) + if err != nil { + panic(err) + } + + // Get the host attribute of the source URL + srcHost := srcUrlParsed.Host + srcScheme := srcUrlParsed.Scheme + + /* + Parse the destination URL + */ + // Loop through the slice of the dest URLs + for _, v := range destUrls { + vDestParsed, err := url.Parse(v) + if err != nil { + panic(err) + } + + // Get the host attribute of the destination URL + // Check if the values are all for the same site, not external site + destHost := vDestParsed.Host + + if srcHost == destHost { + results = append(results, v) + } + + // Skip values for external URLs and a mixed content + // Ex. //cdn-images.mailchimp.com/embedcode/classic-10_7.css + if strings.HasPrefix(v, "//") { + continue + } + + // Handle Relative URLs + // Ex. css/modern.css + if !strings.HasPrefix(v, "http") { + fullRelativeUrl := fmt.Sprintf("%s://%s/%s", srcScheme, srcHost, v) + results = append(results, fullRelativeUrl) + } + + } + + return results +} diff --git a/test_cf_cache/scrape/css.go b/test_cf_cache/scrape/css.go new file mode 100644 index 0000000..eb8a2a6 --- /dev/null +++ b/test_cf_cache/scrape/css.go @@ -0,0 +1,30 @@ +package scrape + +import ( + "github.com/PuerkitoBio/goquery" +) + +func GetCss(document *goquery.Document) []string { + var urls []string + + // Find URLs + document.Find("link").Each(func(i int, element *goquery.Selection) { + // Get the rel attribute + value, exists := element.Attr("rel") + + if exists { + // Check if the rel is stylesheet + if value == "stylesheet" { + + // Get the href value + value, exists = element.Attr("href") + + if exists { + urls = append(urls, value) + } + } + } + }) + + return urls +} diff --git a/test_cf_cache/scrape/document.go b/test_cf_cache/scrape/document.go new file mode 100644 index 0000000..6297653 --- /dev/null +++ b/test_cf_cache/scrape/document.go @@ -0,0 +1,25 @@ +package scrape + +import ( + "log" + "net/http" + + "github.com/PuerkitoBio/goquery" +) + +func GetDocument(url string) (*goquery.Document, error) { + + // Make HTTP Request + response, err := http.Get(url) + + if err != nil { + log.Fatal(err) + } + + defer response.Body.Close() + + // Create a goquery document from the HTTP Response + document, err := goquery.NewDocumentFromReader(response.Body) + + return document, err +} diff --git a/test_cf_cache/scrape/images.go b/test_cf_cache/scrape/images.go new file mode 100644 index 0000000..3f4edb4 --- /dev/null +++ b/test_cf_cache/scrape/images.go @@ -0,0 +1,20 @@ +package scrape + +import ( + "github.com/PuerkitoBio/goquery" +) + +func GetImages(document *goquery.Document) []string { + var urls []string + + // Find tags + document.Find("img").Each(func(i int, element *goquery.Selection) { + // Get the src value + value, exists := element.Attr("src") + if exists { + urls = append(urls, value) + } + }) + + return urls +} diff --git a/test_cf_cache/scrape/js.go b/test_cf_cache/scrape/js.go new file mode 100644 index 0000000..e85b9e9 --- /dev/null +++ b/test_cf_cache/scrape/js.go @@ -0,0 +1,22 @@ +package scrape + +import ( + "github.com/PuerkitoBio/goquery" +) + +func GetJs(document *goquery.Document) []string { + var urls []string + + // Find tags + document.Find("script").Each(func(i int, element *goquery.Selection) { + + // Get the src value + value, exists := element.Attr("src") + + if exists { + urls = append(urls, value) + } + }) + + return urls +}