Skip to content

Commit

Permalink
Push: test_cf_cache App
Browse files Browse the repository at this point in the history
  • Loading branch information
RamyAllam committed Jun 21, 2021
1 parent 31b338a commit 2297390
Show file tree
Hide file tree
Showing 13 changed files with 382 additions and 1 deletion.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
dev_app_*
config.yaml
config.yaml
*_darwin.bin
*_amd64.bin
20 changes: 20 additions & 0 deletions test_cf_cache/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
## CloudFlare Cache Tester
#### This tool scrapes a URL to gather the local static assets files such as images, CSS and JS. And returns the relevant headers and the status code

### Usage
```
Usage of ./cfcache_darwin.bin:
-asset string
The type of the assets (default "all")
Choices: ['css', 'js', 'images', 'all']
-url string
The site URL
```

### Examples
```
./cfcache_darwin.bin --url=https://site.tld
./cfcache_darwin.bin --url=https://site.tld --asset=css
./cfcache_darwin.bin --url=https://site.tld --asset=js
./cfcache_darwin.bin --url=https://site.tld --asset=images
```
62 changes: 62 additions & 0 deletions test_cf_cache/assets.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package main

import (
"fmt"

"github.com/PuerkitoBio/goquery"
"github.com/RamyAllam/golang_journey/test_cf_cache/scrape"
)

func generateImagesList(document *goquery.Document, siteUrl *string) []string {
assets := scrape.GetImages(document)
assetsList := filterUrls(*siteUrl, assets)

if len(assetsList) > 0 {
fmt.Println("Images List:")

for i, v := range assetsList {
i += 1
fmt.Printf("%d) %s\n", i, v)
}
} else {
fmt.Println("Images List: Empty")
}

return assetsList
}

func generateCSSList(document *goquery.Document, siteUrl *string) []string {
assets := scrape.GetCss(document)
assetsList := filterUrls(*siteUrl, assets)

if len(assetsList) > 0 {
fmt.Println("CSS List:")

for i, v := range assetsList {
i += 1
fmt.Printf("%d) %s\n", i, v)
}
} else {
fmt.Println("CSS List: Empty")
}

return assetsList
}

func generateJSList(document *goquery.Document, siteUrl *string) []string {
assets := scrape.GetJs(document)
assetsList := filterUrls(*siteUrl, assets)

if len(assetsList) > 0 {
fmt.Println("JS List:")

for i, v := range assetsList {
i += 1
fmt.Printf("%d) %s\n", i, v)
}
} else {
fmt.Println("JS List: Empty")
}

return assetsList
}
20 changes: 20 additions & 0 deletions test_cf_cache/cloudflare/http.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package cloudflare

import (
"net/http"
"time"
)

func GetResponseData(url string, timeoutLimit time.Duration) (http.Header, int) {
client := http.Client{
Timeout: timeoutLimit * time.Second,
}

r, err := client.Get(url)

if err != nil {
panic(err)
}

return r.Header, r.StatusCode
}
15 changes: 15 additions & 0 deletions test_cf_cache/cloudflare/report.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package cloudflare

import "fmt"

func Report(siteUrl string) {
headers, code := GetResponseData(siteUrl, 10)

fmt.Println("Status Code:", code)
fmt.Println("Cf-Cache-Status:", headers.Get("Cf-Cache-Status"))
fmt.Println("Cache-Control:", headers.Get("Cache-Control"))
fmt.Println("ki-Cache-Tag:", headers.Get("ki-Cache-Tag"))
fmt.Println("ki-edge:", headers.Get("ki-edge"))
fmt.Println("X-Edge-Location-Klb:", headers.Get("X-Edge-Location-Klb"))
fmt.Println("CF-Ray:", headers.Get("CF-Ray"))
}
5 changes: 5 additions & 0 deletions test_cf_cache/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module github.com/RamyAllam/golang_journey/test_cf_cache

go 1.16

require github.com/PuerkitoBio/goquery v1.7.0
10 changes: 10 additions & 0 deletions test_cf_cache/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
github.com/PuerkitoBio/goquery v1.7.0 h1:O5SP3b9JWqMSVMG69zMfj577zwkSNpxrFf7ybS74eiw=
github.com/PuerkitoBio/goquery v1.7.0/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
82 changes: 82 additions & 0 deletions test_cf_cache/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package main

import (
"flag"
"fmt"
"log"

"github.com/RamyAllam/golang_journey/test_cf_cache/cloudflare"
"github.com/RamyAllam/golang_journey/test_cf_cache/scrape"
)

func main() {

banner :=
`
_____ ______ _____ _ _____ _
/ __ \| ___| / __ \ | | |_ _| | |
| / \/| |_ | / \/ __ _ ___| |__ ___ | | ___ ___| |_ ___ _ __
| | | _| | | / _- |/ __| '_ \ / _ \ | |/ _ \/ __| __/ _ \ '__|
| \__/\| | | \__/\ (_| | (__| | | | __/ | | __/\__ \ || __/ |
\____/\_| \____/\__,_|\___|_| |_|\___| \_/\___||___/\__\___|_|
`

fmt.Println(banner)

assetType := flag.String(
"asset", "all", "The type of the assets",
)

siteUrl := flag.String(
"url", "", "The site URL",
)

flag.Parse()

if len(*siteUrl) < 5 {
log.Fatal("Please enter a valid URL")
}

// Create a goquery document from the HTTP Response
document, err := scrape.GetDocument(*siteUrl)

if err != nil {
log.Fatal("Error loading HTTP Response Body", err)
}

if (*assetType) == "images" || (*assetType) == "all" {
fmt.Println()
imagesList := generateImagesList(document, siteUrl)

for _, v := range imagesList {
fmt.Println("Testing URL: ", v)
cloudflare.Report(v)
fmt.Println("----------------------")
}

}

if (*assetType) == "css" || (*assetType) == "all" {
fmt.Println()

cssList := generateCSSList(document, siteUrl)
for _, v := range cssList {
fmt.Println("Testing URL: ", v)
cloudflare.Report(v)
fmt.Println("----------------------")
}
}

if (*assetType) == "js" || (*assetType) == "all" {
fmt.Println()

jsList := generateJSList(document, siteUrl)

for _, v := range jsList {
fmt.Println("Testing URL: ", v)
cloudflare.Report(v)
fmt.Println("----------------------")
}
}

}
68 changes: 68 additions & 0 deletions test_cf_cache/parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package main

import (
"fmt"
"net/url"
"strings"
)

func filterUrls(srcUrl string, destUrls []string) []string {
/*
This function checks the list of the scrapped URLs
and making sure they are all of the source site and not external ones
Params:
- srcUrl: The URL that the user specifies (string)
- srcUrl: The list of the scrapped URLs ([]string)
Terms:
- Source URL: The URL that the user specifies
- Dest URL: The URLs we gather from the scrapping process
*/
var results []string

/*
Parse the source URL
*/
srcUrlParsed, err := url.Parse(srcUrl)
if err != nil {
panic(err)
}

// Get the host attribute of the source URL
srcHost := srcUrlParsed.Host
srcScheme := srcUrlParsed.Scheme

/*
Parse the destination URL
*/
// Loop through the slice of the dest URLs
for _, v := range destUrls {
vDestParsed, err := url.Parse(v)
if err != nil {
panic(err)
}

// Get the host attribute of the destination URL
// Check if the values are all for the same site, not external site
destHost := vDestParsed.Host

if srcHost == destHost {
results = append(results, v)
}

// Skip values for external URLs and a mixed content
// Ex. //cdn-images.mailchimp.com/embedcode/classic-10_7.css
if strings.HasPrefix(v, "//") {
continue
}

// Handle Relative URLs
// Ex. css/modern.css
if !strings.HasPrefix(v, "http") {
fullRelativeUrl := fmt.Sprintf("%s://%s/%s", srcScheme, srcHost, v)
results = append(results, fullRelativeUrl)
}

}

return results
}
30 changes: 30 additions & 0 deletions test_cf_cache/scrape/css.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package scrape

import (
"github.com/PuerkitoBio/goquery"
)

func GetCss(document *goquery.Document) []string {
var urls []string

// Find URLs
document.Find("link").Each(func(i int, element *goquery.Selection) {
// Get the rel attribute
value, exists := element.Attr("rel")

if exists {
// Check if the rel is stylesheet
if value == "stylesheet" {

// Get the href value
value, exists = element.Attr("href")

if exists {
urls = append(urls, value)
}
}
}
})

return urls
}
25 changes: 25 additions & 0 deletions test_cf_cache/scrape/document.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package scrape

import (
"log"
"net/http"

"github.com/PuerkitoBio/goquery"
)

func GetDocument(url string) (*goquery.Document, error) {

// Make HTTP Request
response, err := http.Get(url)

if err != nil {
log.Fatal(err)
}

defer response.Body.Close()

// Create a goquery document from the HTTP Response
document, err := goquery.NewDocumentFromReader(response.Body)

return document, err
}
20 changes: 20 additions & 0 deletions test_cf_cache/scrape/images.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package scrape

import (
"github.com/PuerkitoBio/goquery"
)

func GetImages(document *goquery.Document) []string {
var urls []string

// Find tags
document.Find("img").Each(func(i int, element *goquery.Selection) {
// Get the src value
value, exists := element.Attr("src")
if exists {
urls = append(urls, value)
}
})

return urls
}
22 changes: 22 additions & 0 deletions test_cf_cache/scrape/js.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package scrape

import (
"github.com/PuerkitoBio/goquery"
)

func GetJs(document *goquery.Document) []string {
var urls []string

// Find tags
document.Find("script").Each(func(i int, element *goquery.Selection) {

// Get the src value
value, exists := element.Attr("src")

if exists {
urls = append(urls, value)
}
})

return urls
}

0 comments on commit 2297390

Please sign in to comment.