diff --git a/archiver_test.go b/archiver_test.go new file mode 100644 index 0000000..3c26cd1 --- /dev/null +++ b/archiver_test.go @@ -0,0 +1,210 @@ +package obelisk + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" +) + +//func servefiles() { +//} + +func TestArchiver_Validate(t *testing.T) { + arc := &Archiver{ + Cache: nil, + UserAgent: "", + MaxConcurrentDownload: 0, + isValidated: false, + dlSemaphore: nil, + Transport: nil, + RequestTimeout: 0, + httpClient: nil, + } + + arc.Validate() + + if arc.Cache == nil { + t.Error("Cache should not be nil") + } + + if arc.UserAgent == "" { + t.Error("UserAgent should not be empty") + } + + if arc.MaxConcurrentDownload <= 0 { + t.Error("MaxConcurrentDownload should be greater than 0") + } + + if !arc.isValidated { + t.Error("isValidated should be true") + } + + if arc.dlSemaphore == nil { + t.Error("dlSemaphore should not be nil") + } + + if arc.Transport == nil { + t.Error("Transport should not be nil") + } + + if arc.httpClient == nil { + t.Error("httpClient should not be nil") + } +} + +func TestArchiver_Archive(t *testing.T) { + fs := http.FileServer(http.Dir("./testdata/")) + + // start a test server with the file server handler + server := httptest.NewServer(fs) + + defer server.Close() + + archiver := &Archiver{ + Cache: nil, + UserAgent: "", + MaxConcurrentDownload: 0, + isValidated: true, + dlSemaphore: nil, + Transport: nil, + RequestTimeout: 0, + httpClient: &http.Client{}, + } + + // Create a mock request + req := Request{ + URL: server.URL, + } + + // Call the Archive method and capture the result + result, contentType, err := archiver.Archive(context.Background(), req) + + // Check if there was an error + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + // Check if the result is not empty + if len(result) == 0 { + t.Errorf("Empty result") + } + + // Check if the content type is not empty + if contentType == "" { + t.Errorf("Empty content type") + } + + t.Run("Test isvalidURL", func(t *testing.T) { + archiver.isValidated = false + result, contentType, err = archiver.Archive(context.Background(), req) + assert.Equal(t, []byte(nil), result) + assert.Equal(t, "", contentType) + assert.Error(t, err) + assert.Contains(t, err.Error(), "archiver hasn't been validated") + }) + t.Run("Test url is empty", func(t *testing.T) { + archiver.isValidated = true + req.URL = "" + result, contentType, err = archiver.Archive(context.Background(), req) + assert.Equal(t, []byte(nil), result) + assert.Equal(t, "", contentType) + assert.Error(t, err) + assert.Contains(t, err.Error(), "request url is not specified") + }) + t.Run("Test not valid url ", func(t *testing.T) { + req.URL = "notValidURL" + result, contentType, err = archiver.Archive(context.Background(), req) + assert.Equal(t, []byte(nil), result) + assert.Equal(t, "", contentType) + assert.Error(t, err) + assert.Contains(t, err.Error(), "url \"notValidURL\" is not valid") + }) +} + +func TestTransform(t *testing.T) { + // Create a new instance of the Archiver struct + //arc := &Archiver{} + + arc := &Archiver{ + Cache: nil, + UserAgent: "", + MaxConcurrentDownload: 0, + isValidated: true, + dlSemaphore: nil, + Transport: nil, + RequestTimeout: 0, + httpClient: &http.Client{}, + } + // Test case 1: No WrapDirectory specified + uri := "https://raw.githubusercontent.com/go-shiori/obelisk/master/docs/readme/logo.png" + content := []byte("image content") + contentType := "image/jpeg" + + result := arc.transform(uri, content, contentType) + expected := createDataURL(content, contentType) + + if result != expected { + t.Errorf("Expected %s, but got %s", expected, result) + } + + // Test case 2: WrapDirectory specified + arc.WrapDirectory = "/path/to/directory" + + result = arc.transform(uri, content, contentType) + expected = "data:image/jpeg;base64,aW1hZ2UgY29udGVudA==" + + if result != expected { + t.Errorf("Expected %s, but got %s", expected, result) + } +} + +func TestStore(t *testing.T) { + arc := &Archiver{ + WrapDirectory: "/tmp/some", + } + + // Test case 1: Empty URI + path, rel, err := arc.store("") + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if path != "" || rel != "" { + t.Errorf("Expected empty path and rel, got path: %s, rel: %s", path, rel) + } + + // Test case 2: Valid URI + uri := "http://example.com/statics/css/foo.css" + path, rel, err = arc.store(uri) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + expectedPath := "/tmp/some/statics/css/foo.css" + expectedRel := "/statics/css/foo.css" + if path != expectedPath || rel != expectedRel { + t.Errorf("Expected path: %s, rel: %s, got path: %s, rel: %s", expectedPath, expectedRel, path, rel) + } + + // Test case 3: Invalid URI + uri = "invalid uri" + path, rel, err = arc.store(uri) + if err == nil { + t.Errorf("Expected error, got nil") + } + if path != "" || rel != "" { + t.Errorf("Expected empty path and rel, got path: %s, rel: %s", path, rel) + } + + // Test case 4: Error creating directory + arc.WrapDirectory = "/nonexistent" + uri = "http://example.com/statics/css/foo.css" + path, rel, err = arc.store(uri) + if err == nil { + t.Errorf("Expected error, got nil") + } + if path != "" || rel != "" { + t.Errorf("Expected empty path and rel, got path: %s, rel: %s", path, rel) + } +} diff --git a/go.mod b/go.mod index 39603bb..9c4f6cf 100644 --- a/go.mod +++ b/go.mod @@ -9,17 +9,22 @@ require ( github.com/pkg/errors v0.9.1 github.com/sirupsen/logrus v1.9.3 github.com/spf13/cobra v1.8.0 + github.com/stretchr/testify v1.8.4 github.com/tdewolff/parse/v2 v2.7.11 golang.org/x/net v0.20.0 golang.org/x/sync v0.6.0 + gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) require ( github.com/andybalholm/cascadia v1.3.2 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/kr/pretty v0.3.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/spf13/pflag v1.0.5 // indirect - github.com/stretchr/testify v1.8.4 // indirect golang.org/x/sys v0.16.0 // indirect golang.org/x/text v0.14.0 // indirect ) diff --git a/go.sum b/go.sum index 1e1d6ce..c3f3839 100644 --- a/go.sum +++ b/go.sum @@ -3,6 +3,7 @@ github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6 github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -14,10 +15,17 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= @@ -77,6 +85,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/log_test.go b/log_test.go new file mode 100644 index 0000000..989c521 --- /dev/null +++ b/log_test.go @@ -0,0 +1,57 @@ +package obelisk + +import ( + "bytes" + "os" + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" +) + +func TestArchiver_LogURL(t *testing.T) { + arc := &Archiver{ + EnableLog: true, + EnableVerboseLog: true, + } + + url := "https://example.com" + parentURL := "https://parent.com" + isCached := true + + // Capture log output + var logOutput bytes.Buffer + logrus.SetOutput(&logOutput) + + arc.logURL(url, parentURL, isCached) + assert.Contains(t, logOutput.String(), url) + + // clear log output + logrus.SetOutput(os.Stdout) + + arc = &Archiver{ + EnableLog: false, + EnableVerboseLog: false, + } + arc.logURL(url, parentURL, isCached) + assert.Contains(t, logOutput.String(), url) + +} + +func TestArchiver_LogURLdisable(t *testing.T) { + arc := &Archiver{ + EnableLog: false, + EnableVerboseLog: false, + } + + url := "https://example.com" + parentURL := "https://parent.com" + isCached := true + + // Capture log output + var logOutput bytes.Buffer + logrus.SetOutput(&logOutput) + + arc.logURL(url, parentURL, isCached) + assert.NotContains(t, logOutput.String(), url) +} diff --git a/process-html_test.go b/process-html_test.go new file mode 100644 index 0000000..b02b68b --- /dev/null +++ b/process-html_test.go @@ -0,0 +1,91 @@ +package obelisk + +import ( + "context" + nurl "net/url" + "strings" + "testing" + + "github.com/go-shiori/dom" + "github.com/stretchr/testify/assert" + "golang.org/x/net/html" +) + +func TestProcessHTML(t *testing.T) { + arc := &Archiver{} + + // Create a mock + + t.Run("is fragment off", func(t *testing.T) { + input := strings.NewReader("

Hello, World!

") + baseURL, _ := nurl.Parse("https://example.com") + result, err := arc.processHTML(context.Background(), input, baseURL, false) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + // Check if the result is correct + expectedResult := "

Hello, World!

" + assert.Equal(t, expectedResult, result) + }) + + t.Run("is fragment on", func(t *testing.T) { + input := strings.NewReader("

Hello, World!

") + baseURL, _ := nurl.Parse("https://example.com") + result, err := arc.processHTML(context.Background(), input, baseURL, true) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + // Check if the result is correct + expectedResult := "

Hello, World!

" + assert.Equal(t, expectedResult, result) + }) +} + +func TestConvertNoScriptToDiv(t *testing.T) { + // Create a sample HTML document + htmlContent := ` + + + + + +
+

This second noscript element.

+
+ + + ` + + // Parse the HTML document + doc, err := html.Parse(strings.NewReader(htmlContent)) + assert.NoError(t, err) + + // Create an instance of the Archiver struct + arc := &Archiver{} + + // Call the convertNoScriptToDiv function + arc.convertNoScriptToDiv(doc, true) + + // Assert that the noscript element has been replaced with a div element + divs := dom.GetElementsByTagName(doc, "div") + + assert.Equal(t, 3, len(divs)) + + // Assert that the div element has the correct attribute + div1 := divs[0] + div2 := divs[1] + div3 := divs[2] + attr1 := dom.GetAttribute(div1, "data-obelisk-noscript") + attr2 := dom.GetAttribute(div2, "data-obelisk-noscript") + attr3 := dom.GetAttribute(div3, "change") + assert.Equal(t, "true", attr1) + assert.Equal(t, "true", attr2) + // other div attr should not affect + assert.Equal(t, "false", attr3) +} diff --git a/scripts/test.sh b/scripts/test.sh new file mode 100644 index 0000000..08b65a9 --- /dev/null +++ b/scripts/test.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Check if gotestfmt is installed +if ! [ -x "$(command -v gotestfmt)" ]; then + echo "gotestfmt not found. Using test standard output." +fi + +# if gotestfmt is installed, run with it +if [ -x "$(command -v gotestfmt)" ]; then + set -o pipefail + go test ${SOURCE_FILES} ${GO_TEST_FLAGS} -json | gotestfmt ${GOTESTFMT_FLAGS} +else + go test ${SOURCE_FILES} ${GO_TEST_FLAGS} +fi diff --git a/testdata/index.html b/testdata/index.html new file mode 100644 index 0000000..098f596 --- /dev/null +++ b/testdata/index.html @@ -0,0 +1,46 @@ + + + Example Domain + + + + + + + + +
+

Example Domain

+

This domain is for use in illustrative examples in documents. You may use this + domain in literature without prior coordination or asking for permission.

+

More information...

+
+ + + \ No newline at end of file diff --git a/utils_test.go b/utils_test.go new file mode 100644 index 0000000..64cad5f --- /dev/null +++ b/utils_test.go @@ -0,0 +1,98 @@ +package obelisk + +import ( + "fmt" + "net/url" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIsValitdURL(t *testing.T) { + dataURL := "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAYAAABytg0kAAAAEklEQVQIW2P8z8AARAwMjDAGACwBA/+8RVWvAAAAAElFTkSuQmCC" + rawURL := "https://google.com/page#fragment?utm_source=google&utm_medium=cpc&utm_campaign=summer_sale" + expected := []byte("TextforTest") + // Parse the URL + parsedURL, err := url.Parse(rawURL) + if err != nil { + fmt.Println("Failed to parse URL:", err) + return + } + contentType := "text/plain" + expectedResult := "data:text/plain;base64,VGV4dGZvclRlc3Q=" + + t.Run("Test isvalidURL", func(t *testing.T) { + result := isValidURL("https://github.com/go-shiori/obelisk") + result2 := isValidURL("itIsNotAURL") + assert.True(t, result) + assert.False(t, result2) + }) + + t.Run("Test Create Absolute URL", func(t *testing.T) { + + resultdataURL := createAbsoluteURL(dataURL, parsedURL) + resultRelativePath := createAbsoluteURL("/it/is/relarivepath", parsedURL) + resulacualturl := createAbsoluteURL("https://bing.com", parsedURL) + resulAcualtURLWithfragment := createAbsoluteURL(rawURL, parsedURL) + resulWithoutURL := createAbsoluteURL("", parsedURL) + resulWithfragment := createAbsoluteURL("#bar", parsedURL) + + assert.Equal(t, dataURL, resultdataURL) + assert.Equal(t, "https://google.com/it/is/relarivepath", resultRelativePath) + assert.Equal(t, "https://bing.com", resulacualturl) + assert.Equal(t, "https://google.com/page%23fragment", resulAcualtURLWithfragment) + assert.Equal(t, "", resulWithoutURL) + assert.Equal(t, "#bar", resulWithfragment) + + }) + t.Run("Test create dataURL", func(t *testing.T) { + result := createDataURL(expected, contentType) + assert.Equal(t, expectedResult, result) + }) + t.Run("s2b", func(t *testing.T) { + result := s2b("TextforTest") + assert.Equal(t, expected, result) + }) + t.Run("b2s", func(t *testing.T) { + result := b2s(expected) + assert.Equal(t, "TextforTest", result) + }) +} + +func TestSanitizeStyleURL(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "Test with url()", + input: "url('https://example.com/')", + expected: "https://example.com/", + }, + { + name: "Test with double quotes", + input: "\"https://example.com/\"", + expected: "https://example.com/", + }, + { + name: "Test with single quotes", + input: "'https://example.com/'", + expected: "https://example.com/", + }, + { + name: "Test with no quotes", + input: "https://example.com/", + expected: "https://example.com/", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + result := sanitizeStyleURL(test.input) + if result != test.expected { + t.Errorf("Expected %s, but got %s", test.expected, result) + } + }) + } +}