Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#332 fix pdf consumer #333

Merged
merged 5 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions components/consumers/pdf/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
FROM golang:latest
FROM golang:1.22-bookworm

WORKDIR /playwright
RUN go mod init github.com/smithy-security/pdf-consumer && \
go get -u github.com/playwright-community/playwright-go && \
go run github.com/playwright-community/playwright-go/cmd/playwright@latest install --with-deps
ARG EXECUTABLE_SRC_PATH
ARG EXECUTABLE_TARGET_PATH
ENV EXECUTABLE_TARGET_PATH=${EXECUTABLE_TARGET_PATH}
COPY ${EXECUTABLE_SRC_PATH} /app/${EXECUTABLE_TARGET_PATH}

ENV PATH="${PATH}:/go/pkg/mod/github.com/playwright-community"
COPY components/consumers/pdf/pdf /playwright/pdf
COPY components/consumers/pdf/default.html /playwright/default.html
ENTRYPOINT ["/playwright/pdf"]
# Install playwright cli with right version for later use
RUN go install github.com/playwright-community/playwright-go/cmd/[email protected] && \
apt-get update && apt-get install -y ca-certificates tzdata &&\
playwright install chromium --with-deps &&\
ln -s /app/${EXECUTABLE_TARGET_PATH} /bin/component


COPY components/consumers/pdf/default.html /app/components/consumers/pdf/default.html
WORKDIR /app/components/consumers/pdf
ENTRYPOINT ["/app/components/consumers/pdf/pdf"]
25 changes: 25 additions & 0 deletions components/consumers/pdf/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
.PHONY: container publish

CONTAINER_REPO=
SMITHY_VERSION=
SOURCE_CODE_REPO=
BUILD_ARCHITECTURE=

DOCKER=docker

container:
$(eval workdir:=$(shell mktemp -d /tmp/pdf.XXXXXX))
northdpole marked this conversation as resolved.
Show resolved Hide resolved
mkdir -p ${workdir}/components/consumers && \
cp -r ../../../bin/components/consumers/pdf ${workdir}/components/consumers && \
cp default.html ${workdir}/components/consumers/pdf && \
$(DOCKER) build --tag $(CONTAINER_REPO)/components/consumers/pdf:$(SMITHY_VERSION) \
andream16 marked this conversation as resolved.
Show resolved Hide resolved
--file Dockerfile \
--platform "$(BUILD_ARCHITECTURE)" \
--build-arg EXECUTABLE_SRC_PATH=components/consumers/pdf/$(BUILD_ARCHITECTURE)/pdf \
--build-arg EXECUTABLE_TARGET_PATH=components/consumers/pdf/pdf \
$$([ "${SOURCE_CODE_REPO}" != "" ] && echo "--label=org.opencontainers.image.source=${SOURCE_CODE_REPO}" ) \
${workdir} 1>&2 && \
rm -rf ${workdir}

publish:
$(DOCKER) push $(CONTAINER_REPO)/components/consumers/pdf:$(SMITHY_VERSION) 1>&2
117 changes: 48 additions & 69 deletions components/consumers/pdf/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,17 @@
package main

import (
"bytes"
"flag"
"fmt"
"html/template"
"log"
"log/slog"
"os"
"path/filepath"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/playwright-community/playwright-go"

"github.com/smithy-security/smithy/components/consumers"
playwright "github.com/smithy-security/smithy/pkg/playwright"
s3client "github.com/smithy-security/smithy/pkg/s3"
)

var (
Expand All @@ -31,119 +28,101 @@ var (
)

func main() {

flag.StringVar(&bucket, "bucket", "", "s3 bucket name")
flag.StringVar(&region, "region", "", "s3 bucket region")
flag.StringVar(&reportTemplate, "template", "", "report html template location")

if err := consumers.ParseFlags(); err != nil {
log.Fatal(err)
}

if bucket == "" {
log.Fatal("bucket is empty, you need to provide a bucket value")
}

if region == "" {
log.Fatal("region is empty, you need to provide a region value")
}

var responses any
var scanID string
if consumers.Raw {
r, err := consumers.LoadToolResponse()
if err != nil {
log.Fatal("could not load raw results, file malformed: ", err)
}
responses = r
scanID = r[0].ScanInfo.ScanUuid
} else {
r, err := consumers.LoadEnrichedToolResponse()
if err != nil {
log.Fatal("could not load enriched results, file malformed: ", err)
}
responses = r
}
result, err := buildPdf(responses)
if err != nil {
log.Fatal(err)
scanID = r[0].OriginalResults.ScanInfo.ScanUuid
}

if err = sendToS3(result, bucket, region); err != nil {
log.Fatal(err)
cleanupRun := func(msg string, cleanup func() error) {
if err := cleanup(); err != nil {
slog.Error(msg, "error", err)
}
}
}

func sendToS3(filename, bucket, region string) error {
sess, err := session.NewSession(&aws.Config{Region: aws.String(region)})
pw, err := playwright.NewClient()
if err != nil {
return fmt.Errorf("unable to start session with AWS API: %w", err)
log.Fatalf("could not launch playwright: %s", err)
}

// filename is statically defined above
//#nosec:G304
data, err := os.ReadFile(filename) //#nosec:G304
defer cleanupRun("could not stop Playwright: %w", pw.Stop)

client, err := s3client.NewClient(region)
if err != nil {
return fmt.Errorf("could not open file: %w", err)
log.Fatal(err)
}

if err := run(responses, scanID, pw, client); err != nil {
log.Fatal(err)
}
}

uploader := s3manager.NewUploader(sess)
_, err = uploader.Upload(&s3manager.UploadInput{
Bucket: aws.String(bucket),
Key: aws.String(filename),
Body: bytes.NewReader(data),
})
func run(responses any, s3FilenamePostfix string, pw playwright.Wrapper, s3Wrapper s3client.Wrapper) error {
slog.Info("reading pdf")
resultFilename, pdfBytes, err := buildPdf(responses, pw)
if err != nil {
return fmt.Errorf("unable to upload %q to %q: %w", filename, bucket, err)
return err
}

fmt.Printf("Successfully uploaded %q to %q\n", filename, bucket)
return nil
slog.Info("uploading pdf to s3", slog.String("filename", resultFilename), slog.String("bucket", bucket), slog.String("region", region))
return s3Wrapper.UpsertFile(resultFilename, bucket, s3FilenamePostfix, pdfBytes)
}

func buildPdf(data any) (string, error) {
tmpl := template.Must(template.ParseFiles("default.html"))
cleanupRun := func(msg string, cleanup func() error) {
if err := cleanup(); err != nil {
log.Printf(msg, err)
}
func buildPdf(data any, pw playwright.Wrapper) (string, []byte, error) {
tmpl, err := template.ParseFiles("default.html")
if err != nil {
return "", nil, err
}

currentPath, err := os.Getwd()
if err != nil {
return "", fmt.Errorf("could not get current working directory: %w", err)
return "", nil, fmt.Errorf("could not get current working directory: %w", err)
}

reportHTMLPath := filepath.Join(currentPath, "report.html")
//#nosec: G304
f, err := os.OpenFile(reportHTMLPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0o600) //#nosec: G304
if err != nil {
return "", fmt.Errorf("could not open report.html: %w", err)
return "", nil, fmt.Errorf("could not open report.html: %w", err)
}
defer cleanupRun("could not close file: %w", f.Close)

if err = tmpl.Execute(f, data); err != nil {
return "", fmt.Errorf("could not apply data to template: %w", err)
}

pw, err := playwright.Run()
if err != nil {
return "", fmt.Errorf("could not launch playwright: %w", err)
}
defer cleanupRun("could not stop Playwrigh: %w", pw.Stop)

browser, err := pw.Chromium.Launch()
if err != nil {
return "", fmt.Errorf("could not launch Chromium: %w", err)
}
defer cleanupRun("could not close browser: %w", browser.Close)
andream16 marked this conversation as resolved.
Show resolved Hide resolved

context, err := browser.NewContext()
if err != nil {
return "", fmt.Errorf("could not create context: %w", err)
return "", nil, fmt.Errorf("could not apply data to template: %w", err)
}

page, err := context.NewPage()
reportPage := fmt.Sprintf("file:///%s", reportHTMLPath)
pdfBytes, err := pw.GetPDFOfPage(reportPage, reportHTMLPath)
if err != nil {
return "", fmt.Errorf("could not create page: %w", err)
}
return "", nil, fmt.Errorf("could not generate pdf from page %s, err: %w", reportPage, err)
northdpole marked this conversation as resolved.
Show resolved Hide resolved

reportPage := fmt.Sprintf("file:///%s", reportHTMLPath)
if _, err = page.Goto(reportPage); err != nil {
return "", fmt.Errorf("could not goto page %s in the browser: %w", reportPage, err)
}

_, err = page.PDF(playwright.PagePdfOptions{
Path: playwright.String(reportHTMLPath),
})

return reportHTMLPath, err
return reportHTMLPath, pdfBytes, err
}
62 changes: 62 additions & 0 deletions components/consumers/pdf/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Package main of the pdf consumer implements a simple consumer for
// applying a go-template to a smithy scan, converting the result to pdf and then
// uploading the result to the S3 bucket passed as an argument
// the consumer expects the environment variables
// AWS_ACCESS_KEY_ID
// AWS_SECRET_ACCESS_KEY
// to be set along with the "bucket" and "region" arguments to be passed
package main

import (
"testing"

"github.com/stretchr/testify/require"

v1 "github.com/smithy-security/smithy/api/proto/v1"

playwright "github.com/smithy-security/smithy/pkg/playwright/mock"
s3mock "github.com/smithy-security/smithy/pkg/s3/mock"
"github.com/smithy-security/smithy/pkg/testdata"
)

func Test_run(t *testing.T) {
mockClient, err := playwright.NewMockClient()
require.NoError(t, err)

pdfCalled := false
expected := []byte("this is a pdf")
mockClient.GetPDFOfPageCallBack = func(s1, s2 string) ([]byte, error) {
pdfCalled = true
return expected, nil
}

mockS3Client, err := s3mock.NewMockClient("")
require.NoError(t, err)
s3Called := false
mockS3Client.UpsertCallback = func(s1, s2 string, b []byte) error {
s3Called = true
return nil
}

err = run([]v1.EnrichedLaunchToolResponse{testdata.EnrichedLaunchToolResponse}, "", mockClient, mockS3Client)
require.NoError(t, err)
require.True(t, pdfCalled)
require.True(t, s3Called)

}

func Test_buildPdf(t *testing.T) {
mockClient, err := playwright.NewMockClient()
require.NoError(t, err)

called := false
expected := []byte("this is a pdf")
mockClient.GetPDFOfPageCallBack = func(s1, s2 string) ([]byte, error) {
called = true
return expected, nil
}
_, result, err := buildPdf([]v1.EnrichedLaunchToolResponse{testdata.EnrichedLaunchToolResponse}, mockClient)
require.NoError(t, err)
require.Equal(t, called, true)
require.Equal(t, result, expected)
}
4 changes: 2 additions & 2 deletions components/consumers/pdf/task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ spec:
type: string
- name: consumer-pdf-template-location
type: string
default: "/playwright/default.html"
default: "/app/components/consumers/pdf/default.html"

workspaces:
- name: output
Expand All @@ -35,7 +35,7 @@ spec:
value: "$(params.consumer-pdf-s3-access-key-id)"
- name: AWS_SECRET_ACCESS_KEY
value: "$(params.consumer-pdf-s3-secret-access-key)"
command: ["/playwright/pdf"]
command: ["/app/components/consumers/pdf/pdf"]
args:
[
"-in",
Expand Down
24 changes: 12 additions & 12 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ require (
github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5
github.com/owenrumney/go-sarif/v2 v2.1.2
github.com/package-url/packageurl-go v0.1.0
github.com/playwright-community/playwright-go v0.3900.1
github.com/playwright-community/playwright-go v0.4702.0
github.com/spf13/cobra v1.8.0
github.com/spf13/viper v1.11.0
github.com/stretchr/testify v1.9.0
github.com/tektoncd/pipeline v0.57.0
github.com/trivago/tgo v1.0.7
go.mongodb.org/mongo-driver v1.10.0
golang.org/x/crypto v0.18.0
golang.org/x/crypto v0.23.0
golang.org/x/oauth2 v0.16.0
golang.org/x/sync v0.6.0
golang.org/x/sync v0.7.0
google.golang.org/api v0.156.0
google.golang.org/protobuf v1.35.1
helm.sh/helm/v3 v3.14.4
Expand Down Expand Up @@ -61,12 +61,12 @@ require (
github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/cyphar/filepath-securejoin v0.2.4 // indirect
github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 // indirect
github.com/deckarep/golang-set/v2 v2.6.0 // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/evanphx/json-patch v5.7.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.6.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-jose/go-jose/v3 v3.0.1 // indirect
github.com/go-jose/go-jose/v3 v3.0.3 // indirect
github.com/go-kit/log v0.2.1 // indirect
github.com/go-logfmt/logfmt v0.5.1 // indirect
github.com/go-logr/logr v1.4.1 // indirect
Expand Down Expand Up @@ -140,12 +140,12 @@ require (
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.26.0 // indirect
golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc // indirect
golang.org/x/mod v0.14.0 // indirect
golang.org/x/net v0.20.0 // indirect
golang.org/x/term v0.16.0 // indirect
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
golang.org/x/mod v0.17.0 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/term v0.20.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.16.1 // indirect
golang.org/x/tools v0.21.0 // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
Expand Down Expand Up @@ -190,8 +190,8 @@ require (
github.com/spf13/pflag v1.0.5
github.com/subosito/gotenv v1.2.0 // indirect
go.uber.org/atomic v1.10.0 // indirect
golang.org/x/sys v0.16.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
gopkg.in/ini.v1 v1.66.4 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading