Skip to content

Commit

Permalink
address #332, make the pdf consumer play well with the rest of the bu…
Browse files Browse the repository at this point in the history
…ild system
  • Loading branch information
northdpole committed Oct 24, 2024
1 parent 7078b4b commit a5b58fc
Show file tree
Hide file tree
Showing 6 changed files with 369 additions and 79 deletions.
18 changes: 9 additions & 9 deletions components/consumers/pdf/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
FROM golang:latest
FROM golang:1.22-bookworm

WORKDIR /playwright
RUN go mod init github.com/smithy-security/pdf-consumer && \
go get -u github.com/playwright-community/playwright-go && \
go run github.com/playwright-community/playwright-go/cmd/playwright@latest install --with-deps
# Install playwright cli with right version for later use
RUN go install github.com/playwright-community/playwright-go/cmd/[email protected] && \
apt-get update && apt-get install -y ca-certificates tzdata &&\
playwright install chromium --with-deps

ENV PATH="${PATH}:/go/pkg/mod/github.com/playwright-community"
COPY components/consumers/pdf/pdf /playwright/pdf
COPY components/consumers/pdf/default.html /playwright/default.html
ENTRYPOINT ["/playwright/pdf"]
COPY components/consumers/pdf/linux/amd64/pdf /application/pdf
COPY components/consumers/pdf/default.html /application/default.html
WORKDIR /application
ENTRYPOINT ["/application/pdf"]
22 changes: 22 additions & 0 deletions components/consumers/pdf/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
.PHONY: container publish

CONTAINER_REPO=
SMITHY_VERSION=
SOURCE_CODE_REPO=
PRODUCER_AGGREGATOR_BASE_IMAGE=$(shell test -e .custom_image && cat .custom_image || echo "scratch")

DOCKER=docker

container:
$(eval workdir:=$(shell mktemp -d /tmp/pdf.XXXXXX))
mkdir -p ${workdir}/components/consumers && \
cp -r ../../../bin/components/consumers/pdf ${workdir}/components/consumers && \
cp default.html ${workdir}/components/consumers/pdf && \
$(DOCKER) build --tag $(CONTAINER_REPO)/components/consumers/pdf:$(SMITHY_VERSION) \
--file Dockerfile \
$$([ "${SOURCE_CODE_REPO}" != "" ] && echo "--label=org.opencontainers.image.source=${SOURCE_CODE_REPO}" ) \
${workdir} 1>&2 && \
rm -rf ${workdir}

publish:
$(DOCKER) push $(CONTAINER_REPO)/components/consumers/pdf:$(SMITHY_VERSION) 1>&2
111 changes: 43 additions & 68 deletions components/consumers/pdf/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,19 @@
package main

import (
"bytes"
"flag"
"fmt"
"html/template"
"log"
"log/slog"
"os"
"path/filepath"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/playwright-community/playwright-go"
"github.com/go-errors/errors"

"github.com/smithy-security/smithy/components/consumers"
playwright "github.com/smithy-security/smithy/pkg/playwright"
s3client "github.com/smithy-security/smithy/pkg/s3"
)

var (
Expand All @@ -31,13 +30,23 @@ var (
)

func main() {

flag.StringVar(&bucket, "bucket", "", "s3 bucket name")
flag.StringVar(&region, "region", "", "s3 bucket region")
flag.StringVar(&reportTemplate, "template", "", "report html template location")

if err := consumers.ParseFlags(); err != nil {
log.Fatal(err)
}

if bucket == "" {
log.Fatal("bucket is empty, you need to provide a bucket value")
}

if region == "" {
log.Fatal("region is empty, you need to provide a region value")
}

var responses any
if consumers.Raw {
r, err := consumers.LoadToolResponse()
Expand All @@ -52,98 +61,64 @@ func main() {
}
responses = r
}
result, err := buildPdf(responses)
if err != nil {
log.Fatal(err)
}

if err = sendToS3(result, bucket, region); err != nil {
log.Fatal(err)
cleanupRun := func(msg string, cleanup func() error) {
if err := cleanup(); err != nil {
slog.Error(msg, "error", err)
}
}
}

func sendToS3(filename, bucket, region string) error {
sess, err := session.NewSession(&aws.Config{Region: aws.String(region)})
pw, err := playwright.NewClient()
if err != nil {
return fmt.Errorf("unable to start session with AWS API: %w", err)
log.Fatalf("could not launch playwright: %s", err)
}

// filename is statically defined above
//#nosec:G304
data, err := os.ReadFile(filename) //#nosec:G304
defer cleanupRun("could not stop Playwright: %w", pw.Stop)

client, err := s3client.NewClient(region)
if err != nil {
return fmt.Errorf("could not open file: %w", err)
log.Fatal(err)
}

uploader := s3manager.NewUploader(sess)
_, err = uploader.Upload(&s3manager.UploadInput{
Bucket: aws.String(bucket),
Key: aws.String(filename),
Body: bytes.NewReader(data),
})
if err := run(responses, pw, client); err != nil {
log.Fatal(err)
}
}

func run(responses any, pw playwright.Wrapper, s3Wrapper s3client.Wrapper) error {
slog.Info("reading pdf")
resultFilename, pdfBytes, err := buildPdf(responses, pw)
if err != nil {
return fmt.Errorf("unable to upload %q to %q: %w", filename, bucket, err)
return err
}

fmt.Printf("Successfully uploaded %q to %q\n", filename, bucket)
return nil
slog.Info("uploading pdf to s3", slog.String("filename", resultFilename))
return s3Wrapper.UpsertFile(resultFilename, bucket, pdfBytes)
}

func buildPdf(data any) (string, error) {
func buildPdf(data any, pw playwright.Wrapper) (string, []byte, error) {
tmpl := template.Must(template.ParseFiles("default.html"))
cleanupRun := func(msg string, cleanup func() error) {
if err := cleanup(); err != nil {
log.Printf(msg, err)
}
}

currentPath, err := os.Getwd()
if err != nil {
return "", fmt.Errorf("could not get current working directory: %w", err)
return "", nil, errors.Errorf("could not get current working directory: %w", err)
}

reportHTMLPath := filepath.Join(currentPath, "report.html")
//#nosec: G304
f, err := os.OpenFile(reportHTMLPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0o600) //#nosec: G304
if err != nil {
return "", fmt.Errorf("could not open report.html: %w", err)
return "", nil, errors.Errorf("could not open report.html: %w", err)
}
defer cleanupRun("could not close file: %w", f.Close)

if err = tmpl.Execute(f, data); err != nil {
return "", fmt.Errorf("could not apply data to template: %w", err)
return "", nil, errors.Errorf("could not apply data to template: %w", err)
}

pw, err := playwright.Run()
if err != nil {
return "", fmt.Errorf("could not launch playwright: %w", err)
}
defer cleanupRun("could not stop Playwrigh: %w", pw.Stop)

browser, err := pw.Chromium.Launch()
if err != nil {
return "", fmt.Errorf("could not launch Chromium: %w", err)
}
defer cleanupRun("could not close browser: %w", browser.Close)

context, err := browser.NewContext()
reportPage := fmt.Sprintf("file:///%s", reportHTMLPath)
pdfBytes, err := pw.GetPDFOfPage(reportPage, reportHTMLPath)
if err != nil {
return "", fmt.Errorf("could not create context: %w", err)
}
return "", nil, errors.Errorf("could not generate pdf from page %s, err: %w", reportPage, err)

page, err := context.NewPage()
if err != nil {
return "", fmt.Errorf("could not create page: %w", err)
}

reportPage := fmt.Sprintf("file:///%s", reportHTMLPath)
if _, err = page.Goto(reportPage); err != nil {
return "", fmt.Errorf("could not goto page %s in the browser: %w", reportPage, err)
}

_, err = page.PDF(playwright.PagePdfOptions{
Path: playwright.String(reportHTMLPath),
})

return reportHTMLPath, err
return reportHTMLPath, pdfBytes, err
}
62 changes: 62 additions & 0 deletions components/consumers/pdf/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Package main of the pdf consumer implements a simple consumer for
// applying a go-template to a smithy scan, converting the result to pdf and then
// uploading the result to the S3 bucket passed as an argument
// the consumer expects the environment variables
// AWS_ACCESS_KEY_ID
// AWS_SECRET_ACCESS_KEY
// to be set along with the "bucket" and "region" arguments to be passed
package main

import (
"testing"

"github.com/stretchr/testify/require"

v1 "github.com/smithy-security/smithy/api/proto/v1"

playwright "github.com/smithy-security/smithy/pkg/playwright/mock"
s3mock "github.com/smithy-security/smithy/pkg/s3/mock"
"github.com/smithy-security/smithy/pkg/testdata"
)

func Test_run(t *testing.T) {
mockClient, err := playwright.NewMockClient()
require.NoError(t, err)

pdfCalled := false
expected := []byte("this is a pdf")
mockClient.GetPDFOfPageCallBack = func(s1, s2 string) ([]byte, error) {
pdfCalled = true
return expected, nil
}

mockS3Client, err := s3mock.NewMockClient("")
require.NoError(t, err)
s3Called := false
mockS3Client.UpsertCallback = func(s1, s2 string, b []byte) error {
s3Called = true
return nil
}

err = run([]v1.EnrichedLaunchToolResponse{testdata.EnrichedLaunchToolResponse}, mockClient, mockS3Client)
require.NoError(t, err)
require.True(t, pdfCalled)
require.True(t, s3Called)

}

func Test_buildPdf(t *testing.T) {
mockClient, err := playwright.NewMockClient()
require.NoError(t, err)

called := false
expected := []byte("this is a pdf")
mockClient.GetPDFOfPageCallBack = func(s1, s2 string) ([]byte, error) {
called = true
return expected, nil
}
_, result, err := buildPdf([]v1.EnrichedLaunchToolResponse{testdata.EnrichedLaunchToolResponse}, mockClient)
require.NoError(t, err)
require.Equal(t, called, true)
require.Equal(t, result, expected)
}
Loading

0 comments on commit a5b58fc

Please sign in to comment.