From b1e216248f870a76788cb52f6ecaade5760576ca Mon Sep 17 00:00:00 2001 From: sg Date: Mon, 2 Sep 2024 19:57:06 +0100 Subject: [PATCH] fix #332 by updating the pdf consumer to work with the rest of the project --- components/consumers/pdf/Dockerfile | 26 ++++++++----- components/consumers/pdf/Makefile | 22 +++++++++++ components/consumers/pdf/main.go | 58 ++++++++++++++++++----------- components/consumers/pdf/task.yaml | 4 +- 4 files changed, 77 insertions(+), 33 deletions(-) create mode 100644 components/consumers/pdf/Makefile diff --git a/components/consumers/pdf/Dockerfile b/components/consumers/pdf/Dockerfile index 3946bbbef..d51ac9850 100644 --- a/components/consumers/pdf/Dockerfile +++ b/components/consumers/pdf/Dockerfile @@ -1,11 +1,19 @@ -FROM golang:latest -WORKDIR /playwright -RUN go mod init github.com/ocurity/pdf-consumer &&\ - go get -u github.com/playwright-community/playwright-go &&\ - go run github.com/playwright-community/playwright-go/cmd/playwright@latest install --with-deps +# Stage 2: Build +FROM golang:1.22 as builder -ENV PATH="${PATH}:/go/pkg/mod/github.com/playwright-community" -COPY components/consumers/pdf/pdf /playwright/pdf -COPY components/consumers/pdf/default.html /playwright/default.html -ENTRYPOINT ["/playwright/pdf"] \ No newline at end of file +# Install playwright cli with right version for later use +RUN go install github.com/playwright-community/playwright-go/cmd/playwright@latest + +# Stage 3: Final +FROM ubuntu:oracular +COPY --from=builder /go/bin/playwright / +RUN apt-get update && apt-get install -y ca-certificates tzdata \ + # Install dependencies and all browsers (or specify one) + && /playwright install chromium --with-deps \ + && rm -rf /var/lib/apt/lists/* + +COPY components/consumers/pdf/pdf /application/pdf +COPY components/consumers/pdf/default.html /application/default.html +WORKDIR /application +ENTRYPOINT ["/application/pdf"] diff --git a/components/consumers/pdf/Makefile b/components/consumers/pdf/Makefile new file mode 100644 index 000000000..4eec1e55c --- /dev/null +++ b/components/consumers/pdf/Makefile @@ -0,0 +1,22 @@ +.PHONY: container publish + +CONTAINER_REPO= +DRACON_VERSION= +SOURCE_CODE_REPO= +PRODUCER_AGGREGATOR_BASE_IMAGE=$(shell test -e .custom_image && cat .custom_image || echo "scratch") + +DOCKER=docker + +container: + $(eval workdir:=$(shell mktemp -d /tmp/pdf.XXXXXX)) + mkdir -p ${workdir}/components/consumers && \ + cp -r ../../../bin/components/consumers/pdf ${workdir}/components/consumers && \ + cp default.html ${workdir}/components/consumers/pdf && \ + $(DOCKER) build --tag $(CONTAINER_REPO)/components/consumers/pdf:$(DRACON_VERSION) \ + --file Dockerfile \ + $$([ "${SOURCE_CODE_REPO}" != "" ] && echo "--label=org.opencontainers.image.source=${SOURCE_CODE_REPO}" ) \ + ${workdir} 1>&2 && \ + rm -rf ${workdir} + +publish: + $(DOCKER) push $(CONTAINER_REPO)/components/consumers/pdf:$(DRACON_VERSION) 1>&2 diff --git a/components/consumers/pdf/main.go b/components/consumers/pdf/main.go index 7ed9561ae..93ae0e1c6 100644 --- a/components/consumers/pdf/main.go +++ b/components/consumers/pdf/main.go @@ -13,12 +13,15 @@ import ( "fmt" "html/template" "log" + "log/slog" "os" "path/filepath" + "strings" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/s3/s3manager" + "github.com/go-errors/errors" playwright "github.com/playwright-community/playwright-go" "github.com/ocurity/dracon/components/consumers" @@ -52,27 +55,27 @@ func main() { } responses = r } - result, err := buildPdf(responses) + result, pdfBytes, err := buildPdf(responses) if err != nil { log.Fatal(err) } - if err = sendToS3(result, bucket, region); err != nil { + if err = sendToS3(result, bucket, region, pdfBytes); err != nil { log.Fatal(err) } } -func sendToS3(filename, bucket, region string) error { +func sendToS3(filename, bucket, region string, pdfBytes []byte) error { sess, err := session.NewSession(&aws.Config{Region: aws.String(region)}) if err != nil { - return fmt.Errorf("unable to start session with AWS API: %w", err) + return errors.Errorf("unable to start session with AWS API: %w", err) } // filename is statically defined above //#nosec:G304 data, err := os.ReadFile(filename) //#nosec:G304 if err != nil { - return fmt.Errorf("could not open file: %w", err) + return errors.Errorf("could not open file: %w", err) } uploader := s3manager.NewUploader(sess) @@ -82,68 +85,79 @@ func sendToS3(filename, bucket, region string) error { Body: bytes.NewReader(data), }) if err != nil { - return fmt.Errorf("unable to upload %q to %q: %w", filename, bucket, err) + return errors.Errorf("unable to upload %s to %s: %w", filename, bucket, err) } - fmt.Printf("Successfully uploaded %q to %q\n", filename, bucket) + pdfFilename := strings.Replace(filename, ".html", "", -1) + ".pdf" + _, err = uploader.Upload(&s3manager.UploadInput{ + Bucket: aws.String(bucket), + Key: aws.String(pdfFilename), + Body: bytes.NewReader(pdfBytes), + }) + if err != nil { + return errors.Errorf("unable to upload %s to %s: %w", filename, bucket, err) + } + + slog.Info("uploaded", "filename", filename, "pdf filename", pdfFilename, "to", "bucket", bucket, "successfully") return nil } -func buildPdf(data any) (string, error) { +func buildPdf(data any) (string, []byte, error) { tmpl := template.Must(template.ParseFiles("default.html")) cleanupRun := func(msg string, cleanup func() error) { if err := cleanup(); err != nil { - log.Printf(msg, err) + slog.Error(msg, "error", err) } } currentPath, err := os.Getwd() if err != nil { - return "", fmt.Errorf("could not get current working directory: %w", err) + return "", nil, errors.Errorf("could not get current working directory: %w", err) } reportHTMLPath := filepath.Join(currentPath, "report.html") //#nosec: G304 f, err := os.OpenFile(reportHTMLPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0o600) //#nosec: G304 if err != nil { - return "", fmt.Errorf("could not open report.html: %w", err) + return "", nil, errors.Errorf("could not open report.html: %w", err) } defer cleanupRun("could not close file: %w", f.Close) if err = tmpl.Execute(f, data); err != nil { - return "", fmt.Errorf("could not apply data to template: %w", err) + return "", nil, errors.Errorf("could not apply data to template: %w", err) } pw, err := playwright.Run() if err != nil { - return "", fmt.Errorf("could not launch playwright: %w", err) + return "", nil, errors.Errorf("could not launch playwright: %w", err) } - defer cleanupRun("could not stop Playwrigh: %w", pw.Stop) + defer cleanupRun("could not stop Playwright: %w", pw.Stop) browser, err := pw.Chromium.Launch() if err != nil { - return "", fmt.Errorf("could not launch Chromium: %w", err) + return "", nil, errors.Errorf("could not launch Chromium: %w", err) } - defer cleanupRun("could not close browser: %w", browser.Close) - context, err := browser.NewContext() if err != nil { - return "", fmt.Errorf("could not create context: %w", err) + return "", nil, errors.Errorf("could not create context: %w", err) } page, err := context.NewPage() if err != nil { - return "", fmt.Errorf("could not create page: %w", err) + return "", nil, errors.Errorf("could not create page: %w", err) } reportPage := fmt.Sprintf("file:///%s", reportHTMLPath) if _, err = page.Goto(reportPage); err != nil { - return "", fmt.Errorf("could not goto page %s in the browser: %w", reportPage, err) + return "", nil, errors.Errorf("could not goto page %s in the browser: %w", reportPage, err) } - _, err = page.PDF(playwright.PagePdfOptions{ + pdfBytes, err := page.PDF(playwright.PagePdfOptions{ Path: playwright.String(reportHTMLPath), }) + if err != nil { + return "", nil, errors.Errorf("could not generate pdf from page %s, err: %w", reportPage, err) - return reportHTMLPath, err + } + return reportHTMLPath, pdfBytes, err } diff --git a/components/consumers/pdf/task.yaml b/components/consumers/pdf/task.yaml index e59102a47..5c4a923d1 100644 --- a/components/consumers/pdf/task.yaml +++ b/components/consumers/pdf/task.yaml @@ -21,7 +21,7 @@ spec: type: string - name: consumer-pdf-template-location type: string - default: "/playwright/default.html" + default: "/application/default.html" workspaces: - name: output @@ -35,7 +35,7 @@ spec: value: "$(params.consumer-pdf-s3-access-key-id)" - name: AWS_SECRET_ACCESS_KEY value: "$(params.consumer-pdf-s3-secret-access-key)" - command: ["/playwright/pdf"] + command: ["/application/pdf"] args: [ "-in",