diff --git a/programs/Makefile b/programs/Makefile index 365f1cb..b118d0a 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -6,7 +6,7 @@ GCCINFRA=ftp://gcc.gnu.org/pub/gcc/infrastructure/ THREADS=4 OUT=/opt/laundry -install: $(OUT)/bin/pdf2png $(OUT)/bin/gs $(OUT)/bin/pdf2txt $(OUT)/bin/pdf2pdfa $(OUT)/bin/checksum $(OUT)/sbin/firejail $(OUT)/bin/access-test $(OUT)/bin/sandbox-access-test.sh $(OUT)/bin/clean-caches $(OUT)/bin/lein $(OUT)/bin/convert +install: $(OUT)/bin/pdf2jpeg $(OUT)/bin/gs $(OUT)/bin/pdf2txt $(OUT)/bin/pdf2pdfa $(OUT)/bin/checksum $(OUT)/sbin/firejail $(OUT)/bin/access-test $(OUT)/bin/sandbox-access-test.sh $(OUT)/bin/clean-caches $(OUT)/bin/lein $(OUT)/bin/convert $(OUT)/bin/asan: $(OUT)/bin/gcc mkdir -p $(OUT)/bin @@ -18,9 +18,9 @@ $(OUT)/bin/asan++: $(OUT)/bin/asan cat $(OUT)/bin/asan | sed -e 's/gcc/g++/' > $(OUT)/bin/asan++ chmod +x $(OUT)/bin/asan++ -$(OUT)/bin/pdf2png: pdf2png +$(OUT)/bin/pdf2jpeg: pdf2jpeg mkdir -p $(OUT)/bin - cp pdf2png $(OUT)/bin + cp pdf2jpeg $(OUT)/bin $(OUT)/bin/clean-caches: clean-caches mkdir -p $(OUT)/bin diff --git a/programs/clean-caches b/programs/clean-caches index cf0ff61..8f0c1c4 100755 --- a/programs/clean-caches +++ b/programs/clean-caches @@ -5,7 +5,7 @@ set -e NOW=$(date +%s) -CACHES="/tmp/pdf2pdfa /tmp/pdf2png" +CACHES="/tmp/pdf2pdfa /tmp/pdf2jpeg" LOGFILE=$HOME/clean-caches.log NREMOVED=0 diff --git a/programs/pdf2png b/programs/pdf2jpeg similarity index 67% rename from programs/pdf2png rename to programs/pdf2jpeg index 08412b9..93cbc38 100755 --- a/programs/pdf2png +++ b/programs/pdf2jpeg @@ -5,8 +5,9 @@ set -e TOOLBIN=$(dirname $0) INPUT=$1 OUTPUT=$2 +JPEGSIZE=100 -CACHE=/tmp/pdf2png +CACHE=/tmp/pdf2jpeg export LD_LIBRARY_PATH=$(dirname $TOOLBIN)/lib64 @@ -17,7 +18,7 @@ CSUM=$(sha1sum "$INPUT" | sed -e 's/ .*//') cp "$INPUT" $CACHE/$CSUM.pdf # some room for race here -test -f $CACHE/$CSUM.png || firejail \ +test -f $CACHE/$CSUM.jpeg || firejail \ --noprofile \ --private \ --read-write=$CACHE \ @@ -29,5 +30,11 @@ test -f $CACHE/$CSUM.png || firejail \ --shell=none \ $TOOLBIN/gs -dSAFER -dBATCH -dNOPAUSE -sDEVICE=png16m -dGraphicsAlphaBits=4 -dTextAlphaBits=4 -sPageList=1 -sOutputFile="$CACHE/$CSUM.png" "$CACHE/$CSUM.pdf" -cp $CACHE/$CSUM.png "$OUTPUT" +# bound size and convert to jpeg (not scaled yet) +$TOOLBIN/convert -define jpeg:extent=$(JPEGSIZE)kb $CACHE/$CSUM.png $CACHE/$CSUM.jpeg + +# remove temporary png +rm $CACHE/$CSUM.png + +cp $CACHE/$CSUM.jpeg "$OUTPUT" diff --git a/src/laundry/pdf.clj b/src/laundry/pdf.clj index d8d91e0..d6ff0f8 100644 --- a/src/laundry/pdf.clj +++ b/src/laundry/pdf.clj @@ -49,15 +49,15 @@ (not-ok "pdf2txt conversion failed")))) ;; previewer of first page -(s/defn api-pdf2png [env, tempfile :- java.io.File] +(s/defn api-pdf2jpeg [env, tempfile :- java.io.File] (let [path (.getAbsolutePath tempfile) - out (str (.getAbsolutePath tempfile) ".png") - res (sh (str (:tools env) "/bin/pdf2png") path out)] + out (str (.getAbsolutePath tempfile) ".jpeg") + res (sh (str (:tools env) "/bin/pdf2jpeg") path out)] (.delete tempfile) (if (= (:exit res) 0) (content-type (ok (temp-file-input-stream out)) - "image/png") + "image/jpeg") (do (warn "pdf preview failed: " res) (not-ok "pdf preview failed"))))) @@ -74,7 +74,7 @@ filename (:filename file)] (info "PDF previewer received " filename "(" (:size file) "b)") (.deleteOnExit tempfile) ;; cleanup if VM is terminated - (api-pdf2png env tempfile))) + (api-pdf2jpeg env tempfile))) (POST "/pdf2txt" [] :summary "attempt to convert a PDF file to TXT" diff --git a/test/test.sh b/test/test.sh index 98003a5..ad16dbc 100755 --- a/test/test.sh +++ b/test/test.sh @@ -69,14 +69,14 @@ test_pdf2txt() { echo ", ok" } -test_pdf2png() { - echo -n "Testing pdf2png:" +test_pdf2jpeg() { + echo -n "Testing pdf2jpeg:" test -x $FIREJAIL || { echo "no firejail - skipping "; return 0; } echo -n " converting" - curl -sf -F file=@test/testcases/hypno.pdf -X POST "$HOST/pdf/pdf-preview" > tmp/response.png || die "conversion failed" + curl -sf -F file=@test/testcases/hypno.pdf -X POST "$HOST/pdf/pdf-preview" > tmp/response.jpeg || die "conversion failed" echo -n ", checking" - file tmp/response.png | grep -q 'PNG' || die "tmp/response.png does not look like a png file" - echo ", ok (response is png)" + file tmp/response.jpeg | grep -q 'JPEG' || die "tmp/response.jpeg does not look like a jpeg file" + echo ", ok (response is jpeg)" } test_pdf2pdfa() { @@ -127,7 +127,7 @@ main() { # Actual tests test_checksum test_pdf2txt - test_pdf2png + test_pdf2jpeg test_pdf2pdfa echo "Tests OK"