From d33c8caa713e45361af66bc96a86f32cce15b3a5 Mon Sep 17 00:00:00 2001 From: Raman Gupta Date: Wed, 10 Mar 2021 22:07:03 -0500 Subject: [PATCH] performance: pnmtops to temp file For some reason when piping the output of pnmtops directly to ps2pdf, conversion is very slow (especially for color scans). When writing to an intermediate temporary file, conversion is fast. Before: ________________________________________________________ Executed in 19.20 secs fish external usr time 4.01 secs 0.32 millis 4.01 secs sys time 20.63 secs 5.30 millis 20.62 secs After: ________________________________________________________ Executed in 368.06 millis fish external usr time 378.06 millis 0.00 millis 378.06 millis sys time 100.00 millis 2.79 millis 97.21 millis An improvement of over 50 times! This should resolve #19. --- scan_perpage | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/scan_perpage b/scan_perpage index f7adfdf..a1793a5 100755 --- a/scan_perpage +++ b/scan_perpage @@ -59,6 +59,11 @@ IMAGE_PATH=$1 IMAGE_DIR=$(dirname $1) IMAGE_FILE=$(basename $1) +TIMEVERBOSE= +if [[ $VERBOSE == 1 ]]; then + TIMEVERBOSE=time +fi + process_page() { log "" log "-------------------------------------------------------------------------------" @@ -80,14 +85,16 @@ process_page() { if [[ $VERBOSE == 1 ]]; then UNPAPERVERBOSE="-v" fi - #runconstrained unpaper $UNPAPERVERBOSE --no-mask-scan --overwrite --dpi $RESOLUTION --no-blackfilter $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout - runconstrained unpaper $UNPAPERVERBOSE --overwrite --dpi $RESOLUTION $IMAGE_PATH $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | logstdout + #runconstrained $TIMEVERBOSE unpaper $UNPAPERVERBOSE --no-mask-scan --overwrite --dpi $RESOLUTION --no-blackfilter $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout + runconstrained $TIMEVERBOSE unpaper $UNPAPERVERBOSE --overwrite --dpi $RESOLUTION $IMAGE_PATH $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | logstdout fi if [[ $SEARCHABLE == 1 ]]; then log "Converting image data to searchable pdf..." # tesseract uses the input's DPI header, we need to convert to a format that supports this (like tiff) - runconstrained convert -density ${RESOLUTION}x${RESOLUTION} -units PixelsPerInch $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff | logstdout - runconstrained tesseract $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff $IMAGE_DIR/${IMAGE_FILE%.*} -l $LANGUAGE pdf | logstdout + log "...Running convert" + runconstrained $TIMEVERBOSE convert -density ${RESOLUTION}x${RESOLUTION} -units PixelsPerInch $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff | logstdout + log "...Running tesseract" + runconstrained $TIMEVERBOSE tesseract $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff $IMAGE_DIR/${IMAGE_FILE%.*} -l $LANGUAGE pdf | logstdout [[ -f $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff ]] && rm $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff else log "Converting image data to pdf..." @@ -101,9 +108,11 @@ process_page() { if [[ $VERBOSE = 1 && ! "$(pnmtops -verbose 2>&1 < /dev/null)" =~ "unrecognized option" ]]; then PNMVERBOSE="-verbose" fi - log "Using page options: $PAGEOPTS" - runconstrained pnmtops $PNMVERBOSE $PAGEOPTS $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | ps2pdf $PS2PDF_OPTS - > $IMAGE_DIR/${IMAGE_FILE%.*}.pdf | logstdout - [[ -f $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.ps ]] && rm $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.ps + log "...Running pnmtops on $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE using page options: $PAGEOPTS" + runconstrained $TIMEVERBOSE pnmtops $PNMVERBOSE $PAGEOPTS $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE > $IMAGE_DIR/${IMAGE_FILE}.ps | logstdout + log "...Running ps2pdf on $IMAGE_DIR/${IMAGE_FILE}.ps" + runconstrained $TIMEVERBOSE ps2pdf $PS2PDF_OPTS $IMAGE_DIR/${IMAGE_FILE}.ps $IMAGE_DIR/${IMAGE_FILE}.pdf | logstdout + [[ -f $IMAGE_DIR/${IMAGE_FILE}.ps ]] && rm $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.ps fi else log "Skipping empty page $IMAGE_FILE with white percentage $PERCENTAGE_WHITE"