Skip to content

Commit

Permalink
Optimize implementation of DDFAPD #16
Browse files Browse the repository at this point in the history
1) Replace scala `for` with `cfor` macro
2) Optimize some ImageJ operations on Processor, like duplicate
3) Add JMH based benchmark

As a result, the DDFAPD execution time is cut roughly in half
  • Loading branch information
jpsacha committed May 3, 2021
1 parent 7f0ab1f commit f236066
Show file tree
Hide file tree
Showing 11 changed files with 1,402 additions and 42 deletions.
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ lazy val ijp_debayer2sx_core = project.in(file("ijp-debayer2sx-core"))
description := "IJP DeBayer2SX Core",
commonSettings,
libraryDependencies += "com.beachape" %% "enumeratum" % "1.6.1",
libraryDependencies += "io.github.metarank" %% "cfor" % "0.2"
)

lazy val ijp_debayer2sx_plugins = project.in(file("ijp-debayer2sx-plugins"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ package ij_plugins.debayer2sx

import ij.ImageStack
import ij.plugin.filter.Convolver
import ij.process.{Blitter, ByteProcessor, ColorProcessor, FloatProcessor}
import ij.process._
import ij_plugins.debayer2sx.process.{FR, add, copyRanges, _}

import java.awt.Rectangle
Expand Down Expand Up @@ -173,9 +173,9 @@ object DDFAPD {
)

// f1=filtImg(h1+[0 0 1 0 0],G0,1);
val f11 = filtImg(add(h1, Array(0f, 0f, 1f, 0f, 0f)), G0.convertToFloatProcessor(), 1)
val f11 = filtImg(add(h1, Array(0f, 0f, 1f, 0f, 0f)), G0, 1)
// f2 = filtImg(h0, R1, 1)
val f21 = filtImg(h0, R1.convertToFloatProcessor(), 1)
val f21 = filtImg(h0, R1, 1)

// Gh=zeros(m,n);
val Gh = new FloatProcessor(w, h)
Expand All @@ -195,9 +195,9 @@ object DDFAPD {
copyRanges(G1, Range(1, w, 2), FR, /* = */ bay, Range(1, w, 2), Range(1, h, 2))

// f1=filtImg(h1+[0,0,1,0,0],G1,1);
val f12 = filtImg(add(h1, Array(0f, 0f, 1f, 0f, 0f)), G1.convertToFloatProcessor(), 1)
val f12 = filtImg(add(h1, Array(0f, 0f, 1f, 0f, 0f)), G1, 1)
// f2=filtImg(h0,B0,1);
val f22 = filtImg(h0, B0.convertToFloatProcessor(), 1)
val f22 = filtImg(h0, B0, 1)
// Gh(2:2:m,:)=f1+f2;
copyRanges(Gh, FR, Range(1, h, 2), /* = */ f12 + f22, FR, FR)
Gh
Expand All @@ -223,9 +223,9 @@ object DDFAPD {
copyRanges(B1, FR, Range(1, h, 2), /* = */ bay, Range(0, w, 2), Range(1, h, 2))

// f1=filtImg([0 0 1 0 0]+h1,G0,2);
val f11 = filtImg(add(Array(0f, 0f, 1f, 0f, 0f), h1), G0.convertToFloatProcessor(), 2)
val f11 = filtImg(add(Array(0f, 0f, 1f, 0f, 0f), h1), G0, 2)
// f2=filtImg(h0,B1,2);
val f21 = filtImg(h0, B1.convertToFloatProcessor(), 2)
val f21 = filtImg(h0, B1, 2)

// Gv(:,1:2:n)=f1+f2;
copyRanges(Gv, Range(0, w, 2), FR, /* = */ f11 + f21, FR, FR)
Expand All @@ -243,9 +243,9 @@ object DDFAPD {
copyRanges(G1, FR, Range(1, h, 2), /* = */ bay, Range(1, w, 2), Range(1, h, 2))

// f1=filtImg(h1+[0 0 1 0 0],G1,2);
val f12 = filtImg(add(h1, Array(0f, 0f, 1f, 0f, 0f)), G1.convertToFloatProcessor(), 2)
val f12 = filtImg(add(h1, Array(0f, 0f, 1f, 0f, 0f)), G1, 2)
// f2=filtImg(h0,R0,2);
val f22 = filtImg(h0, R0.convertToFloatProcessor(), 2)
val f22 = filtImg(h0, R0, 2)

// Gv(:,2:2:n)=f1+f2;
copyRanges(Gv, Range(1, w, 2), FR, /* = */ f12 + f22, FR, FR)
Expand Down Expand Up @@ -275,24 +275,53 @@ object DDFAPD {

// y=conv2(1,h,xx,'valid');
new Convolver().convolveFloat1D(xx, hh, hh.length, 1, 1)
xx.setRoi(new Rectangle(B, 0, w, h))
xx.crop().asInstanceOf[FloatProcessor]
val cropROI = new Rectangle(B, 0, w, h)
// xx.setRoi(cropROI)
// xx.crop().asInstanceOf[FloatProcessor]
crop(xx, cropROI)
} else if (dir == 2) {
// Add mirroring of the borders
// xx = [x(1+B:-1:2,:); x; x(m-1:-1:m-B,:)];
val xx = mirrorBorderHeight(x: FloatProcessor, B)

// y=conv2(h,1,xx,'valid');
new Convolver().convolveFloat1D(xx, hh, 1, hh.length, 1)
xx.setRoi(new Rectangle(0, B, w, h))
xx.crop().asInstanceOf[FloatProcessor]
val cropROI = new Rectangle(0, B, w, h)
// xx.setRoi(cropROI)
// xx.crop().asInstanceOf[FloatProcessor]
crop(xx, cropROI)
} else {
throw new IllegalArgumentException("Invalid `dir` value:" + dir)
}

y
}

private[this] def crop(src: FloatProcessor, roi: Rectangle): FloatProcessor = {
import io.github.metarank.cfor._

val width = src.getWidth
val pixels = src.getPixels.asInstanceOf[Array[Float]]
val roiX = roi.x
val roiY = roi.y
val roiWidth = roi.width
val roiHeight = roi.height
val ip2 = new FloatProcessor(roiWidth, roiHeight)
val pixels2 = ip2.getPixels.asInstanceOf[Array[Float]]
// for (ys <- roiY until roiY + roiHeight) {
cfor(roiY)(_ < roiY + roiHeight, _ + 1) { ys =>
var offset1 = (ys - roiY) * roiWidth
var offset2 = ys * width + roiX
// for (xs <- 0 until roiWidth) {
cfor(0)(_ < roiWidth, _ + 1) { _ =>
pixels2(offset1) = pixels(offset2)
offset1 += 1
offset2 += 1
}
}
ip2
}

/**
* Clip values in the image to the range specified by `bpp`.
*
Expand All @@ -304,12 +333,14 @@ object DDFAPD {
* @param ip image to check
* @param bpp bits per pixel
*/
private def checkImg(ip: FloatProcessor, bpp: Int): Unit = {
private[this] def checkImg(ip: FloatProcessor, bpp: Int): Unit = {
import io.github.metarank.cfor._

val maxVal = (math.pow(2, bpp) - 1).toFloat
val pixels = ip.getPixels.asInstanceOf[Array[Float]]

for (i <- pixels.indices) {
// for (i <- pixels.indices) {
cfor(0)(_ < pixels.length, _ + 1) { i =>
val v = pixels(i)
if (v > maxVal) {
pixels(i) = maxVal
Expand Down Expand Up @@ -922,9 +953,10 @@ object DDFAPD {
val xx1 = x(Range(1 + b - 1, 2 - 2, -1), FR)
val xx2 = x(Range(w - 1 - 1, w - b - 2, -1), FR)
val xx = new FloatProcessor(b + w + b, h)
xx.copyBits(xx1, 0, 0, Blitter.COPY)
xx.copyBits(x, b, 0, Blitter.COPY)
xx.copyBits(xx2, b + w, 0, Blitter.COPY)
val blitter = new FloatBlitter(xx)
blitter.copyBits(xx1, 0, 0, Blitter.COPY)
blitter.copyBits(x, b, 0, Blitter.COPY)
blitter.copyBits(xx2, b + w, 0, Blitter.COPY)

xx
}
Expand All @@ -937,9 +969,10 @@ object DDFAPD {
val xx1 = x(FR, Range(1 + b - 1, 2 - 2, -1))
val xx2 = x(FR, Range(h - 1 - 1, h - b - 2, -1))
val xx = new FloatProcessor(w, b + h + b)
xx.copyBits(xx1, 0, 0, Blitter.COPY)
xx.copyBits(x, 0, b, Blitter.COPY)
xx.copyBits(xx2, 0, b + h, Blitter.COPY)
val blitter = new FloatBlitter(xx)
blitter.copyBits(xx1, 0, 0, Blitter.COPY)
blitter.copyBits(x, 0, b, Blitter.COPY)
blitter.copyBits(xx2, 0, b + h, Blitter.COPY)

xx
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

package ij_plugins.debayer2sx.process

import ij.process.{Blitter, FloatProcessor}
import ij.process.{Blitter, FloatBlitter, FloatProcessor}

import scala.language.implicitConversions

Expand Down Expand Up @@ -86,8 +86,9 @@ final class FloatProcessorMath(val fp: FloatProcessor) {
require(fp.getWidth == other.getWidth)
require(fp.getHeight == other.getHeight)

val r = fp.duplicate().asInstanceOf[FloatProcessor]
r.copyBits(other, 0, 0, Blitter.ADD)
// val r = fp.duplicate().asInstanceOf[FloatProcessor]
val r = duplicate(fp)
new FloatBlitter(r).copyBits(other, 0, 0, Blitter.ADD)
r
}

Expand All @@ -100,8 +101,9 @@ final class FloatProcessorMath(val fp: FloatProcessor) {
require(fp.getWidth == other.getWidth)
require(fp.getHeight == other.getHeight)

val r = fp.duplicate().asInstanceOf[FloatProcessor]
r.copyBits(other, 0, 0, Blitter.SUBTRACT)
// val r = fp.duplicate().asInstanceOf[FloatProcessor]
val r = duplicate(fp)
new FloatBlitter(r).copyBits(other, 0, 0, Blitter.SUBTRACT)
r
}

Expand All @@ -112,7 +114,8 @@ final class FloatProcessorMath(val fp: FloatProcessor) {
*/
def /(v: Double): FloatProcessor = {

val r = fp.duplicate().asInstanceOf[FloatProcessor]
// val r = fp.duplicate().asInstanceOf[FloatProcessor]
val r = duplicate(fp)
r.multiply(1 / v)
r
}
Expand All @@ -124,23 +127,39 @@ final class FloatProcessorMath(val fp: FloatProcessor) {
*/
def *(v: Double): FloatProcessor = {

val r = fp.duplicate().asInstanceOf[FloatProcessor]
// val r = fp.duplicate().asInstanceOf[FloatProcessor]
val r = duplicate(fp)
r.multiply(v)
r
}

@inline
private def slice(src: FloatProcessor, srcRangeX: Range, srcRangeY: Range): FloatProcessor = {
private[this] def slice(src: FloatProcessor, srcRangeX: Range, srcRangeY: Range): FloatProcessor = {
import io.github.metarank.cfor._

// bay(1:2:m,2:2:n)
val _srcRangeX = if (srcRangeX == FR) Range(0, src.getWidth) else srcRangeX
val _srcRangeY = if (srcRangeY == FR) Range(0, src.getHeight) else srcRangeY

val dst = new FloatProcessor(_srcRangeX.length, _srcRangeY.length)
for (x <- _srcRangeX; y <- _srcRangeY) {
val v = src.getf(x, y)
val dstX = (x - _srcRangeX.start) / _srcRangeX.step
val (xStart, xEnd, xStep) = sortedRangeParams(_srcRangeX)
val (yStart, yEnd, yStep) = sortedRangeParams(_srcRangeY)

val srcWidth = src.getWidth
val srcPixels = src.getPixels.asInstanceOf[Array[Float]]

val dstWidth = _srcRangeX.length
val dstHeight = _srcRangeY.length
val dst = new FloatProcessor(dstWidth, dstHeight)
val dstPixels = dst.getPixels.asInstanceOf[Array[Float]]

cfor(yStart)(_ < yEnd, _ + yStep) { y =>
val srcOffsetY = y * srcWidth
val dstY = (y - _srcRangeY.start) / _srcRangeY.step
dst.setf(dstX, dstY, v)
val dstOffsetY = dstY * dstWidth
cfor(xStart)(_ < xEnd, _ + xStep) { x =>
val dstX = (x - _srcRangeX.start) / _srcRangeX.step
dstPixels(dstX + dstOffsetY) = srcPixels(x + srcOffsetY)
}
}
dst
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ package object process {
/** Marker object that represent full range for a given processor. Similar to ":" in MATLAB. */
val FR = Range(Int.MinValue, Int.MaxValue)

implicit def wraRange(r: Range): RangeMath = new RangeMath(r)
implicit def wrapRange(r: Range): RangeMath = new RangeMath(r)

implicit def wraFloatProcessor(fp: FloatProcessor): FloatProcessorMath = new FloatProcessorMath(fp)
implicit def wrapFloatProcessor(fp: FloatProcessor): FloatProcessorMath = new FloatProcessorMath(fp)


def add(a: Array[Float], b: Array[Float]): Array[Float] = {
Expand Down Expand Up @@ -63,27 +63,52 @@ package object process {
* @param srcRangeX source X range
* @param srcRangeY source Y range
*/
def copyRanges(dstIP: ImageProcessor, dstRangeX: Range, dstRangeY: Range,
srcIP: ImageProcessor, srcRangeX: Range, srcRangeY: Range): Unit = {
final def copyRanges(dstIP: ImageProcessor, dstRangeX: Range, dstRangeY: Range,
srcIP: ImageProcessor, srcRangeX: Range, srcRangeY: Range): Unit = {
import io.github.metarank.cfor._

val _dstRangeX = if (dstRangeX == FR) Range(0, dstIP.getWidth) else dstRangeX
val _dstRangeY = if (dstRangeY == FR) Range(0, dstIP.getHeight) else dstRangeY
val _srcRangeX = if (srcRangeX == FR) Range(0, srcIP.getWidth) else srcRangeX
val _srcRangeY = if (srcRangeY == FR) Range(0, srcIP.getHeight) else srcRangeY

for (y <- _dstRangeY) {
val (dstXStart, dstXEnd, dstXStep) = sortedRangeParams(_dstRangeX)
val (dstYStart, dstYEnd, dstYStep) = sortedRangeParams(_dstRangeY)

cfor(dstYStart)(_ < dstYEnd, _ + dstYStep) { y =>
val indexY = (y - _dstRangeY.start) / _dstRangeY.step
val srcY = _srcRangeY.start + indexY * _srcRangeY.step
val srcYOffset = srcY * srcIP.getWidth
val dstYOffset = y * dstIP.getWidth

for (x <- _dstRangeX) {
cfor(dstXStart)(_ < dstXEnd, _ + dstXStep) { x =>
val indexX = (x - _dstRangeX.start) / _dstRangeX.step
val srcX = _srcRangeX.start + indexX * _srcRangeX.step

val v = srcIP.getf(srcX, srcY)
dstIP.setf(x, y, v)
val v = srcIP.getf(srcX + srcYOffset)
dstIP.setf(x + dstYOffset, v)
}
}
}


@inline
final def duplicate(src: FloatProcessor): FloatProcessor = {
val dst = new FloatProcessor(src.getWidth, src.getHeight)
val srcPixels = src.getPixels.asInstanceOf[Array[Float]]
val dstPixels = dst.getPixels.asInstanceOf[Array[Float]]
System.arraycopy(srcPixels, 0, dstPixels, 0, srcPixels.length)
dst
}


@inline
final def sortedRangeParams(range: Range): (Int, Int, Int) = {
if (range.step >= 0)
(range.start, range.end, range.step)
else
(range.end + 1, range.start + 1, -range.step)
}


}
1 change: 1 addition & 0 deletions ijp-debayer2sx-experimental/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
enablePlugins(JmhPlugin)
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Image/J Plugins
* Copyright (C) 2002-2021 Jarek Sacha
* Author's email: jpsacha at gmail [dot] com
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Latest release available at https://github.com/ij-plugins/ijp-DeBayer2SX
*/

package ij_plugins.debayer2sx.experimental

import ij.ImageStack
import ij.process.FloatProcessor
import org.openjdk.jmh.annotations._

import java.util.concurrent.TimeUnit

@State(Scope.Benchmark)
@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 10, time = -1)
@Measurement(iterations = 10, time = -1)
@Fork(value = 1)
class JMHBenchmarkDDFAPD {
@Param(Array("2048"))
var width: Int = _

@Param(Array("1536"))
var height: Int = _


var fp: FloatProcessor = new FloatProcessor(width, height)

@Setup
def setup(): Unit = {
fp = new FloatProcessor(width, height)
}

@Benchmark
def DDFAPD_cfor(): ImageStack = {
val r = ij_plugins.debayer2sx.DDFAPD.debayerGR(fp, 16, doRefine = true)
r
}

@Benchmark
def DDFAPD_v_1_2_0(): ImageStack = {
val r = ij_plugins.debayer2sx.experimental.v_1_2_0.core.DDFAPD.debayerGR(fp, 16, doRefine = true)
r
}
}
Loading

0 comments on commit f236066

Please sign in to comment.