-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Performance: simplify and optimize kth-greatest computation (96% reca…
…ll at 195 qps) (#616)
- Loading branch information
1 parent
504589b
commit ea383d8
Showing
11 changed files
with
183 additions
and
149 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
|Model|Parameters|Recall|Queries per Second| | ||
|---|---|---|---| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=0|0.378|337.457| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=0|0.446|281.828| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=3|0.634|272.814| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=3|0.716|232.698| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=0|0.767|303.686| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=0|0.846|254.121| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=3|0.922|215.233| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=3|0.960|190.689| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=0|0.379|353.162| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=0|0.447|295.007| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=3|0.634|286.531| | ||
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=3|0.716|245.690| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=0|0.767|312.826| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=0|0.846|265.204| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=3|0.921|221.817| | ||
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=3|0.960|195.653| |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
28 changes: 28 additions & 0 deletions
28
elastiknn-lucene/src/main/java/com/klibisz/elastiknn/search/KthGreatestResult.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
package com.klibisz.elastiknn.search; | ||
|
||
public class KthGreatestResult { | ||
public final short kthGreatest; | ||
public final int numGreaterThan; | ||
public final int numNonZero; | ||
public KthGreatestResult(short kthGreatest, int numGreaterThan, int numNonZero) { | ||
this.kthGreatest = kthGreatest; | ||
this.numGreaterThan = numGreaterThan; | ||
this.numNonZero = numNonZero; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (o == this) { | ||
return true; | ||
} else if (!(o instanceof KthGreatestResult other)) { | ||
return false; | ||
} else { | ||
return kthGreatest == other.kthGreatest && numGreaterThan == other.numGreaterThan && numNonZero == other.numNonZero; | ||
} | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return String.format("KthGreatestResult(kthGreatest=%d, numGreaterThan=%d, numNonZero=%d)", kthGreatest, numGreaterThan, numNonZero); | ||
} | ||
} |
64 changes: 0 additions & 64 deletions
64
elastiknn-lucene/src/main/java/org/apache/lucene/search/KthGreatest.java
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
103 changes: 103 additions & 0 deletions
103
elastiknn-lucene/src/test/scala/com/klibisz/elastiknn/search/ArrayHitCounterSpec.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
package com.klibisz.elastiknn.search | ||
|
||
import org.scalatest.freespec.AnyFreeSpec | ||
import org.scalatest.matchers.should.Matchers | ||
|
||
import scala.util.Random | ||
|
||
final class ArrayHitCounterSpec extends AnyFreeSpec with Matchers { | ||
|
||
final class Reference(capacity: Int) extends HitCounter { | ||
private val counts = scala.collection.mutable.Map[Int, Short]( | ||
(0 until capacity).map(_ -> 0.toShort): _* | ||
) | ||
|
||
override def increment(key: Int): Unit = counts.update(key, (counts(key) + 1).toShort) | ||
|
||
override def increment(key: Int, count: Short): Unit = counts.update(key, (counts(key) + count).toShort) | ||
|
||
override def isEmpty: Boolean = !counts.values.exists(_ > 0) | ||
|
||
override def get(key: Int): Short = counts(key) | ||
|
||
override def numHits(): Int = counts.values.count(_ > 0) | ||
|
||
override def capacity(): Int = capacity | ||
|
||
override def minKey(): Int = counts.filter(_._2 > 0).keys.min | ||
|
||
override def maxKey(): Int = counts.filter(_._2 > 0).keys.max | ||
|
||
override def kthGreatest(k: Int): KthGreatestResult = { | ||
val values = counts.values.toArray.sorted.reverse | ||
val numGreaterThan = values.count(_ > values(k)) | ||
val numNonZero = values.count(_ != 0) | ||
new KthGreatestResult(values(k), numGreaterThan, numNonZero) | ||
} | ||
} | ||
|
||
"reference examples" - { | ||
"example 1" in { | ||
val c = new Reference(10) | ||
c.isEmpty shouldBe true | ||
c.capacity() shouldBe 10 | ||
|
||
c.get(0) shouldBe 0 | ||
c.increment(0) | ||
c.get(0) shouldBe 1 | ||
c.numHits() shouldBe 1 | ||
c.minKey() shouldBe 0 | ||
c.maxKey() shouldBe 0 | ||
|
||
c.get(5) shouldBe 0 | ||
c.increment(5, 5) | ||
c.get(5) shouldBe 5 | ||
c.numHits() shouldBe 2 | ||
c.minKey() shouldBe 0 | ||
c.maxKey() shouldBe 5 | ||
|
||
c.get(9) shouldBe 0 | ||
c.increment(9) | ||
c.get(9) shouldBe 1 | ||
c.increment(9) | ||
c.get(9) shouldBe 2 | ||
c.numHits() shouldBe 3 | ||
c.minKey() shouldBe 0 | ||
c.maxKey() shouldBe 9 | ||
|
||
val kgr = c.kthGreatest(2) | ||
kgr.kthGreatest shouldBe 1 | ||
kgr.numGreaterThan shouldBe 2 | ||
kgr.numNonZero shouldBe 3 | ||
} | ||
} | ||
|
||
"randomized comparison to reference" in { | ||
val seed = System.currentTimeMillis() | ||
val rng = new Random(seed) | ||
val numDocs = 60000 | ||
val numMatches = numDocs / 2 | ||
info(s"Using seed $seed") | ||
for (_ <- 0 until 99) { | ||
val matches = (0 until numMatches).map(_ => rng.nextInt(numDocs)) | ||
val ref = new Reference(numDocs) | ||
val ahc = new ArrayHitCounter(numDocs) | ||
matches.foreach { doc => | ||
ref.increment(doc) | ||
ahc.increment(doc) | ||
ahc.get(doc) shouldBe ref.get(doc) | ||
val count = rng.nextInt(10).toShort | ||
ref.increment(doc, count) | ||
ahc.increment(doc, count) | ||
ahc.get(doc) shouldBe ref.get(doc) | ||
} | ||
ahc.minKey() shouldBe ref.minKey() | ||
ahc.maxKey() shouldBe ref.maxKey() | ||
ahc.numHits() shouldBe ref.numHits() | ||
val k = rng.nextInt(numDocs) | ||
val ahcKgr = ahc.kthGreatest(k) | ||
val refKgr = ref.kthGreatest(k) | ||
ahcKgr shouldBe refKgr | ||
} | ||
} | ||
} |
Oops, something went wrong.