Skip to content

Commit

Permalink
Revise LSH parameter names (#105)
Browse files Browse the repository at this point in the history
Parameter names are now based on the conventions in the LSH wikipedia article and material from Indyk, et. al.
Also improved some internal conventions, e.g. using BitBuffer to abstract bit operations.
  • Loading branch information
alexklibisz authored Jul 12, 2020
1 parent 8cf0156 commit 8bc2cae
Show file tree
Hide file tree
Showing 12 changed files with 260 additions and 164 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,14 @@ package object benchmarks {
)
)
val lsh = for {
bitsProp <- Seq(0.1, 0.3, 0.5, 0.7, 0.9)
l <- 100 to 350 by 50
kProp <- Seq(0.01, 0.1, 0.20)
} yield
Experiment(
dataset,
Mapping.SparseBool(dataset.dims),
NearestNeighborsQuery.Exact(vecName, Similarity.Hamming),
Mapping.HammingLsh(dataset.dims, (bitsProp * dataset.dims).toInt),
Mapping.HammingLsh(dataset.dims, L = l, k = (kProp * dataset.dims).toInt),
for {
k <- ks
m <- Seq(1, 2, 10, 50)
Expand Down
5 changes: 5 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
- Switched LSH parameter names to more canonical equivalents: `bands -> L`, `rows -> k`,
based on the [LSH wikipedia article](https://en.wikipedia.org/wiki/Locality-sensitive_hashing#LSH_algorithm_for_nearest_neighbor_search)
and material from Indyk, et. al, e.g. [these slides](http://people.csail.mit.edu/indyk/mmds.pdf).
- Added a `k` parameter to Hamming LSH model, which lets you concatenate > 1 bits to form a single hash value.
---
- Switched scala client to store the ID as a doc-value field. This avoids decompressing the document source
when reading results, which is about 40% faster on benchmarks for both exact and approx. search.
---
Expand Down
34 changes: 18 additions & 16 deletions client-python/elastiknn/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ def to_dict(self):
@dataclass(frozen=True)
class JaccardLsh(Base):
dims: int
bands: int
rows: int
K: int
l: int

def to_dict(self):
return {
Expand All @@ -105,15 +105,16 @@ def to_dict(self):
"model": "lsh",
"similarity": "jaccard",
"dims": self.dims,
"bands": self.bands,
"rows": self.rows
"K": self.K,
"l": self.l
}
}

@dataclass(frozen=True)
class HammingLsh(Base):
dims: int
bits: int
L: int
k: int

def to_dict(self):
return {
Expand All @@ -122,7 +123,8 @@ def to_dict(self):
"model": "lsh",
"similarity": "jaccard",
"dims": self.dims,
"bits": self.bits
"L": self.L,
"k": self.k
}
}

Expand All @@ -141,8 +143,8 @@ def to_dict(self):
@dataclass(frozen=True)
class AngularLsh(Base):
dims: int
bands: int
rows: int
K: int
l: int

def to_dict(self):
return {
Expand All @@ -151,17 +153,17 @@ def to_dict(self):
"model": "lsh",
"similarity": "angular",
"dims": self.dims,
"bands": self.bands,
"rows": self.rows
"K": self.K,
"l": self.l
}
}

@dataclass(frozen=True)
class L2LSH(Base):
dims: int
bands: int
rows: int
width: int
K: int
l: int
r: int

def to_dict(self):
return {
Expand All @@ -170,9 +172,9 @@ def to_dict(self):
"model": "lsh",
"similarity": "l2",
"dims": self.dims,
"bands": self.bands,
"rows": self.rows,
"width": self.width
"K": self.K,
"l": self.l,
"r": self.r
}
}

Expand Down
10 changes: 4 additions & 6 deletions core/src/main/scala/com/klibisz/elastiknn/api/package.scala
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package com.klibisz.elastiknn

import jdk.jfr.Experimental

import scala.util.Random

package object api {
Expand Down Expand Up @@ -86,11 +84,11 @@ package object api {
object Mapping {
final case class SparseBool(dims: Int) extends Mapping
final case class SparseIndexed(dims: Int) extends Mapping
final case class JaccardLsh(dims: Int, bands: Int, rows: Int) extends Mapping
final case class HammingLsh(dims: Int, bits: Int) extends Mapping
final case class JaccardLsh(dims: Int, L: Int, k: Int) extends Mapping
final case class HammingLsh(dims: Int, L: Int, k: Int) extends Mapping
final case class DenseFloat(dims: Int) extends Mapping
final case class AngularLsh(dims: Int, bands: Int, rows: Int) extends Mapping
final case class L2Lsh(dims: Int, bands: Int, rows: Int, width: Int) extends Mapping
final case class AngularLsh(dims: Int, L: Int, k: Int) extends Mapping
final case class L2Lsh(dims: Int, L: Int, k: Int, r: Int) extends Mapping
}

sealed trait NearestNeighborsQuery {
Expand Down
Loading

0 comments on commit 8bc2cae

Please sign in to comment.