From fb5e4742216225ad71c6a95e60c934045cbf9125 Mon Sep 17 00:00:00 2001 From: Alex Klibisz <8015228+alexklibisz@users.noreply.github.com> Date: Sat, 23 Mar 2024 11:27:03 -0700 Subject: [PATCH] Remove usages of scala's Array so we can avoid the extra permissions --- ann-benchmarks/ann-benchmarks | 2 +- .../elastiknn/api/FloatArrayBuffer.java | 2 ++ .../klibisz/elastiknn/VectorMapperUtil.java | 9 ++++++++ .../plugin-metadata/plugin-security.policy | 1 - .../elastiknn/mapper/VectorMapper.scala | 9 +++++--- .../klibisz/elastiknn/query/ExactQuery.scala | 4 ++-- .../elastiknn/query/HashingQuery.scala | 21 +++++++++++++------ 7 files changed, 35 insertions(+), 13 deletions(-) create mode 100644 elastiknn-plugin/src/main/java/com/klibisz/elastiknn/VectorMapperUtil.java diff --git a/ann-benchmarks/ann-benchmarks b/ann-benchmarks/ann-benchmarks index c45bf6b2c..df8083ab9 160000 --- a/ann-benchmarks/ann-benchmarks +++ b/ann-benchmarks/ann-benchmarks @@ -1 +1 @@ -Subproject commit c45bf6b2c917eeb169426088fd6b82840f272e97 +Subproject commit df8083ab96464faa5ddda76da9667069ec5825b6 diff --git a/elastiknn-api4s/src/main/java/com/klibisz/elastiknn/api/FloatArrayBuffer.java b/elastiknn-api4s/src/main/java/com/klibisz/elastiknn/api/FloatArrayBuffer.java index c166c1137..274e84178 100644 --- a/elastiknn-api4s/src/main/java/com/klibisz/elastiknn/api/FloatArrayBuffer.java +++ b/elastiknn-api4s/src/main/java/com/klibisz/elastiknn/api/FloatArrayBuffer.java @@ -16,6 +16,7 @@ public class FloatArrayBuffer { private int index = 0; public FloatArrayBuffer() { +// System.out.printf("Starting at %d\n", nextInitialCapacity); this.array = new float[nextInitialCapacity]; } @@ -28,6 +29,7 @@ public void append(float f) { // this.array[index - 1] = f; // } if (index == this.array.length) { +// System.out.printf("Growing from %d to %d\n", this.array.length, this.array.length * 2); this.array = Arrays.copyOf(this.array, this.array.length * 2); } this.array[index++] = f; diff --git a/elastiknn-plugin/src/main/java/com/klibisz/elastiknn/VectorMapperUtil.java b/elastiknn-plugin/src/main/java/com/klibisz/elastiknn/VectorMapperUtil.java new file mode 100644 index 000000000..73ea3e920 --- /dev/null +++ b/elastiknn-plugin/src/main/java/com/klibisz/elastiknn/VectorMapperUtil.java @@ -0,0 +1,9 @@ +package com.klibisz.elastiknn; + +import org.elasticsearch.index.mapper.FieldMapper; + +public class VectorMapperUtil { + + public static FieldMapper.Parameter[] EMPTY_ARRAY_FIELD_MAPPER_PARAMETER = new FieldMapper.Parameter[0]; + +} diff --git a/elastiknn-plugin/src/main/plugin-metadata/plugin-security.policy b/elastiknn-plugin/src/main/plugin-metadata/plugin-security.policy index 1d287d1d9..e89913b9f 100644 --- a/elastiknn-plugin/src/main/plugin-metadata/plugin-security.policy +++ b/elastiknn-plugin/src/main/plugin-metadata/plugin-security.policy @@ -1,3 +1,2 @@ grant { - permission java.lang.RuntimePermission "getClassLoader"; }; diff --git a/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/mapper/VectorMapper.scala b/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/mapper/VectorMapper.scala index fc22f4108..5f65c8eac 100644 --- a/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/mapper/VectorMapper.scala +++ b/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/mapper/VectorMapper.scala @@ -30,7 +30,7 @@ object VectorMapper { else { val sorted = vec.sorted() // Sort for faster intersections on the query side. mapping match { - case Mapping.SparseBool(_) => Try(ExactQuery.index(field, sorted)) + case Mapping.SparseBool(_) => Try(Seq(ExactQuery.index(field, sorted))) case m: Mapping.JaccardLsh => Try(HashingQuery.index(field, luceneFieldType, sorted, modelCache(m).hash(vec.trueIndices, vec.totalIndices))) case m: Mapping.HammingLsh => @@ -51,7 +51,7 @@ object VectorMapper { Failure(ElastiknnException.vectorDimensions(vec.values.length, mapping.dims)) else mapping match { - case Mapping.DenseFloat(_) => Try(ExactQuery.index(field, vec)) + case Mapping.DenseFloat(_) => Try(Seq(ExactQuery.index(field, vec))) case m: Mapping.CosineLsh => Try(HashingQuery.index(field, luceneFieldType, vec, modelCache(m).hash(vec.values))) case m: Mapping.L2Lsh => Try(HashingQuery.index(field, luceneFieldType, vec, modelCache(m).hash(vec.values))) case m: Mapping.PermutationLsh => Try(HashingQuery.index(field, luceneFieldType, vec, modelCache(m).hash(vec.values))) @@ -138,6 +138,9 @@ abstract class VectorMapper[V <: Vec: XContentCodec.Decoder] { self => override def getMergeBuilder: FieldMapper.Builder = new Builder(simpleName(), mapping) } - override def getParameters: Array[FieldMapper.Parameter[_]] = Array.empty + override def getParameters: Array[FieldMapper.Parameter[_]] = + // This has to be defined in Java because scala's Array wrapper uses ClassTag, + // which requires the extra permission: java.lang.RuntimePermission "getClassLoader". + VectorMapperUtil.EMPTY_ARRAY_FIELD_MAPPER_PARAMETER } } diff --git a/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/query/ExactQuery.scala b/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/query/ExactQuery.scala index 70500d979..23b90857c 100644 --- a/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/query/ExactQuery.scala +++ b/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/query/ExactQuery.scala @@ -53,8 +53,8 @@ final class ExactQuery[V <: Vec, S <: StoredVec](field: String, queryVec: V, sim } object ExactQuery { - def index[V <: Vec: StoredVec.Encoder](field: String, vec: V): Seq[IndexableField] = { + def index[V <: Vec: StoredVec.Encoder](field: String, vec: V): IndexableField = { val storedVec = implicitly[StoredVec.Encoder[V]].apply(vec) - Seq(new BinaryDocValuesField(field, new BytesRef(storedVec))) + new BinaryDocValuesField(field, new BytesRef(storedVec)) } } diff --git a/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/query/HashingQuery.scala b/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/query/HashingQuery.scala index be189ab1b..f16919da5 100644 --- a/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/query/HashingQuery.scala +++ b/elastiknn-plugin/src/main/scala/com/klibisz/elastiknn/query/HashingQuery.scala @@ -11,6 +11,7 @@ import org.apache.lucene.util.BytesRef import org.elasticsearch.common.lucene.search.function.{CombineFunction, LeafScoreFunction, ScoreFunction} import java.util.Objects +import scala.collection.mutable.ListBuffer final class HashingQuery[V <: Vec, S <: StoredVec: Decoder]( field: String, @@ -52,10 +53,15 @@ final class HashingQuery[V <: Vec, S <: StoredVec: Decoder]( private val reader = ctx.reader() private val terms = reader.terms(field) private val termsEnum = terms.iterator() - private val postings = hashes.sorted.flatMap { h => - if (termsEnum.seekExact(new BytesRef(h.hash))) Some(termsEnum.postings(null, PostingsEnum.NONE)) - else None + private val postings: Seq[PostingsEnum] = { + val buf = new ListBuffer[PostingsEnum]() + hashes.sorted.foreach { h => + if (termsEnum.seekExact(new BytesRef(h.hash))) buf.prepend(termsEnum.postings(null, PostingsEnum.NONE)) + else None + } + buf.toList.reverse } + override def score(docId: Int, subQueryScore: Float): Double = { val intersection = postings.count { p => p.docID() != DocIdSetIterator.NO_MORE_DOCS && p.advance(docId) == docId } simFunc.maxScore * (intersection * 1d / hashes.length) @@ -84,8 +90,11 @@ object HashingQuery { fieldType: FieldType, vec: V, hashes: Array[HashAndFreq] - ): Seq[IndexableField] = ExactQuery.index(field, vec) ++ hashes.flatMap { h => - val f = new Field(field, h.hash, fieldType) - (0 until h.freq).map(_ => f) + ): Seq[IndexableField] = { + val buffer = ListBuffer.empty[IndexableField] + hashes.foreach { h => + (0 until h.freq).foreach(_ => buffer.prepend(new Field(field, h.hash, fieldType))) + } + buffer.prepend(ExactQuery.index(field, vec)).toList } }