diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 487546fd3df2..b6c11095d231 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -25,6 +25,8 @@ Optimizations * GITHUB##13425: Rewrite SortedNumericDocValuesRangeQuery to MatchNoDocsQuery when the upper bound is smaller than the lower bound. (Ioana Tagirta) +* GITHUB#13322: Implement Weight#count for vector values in the FieldExistsQuery. (Pan Guixin) + Bug Fixes --------------------- (No changes) diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java index d783d99e0bb0..d7b70cf4aa35 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java @@ -19,10 +19,12 @@ import java.io.IOException; import java.util.Objects; import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; @@ -35,7 +37,7 @@ * org.apache.lucene.document.KnnByteVectorField} or a field that indexes norms or doc values. */ public class FieldExistsQuery extends Query { - private String field; + private final String field; /** Create a query that will match that have a value for the given {@code field}. */ public FieldExistsQuery(String field) { @@ -128,20 +130,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException { break; } } else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors - final DocIdSetIterator vectorValues; - switch (fieldInfo.getVectorEncoding()) { - case FLOAT32: - vectorValues = leaf.getFloatVectorValues(field); - break; - case BYTE: - vectorValues = leaf.getByteVectorValues(field); - break; - default: - throw new IllegalArgumentException( - "unknown vector encoding=" + fieldInfo.getVectorEncoding()); - } - assert vectorValues != null : "unexpected null vector values"; - if (vectorValues != null && vectorValues.cost() != leaf.maxDoc()) { + if (getVectorValuesSize(fieldInfo, leaf) != leaf.maxDoc()) { allReadersRewritable = false; break; } @@ -253,7 +242,10 @@ public int count(LeafReaderContext context) throws IOException { } return super.count(context); - } else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors + } else if (fieldInfo.hasVectorValues()) { // the field indexes vectors + if (reader.hasDeletions() == false) { + return getVectorValuesSize(fieldInfo, reader); + } return super.count(context); } else if (fieldInfo.getDocValuesType() != DocValuesType.NONE) { // the field indexes doc values @@ -300,4 +292,20 @@ private String buildErrorMsg(FieldInfo fieldInfo) { + fieldInfo.name + "' exists and indexes neither of these data structures"; } + + private int getVectorValuesSize(FieldInfo fi, LeafReader reader) throws IOException { + assert fi.name.equals(field); + switch (fi.getVectorEncoding()) { + case FLOAT32: + FloatVectorValues floatVectorValues = reader.getFloatVectorValues(field); + assert floatVectorValues != null : "unexpected null float vector values"; + return floatVectorValues.size(); + case BYTE: + ByteVectorValues byteVectorValues = reader.getByteVectorValues(field); + assert byteVectorValues != null : "unexpected null byte vector values"; + return byteVectorValues.size(); + default: + throw new IllegalArgumentException("unknown vector encoding=" + fi.getVectorEncoding()); + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFieldExistsQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestFieldExistsQuery.java index 209ad510889b..125706a75f33 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestFieldExistsQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldExistsQuery.java @@ -43,7 +43,9 @@ import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.VectorUtil; @@ -649,6 +651,39 @@ public void testKnnVectorAllDocsHaveField() throws IOException { } } + public void testDeleteKnnVector() throws IOException { + try (Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) { + final int numDocs = atLeast(100); + + boolean allDocsHaveVector = random().nextBoolean(); + BitSet docWithVector = new FixedBitSet(numDocs); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + if (allDocsHaveVector || random().nextBoolean()) { + doc.add(new KnnFloatVectorField("vector", randomVector(5))); + docWithVector.set(i); + } + doc.add(new StringField("id", Integer.toString(i), Store.NO)); + iw.addDocument(doc); + } + if (random().nextBoolean()) { + final int numDeleted = random().nextInt(numDocs) + 1; + for (int i = 0; i < numDeleted; ++i) { + iw.deleteDocuments(new Term("id", Integer.toString(i))); + docWithVector.clear(i); + } + } + + try (IndexReader reader = iw.getReader()) { + final IndexSearcher searcher = newSearcher(reader); + + final int count = searcher.count(new FieldExistsQuery("vector")); + assertEquals(docWithVector.cardinality(), count); + } + } + } + public void testKnnVectorConjunction() throws IOException { try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {