From e6591e54de5c9c9bdc07a9a43fbc29046a1e8847 Mon Sep 17 00:00:00 2001 From: Sam Pullara Date: Thu, 30 May 2024 15:52:52 -0700 Subject: [PATCH 1/3] Replace BitSet with RoaringBitmap --- pom.xml | 5 +++ src/main/java/nbdfdb/FDBBitSet.java | 49 +++++++++++------------------ 2 files changed, 23 insertions(+), 31 deletions(-) diff --git a/pom.xml b/pom.xml index 2b52c84..2a15226 100644 --- a/pom.xml +++ b/pom.xml @@ -60,6 +60,11 @@ 4.11 test + + org.roaringbitmap + RoaringBitmap + 0.9.0 + diff --git a/src/main/java/nbdfdb/FDBBitSet.java b/src/main/java/nbdfdb/FDBBitSet.java index 8bf5cd2..d77c251 100644 --- a/src/main/java/nbdfdb/FDBBitSet.java +++ b/src/main/java/nbdfdb/FDBBitSet.java @@ -19,15 +19,17 @@ import com.apple.foundationdb.*; import com.apple.foundationdb.subspace.Subspace; +import org.roaringbitmap.RoaringBitmap; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; -import java.util.BitSet; +import java.nio.ByteBuffer; import java.util.concurrent.CompletableFuture; public class FDBBitSet { private final Database database; private final Subspace subspace; private final int blockSize; - private final int bitsPerBlock; private final byte[] allSetBytes; private final Range subspaceRange; @@ -35,10 +37,11 @@ protected FDBBitSet(Database database, Subspace subspace, int blockSize) { this.database = database; this.subspace = subspace; this.blockSize = blockSize; - bitsPerBlock = blockSize * 8; - BitSet allSet = new BitSet(bitsPerBlock); - allSet.set(0, bitsPerBlock); - allSetBytes = allSet.toByteArray(); + RoaringBitmap allSet = new RoaringBitmap(); + allSet.add(0L, blockSize * 8L); + ByteBuffer byteBuffer = ByteBuffer.allocate(allSet.serializedSizeInBytes()); + allSet.serialize(byteBuffer); + allSetBytes = byteBuffer.array(); subspaceRange = Range.startsWith(subspace.pack()); } @@ -50,37 +53,21 @@ public CompletableFuture set(long startBit, long endBit) { } protected void set(Transaction tx, long startBit, long endBit) { - long startBlock = startBit / bitsPerBlock; - long endBlock = endBit / bitsPerBlock; - BitSet bitSet = new BitSet(bitsPerBlock); - for (long block = startBlock; block <= endBlock; block++) { - bitSet.clear(); - if (block == startBlock) { - int startBitOffset = (int) (startBit % bitsPerBlock); - int endBitOffset = startBlock == endBlock ? (int) (endBit % bitsPerBlock) : bitsPerBlock - 1; - bitSet.set(startBitOffset, endBitOffset + 1); - byte[] bitBytes = bitSet.toByteArray(); - byte[] bytes = new byte[blockSize]; - System.arraycopy(bitBytes, 0, bytes, 0, bitBytes.length); - tx.mutate(MutationType.BIT_OR, subspace.get(block).pack(), bytes); - } else if (block == endBlock) { - int endBitOffset = (int) (endBit % bitsPerBlock); - bitSet.set(0, endBitOffset); - byte[] bitBytes = bitSet.toByteArray(); - byte[] bytes = new byte[blockSize]; - System.arraycopy(bitBytes, 0, bytes, 0, bitBytes.length); - tx.mutate(MutationType.BIT_OR, subspace.get(block).pack(), bytes); - } else { - tx.set(subspace.get(block).pack(), allSetBytes); - } - } + MutableRoaringBitmap bitSet = new MutableRoaringBitmap(); + bitSet.add(startBit, endBit); + ByteBuffer byteBuffer = ByteBuffer.allocate(bitSet.serializedSizeInBytes()); + bitSet.serialize(byteBuffer); + byte[] bytes = byteBuffer.array(); + tx.set(subspace.pack(), bytes); } public CompletableFuture count() { return database.runAsync(tx -> { long count = 0; for (KeyValue keyValue : tx.getRange(subspaceRange)) { - count += BitSet.valueOf(keyValue.getValue()).cardinality(); + ByteBuffer byteBuffer = ByteBuffer.wrap(keyValue.getValue()); + ImmutableRoaringBitmap bitSet = new ImmutableRoaringBitmap(byteBuffer); + count += bitSet.getLongCardinality(); } return CompletableFuture.completedFuture(count); }); From 48e56b5b6f66d5061a410b234fbe63aeb0d2b6ca Mon Sep 17 00:00:00 2001 From: Sam Pullara Date: Fri, 31 May 2024 12:36:47 -0700 Subject: [PATCH 2/3] upgrade to latest fdb --- pom.xml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 2a15226..232e3c3 100644 --- a/pom.xml +++ b/pom.xml @@ -28,10 +28,11 @@ http://maven.apache.org + - com.apple.cie.foundationdb + org.foundationdb fdb-java - 5.1.5 + 7.3.43 org.hdrhistogram From ad6045272daebd9802d7d6cb2b77ec7508f9d47d Mon Sep 17 00:00:00 2001 From: Sam Pullara Date: Fri, 31 May 2024 12:53:10 -0700 Subject: [PATCH 3/3] rebase and fix an off by one bug --- src/main/java/nbdfdb/FDBBitSet.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/nbdfdb/FDBBitSet.java b/src/main/java/nbdfdb/FDBBitSet.java index d77c251..e667f00 100644 --- a/src/main/java/nbdfdb/FDBBitSet.java +++ b/src/main/java/nbdfdb/FDBBitSet.java @@ -53,9 +53,12 @@ public CompletableFuture set(long startBit, long endBit) { } protected void set(Transaction tx, long startBit, long endBit) { + // TODO: need to do something if the bitset is too big for an FDB value MutableRoaringBitmap bitSet = new MutableRoaringBitmap(); - bitSet.add(startBit, endBit); - ByteBuffer byteBuffer = ByteBuffer.allocate(bitSet.serializedSizeInBytes()); + bitSet.add(startBit, endBit + 1); + int capacity = bitSet.serializedSizeInBytes(); + assert capacity <= 100_000; + ByteBuffer byteBuffer = ByteBuffer.allocate(capacity); bitSet.serialize(byteBuffer); byte[] bytes = byteBuffer.array(); tx.set(subspace.pack(), bytes);