Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add dup_sort_comparator #283

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions heed/examples/custom-dupsort-comparator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use std::cmp::Ordering;
use std::error::Error;
use std::fs;
use std::path::Path;

use byteorder::BigEndian;
use heed::{DatabaseFlags, EnvOpenOptions};
use heed_traits::Comparator;
use heed_types::{Str, U128};

enum DescendingIntCmp {}

impl Comparator for DescendingIntCmp {
fn compare(a: &[u8], b: &[u8]) -> Ordering {
b.cmp(&a)
}
}

fn main() -> Result<(), Box<dyn Error>> {
let env_path = Path::new("target").join("custom-dupsort-cmp.mdb");

let _ = fs::remove_dir_all(&env_path);

fs::create_dir_all(&env_path)?;
let env = unsafe {
EnvOpenOptions::new()
.map_size(10 * 1024 * 1024) // 10MB
.max_dbs(3)
.open(env_path)?
};

let mut wtxn = env.write_txn()?;
let db = env
.database_options()
.types::<Str, U128<BigEndian>>()
.flags(DatabaseFlags::DUP_SORT)
.dup_sort_comparator::<DescendingIntCmp>()
.create(&mut wtxn)?;
wtxn.commit()?;

let mut wtxn = env.write_txn()?;

// We fill our database with entries.
db.put(&mut wtxn, "1", &1)?;
db.put(&mut wtxn, "1", &2)?;
db.put(&mut wtxn, "1", &3)?;
db.put(&mut wtxn, "2", &4)?;
db.put(&mut wtxn, "1", &5)?;
db.put(&mut wtxn, "0", &0)?;

// We check that the keys are in lexicographic and values in descending order.
let mut iter = db.iter(&wtxn)?;
assert_eq!(iter.next().transpose()?, Some(("0", 0)));
assert_eq!(iter.next().transpose()?, Some(("1", 5)));
assert_eq!(iter.next().transpose()?, Some(("1", 3)));
assert_eq!(iter.next().transpose()?, Some(("1", 2)));
assert_eq!(iter.next().transpose()?, Some(("1", 1)));
assert_eq!(iter.next().transpose()?, Some(("2", 4)));
drop(iter);

Ok(())
}
152 changes: 151 additions & 1 deletion heed/src/cookbook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
//! - [Create Custom and Prefix Codecs](#create-custom-and-prefix-codecs)
//! - [Change the Environment Size Dynamically](#change-the-environment-size-dynamically)
//! - [Advanced Multithreaded Access of Entries](#advanced-multithreaded-access-of-entries)
//! - [Custom Key Comparator](#custom-key-comparator)
//! - [Custom Dupsort Comparator](#custom-dupsort-comparator)
//!
//! # Decode Values on Demand
//!
Expand Down Expand Up @@ -445,8 +447,156 @@
//! unsafe impl Sync for ImmutableMap<'_> {}
//! ```
//!
//! # Custom Key Comparator
//!
//! LMDB keys are sorted in lexicographic order by default. To change this behavior you can implement a custom [`Comparator`]
//! and provide it when creating the database.
//!
//! Under the hood this translates into a [`mdb_set_compare`] call.
//!
//! ```
//! use std::cmp::Ordering;
//! use std::error::Error;
//! use std::path::Path;
//! use std::{fs, str};
//!
//! use heed::EnvOpenOptions;
//! use heed_traits::Comparator;
//! use heed_types::{Str, Unit};
//!
//! enum StringAsIntCmp {}
//!
//! // This function takes two strings which represent positive numbers,
//! // parses them into i32s and compare the parsed value.
//! // Therefore "-1000" < "-100" must be true even without '0' padding.
//! impl Comparator for StringAsIntCmp {
//! fn compare(a: &[u8], b: &[u8]) -> Ordering {
//! let a: i32 = str::from_utf8(a).unwrap().parse().unwrap();
//! let b: i32 = str::from_utf8(b).unwrap().parse().unwrap();
//! a.cmp(&b)
//! }
//! }
//!
//! fn main() -> Result<(), Box<dyn Error>> {
//! let env_path = Path::new("target").join("custom-key-cmp.mdb");
//!
//! let _ = fs::remove_dir_all(&env_path);
//!
//! fs::create_dir_all(&env_path)?;
//! let env = unsafe {
//! EnvOpenOptions::new()
//! .map_size(10 * 1024 * 1024) // 10MB
//! .max_dbs(3)
//! .open(env_path)?
//! };
//!
//! let mut wtxn = env.write_txn()?;
//! let db = env
//! .database_options()
//! .types::<Str, Unit>()
//! .key_comparator::<StringAsIntCmp>()
//! .create(&mut wtxn)?;
//! wtxn.commit()?;
//!
//! let mut wtxn = env.write_txn()?;
//!
//! // We fill our database with entries.
//! db.put(&mut wtxn, "-100000", &())?;
//! db.put(&mut wtxn, "-10000", &())?;
//! db.put(&mut wtxn, "-1000", &())?;
//! db.put(&mut wtxn, "-100", &())?;
//! db.put(&mut wtxn, "100", &())?;
//!
//! // We check that the key are in the right order ("-100" < "-1000" < "-10000"...)
//! let mut iter = db.iter(&wtxn)?;
//! assert_eq!(iter.next().transpose()?, Some(("-100000", ())));
//! assert_eq!(iter.next().transpose()?, Some(("-10000", ())));
//! assert_eq!(iter.next().transpose()?, Some(("-1000", ())));
//! assert_eq!(iter.next().transpose()?, Some(("-100", ())));
//! assert_eq!(iter.next().transpose()?, Some(("100", ())));
//! drop(iter);
//!
//! Ok(())
//! }
//! ```
//!
//! # Custom Dupsort Comparator
//!
//! When using DUPSORT LMDB sorts values of the same key in lexicographic order by default. To change this behavior you can implement a custom [`Comparator`]
//! and provide it when creating the database.
//!
//! Under the hood this translates into a [`mdb_set_dupsort`] call.
//!
//! ```
//! use std::cmp::Ordering;
//! use std::error::Error;
//! use std::fs;
//! use std::path::Path;
//!
//! use byteorder::BigEndian;
//! use heed::{DatabaseFlags, EnvOpenOptions};
//! use heed_traits::Comparator;
//! use heed_types::{Str, U128};
//!
//! enum DescendingIntCmp {}
//!
//! impl Comparator for DescendingIntCmp {
//! fn compare(a: &[u8], b: &[u8]) -> Ordering {
//! b.cmp(&a)
//! }
//! }
//!
//! fn main() -> Result<(), Box<dyn Error>> {
//! let env_path = Path::new("target").join("custom-dupsort-cmp.mdb");
//!
//! let _ = fs::remove_dir_all(&env_path);
//!
//! fs::create_dir_all(&env_path)?;
//! let env = unsafe {
//! EnvOpenOptions::new()
//! .map_size(10 * 1024 * 1024) // 10MB
//! .max_dbs(3)
//! .open(env_path)?
//! };
//!
//! let mut wtxn = env.write_txn()?;
//! let db = env
//! .database_options()
//! .types::<Str, U128<BigEndian>>()
//! .flags(DatabaseFlags::DUP_SORT)
//! .dup_sort_comparator::<DescendingIntCmp>()
//! .create(&mut wtxn)?;
//! wtxn.commit()?;
//!
//! let mut wtxn = env.write_txn()?;
//!
//! // We fill our database with entries.
//! db.put(&mut wtxn, "1", &1)?;
//! db.put(&mut wtxn, "1", &2)?;
//! db.put(&mut wtxn, "1", &3)?;
//! db.put(&mut wtxn, "2", &4)?;
//! db.put(&mut wtxn, "1", &5)?;
//! db.put(&mut wtxn, "0", &0)?;
//!
//! // We check that the keys are in lexicographic and values in descending order.
//! let mut iter = db.iter(&wtxn)?;
//! assert_eq!(iter.next().transpose()?, Some(("0", 0)));
//! assert_eq!(iter.next().transpose()?, Some(("1", 5)));
//! assert_eq!(iter.next().transpose()?, Some(("1", 3)));
//! assert_eq!(iter.next().transpose()?, Some(("1", 2)));
//! assert_eq!(iter.next().transpose()?, Some(("1", 1)));
//! assert_eq!(iter.next().transpose()?, Some(("2", 4)));
//! drop(iter);
//!
//! Ok(())
//! }
//! ```
//!

// To let cargo generate doc links
#![allow(unused_imports)]

use crate::{BytesDecode, BytesEncode, Database, EnvOpenOptions};
use crate::{
mdb::ffi::mdb_set_compare, mdb::ffi::mdb_set_dupsort, BytesDecode, BytesEncode, Comparator,
Database, EnvOpenOptions,
};
30 changes: 22 additions & 8 deletions heed/src/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ use crate::*;
/// # Ok(()) }
/// ```
#[derive(Debug)]
pub struct DatabaseOpenOptions<'e, 'n, KC, DC, C = DefaultComparator> {
pub struct DatabaseOpenOptions<'e, 'n, KC, DC, C = DefaultComparator, CDUP = DefaultComparator> {
env: &'e Env,
types: marker::PhantomData<(KC, DC, C)>,
types: marker::PhantomData<(KC, DC, C, CDUP)>,
name: Option<&'n str>,
flags: AllDatabaseFlags,
}
Expand All @@ -74,7 +74,7 @@ impl<'e> DatabaseOpenOptions<'e, 'static, Unspecified, Unspecified> {
}
}

impl<'e, 'n, KC, DC, C> DatabaseOpenOptions<'e, 'n, KC, DC, C> {
impl<'e, 'n, KC, DC, C, CDUP> DatabaseOpenOptions<'e, 'n, KC, DC, C, CDUP> {
/// Change the type of the database.
///
/// The default types are [`Unspecified`] and require a call to [`Database::remap_types`]
Expand All @@ -90,7 +90,19 @@ impl<'e, 'n, KC, DC, C> DatabaseOpenOptions<'e, 'n, KC, DC, C> {
/// Change the customized key compare function of the database.
///
/// By default no customized compare function will be set when opening a database.
pub fn key_comparator<NC>(self) -> DatabaseOpenOptions<'e, 'n, KC, DC, NC> {
pub fn key_comparator<NC>(self) -> DatabaseOpenOptions<'e, 'n, KC, DC, NC, CDUP> {
DatabaseOpenOptions {
env: self.env,
types: Default::default(),
name: self.name,
flags: self.flags,
}
}

/// Change the customized dup sort compare function of the database.
///
/// By default no customized compare function will be set when opening a database.
pub fn dup_sort_comparator<NCDUP>(self) -> DatabaseOpenOptions<'e, 'n, KC, DC, C, NCDUP> {
DatabaseOpenOptions {
env: self.env,
types: Default::default(),
Expand Down Expand Up @@ -136,10 +148,11 @@ impl<'e, 'n, KC, DC, C> DatabaseOpenOptions<'e, 'n, KC, DC, C> {
KC: 'static,
DC: 'static,
C: Comparator + 'static,
CDUP: Comparator + 'static,
{
assert_eq_env_txn!(self.env, rtxn);

match self.env.raw_init_database::<C>(rtxn.txn.unwrap(), self.name, self.flags) {
match self.env.raw_init_database::<C, CDUP>(rtxn.txn.unwrap(), self.name, self.flags) {
Ok(dbi) => Ok(Some(Database::new(self.env.env_mut_ptr() as _, dbi))),
Err(Error::Mdb(e)) if e.not_found() => Ok(None),
Err(e) => Err(e),
Expand All @@ -160,24 +173,25 @@ impl<'e, 'n, KC, DC, C> DatabaseOpenOptions<'e, 'n, KC, DC, C> {
KC: 'static,
DC: 'static,
C: Comparator + 'static,
CDUP: Comparator + 'static,
{
assert_eq_env_txn!(self.env, wtxn);

let flags = self.flags | AllDatabaseFlags::CREATE;
match self.env.raw_init_database::<C>(wtxn.txn.txn.unwrap(), self.name, flags) {
match self.env.raw_init_database::<C, CDUP>(wtxn.txn.txn.unwrap(), self.name, flags) {
Ok(dbi) => Ok(Database::new(self.env.env_mut_ptr() as _, dbi)),
Err(e) => Err(e),
}
}
}

impl<KC, DC, C> Clone for DatabaseOpenOptions<'_, '_, KC, DC, C> {
impl<KC, DC, C, CDUP> Clone for DatabaseOpenOptions<'_, '_, KC, DC, C, CDUP> {
fn clone(&self) -> Self {
*self
}
}

impl<KC, DC, C> Copy for DatabaseOpenOptions<'_, '_, KC, DC, C> {}
impl<KC, DC, C, CDUP> Copy for DatabaseOpenOptions<'_, '_, KC, DC, C, CDUP> {}

/// A typed database that accepts only the types it was created with.
///
Expand Down
25 changes: 18 additions & 7 deletions heed/src/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,8 @@ impl Env {

let rtxn = self.read_txn()?;
// Open the main database
let dbi = self.raw_open_dbi::<DefaultComparator>(rtxn.txn.unwrap(), None, 0)?;
let dbi =
self.raw_open_dbi::<DefaultComparator, DefaultComparator>(rtxn.txn.unwrap(), None, 0)?;

// We're going to iterate on the unnamed database
let mut cursor = RoCursor::new(&rtxn, dbi)?;
Expand All @@ -619,9 +620,11 @@ impl Env {
let key = String::from_utf8(key.to_vec()).unwrap();
// Calling `ffi::db_stat` on a database instance does not involve key comparison
// in LMDB, so it's safe to specify a noop key compare function for it.
if let Ok(dbi) =
self.raw_open_dbi::<DefaultComparator>(rtxn.txn.unwrap(), Some(&key), 0)
{
if let Ok(dbi) = self.raw_open_dbi::<DefaultComparator, DefaultComparator>(
rtxn.txn.unwrap(),
Some(&key),
0,
) {
let mut stat = mem::MaybeUninit::uninit();
let mut txn = rtxn.txn.unwrap();
unsafe { mdb_result(ffi::mdb_stat(txn.as_mut(), dbi, stat.as_mut_ptr()))? };
Expand Down Expand Up @@ -697,19 +700,19 @@ impl Env {
options.create(wtxn)
}

pub(crate) fn raw_init_database<C: Comparator + 'static>(
pub(crate) fn raw_init_database<C: Comparator + 'static, CDUP: Comparator + 'static>(
&self,
raw_txn: NonNull<ffi::MDB_txn>,
name: Option<&str>,
flags: AllDatabaseFlags,
) -> Result<u32> {
match self.raw_open_dbi::<C>(raw_txn, name, flags.bits()) {
match self.raw_open_dbi::<C, CDUP>(raw_txn, name, flags.bits()) {
Ok(dbi) => Ok(dbi),
Err(e) => Err(e.into()),
}
}

fn raw_open_dbi<C: Comparator + 'static>(
fn raw_open_dbi<C: Comparator + 'static, CDUP: Comparator + 'static>(
&self,
mut raw_txn: NonNull<ffi::MDB_txn>,
name: Option<&str>,
Expand All @@ -733,6 +736,14 @@ impl Env {
Some(custom_key_cmp_wrapper::<C>),
))?;
}

if TypeId::of::<CDUP>() != TypeId::of::<DefaultComparator>() {
mdb_result(ffi::mdb_set_dupsort(
raw_txn,
dbi,
Some(custom_key_cmp_wrapper::<CDUP>),
))?;
}
};

Ok(dbi)
Expand Down
6 changes: 3 additions & 3 deletions heed/src/mdb/lmdb_ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ pub use ffi::{
mdb_env_get_fd, mdb_env_get_flags, mdb_env_get_maxkeysize, mdb_env_info, mdb_env_open,
mdb_env_set_flags, mdb_env_set_mapsize, mdb_env_set_maxdbs, mdb_env_set_maxreaders,
mdb_env_stat, mdb_env_sync, mdb_filehandle_t, mdb_get, mdb_put, mdb_reader_check,
mdb_set_compare, mdb_stat, mdb_txn_abort, mdb_txn_begin, mdb_txn_commit, mdb_version,
MDB_cursor, MDB_dbi, MDB_env, MDB_stat, MDB_txn, MDB_val, MDB_CP_COMPACT, MDB_CURRENT,
MDB_RDONLY, MDB_RESERVE,
mdb_set_compare, mdb_set_dupsort, mdb_stat, mdb_txn_abort, mdb_txn_begin, mdb_txn_commit,
mdb_version, MDB_cursor, MDB_dbi, MDB_env, MDB_stat, MDB_txn, MDB_val, MDB_CP_COMPACT,
MDB_CURRENT, MDB_RDONLY, MDB_RESERVE,
};
use lmdb_master_sys as ffi;

Expand Down