From 2a41c470de91ffe1ad5255a4c60add594289b6ed Mon Sep 17 00:00:00 2001 From: Martin Clauss Date: Fri, 22 Mar 2024 22:14:26 +0100 Subject: [PATCH] Schema migration WIP Issue: #21 --- packages/isar_core/src/core/mod.rs | 1 + .../src/core/v3schema/collection_schema.rs | 444 ++++++++++++++++++ .../src/core/v3schema/index_schema.rs | 68 +++ .../src/core/v3schema/link_schema.rs | 17 + packages/isar_core/src/core/v3schema/mod.rs | 4 + .../isar_core/src/core/v3schema/property.rs | 32 ++ .../isar_core/src/native/schema_manager.rs | 26 +- 7 files changed, 586 insertions(+), 6 deletions(-) create mode 100644 packages/isar_core/src/core/v3schema/collection_schema.rs create mode 100644 packages/isar_core/src/core/v3schema/index_schema.rs create mode 100644 packages/isar_core/src/core/v3schema/link_schema.rs create mode 100644 packages/isar_core/src/core/v3schema/mod.rs create mode 100644 packages/isar_core/src/core/v3schema/property.rs diff --git a/packages/isar_core/src/core/mod.rs b/packages/isar_core/src/core/mod.rs index 178048e33..449ff0ff3 100644 --- a/packages/isar_core/src/core/mod.rs +++ b/packages/isar_core/src/core/mod.rs @@ -11,6 +11,7 @@ pub mod query_builder; pub mod reader; pub mod schema; mod ser; +pub mod v3schema; pub mod value; pub mod watcher; pub mod writer; diff --git a/packages/isar_core/src/core/v3schema/collection_schema.rs b/packages/isar_core/src/core/v3schema/collection_schema.rs new file mode 100644 index 000000000..d39a1851b --- /dev/null +++ b/packages/isar_core/src/core/v3schema/collection_schema.rs @@ -0,0 +1,444 @@ +use crate::core::error::IsarError; +use crate::core::error::Result; +use super::super::data_type::DataType; +use super::property::Property; +use super::index_schema::{IndexSchema, IndexType}; +use super::link_schema::LinkSchema; +use super::super::schema::PropertySchema; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; + +//use super::schema_manager::SchemaManager; + +#[derive(Serialize, Deserialize, Clone, Debug, Eq)] +pub struct CollectionSchema { + pub(crate) name: String, + #[serde(default)] + pub(crate) embedded: bool, + pub(crate) properties: Vec, + #[serde(default)] + pub(crate) indexes: Vec, + #[serde(default)] + pub(crate) links: Vec, + #[serde(default)] + pub(crate) version: u8, +} + +fn schema_error(msg: &str) -> Result { + Err(IsarError::SchemaError { + message: msg.to_string(), + }) +} + +impl PartialEq for CollectionSchema { + fn eq(&self, other: &Self) -> bool { + self.name == other.name && self.embedded == other.embedded + } +} + +impl CollectionSchema { + pub fn new( + name: &str, + embedded: bool, + properties: Vec, + indexes: Vec, + links: Vec, + ) -> CollectionSchema { + CollectionSchema { + name: name.to_string(), + embedded, + properties, + indexes, + links, + version: 2, + } + } +/* + fn verify_name(name: &str) -> Result<()> { + if name.is_empty() { + Self::schema_error("Empty names are not allowed.") + } else if name.starts_with('_') { + Self::schema_error("Names must not begin with an underscore.") + } else { + Ok(()) + } + } + + + pub(crate) fn verify(&self, collections: &[CollectionSchema]) -> Result<()> { + Self::verify_name(&self.name)?; + + if self.embedded && (!self.links.is_empty() || !self.indexes.is_empty()) { + schema_error("Embedded objects must not have Links or Indexes.")?; + } + + let verify_target_col_exists = |col: &str, embedded: bool| -> Result<()> { + if !collections + .iter() + .any(|c| c.name == col && c.embedded == embedded) + { + schema_error("Target collection does not exist.")?; + } + Ok(()) + }; + + for property in &self.properties { + if let Some(name) = &property.name { + Self::verify_name(name)?; + } + + if property.data_type == DataType::Object || property.data_type == DataType::ObjectList + { + if let Some(target_col) = &property.target_col { + verify_target_col_exists(target_col, true)?; + } else { + schema_error("Object property must have a target collection.")?; + } + } else { + if property.target_col.is_some() { + schema_error("Target collection can only be set for object properties.")?; + } + } + } + + for link in &self.links { + Self::verify_name(&link.name)?; + verify_target_col_exists(&link.target_col, false)?; + } + + let property_names = self + .properties + .iter() + .unique_by(|p| p.name.as_ref().unwrap()); + if property_names.count() != self.properties.len() { + schema_error("Duplicate property name")?; + } + + let index_names = self.indexes.iter().unique_by(|i| i.name.as_str()); + if index_names.count() != self.indexes.len() { + schema_error("Duplicate index name")?; + } + + let link_names = self.links.iter().unique_by(|l| l.name.as_str()); + if link_names.count() != self.links.len() { + schema_error("Duplicate link name")?; + } + + for index in &self.indexes { + if index.properties.is_empty() { + schema_error("At least one property needs to be added to a valid index")?; + } else if index.properties.len() > 3 { + schema_error("No more than three properties may be used as a composite index")?; + } + + if !index.unique && index.replace { + schema_error("Only unique indexes can replace")?; + } + + for (i, index_property) in index.properties.iter().enumerate() { + let property = self + .properties + .iter() + .find(|p| p.name.as_ref() == Some(&index_property.name)); + if property.is_none() { + schema_error("IsarIndex property does not exist")?; + } + let property = property.unwrap(); + + if property.data_type == DataType::Object + || property.data_type == DataType::ObjectList + { + schema_error("Object and ObjectList cannot be indexed.")?; + } + + if property.data_type == DataType::Float + || property.data_type == DataType::Double + || property.data_type == DataType::FloatList + || property.data_type == DataType::DoubleList + { + if index_property.index_type == IndexType::Hash { + schema_error("Float values cannot be hashed.")?; + } else if i != index.properties.len() - 1 { + schema_error( + "Float indexes must only be at the end of a composite index.", + )?; + } + } + + if property.data_type.get_element_type().is_some() { + if index.properties.len() > 1 && index_property.index_type != IndexType::Hash { + schema_error("Composite list indexes are not supported.")?; + } + } else if property.data_type == DataType::String + && i != index.properties.len() - 1 + && index_property.index_type != IndexType::Hash + { + schema_error( + "Non-hashed string indexes must only be at the end of a composite index.", + )?; + } + + if property.data_type != DataType::String + && property.data_type.get_element_type().is_none() + && index_property.index_type == IndexType::Hash + { + schema_error("Only string and list indexes may be hashed")?; + } + if property.data_type != DataType::StringList + && index_property.index_type == IndexType::HashElements + { + schema_error("Only string list indexes may be use hash elements")?; + } + if property.data_type != DataType::String + && property.data_type != DataType::StringList + && index_property.case_sensitive + { + schema_error("Only String and StringList indexes may be case sensitive.")?; + } + } + } + + Ok(()) + } + + pub(crate) fn merge_properties(&mut self, existing: &Self) -> Result> { + let mut properties = existing.properties.clone(); + let mut removed_properties = vec![]; + + for property in &mut properties { + if property.name.is_some() && !self.properties.contains(property) { + removed_properties.push(property.name.take().unwrap()); + } + } + for property in &self.properties { + if !properties.contains(property) { + properties.push(property.clone()) + } + } + + self.properties = properties; + + Ok(removed_properties) + } + + pub fn get_properties(&self) -> Vec { + let mut properties = vec![]; + let mut offset = 2; + for property_schema in self.properties.iter() { + let property = property_schema.as_property(offset); + if let Some(property) = property { + properties.push(property); + } + offset += property_schema.data_type.get_static_size(); + } + + properties.sort_by(|a, b| a.name.cmp(&b.name)); + properties + } + + pub fn to_json_bytes(&self) -> Result> { + serde_json::to_vec(self).map_err(|_| IsarError::SchemaError { + message: "Could not serialize schema.".to_string(), + }) + }*/ +} + +/*#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add_property_empty_name() { + let mut col = CollectionSchema::new("col"); + assert!(col.add_property("", DataType::Int).is_err()) + } + + #[test] + fn test_add_property_duplicate_name() { + let mut col = CollectionSchema::new("col"); + col.add_property("prop", DataType::Int).unwrap(); + assert!(col.add_property("prop", DataType::Int).is_err()) + } + + #[test] + fn test_add_property_same_type_wrong_order() { + let mut col = CollectionSchema::new("col"); + + col.add_property("b", DataType::Int).unwrap(); + assert!(col.add_property("a", DataType::Int).is_err()) + } + + #[test] + fn test_add_property_wrong_order() { + let mut col = CollectionSchema::new("col"); + + col.add_property("a", DataType::Long).unwrap(); + assert!(col.add_property("b", DataType::Int).is_err()) + } + + #[test] + fn test_add_index_without_properties() { + let mut col = CollectionSchema::new("col"); + + assert!(col.add_index(&[], false, false).is_err()) + } + + #[test] + fn test_add_index_with_non_existing_property() { + let mut col = CollectionSchema::new("col"); + col.add_property("prop1", DataType::Int).unwrap(); + + col.add_index(&["prop1"], false, false).unwrap(); + assert!(col.add_index(&["wrongprop"], false, false).is_err()) + } + + #[test] + fn test_add_index_with_illegal_data_type() { + let mut col = CollectionSchema::new("col"); + col.add_property("byte", DataType::Byte).unwrap(); + col.add_property("int", DataType::Int).unwrap(); + col.add_property("float", DataType::Float).unwrap(); + col.add_property("long", DataType::Long).unwrap(); + col.add_property("double", DataType::Double).unwrap(); + col.add_property("str", DataType::String).unwrap(); + col.add_property("byteList", DataType::ByteList).unwrap(); + col.add_property("intList", DataType::IntList).unwrap(); + + col.add_index(&["byte"], false, None, false).unwrap(); + col.add_index(&["int"], false, None, false).unwrap(); + col.add_index(&["float"], false, None, false).unwrap(); + col.add_index(&["long"], false, None, false).unwrap(); + col.add_index(&["double"], false, None, false).unwrap(); + col.add_index(&["str"], false, Some(StringIndexType::Value), false) + .unwrap(); + assert!(col.add_index(&["byteList"], false, false).is_err()); + assert!(col.add_index(&["intList"], false, false).is_err()); + } + + #[test] + fn test_add_index_too_many_properties() { + let mut col = CollectionSchema::new("col"); + col.add_property("prop1", DataType::Int).unwrap(); + col.add_property("prop2", DataType::Int).unwrap(); + col.add_property("prop3", DataType::Int).unwrap(); + col.add_property("prop4", DataType::Int).unwrap(); + + assert!(col + .add_index(&["prop1", "prop2", "prop3", "prop4"], false, false) + .is_err()) + } + + #[test] + fn test_add_duplicate_index() { + let mut col = CollectionSchema::new("col"); + col.add_property("prop1", DataType::Int).unwrap(); + col.add_property("prop2", DataType::Int).unwrap(); + + col.add_index(&["prop2"], false, false).unwrap(); + col.add_index(&["prop1", "prop2"], false, false).unwrap(); + assert!(col.add_index(&["prop1", "prop2"], false, false).is_err()); + assert!(col.add_index(&["prop1"], false, false).is_err()); + } + + #[test] + fn test_add_composite_index_with_non_hashed_string_in_the_middle() { + let mut col = CollectionSchema::new("col"); + col.add_property("int", DataType::Int).unwrap(); + col.add_property("str", DataType::String).unwrap(); + + col.add_index(&["int", "str"], false, false).unwrap(); + assert!(col.add_index(&["str", "int"], false, false).is_err()); + col.add_index(&["str", "int"], false, true).unwrap(); + } + + #[test] + fn test_properties_have_correct_offset() { + fn get_offsets(mut schema: CollectionSchema) -> Vec { + let mut get_id = || 1; + schema.update_with_existing_collections(&[], &mut get_id); + let col = schema.get_isar_collection(); + let mut offsets = vec![]; + for i in 0..schema.properties.len() { + let (_, p) = col.get_properties().get(i).unwrap(); + offsets.push(p.offset); + } + offsets + } + + let mut col = CollectionSchema::new("col"); + col.add_property("byte", DataType::Byte).unwrap(); + col.add_property("int", DataType::Int).unwrap(); + col.add_property("double", DataType::Double).unwrap(); + assert_eq!(get_offsets(col), vec![0, 2, 10]); + + let mut col = CollectionSchema::new("col"); + col.add_property("byte1", DataType::Byte).unwrap(); + col.add_property("byte2", DataType::Byte).unwrap(); + col.add_property("byte3", DataType::Byte).unwrap(); + col.add_property("str", DataType::String).unwrap(); + assert_eq!(get_offsets(col), vec![0, 1, 2, 10]); + + let mut col = CollectionSchema::new("col"); + col.add_property("byteList", DataType::ByteList).unwrap(); + col.add_property("intList", DataType::IntList).unwrap(); + col.add_property("doubleList", DataType::DoubleList) + .unwrap(); + assert_eq!(get_offsets(col), vec![2, 10, 18]); + } + + #[test] + fn update_with_no_existing_collection() { + let mut col = CollectionSchema::new("col"); + col.add_property("byte", DataType::Byte).unwrap(); + col.add_property("int", DataType::Int).unwrap(); + col.add_index(&["byte"], true, false).unwrap(); + col.add_index(&["int"], true, false).unwrap(); + + let mut counter = 0; + let mut get_id = || { + counter += 1; + counter + }; + col.update_with_existing_collections(&[], &mut get_id); + + assert_eq!(col.id, Some(1)); + assert_eq!(col.indexes[0].id, Some(2)); + assert_eq!(col.indexes[1].id, Some(3)); + } + + #[test] + fn update_with_existing_collection() { + let mut counter = 0; + let mut get_id = || { + counter += 1; + counter + }; + + let mut col1 = CollectionSchema::new("col"); + col1.add_property("byte", DataType::Byte).unwrap(); + col1.add_property("int", DataType::Int).unwrap(); + col1.add_index(&["byte"], true, false).unwrap(); + col1.add_index(&["int"], true, false).unwrap(); + + col1.update_with_existing_collections(&[], &mut get_id); + assert_eq!(col1.id, Some(1)); + assert_eq!(col1.indexes[0].id, Some(2)); + assert_eq!(col1.indexes[1].id, Some(3)); + + let mut col2 = CollectionSchema::new("col"); + col2.add_property("byte", DataType::Byte).unwrap(); + col2.add_property("int", DataType::Int).unwrap(); + col2.add_index(&["byte"], true, false).unwrap(); + col2.add_index(&["int", "byte"], true, false).unwrap(); + + col2.update_with_existing_collections(&[col1], &mut get_id); + assert_eq!(col2.id, Some(1)); + assert_eq!(col2.indexes[0].id, Some(2)); + assert_eq!(col2.indexes[1].id, Some(4)); + + let mut col3 = CollectionSchema::new("col3"); + col3.update_with_existing_collections(&[col2], &mut get_id); + assert_eq!(col3.id, Some(5)); + } +} +*/ diff --git a/packages/isar_core/src/core/v3schema/index_schema.rs b/packages/isar_core/src/core/v3schema/index_schema.rs new file mode 100644 index 000000000..8bf54d9c5 --- /dev/null +++ b/packages/isar_core/src/core/v3schema/index_schema.rs @@ -0,0 +1,68 @@ +//use crate::index::{IndexProperty, IsarIndex}; +//use crate::native::mdbx::db::Db; +use super::property::Property; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Copy, Clone, Eq, Debug, PartialEq)] +pub enum IndexType { + Value, + Hash, + HashElements, +} + +#[derive(Serialize, Deserialize, Clone, Eq, Debug, PartialEq)] +pub struct IndexPropertySchema { + pub(crate) name: String, + #[serde(rename = "type")] + pub(crate) index_type: IndexType, + #[serde(rename = "caseSensitive")] + pub(crate) case_sensitive: bool, +} + +impl IndexPropertySchema { + pub fn new(name: &str, index_type: IndexType, case_sensitive: bool) -> IndexPropertySchema { + IndexPropertySchema { + name: name.to_string(), + index_type, + case_sensitive, + } + } +} + +#[derive(Serialize, Deserialize, Clone, Eq, Debug, PartialEq)] +pub struct IndexSchema { + pub(crate) name: String, + pub(crate) properties: Vec, + pub(crate) unique: bool, + #[serde(default)] + pub(crate) replace: bool, +} + +impl IndexSchema { + pub fn new( + name: &str, + properties: Vec, + unique: bool, + replace: bool, + ) -> IndexSchema { + IndexSchema { + name: name.to_string(), + properties, + unique, + replace, + } + } + + /*pub(crate) fn as_index(&self, db: Db, properties: &[Property]) -> IsarIndex { + let index_properties = self + .properties + .iter() + .map(|ip| { + let property = properties.iter().find(|p| ip.name == *p.name).unwrap(); + IndexProperty::new(property.clone(), ip.index_type, ip.case_sensitive) + }) + .collect_vec(); + IsarIndex::new(&self.name, db, index_properties, self.unique, self.replace) + }*/ +} diff --git a/packages/isar_core/src/core/v3schema/link_schema.rs b/packages/isar_core/src/core/v3schema/link_schema.rs new file mode 100644 index 000000000..f32325425 --- /dev/null +++ b/packages/isar_core/src/core/v3schema/link_schema.rs @@ -0,0 +1,17 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Clone, Eq, Debug, PartialEq)] +pub struct LinkSchema { + pub(crate) name: String, + #[serde(rename = "target")] + pub(crate) target_col: String, +} + +impl LinkSchema { + pub fn new(name: &str, target_collection_name: &str) -> Self { + LinkSchema { + name: name.to_string(), + target_col: target_collection_name.to_string(), + } + } +} diff --git a/packages/isar_core/src/core/v3schema/mod.rs b/packages/isar_core/src/core/v3schema/mod.rs new file mode 100644 index 000000000..b0f53914d --- /dev/null +++ b/packages/isar_core/src/core/v3schema/mod.rs @@ -0,0 +1,4 @@ +pub mod collection_schema; +pub mod property; +pub mod index_schema; +pub mod link_schema; diff --git a/packages/isar_core/src/core/v3schema/property.rs b/packages/isar_core/src/core/v3schema/property.rs new file mode 100644 index 000000000..fd6c3843f --- /dev/null +++ b/packages/isar_core/src/core/v3schema/property.rs @@ -0,0 +1,32 @@ +use xxhash_rust::xxh3::xxh3_64; + +use super::super::data_type::DataType; + +#[derive(Clone, Eq, PartialEq)] +pub struct Property { + pub name: String, + pub data_type: DataType, + pub offset: usize, + pub target_id: Option, +} + +impl Property { + pub fn new(name: &str, data_type: DataType, offset: usize, target_id: Option<&str>) -> Self { + let target_id = target_id.map(|col| xxh3_64(col.as_bytes())); + Property { + name: name.to_string(), + data_type, + offset, + target_id, + } + } + + pub const fn debug(data_type: DataType, offset: usize) -> Self { + Property { + name: String::new(), + data_type, + offset, + target_id: None, + } + } +} diff --git a/packages/isar_core/src/native/schema_manager.rs b/packages/isar_core/src/native/schema_manager.rs index 363d61e11..b986c8728 100644 --- a/packages/isar_core/src/native/schema_manager.rs +++ b/packages/isar_core/src/native/schema_manager.rs @@ -16,15 +16,18 @@ pub(crate) fn perform_migration( env: &Arc, mut schemas: Vec, ) -> Result> { + println!("Perform migration"); let txn = NativeTxn::new(instance_id, env, true)?; let info_db = open_info_db(&txn)?; let existing_schemas = get_schemas(&txn, info_db)?; txn.commit()?; let schema_names = schemas.iter().map(|c| c.name.to_string()).collect_vec(); - + let existing_names = existing_schemas.iter().map(|c| c.name.to_string()).collect_vec(); + println!("Existing Schemas: {:?}", existing_names); let mut collections = vec![]; for schema in schemas.iter_mut() { + let existing_schema_index = existing_schemas.iter().position(|c| c.name == schema.name); let txn = NativeTxn::new(instance_id, env, true)?; @@ -111,11 +114,22 @@ fn get_schemas(txn: &NativeTxn, info_db: Db) -> Result> { let info_cursor = txn.get_cursor(info_db)?; let mut schemas = vec![]; for (_, bytes) in info_cursor.iter()? { - let col = - serde_json::from_slice::(bytes).map_err(|_| IsarError::SchemaError { - message: "Could not deserialize existing schema.".to_string(), - })?; - schemas.push(col); + if let Ok(collection) = serde_json::from_slice::(bytes) + { + println!("v4Col: {:?}", collection); + schemas.push(collection); + } else { + println!("Could not deserialize existing schema. Attempting fallback"); + if let Ok(collection) = serde_json::from_slice::(bytes) + { + println!("Fallback OK. Col: {:?}", collection); + //IsarSchema::new("test234", None, Vec::new(), Vec::new(), false) + let v4schema = IsarSchema::new(collection.name.as_str(), None, collection.properties, Vec::new(), collection.embedded); + schemas.push(v4schema); + } else { + println!("Fallback failed"); + } + } } Ok(schemas) }