From e44ca841b405d0a84b85c937ed35ff76475d4c96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 12 Dec 2022 12:42:55 +0100 Subject: [PATCH] Remove soft deleted ids from ExternalDocumentIds during document import If the document import replaces a document using hard deletion --- milli/src/update/delete_documents.rs | 33 ++++++++++++++++++++----- milli/src/update/index_documents/mod.rs | 9 ++++--- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 88ec784204..5edb9e5d98 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -34,6 +34,12 @@ pub struct DocumentDeletionResult { pub deleted_documents: u64, pub remaining_documents: u64, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct DetailedDocumentDeletionResult { + pub deleted_documents: u64, + pub remaining_documents: u64, + pub used_soft_deletion: bool, +} impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { pub fn new( @@ -68,8 +74,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { self.delete_document(docid); Some(docid) } - - pub fn execute(mut self) -> Result { + pub fn execute(self) -> Result { + let DetailedDocumentDeletionResult { + deleted_documents, + remaining_documents, + used_soft_deletion: _, + } = self.execute_inner()?; + + Ok(DocumentDeletionResult { deleted_documents, remaining_documents }) + } + pub(crate) fn execute_inner(mut self) -> Result { self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; // We retrieve the current documents ids that are in the database. @@ -83,7 +97,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { if !soft_deleted_docids.is_empty() { ClearDocuments::new(self.wtxn, self.index).execute()?; } - return Ok(DocumentDeletionResult { deleted_documents: 0, remaining_documents: 0 }); + return Ok(DetailedDocumentDeletionResult { + deleted_documents: 0, + remaining_documents: 0, + used_soft_deletion: false, + }); } // We remove the documents ids that we want to delete @@ -95,9 +113,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { // to delete is exactly the number of documents in the database. if current_documents_ids_len == self.to_delete_docids.len() { let remaining_documents = ClearDocuments::new(self.wtxn, self.index).execute()?; - return Ok(DocumentDeletionResult { + return Ok(DetailedDocumentDeletionResult { deleted_documents: current_documents_ids_len, remaining_documents, + used_soft_deletion: false, }); } @@ -159,9 +178,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { && percentage_used_by_soft_deleted_documents < 10 { self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?; - return Ok(DocumentDeletionResult { + return Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), + used_soft_deletion: true, }); } @@ -488,9 +508,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { &self.to_delete_docids, )?; - Ok(DocumentDeletionResult { + Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), + used_soft_deletion: false, }) } } diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index db6ffedc13..478a740659 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -210,7 +210,7 @@ where primary_key, fields_ids_map, field_distribution, - external_documents_ids, + mut external_documents_ids, new_documents_ids, replaced_documents_ids, documents_count, @@ -335,8 +335,11 @@ where deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion); debug!("documents to delete {:?}", replaced_documents_ids); deletion_builder.delete_documents(&replaced_documents_ids); - let deleted_documents_count = deletion_builder.execute()?; - debug!("{} documents actually deleted", deleted_documents_count.deleted_documents); + let deleted_documents_result = deletion_builder.execute_inner()?; + debug!("{} documents actually deleted", deleted_documents_result.deleted_documents); + if !deleted_documents_result.used_soft_deletion { + external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; + } } let index_documents_ids = self.index.documents_ids(self.wtxn)?;