From c608306589e1cb9576948506888db2772a4c96fc Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Mon, 21 Aug 2023 17:11:05 +1000 Subject: [PATCH] Detect non-learning starts earlier .retain() is fairly expensive, as it requires shuffling data about; better to filter the material before the array is created --- src/convertor.rs | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/src/convertor.rs b/src/convertor.rs index c4b9f03..9641b2a 100644 --- a/src/convertor.rs +++ b/src/convertor.rs @@ -10,7 +10,6 @@ struct RevlogEntry { id: i64, cid: i64, button_chosen: i32, - ease_factor: i64, review_kind: i64, delta_t: i32, i: usize, @@ -23,8 +22,7 @@ fn row_to_revlog_entry(row: &Row) -> Result { id: row.get(0)?, cid: row.get(1)?, button_chosen: row.get(2)?, - ease_factor: row.get(3)?, - review_kind: row.get(4).unwrap_or_default(), + review_kind: row.get(3).unwrap_or_default(), delta_t: 0, i: 0, r_history: vec![], @@ -58,7 +56,7 @@ fn read_collection() -> Vec { let current_timestamp = Utc::now().timestamp() * 1000; let query = format!( - "SELECT id, cid, ease, factor, type + "SELECT id, cid, ease, type FROM revlog WHERE type != 4 AND ivl > 0 @@ -110,7 +108,7 @@ fn extract_time_series_feature( mut entries: Vec, next_day_starts_at: i64, timezone: Tz, -) -> Vec { +) -> Option> { // Find the index of the first RevlogEntry in the last continuous group where review_kind = 0 // 寻找最后一组连续 review_kind = 0 的第一个 RevlogEntry 的索引 let mut index_to_keep = 0; @@ -185,7 +183,13 @@ fn extract_time_series_feature( entries.truncate(index_to_remove); } - entries + // we ignore cards that don't start in the learning state + if let Some(first) = entries.first() { + if first.review_kind == 1 { + return Some(entries) + } + } + None } fn convert_to_fsrs_items(revlogs: Vec>) -> Vec { @@ -216,29 +220,15 @@ fn convert_to_fsrs_items(revlogs: Vec>) -> Vec { .collect() } -fn remove_non_learning_first(revlogs_per_card: Vec>) -> Vec> { - let mut result = revlogs_per_card; - result.retain(|entries| { - if let Some(first_entry) = entries.first() { - first_entry.review_kind == 1 - } else { - false - } - }); - result -} - pub fn anki_to_fsrs() -> Vec { let revlogs = read_collection(); let revlogs_per_card = group_by_cid(revlogs); let extracted_revlogs_per_card: Vec> = revlogs_per_card .into_iter() - .map(|entries| extract_time_series_feature(entries, 4, Tz::Asia__Shanghai)) + .filter_map(|entries| extract_time_series_feature(entries, 4, Tz::Asia__Shanghai)) .collect(); - let filtered_revlogs_per_card = remove_non_learning_first(extracted_revlogs_per_card); - - convert_to_fsrs_items(filtered_revlogs_per_card) + convert_to_fsrs_items(extracted_revlogs_per_card) } #[test] @@ -252,8 +242,6 @@ fn test() { .map(|entries| extract_time_series_feature(entries, 4, Tz::Asia__Shanghai)) .collect(); - dbg!(&extracted_revlogs_per_card[0]); - extracted_revlogs_per_card = remove_non_learning_first(extracted_revlogs_per_card); dbg!(extracted_revlogs_per_card .iter() .map(|x| x.len())