From b9f9796bf0d58e85633db23968966b6634108e21 Mon Sep 17 00:00:00 2001 From: Linus Behrbohm Date: Thu, 3 Oct 2024 20:59:12 +0200 Subject: [PATCH] Fix labels not added by partition pass --- ngrams/src/graph/mod.rs | 16 ++++++++-- .../src/graph/partitions/container/builder.rs | 9 +++--- ngrams/src/graph/partitions/mod.rs | 32 ++++++++++++------- seqraph/src/graph/getters/advanced.rs | 5 ++- src/app.rs | 9 +++--- src/graph/mod.rs | 6 ++-- src/graph/vis.rs | 21 ++++++------ 7 files changed, 59 insertions(+), 39 deletions(-) diff --git a/ngrams/src/graph/mod.rs b/ngrams/src/graph/mod.rs index f44b005..74840c7 100644 --- a/ngrams/src/graph/mod.rs +++ b/ngrams/src/graph/mod.rs @@ -6,7 +6,7 @@ use itertools::Itertools; use ngram::NGram; use pretty_assertions::assert_eq; -use seqraph::{graph::Hypergraph, HashSet}; +use seqraph::{graph::{vertex::key::VertexKey, Hypergraph}, HashSet}; use serde::{Deserialize, Serialize}; use crate::graph::{ @@ -47,10 +47,20 @@ impl Corpus CORPUS_DIR.join(&self.name) } } -pub fn parse_corpus(corpus: Corpus) -> Hypergraph { +pub struct ParseResult { + pub graph: Hypergraph, + pub containment: Hypergraph, + pub labels: HashSet, +} +pub fn parse_corpus(corpus: Corpus) -> ParseResult { let mut image = LabellingCtx::from_corpus(&corpus); image.label_freq(); image.label_wrap(); - image.label_part() + let graph = image.label_part(); + ParseResult { + graph, + containment: image.vocab.containment, + labels: image.labels, + } } \ No newline at end of file diff --git a/ngrams/src/graph/partitions/container/builder.rs b/ngrams/src/graph/partitions/container/builder.rs index 98a4289..af9c7d0 100644 --- a/ngrams/src/graph/partitions/container/builder.rs +++ b/ngrams/src/graph/partitions/container/builder.rs @@ -70,9 +70,10 @@ impl<'a, 'b> PartitionLineBuilder<'a, 'b> offset: NonZeroUsize, ) { + let end_pos = self.end_pos(); let index = self.ctx.vocab.containment.get_vertex_subrange( self.ctx.root.vertex_key(), - self.pos..(self.pos + offset.get()), + end_pos..(end_pos + offset.get()), ); self.push_cell(index); } @@ -107,9 +108,9 @@ impl<'a, 'b> PartitionLineBuilder<'a, 'b> } pub fn end_pos(&self) -> usize { - let cell = self.line.last().unwrap(); - let cell_width = cell.width(); - self.pos + cell_width + self.pos + self.line.last().map(|cell| + cell.width() + ).unwrap_or_default() } pub fn close( mut self, diff --git a/ngrams/src/graph/partitions/mod.rs b/ngrams/src/graph/partitions/mod.rs index 17ac8ef..5109cd6 100644 --- a/ngrams/src/graph/partitions/mod.rs +++ b/ngrams/src/graph/partitions/mod.rs @@ -89,16 +89,17 @@ impl<'b> PartitionsCtx<'b> node: &NGramId, ) -> Vec { - if node.width() == 1 { - return Vec::new(); - } let entry = self.vocab.get_vertex(node).unwrap(); let container = PartitionContainer::from_entry(self, &entry); let pids: Vec<_> = std::iter::repeat_n((), container.len()) .map(|_| PatternId::default()) .collect(); + + ///// assert!(self.graph.contains_vertex(node.vertex_key())); + ///// + let err = format!( "Node not yet created {} in: {:#?}", node.vertex_key(), @@ -106,6 +107,7 @@ impl<'b> PartitionsCtx<'b> ); let parent_data = self.graph.get_vertex_mut(node.vertex_key()).expect(&err); + ///// assert!( match parent_data.width() { 0 => panic!("Invalid width of zero."), @@ -114,9 +116,11 @@ impl<'b> PartitionsCtx<'b> _ => !pids.is_empty(), } ); - // child patterns with indices in containment + ///// + // child patterns with indices in containment parent_data.children = pids.into_iter().zip(container).collect(); + // child locations parent in self.graph, children indices in self.vocab.containment let child_locations = parent_data .all_localized_children_iter() @@ -124,6 +128,7 @@ impl<'b> PartitionsCtx<'b> .map(|(l, c)| (l, *c)) .collect_vec(); + ///// assert_eq!( child_locations .iter() @@ -137,6 +142,7 @@ impl<'b> PartitionsCtx<'b> .sorted() .collect_vec(), ); + ///// // create child nodes in self.graph @@ -151,6 +157,7 @@ impl<'b> PartitionsCtx<'b> } else { + self.ctx.labels.insert(key); let mut builder = VertexDataBuilder::default(); builder.width(vi.width()); builder.key(key); @@ -176,10 +183,13 @@ impl<'b> PartitionsCtx<'b> .map(|c| { let entry = self.vocab.get_vertex(&c).unwrap(); let key = entry.data.vertex_key(); - //assert!( - // self.graph.contains_vertex(key), - // "{:#?}", entry.entry, - //); + assert!( + self.ctx.labels.contains(&key), + ); + assert!( + self.graph.contains_vertex(key), + "{:#?}", entry.entry, + ); NGramId::new( key, c.width(), @@ -206,9 +216,9 @@ impl<'b> PartitionsCtx<'b> let mut next_layer: Vec<_> = Default::default(); while let Some(node) = queue.pop_front() { - if !visited.contains(&node) - && self.labels.contains(&node) - //&& !self.vocab.leaves.contains(&node) + if (!visited.contains(&node) + && self.labels.contains(&node)) + || self.vocab.leaves.contains(&node) { next_layer.extend(self.on_node(&node)); visited.insert(node); diff --git a/seqraph/src/graph/getters/advanced.rs b/seqraph/src/graph/getters/advanced.rs index c440515..c53a249 100644 --- a/seqraph/src/graph/getters/advanced.rs +++ b/seqraph/src/graph/getters/advanced.rs @@ -82,9 +82,8 @@ impl Hypergraph { { let next = data.top_down_containment_nodes() .into_iter() - .map( - |(pos, c)| (c.vertex_index(), pos..pos + c.width()), //pos <= range.start || pos + c.width() >= range.end - ) + .map(|(pos, c)| (wrap.start + pos, c)) + .map(|(pos, c)| (c.vertex_index(), pos..pos + c.width())) .find_or_first(|(_, w)| { w.start == range.start || w.end == range.end }) diff --git a/src/app.rs b/src/app.rs index ccc511a..51e638d 100644 --- a/src/app.rs +++ b/src/app.rs @@ -195,11 +195,12 @@ impl eframe::App for App ui.text_edit_multiline(&mut self.graph.insert_text); if ui.button("Insert").clicked() { - let insert_text = self.graph.insert_text.clone(); + let insert_text = std::mem::take(&mut self.graph.insert_text); //self.read_task = Some(self.graph.read_text(insert_text, ctx)); - let graph = ngrams::graph::parse_corpus(ngrams::graph::Corpus::new("", [insert_text])); - self.graph.insert_text = String::new(); - self.graph.set_graph(graph); + let res = ngrams::graph::parse_corpus(ngrams::graph::Corpus::new("", [insert_text])); + + *self.graph.graph.write().unwrap() = res.graph; + *self.graph.labels.write().unwrap() = res.labels; } }); } diff --git a/src/graph/mod.rs b/src/graph/mod.rs index 8974bd8..cf27bd6 100644 --- a/src/graph/mod.rs +++ b/src/graph/mod.rs @@ -12,7 +12,7 @@ use petgraph::{ }, visit::EdgeRef, }; -use seqraph::graph::{Hypergraph, HypergraphRef}; +use seqraph::{graph::{vertex::key::VertexKey, Hypergraph, HypergraphRef}, HashSet}; pub mod vis; use tokio::task::JoinHandle; use vis::GraphVis; @@ -24,6 +24,7 @@ pub struct Graph pub graph: HypergraphRef, pub vis: Arc>, pub insert_text: String, + pub labels: Arc>>, } impl Default for Graph { @@ -45,7 +46,8 @@ impl Graph let new = Self { graph, vis, - insert_text: String::from("ababa"), + insert_text: String::from("aabbaabbaa"), + labels: Default::default(), }; let g = new.clone(); new.vis_mut().set_graph(g); diff --git a/src/graph/vis.rs b/src/graph/vis.rs index d71f007..ad334cb 100644 --- a/src/graph/vis.rs +++ b/src/graph/vis.rs @@ -1,16 +1,5 @@ use eframe::egui::{ - self, - vec2, - Frame, - Pos2, - Rect, - Response, - Shape, - Stroke, - Style, - Ui, - Vec2, - Window, + self, vec2, Color32, Frame, Pos2, Rect, Response, Shape, Stroke, Style, Ui, Vec2, Window }; #[allow(unused)] use petgraph::{ @@ -436,6 +425,14 @@ impl NodeVis //Window::new(&self.name) .vscroll(true) .default_width(80.0) + .frame( + Frame::window(&Style::default()) + .fill( + self.graph.labels.read().unwrap().contains(&self.key) + .then_some(Color32::from_rgb(10, 50, 10)) + .unwrap_or_default() + ) + ) .show(ui.ctx(), |ui| { ui.spacing_mut().item_spacing = Vec2::splat(0.0); self.child_patterns(ui, gvis)