Skip to content

Commit

Permalink
Abstract Visitor Tracking interface
Browse files Browse the repository at this point in the history
  • Loading branch information
Behrbohm authored and Behrbohm committed Nov 9, 2024
1 parent 36b0ff9 commit acdb937
Show file tree
Hide file tree
Showing 15 changed files with 284 additions and 116 deletions.
1 change: 1 addition & 0 deletions ngrams/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pretty_assertions = "1.4.0"
serde = { version = "1.0.203", features = ["derive"] }
ciborium = "0.2"
lazy_static = "1.4.0"
derivative = "^2.2"


[dependencies.seqraph]
Expand Down
9 changes: 2 additions & 7 deletions ngrams/src/graph/labelling/frequency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::graph::{
TopDown,
TraversalDirection,
}, pass::TraversalPass, queue::{Queue, SortedQueue}
}, utils::cover::FrequencyCover, vocabulary::{
}, utils::cover::frequency::FrequencyCover, vocabulary::{
entry::{
HasVertexEntries,
VertexCtx,
Expand Down Expand Up @@ -45,12 +45,10 @@ pub struct FrequencyCtx<'b>
#[deref]
#[deref_mut]
pub ctx: &'b mut LabellingCtx,
#[new(default)]
visited: <Self as TraversalPass>::Visited,
}

impl TraversalPass for FrequencyCtx<'_>
{
type Visited = ();
type Node = VertexKey;
type NextNode = NGramId;
type Queue = SortedQueue;
Expand All @@ -67,9 +65,6 @@ impl TraversalPass for FrequencyCtx<'_>
self.labels.extend(start.iter().map(HasVertexKey::vertex_key));
queue
}
fn visited(&mut self) -> &mut Self::Visited {
&mut self.visited
}
fn on_node(
&mut self,
node: &Self::Node,
Expand Down
19 changes: 11 additions & 8 deletions ngrams/src/graph/labelling/wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ use crate::graph::{
TopDown,
TraversalDirection,
},
pass::TraversalPass, queue::{LayeredQueue, Queue},
}, utils::tree::ChildTree, vocabulary::{
pass::TraversalPass, queue::{LayeredQueue, Queue}, visited::Visited,
}, utils::cover::ChildCover, vocabulary::{
entry::VertexCtx,
NGramId,
ProcessStatus, Vocabulary,
Expand All @@ -44,22 +44,25 @@ pub struct WrapperCtx<'b>
#[deref_mut]
ctx: &'b mut LabellingCtx,
#[new(default)]
visited: <Self as TraversalPass>::Visited,
visited: <Self as Visited>::Collection,
}
// - run bottom up (all smaller nodes need to be fully labelled)
// - for each node x:
// - run top down to find the largest frequent children to cover whole range
// - label node x if there are multiple overlapping labelled child nodes

impl Visited for WrapperCtx<'_>
{
type Collection = HashSet<<Self as TraversalPass>::Node>;
fn visited<'t>(&'t mut self) -> &'t mut <Self as Visited>::Collection {
&mut self.visited
}
}
impl TraversalPass for WrapperCtx<'_>
{
type Visited = HashSet<Self::Node>;
type Node = VertexKey;
type NextNode = VertexKey;
type Queue = LayeredQueue<Self>;
fn visited(&mut self) -> &mut Self::Visited {
&mut self.visited
}
fn start_queue(&mut self) -> Self::Queue {
BottomUp::starting_nodes(&self.vocab).into_iter()
.map(|ng| ng.key).collect()
Expand All @@ -77,7 +80,7 @@ impl TraversalPass for WrapperCtx<'_>

if !self.labels.contains(node)
{
let tree = ChildTree::from_entry(self.ctx, &entry);
let tree = ChildCover::from_key(self.ctx, entry.vertex_key());
if tree.any_intersect()
{
let key = entry.data.vertex_key();
Expand Down
13 changes: 7 additions & 6 deletions ngrams/src/graph/partitions/container/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ use crate::graph::{
}, traversal::direction::{
TopDown,
TraversalDirection,
}, utils::tree::ChildTree, vocabulary::{
}, utils::cover::ChildCover, vocabulary::{
entry::{
HasVertexEntries,
VertexCtx,
Expand Down Expand Up @@ -89,15 +89,16 @@ pub struct PartitionContainer
}
impl PartitionContainer
{
pub fn from_entry(
pub fn from_ngram(
ctx: &PartitionsCtx<'_>,
entry: &VertexCtx,
ngram: NGramId,
) -> Self
{
// find all largest children
let tree = ChildTree::from_entry(ctx, entry);
let tree = ChildCover::from_key(ctx, ngram.vertex_key());

assert!(
match entry.width()
match ngram.width()
{
1 => tree.is_empty(),
_ => !tree.is_empty(),
Expand All @@ -107,7 +108,7 @@ impl PartitionContainer
// build container with gaps
//let next = tree.iter().map(|(_, c)| c.vertex_index()).collect();
let ctx = NodePartitionCtx::new(
NGramId::new(entry.data.vertex_key(), entry.data.width()),
ngram,
ctx,
);
Self::from_child_list(&ctx, tree)
Expand Down
19 changes: 11 additions & 8 deletions ngrams/src/graph/partitions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::graph::{
},
pass::TraversalPass, queue::Queue,
},
utils::tree::ChildTree,
utils::cover::ChildCover,
vocabulary::{
entry::{
HasVertexEntries,
Expand Down Expand Up @@ -54,7 +54,7 @@ use seqraph::{
HashSet,
};

use super::{traversal::queue::LayeredQueue, vocabulary::Vocabulary};
use super::{traversal::{queue::LayeredQueue, visited::Visited}, vocabulary::Vocabulary};

// - run top down (smaller nodes to label need to be found)
// - for each node x:
Expand All @@ -78,7 +78,7 @@ pub struct PartitionsCtx<'b>
#[deref_mut]
pub ctx: &'b mut LabellingCtx,
pub graph: Hypergraph,
visited: <Self as TraversalPass>::Visited,
visited: <Self as Visited>::Collection,
}

impl<'b> From<&'b mut LabellingCtx> for PartitionsCtx<'b> {
Expand All @@ -91,15 +91,18 @@ impl<'b> From<&'b mut LabellingCtx> for PartitionsCtx<'b> {
}
}
}
impl Visited for PartitionsCtx<'_>
{
type Collection = HashSet<<Self as TraversalPass>::Node>;
fn visited<'t>(&'t mut self) -> &'t mut <Self as Visited>::Collection {
&mut self.visited
}
}
impl TraversalPass for PartitionsCtx<'_>
{
type Visited = HashSet<Self::Node>;
type Node = NGramId;
type NextNode = NGramId;
type Queue = LayeredQueue<Self>;
fn visited(&mut self) -> &mut Self::Visited {
&mut self.visited
}
fn start_queue(&mut self) -> Self::Queue {
let queue = Self::Queue::from_iter(
TopDown::starting_nodes(&self.vocab)
Expand All @@ -119,8 +122,8 @@ impl TraversalPass for PartitionsCtx<'_>
node: &NGramId,
) -> Option<Vec<NGramId>>
{
let container = PartitionContainer::from_ngram(self, *node);
let entry = self.vocab.get_vertex(node).unwrap();
let container = PartitionContainer::from_entry(self, &entry);

let pids: Vec<_> = std::iter::repeat_n((), container.len())
.map(|_| PatternId::default())
Expand Down
7 changes: 2 additions & 5 deletions ngrams/src/graph/traversal/pass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,18 @@ use seqraph::graph::vertex::{
VertexIndex,
};

use super::{queue::Queue, visited::Visited};
use super::{queue::Queue, visited::{Visited, VisitorCollection}};
pub trait PassNode: Eq + PartialEq + Debug + Clone + Hash {}
impl<N: Eq + PartialEq + Debug + Clone + Hash> PassNode for N {}

pub trait TraversalPass : Sized {
type Node: PassNode + Copy;
type NextNode: PassNode + Into<Self::Node>;
type Visited: Visited<Self>;
type Queue: Queue<Self>;
fn visited(&mut self) -> &mut Self::Visited;
fn start_queue(&mut self) -> Self::Queue;
fn on_node(&mut self, node: &Self::Node) -> Option<Vec<Self::NextNode>>;
fn node_condition(&mut self, node: Self::Node) -> bool {
self.visited().insert(node)
true
}
fn begin_run(&mut self) {}
fn finish_run(&mut self) {}
Expand All @@ -56,4 +54,3 @@ pub trait TraversalPass : Sized {
self.finish_run()
}
}

2 changes: 1 addition & 1 deletion ngrams/src/graph/traversal/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{collections::VecDeque, ops::{Deref, DerefMut}};

use itertools::Itertools;

use crate::graph::{utils::cover::FrequencyCover, vocabulary::{
use crate::graph::{utils::cover::frequency::FrequencyCover, vocabulary::{
entry::VertexCtx,
NGramId,
Vocabulary,
Expand Down
30 changes: 23 additions & 7 deletions ngrams/src/graph/traversal/visited.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,33 @@ use seqraph::{
HashSet,
};

pub trait Visited<P: TraversalPass> {
fn insert(&mut self, node: <P as TraversalPass>::Node) -> bool;
use super::pass::PassNode;

pub trait Visited: TraversalPass {
type Collection: VisitorCollection<Self::Node>;
fn visited(&mut self) -> &mut Self::Collection;
}
pub trait VisitorCollection<N: PassNode> {
type Ref<'t>: VisitorCollection<N> where N: 't;
fn insert(&mut self, node: N) -> bool;
}

impl<P: TraversalPass> Visited<P> for HashSet<P::Node> {
fn insert(&mut self, node: <P as TraversalPass>::Node) -> bool {
HashSet::insert(self, node)
impl<N: PassNode> VisitorCollection<N> for HashSet<N>
{
type Ref<'t> = &'t mut Self where N: 't;
fn insert(&mut self, node: N) -> bool {
<&mut Self as VisitorCollection<N>>::insert(&mut &mut *self, node)
}
}
impl<'a, N: PassNode> VisitorCollection<N> for &'a mut HashSet<N> {
type Ref<'t> = &'t mut HashSet<N> where N: 't;
fn insert(&mut self, node: N) -> bool {
HashSet::insert(*self, node)
}
}
impl<P: TraversalPass> Visited<P> for () {
fn insert(&mut self, node: <P as TraversalPass>::Node) -> bool {
impl<N: PassNode> VisitorCollection<N> for () {
type Ref<'t> = Self where N: 't;
fn insert(&mut self, node: N) -> bool {
true
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,13 @@ use seqraph::{
graph::{
getters::vertex::VertexSet,
vertex::{
child::Child,
data::{
child::Child, data::{
VertexData,
VertexDataBuilder,
},
has_vertex_index::{
}, has_vertex_index::{
HasVertexIndex,
ToChild,
},
has_vertex_key::HasVertexKey,
wide::Wide,
VertexIndex,
}, has_vertex_key::HasVertexKey, key::VertexKey, wide::Wide, VertexIndex
},
Hypergraph,
},
Expand Down Expand Up @@ -55,7 +50,7 @@ use crate::graph::{
TopDown,
TraversalDirection,
},
pass::TraversalPass, queue::{LayeredQueue, Queue},
pass::TraversalPass, queue::{LayeredQueue, Queue}, visited::Visited,
},
vocabulary::{
entry::{
Expand All @@ -68,44 +63,47 @@ use crate::graph::{
},
};

use super::ChildTree;
#[derive(Debug, new)]
use super::ChildCover;

#[derive(Debug)]
pub struct ChildCoverPass<'a> {
pub ctx: &'a LabellingCtx,
pub root: &'a VertexCtx<'a>,
#[new(default)]
pub visited: <Self as TraversalPass>::Visited,
#[new(default)]
pub tree: ChildTree,
pub root: VertexKey,
pub cover: ChildCover,
}
impl<'a> ChildCoverPass<'a> {
pub fn new(ctx: &'a LabellingCtx, root: VertexKey) -> Self {
Self {
ctx,
root,
cover: Default::default(),
}
}
}
impl TraversalPass for ChildCoverPass<'_> {
type Visited = HashSet<Self::Node>;
type Node = (usize, NGramId);
type NextNode = (usize, NGramId);
type Queue = LayeredQueue<Self>;
fn visited(&mut self) -> &mut Self::Visited {
&mut self.visited
}
fn start_queue(&mut self) -> Self::Queue {
Self::Queue::from_iter(
TopDown::next_nodes(self.root)
TopDown::next_nodes(&self.ctx.vocab.expect_vertex(&self.root))
)
}
fn on_node(&mut self, node: &Self::Node) -> Option<Vec<Self::NextNode>> {
let &(off, node) = node;
// check if covered
if self.tree.any_covers(off, node)
if self.cover.any_covers(off, node)
{
None
}
else if self.ctx.labels.contains(&node)
{
self.tree.insert(off, node);
self.cover.insert(off, node);
None
}
else
{
let ne = self.root.vocab.get_vertex(&node).unwrap();
let ne = self.ctx.vocab.get_vertex(&node).unwrap();
Some(
TopDown::next_nodes(&ne)
.into_iter()
Expand Down
File renamed without changes.
Loading

0 comments on commit acdb937

Please sign in to comment.