Skip to content

Commit

Permalink
Refactor get vertex subrange
Browse files Browse the repository at this point in the history
  • Loading branch information
mankinskin committed Oct 2, 2024
1 parent 1341019 commit f89da4d
Show file tree
Hide file tree
Showing 16 changed files with 541 additions and 288 deletions.
17 changes: 10 additions & 7 deletions ngrams/src/graph/partitions/container/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@ use derive_more::{
};
use itertools::Itertools;
use ngram::NGram;
use seqraph::graph::vertex::{
child::Child,
has_vertex_index::HasVertexIndex,
has_vertex_key::HasVertexKey,
wide::Wide,
use seqraph::graph::{
vertex::{
child::Child,
has_vertex_index::HasVertexIndex,
has_vertex_key::HasVertexKey,
wide::Wide,
},
getters::vertex::VertexSet,
};
use std::{
cmp::{
Expand Down Expand Up @@ -67,8 +70,8 @@ impl<'a, 'b> PartitionLineBuilder<'a, 'b>
offset: NonZeroUsize,
)
{
let index = self.ctx.vocab.get_vertex_subrange(
&self.ctx.root.vertex_key(),
let index = self.ctx.vocab.containment.get_vertex_subrange(
self.ctx.root.vertex_key(),
self.pos..(self.pos + offset.get()),
);
self.push_cell(index);
Expand Down
116 changes: 115 additions & 1 deletion ngrams/src/graph/partitions/container/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::graph::{
NodePartitionCtx,
PartitionsCtx,
},
vocabulary::NGramId,
vocabulary::{entry::VocabEntry, NGramId},
};
use builder::PartitionBuilder;
use derive_more::{
Expand All @@ -32,6 +32,43 @@ use std::{
num::NonZeroUsize,
};

use derive_new::new;
use std::collections::VecDeque;

use crate::graph::{
labelling::LabellingCtx,
traversal::{
TopDown,
TraversalPolicy,
},
vocabulary::{
entry::{
HasVertexEntries,
VertexCtx,
},
ProcessStatus,
},
};
use seqraph::{
graph::{
getters::vertex::VertexSet,
vertex::{
data::{
VertexData,
VertexDataBuilder,
},
has_vertex_index::{
HasVertexIndex,
ToChild,
},
VertexIndex,
},
Hypergraph,
},
HashMap,
HashSet,
};

#[derive(Debug, Copy, Clone)]
pub enum PartitionCell
{
Expand All @@ -49,6 +86,65 @@ impl PartitionCell
}
}
}
#[derive(Debug, Deref, DerefMut, Default, IntoIterator)]
pub struct ChildTree
{
#[deref]
#[deref_mut]
#[into_iterator(owned, ref)]
entries: HashMap<usize, NGramId>,
}


impl ChildTree
{
// find largest labelled children
pub fn from_entry(
ctx: &PartitionsCtx<'_>,
entry: &VertexCtx<'_>,
) -> Self
{
let mut queue: VecDeque<_> =
TopDown::next_nodes(entry).into_iter().collect();
let mut tree: ChildTree = Default::default();

let mut visited: HashSet<_> = Default::default();
while let Some((off, node)) = queue.pop_front()
{
if visited.contains(&(off, node))
{
continue;
}
visited.insert((off, node));
// check if covered
if tree.any_covers(off, node)
{
continue;
}
if ctx.labels.contains(&node)
{
tree.insert(off, node);
}
else
{
let ne = entry.vocab.get_vertex(&node).unwrap();
queue.extend(
TopDown::next_nodes(&ne)
.into_iter()
.map(|(o, c)| (o + off, c)),
)
}
}
tree
}
pub fn any_covers(&self, off: usize, node: impl Wide) -> bool {
self.iter().any(|(&p, &c)| {
let node_end = off + node.width();
let probe_end = p + c.width();
p <= off && node_end <= probe_end
})
}
}

#[derive(Debug, IntoIterator, Deref)]
pub struct PartitionContainer
Expand All @@ -57,6 +153,24 @@ pub struct PartitionContainer
}
impl PartitionContainer
{
pub fn from_entry(ctx: &PartitionsCtx<'_>, entry: &VertexCtx) -> Self {
// find all largest children
let tree = ChildTree::from_entry(ctx, entry);
assert!(
match entry.width() {
1 => tree.is_empty(),
_ => !tree.is_empty()
}
);

// build container with gaps
//let next = tree.iter().map(|(_, c)| c.vertex_index()).collect();
let ctx = NodePartitionCtx::new(
NGramId::new(entry.data.vertex_key(), entry.data.width()),
ctx,
);
Self::from_child_list(&ctx, tree)
}
pub fn from_child_list(
ctx: &NodePartitionCtx,
list: impl IntoIterator<Item = (usize, NGramId)>,
Expand Down
111 changes: 37 additions & 74 deletions ngrams/src/graph/partitions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@ mod container;
use derive_more::{
Deref,
DerefMut,
IntoIterator,
};
use derive_new::new;
use itertools::Itertools;
use std::collections::VecDeque;

use crate::graph::{
labelling::LabellingCtx,
partitions::container::PartitionContainer,
partitions::container::{ChildTree, PartitionContainer},
traversal::{
TopDown,
TraversalPolicy,
Expand Down Expand Up @@ -82,69 +83,18 @@ impl<'b> PartitionsCtx<'b>
graph: Default::default(),
}
}
// find largest labelled children
fn child_tree(
&self,
entry: &VertexCtx,
) -> HashMap<usize, NGramId>
{
let mut queue: VecDeque<_> =
TopDown::next_nodes(entry).into_iter().collect();
let mut tree: HashMap<usize, NGramId> = Default::default();

let mut visited: HashSet<_> = Default::default();
while let Some((off, node)) = queue.pop_front()
{
if visited.contains(&(off, node))
{
continue;
}
visited.insert((off, node));
// check if covered
if tree.iter().any(|(&p, &c)| {
let node_end = off + node.width();
let probe_end = p + c.width();
p <= off && node_end <= probe_end
})
{
continue;
}
if self.labels.contains(&node)
{
tree.insert(off, node);
}
else
{
let ne = entry.vocab.get_vertex(&node).unwrap();
queue.extend(
TopDown::next_nodes(&ne)
.into_iter()
.map(|(o, c)| (o + off, c)),
)
}
}
tree
}
fn on_node(
&mut self,
node: &NGramId,
) -> Vec<NGramId>
{
if node.width() == 1 {
return Vec::new();
}
let entry = self.vocab.get_vertex(node).unwrap();

// find all largest children
let tree = self.child_tree(&entry);

// build container with gaps
//let next = tree.iter().map(|(_, c)| c.vertex_index()).collect();
let ctx = NodePartitionCtx::new(
NGramId::new(entry.data.vertex_key(), entry.data.width()),
self,
);
let container = PartitionContainer::from_child_list(&ctx, tree);
//println!("{:#?}", container);
//print!("{}", container);

let container = PartitionContainer::from_entry(self, &entry);

let pids: Vec<_> = std::iter::repeat_n((), container.len())
.map(|_| PatternId::default())
.collect();
Expand All @@ -156,9 +106,17 @@ impl<'b> PartitionsCtx<'b>
);
let parent_data = self.graph.get_vertex_mut(node.vertex_key()).expect(&err);

assert!(
match parent_data.width() {
0 => panic!("Invalid width of zero."),
2 => pids.len() == 1,
1 => pids.is_empty(),
_ => !pids.is_empty(),
}
);
// child patterns with indices in containment
parent_data.children = pids.into_iter().zip(container.clone()).collect();

parent_data.children = pids.into_iter().zip(container).collect();
// child locations parent in self.graph, children indices in self.vocab.containment
let child_locations = parent_data
.all_localized_children_iter()
Expand All @@ -172,17 +130,18 @@ impl<'b> PartitionsCtx<'b>
.map(|(_, c)| c.vertex_index())
.sorted()
.collect_vec(),
container
parent_data.children
.iter()
.flatten()
.map(HasVertexIndex::vertex_index)
.flat_map(|(_, p)| p)
.map(|c| c.vertex_index())
.sorted()
.collect_vec(),
);


// create child nodes in self.graph
// set child parents and translate child indices to self.graph
for (loc, vi) in child_locations.into_iter()
for (loc, vi) in child_locations.iter().copied()
{
let key = self.vocab.containment.expect_key_for_index(vi);
let out_index = if let Ok(v) = self.graph.get_vertex_mut(key)
Expand All @@ -198,6 +157,7 @@ impl<'b> PartitionsCtx<'b>
let mut data = self.graph.finish_vertex_builder(builder);
assert!(data.key == key);
data.add_parent(loc);

// translate containment index to output index
if vi.width() > 1 {
self.graph.insert_vertex_data(data)
Expand All @@ -207,17 +167,19 @@ impl<'b> PartitionsCtx<'b>
};
self.graph.expect_child_mut_at(loc).index = out_index;
}
container
let parent_data = self.graph.get_vertex_mut(node.vertex_key()).expect(&err);
child_locations
.clone()
.into_iter()
.flatten()
.flat_map(|(_, p)| p)
.filter(|c| c.width() > 1)
.map(|c| {
let entry = self.vocab.get_vertex(&c).unwrap();
let key = entry.data.vertex_key();
assert!(
self.graph.contains_vertex(key),
"{:#?}", entry.entry,
);
//assert!(
// self.graph.contains_vertex(key),
// "{:#?}", entry.entry,
//);
NGramId::new(
key,
c.width(),
Expand All @@ -229,33 +191,34 @@ impl<'b> PartitionsCtx<'b>
{
println!("Partition Pass");
let mut queue: VecDeque<_> = TopDown::starting_nodes(&self.vocab);
//let mut n = 0;
for vk in queue.iter()
{
let entry = self.vocab.get_vertex(vk).unwrap();
let data = self.vocab.containment.expect_vertex(vk.vertex_key());
let mut builder = VertexDataBuilder::default();
builder.width(entry.data.width());
builder.width(data.width());
builder.key(**vk);
self.graph.insert_vertex_builder(builder);
}

while !queue.is_empty()
{
//println!("{}", n);
//n += 1;
let mut visited: HashSet<_> = Default::default();
let mut next_layer: Vec<_> = Default::default();
while let Some(node) = queue.pop_front()
{
if !visited.contains(&node)
&& self.labels.contains(&node)
&& !self.vocab.leaves.contains(&node)
//&& !self.vocab.leaves.contains(&node)
{
next_layer.extend(self.on_node(&node));
visited.insert(node);
}
}
queue.extend(next_layer)
}
self.vocab.roots.iter().for_each(|key| {
let _ = self.graph.vertex_key_string(key);
});
self.status = ProcessStatus::Partitions;
println!("{:#?}", &self.graph);
}
Expand Down
Loading

0 comments on commit f89da4d

Please sign in to comment.