Skip to content

Commit

Permalink
Use ChildDedupPass
Browse files Browse the repository at this point in the history
  • Loading branch information
mankinskin committed Nov 17, 2024
1 parent acdb937 commit ec81475
Show file tree
Hide file tree
Showing 16 changed files with 341 additions and 106 deletions.
8 changes: 2 additions & 6 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,10 @@
"build",
"--bin=graph_app",
"--package=graph_app"
],
"filter": {
"name": "graph_app",
"kind": "bin"
}
]
},
"args": [],
"cwd": "${workspaceFolder}"
"cwd": "${workspaceFolder}/graph_app"
}
]
}
16 changes: 10 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@ exclude = [
"./egui",
"./egui/crates/eframe",
"./tracing-egui",
"./petgraph"
]
members = [
"seqraph",
"ngrams",
"graph_app"
"./petgraph",
"./minimal_options",
"./graph_app",
"./ngrams",
"./justlog"
]
#members = [
# "seqraph",
# "ngrams",
# "graph_app"
#]

[profile.release]
opt-level = 2 # fast and small wasm
2 changes: 1 addition & 1 deletion justlog
Submodule justlog updated 1 files
+4 −4 Cargo.toml
16 changes: 9 additions & 7 deletions ngrams/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,24 @@ test-hashing = []
[dependencies]

ngram = "0.1"
csv = "1"
csv = "1.3.1"
maplit = "^1"
tap = "^1"
itertools = "^0.13"
plotters = "^0.3"
derive_more = "^0.99"
derive-new = "^0.6"
derive_builder = "^0.20"
derive-new = "^0.7.0"
derive_builder = "^0.20.2"
range-ext = "0.3.0"
pretty_assertions = "1.4.0"
pretty_assertions = "1.4.1"
serde = { version = "1.0.203", features = ["derive"] }
ciborium = "0.2"
lazy_static = "1.4.0"
lazy_static = "1.5.0"
derivative = "^2.2"


[dependencies.derive_more]
version = "^1.0.0"
features = ["full"]

[dependencies.seqraph]
path = "../seqraph"

Expand Down
29 changes: 13 additions & 16 deletions ngrams/src/graph/labelling/frequency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl TraversalPass for FrequencyCtx<'_>
for node in start.iter()
{
queue.extend_layer(
self.on_node(&node).unwrap_or_default()
self.on_node(node).unwrap_or_default()
);
}
self.labels.extend(start.iter().map(HasVertexKey::vertex_key));
Expand All @@ -70,21 +70,18 @@ impl TraversalPass for FrequencyCtx<'_>
node: &Self::Node,
) -> Option<Vec<Self::NextNode>>
{
if self.labels.contains(&node)
{
None
}
else
{
let entry = self.vocab.get_vertex(node).unwrap();
let next = self.entry_next(&entry);
if self.entry_is_frequent(&entry)
{
let key = entry.data.vertex_key();
self.labels.insert(key);
}
Some(next)
}
self.labels.contains(node)
.then_some(None)
.unwrap_or_else(|| {
let entry = self.vocab.get_vertex(node).unwrap();
let next = self.entry_next(&entry);
if self.entry_is_frequent(&entry)
{
let key = entry.data.vertex_key();
self.labels.insert(key);
}
Some(next)
})
}
fn begin_run(&mut self) {
println!("Frequency Pass");
Expand Down
10 changes: 5 additions & 5 deletions ngrams/src/graph/labelling/wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::graph::{
TopDown,
TraversalDirection,
},
pass::TraversalPass, queue::{LayeredQueue, Queue}, visited::Visited,
pass::TraversalPass, queue::{LayeredQueue, Queue}, visited::VisitTracking,
}, utils::cover::ChildCover, vocabulary::{
entry::VertexCtx,
NGramId,
Expand All @@ -44,18 +44,18 @@ pub struct WrapperCtx<'b>
#[deref_mut]
ctx: &'b mut LabellingCtx,
#[new(default)]
visited: <Self as Visited>::Collection,
visited_mut: <Self as VisitTracking>::Collection,
}
// - run bottom up (all smaller nodes need to be fully labelled)
// - for each node x:
// - run top down to find the largest frequent children to cover whole range
// - label node x if there are multiple overlapping labelled child nodes

impl Visited for WrapperCtx<'_>
impl VisitTracking for WrapperCtx<'_>
{
type Collection = HashSet<<Self as TraversalPass>::Node>;
fn visited<'t>(&'t mut self) -> &'t mut <Self as Visited>::Collection {
&mut self.visited
fn visited_mut(&mut self) -> &mut <Self as VisitTracking>::Collection {
&mut self.visited_mut
}
}
impl TraversalPass for WrapperCtx<'_>
Expand Down
195 changes: 195 additions & 0 deletions ngrams/src/graph/partitions/collect.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@

use derive_more::{
Deref,
DerefMut,
IntoIterator,
};
use derive_new::new;
use itertools::Itertools;
use std::collections::VecDeque;

use crate::graph::{
labelling::LabellingCtx,
partitions::container::PartitionContainer,
traversal::{
direction::{
TopDown,
TraversalDirection,
},
pass::TraversalPass, queue::Queue,
},
utils::cover::ChildCover,
vocabulary::{
entry::{
HasVertexEntries,
VertexCtx,
},
NGramId,
ProcessStatus,
},
};
use seqraph::{
graph::{
getters::vertex::VertexSet,
vertex::{
child::Child,
data::{
VertexData,
VertexDataBuilder,
},
has_vertex_index::{
HasVertexIndex,
ToChild,
},
has_vertex_key::HasVertexKey,
key::VertexKey,
pattern::id::PatternId,
wide::Wide,
VertexIndex,
},
Hypergraph,
},
HashMap,
HashSet,
};

use crate::graph::{traversal::{queue::{LayeredQueue, LinearQueue}, visited::VisitTracking}, vocabulary::Vocabulary};


#[derive(Debug, Deref, DerefMut)]
pub struct AccumulateCtx<'b>
{
#[deref]
#[deref_mut]
pub ctx: &'b mut LabellingCtx,
pub result: Hypergraph,
visited: <Self as VisitTracking>::Collection,
}

impl<'b> From<&'b mut LabellingCtx> for AccumulateCtx<'b> {
fn from(ctx: &'b mut LabellingCtx) -> Self
{
Self {
ctx,
result: Default::default(),
visited: Default::default(),
}
}
}
impl VisitTracking for AccumulateCtx<'_>
{
type Collection = HashSet<<Self as TraversalPass>::Node>;
fn visited_mut(&mut self) -> &mut <Self as VisitTracking>::Collection {
&mut self.visited
}
}
impl TraversalPass for AccumulateCtx<'_>
{
type Node = NGramId;
type NextNode = NGramId;
type Queue = LinearQueue<Self>;
fn start_queue(&mut self) -> Self::Queue {
let queue = Self::Queue::from_iter(
TopDown::starting_nodes(&self.vocab)
);
for vk in queue.iter()
{
let data = self.vocab.containment.expect_vertex(vk.vertex_key());
let mut builder = VertexDataBuilder::default();
builder.width(data.width());
builder.key(**vk);
self.result.insert_vertex_builder(builder);
}
queue
}
fn node_condition(&mut self, node: Self::Node) -> bool {
(!self.visited_mut().contains(&node)
&& self.labels.contains(&node))
|| self.vocab.leaves.contains(&node)
.then(|| self.visited_mut().insert(node))
.is_some()
}
fn on_node(
&mut self,
node: &NGramId,
) -> Option<Vec<NGramId>>
{
//let container = PartitionContainer::from_ngram(self, *node);
//let entry = self.vocab.get_vertex(node).unwrap();

//let pids: Vec<_> = std::iter::repeat_n((), container.len())
// .map(|_| PatternId::default())
// .collect();

//let parent_data = self.result.expect_vertex_mut(node.vertex_key());

//// child patterns with indices in containment
//parent_data.children = pids.into_iter().zip(container).collect();

//// child locations parent in self.graph, children indices in self.vocab.containment
//let child_locations = parent_data
// .all_localized_children_iter()
// .into_iter()
// .map(|(l, c)| (l, *c))
// .collect_vec();

// create child nodes in self.graph
// set child parents and translate child indices to self.graph
//for (loc, vi) in child_locations.iter().copied()
//{
// let key = self.vocab.containment.expect_key_for_index(vi);
// let out_index = if let Ok(v) = self.result.get_vertex_mut(key)
// {
// v.add_parent(loc);
// v.vertex_index()
// }
// else
// {
// let mut builder = VertexDataBuilder::default();
// builder.width(vi.width());
// builder.key(key);
// let mut data = self.result.finish_vertex_builder(builder);
// data.add_parent(loc);

// // translate containment index to output index
// let out = if vi.width() > 1 {
// self.result.insert_vertex_data(data)
// } else {
// self.result.insert_token_data(*self.vocab.containment.expect_token_by_key(&key), data)
// }.vertex_index();

// if !self.ctx.labels.contains(&key) {
// self.ctx.labels.insert(key);
// // TODO: Rerun frequency pass for subgraph of key
// }
// out
// };
// self.result.expect_child_mut_at(loc).index = out_index;
//}
//let next = child_locations
// .clone()
// .into_iter()
// .flat_map(|(_, p)| p)
// .filter(|c| c.width() > 1)
// .map(|c|
// NGramId::new(
// self.vocab.get_vertex(&c).unwrap().data.vertex_key(),
// c.width(),
// )
// )
// .collect();
let next = vec![];
Some(next)
}
fn begin_run(&mut self) {
println!("Accumulate Pass");
}

fn finish_run(&mut self) {
self.vocab.roots.iter().for_each(|key| {
let _ = self.result.vertex_key_string(key);
});
self.status = ProcessStatus::Partitions;
println!("{:#?}", &self.result);
}
}
Loading

0 comments on commit ec81475

Please sign in to comment.