Skip to content

Commit

Permalink
pr -> pl
Browse files Browse the repository at this point in the history
  • Loading branch information
vigna committed Nov 1, 2023
1 parent b3b4cac commit e1792d7
Show file tree
Hide file tree
Showing 17 changed files with 138 additions and 117 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ sux = {git = "https://github.com/vigna/sux-rs" }
dsi-bitstream = {git = "https://github.com/vigna/dsi-bitstream-rs" }
# Bin dependancies
clap = { version = "4.1.6", features = ["derive"] }
dsi-progress-logger = "0.1.0"
dsi-progress-logger = "0.2.0"
log = "0.4.17"
stderrlog = "0.5.4"
rand = { version = "0.8.5", features = ["small_rng"] }
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# `webgraph`

A pure Rust implementation of the [WebGraph framework](https://webgraph.di.unimi.it/) for graph compression.

# Acknowledgments

This software has been partially supported by project SERICS (PE00000014) under the NRRP MUR program funded by the EU - NGEU,
and by project ANR COREGRAPHIE, grant ANR-20-CE23-0002 of the French Agence Nationale de la Recherche.
11 changes: 6 additions & 5 deletions examples/bv_bf_visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use anyhow::Result;
use bitvec::*;
use clap::Parser;
use dsi_progress_logger::ProgressLogger;
use dsi_progress_logger::*;
use std::collections::VecDeque;
use webgraph::prelude::*;
#[derive(Parser, Debug)]
Expand All @@ -32,10 +32,11 @@ pub fn main() -> Result<()> {
let mut visited = bitvec![0; num_nodes];
let mut queue = VecDeque::new();

let mut pl = ProgressLogger::default().display_memory();
pl.item_name = "node";
pl.local_speed = true;
pl.expected_updates = Some(num_nodes);
let mut pl = ProgressLogger::default();
pl.display_memory(true)
.item_name("node")
.local_speed(true)
.expected_updates(Some(num_nodes));
pl.start("Visiting graph...");

for start in 0..num_nodes {
Expand Down
13 changes: 7 additions & 6 deletions src/algorithms/bfs_order.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
*/

use crate::traits::RandomAccessGraph;
use dsi_progress_logger::ProgressLogger;
use dsi_progress_logger::*;
use std::collections::VecDeque;
use sux::prelude::BitVec;

/// Iterator on all nodes of the graph in a BFS order
pub struct BfsOrder<'a, G: RandomAccessGraph> {
graph: &'a G,
pl: ProgressLogger<'static>,
pl: ProgressLogger,
visited: BitVec,
queue: VecDeque<usize>,
/// If the queue is empty, resume the BFS from that node.
Expand All @@ -25,10 +25,11 @@ pub struct BfsOrder<'a, G: RandomAccessGraph> {
impl<'a, G: RandomAccessGraph> BfsOrder<'a, G> {
pub fn new(graph: &G) -> BfsOrder<G> {
let num_nodes = graph.num_nodes();
let mut pl = ProgressLogger::default().display_memory();
pl.item_name = "node";
pl.local_speed = true;
pl.expected_updates = Some(num_nodes);
let mut pl = ProgressLogger::default();
pl.display_memory(true)
.item_name("node")
.local_speed(true)
.expected_updates(Some(num_nodes));
pl.start("Visiting graph in BFS order...");
BfsOrder {
graph,
Expand Down
41 changes: 22 additions & 19 deletions src/algorithms/llp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use crate::prelude::*;
use crate::{invert_in_place, traits::*};
use anyhow::Result;
use dsi_progress_logger::ProgressLogger;
use dsi_progress_logger::*;
use epserde::prelude::*;

use lender::*;
Expand Down Expand Up @@ -54,28 +54,31 @@ pub fn layered_label_propagation(
.build()?;

// init the gamma progress logger
let mut gamma_pr = ProgressLogger::default().display_memory();
gamma_pr.item_name = "gamma";
gamma_pr.expected_updates = Some(gammas.len());
let mut gamma_pl = ProgressLogger::default();
gamma_pl
.display_memory(true)
.item_name("gamma")
.expected_updates(Some(gammas.len()));

// init the iteration progress logger
let mut iter_pr = ProgressLogger::default();
iter_pr.item_name = "update";
let mut iter_pl = ProgressLogger::default();
iter_pl.item_name("update");

// init the update progress logger
let mut update_pr = ProgressLogger::default();
update_pr.item_name = "node";
update_pr.local_speed = true;
update_pr.expected_updates = Some(num_nodes);
let mut update_pl = ProgressLogger::default();
update_pl
.item_name("node")
.local_speed(true)
.expected_updates(Some(num_nodes));

let seed = AtomicU64::new(seed);
let mut costs = Vec::with_capacity(gammas.len());

gamma_pr.start(format!("Running {} threads", num_threads));
gamma_pl.start(format!("Running {} threads", num_threads));

for (gamma_index, gamma) in gammas.iter().enumerate() {
// Reset mutable state for the next gamma
iter_pr.start(format!(
iter_pl.start(format!(
"Starting iterations with gamma={} ({}/{})...",
gamma,
gamma_index + 1,
Expand All @@ -88,7 +91,7 @@ pub fn layered_label_propagation(
.for_each(|x| x.store(true, Ordering::Relaxed));

for i in 0..max_iters {
update_pr.start(format!("Starting update {}...", i));
update_pl.start(format!("Starting update {}...", i));

update_perm.iter_mut().enumerate().for_each(|(i, x)| *x = i);
thread_pool.install(|| {
Expand Down Expand Up @@ -178,11 +181,11 @@ pub fn layered_label_propagation(
|delta_obj_func_0, delta_obj_func_1| delta_obj_func_0 + delta_obj_func_1,
&thread_pool,
granularity,
Some(&mut update_pr),
Some(&mut update_pl),
);

update_pr.done_with_count(num_nodes);
iter_pr.update_and_display();
update_pl.done_with_count(num_nodes);
iter_pl.update_and_display();

obj_func += delta_obj_func;
let gain = delta_obj_func / obj_func;
Expand All @@ -195,7 +198,7 @@ pub fn layered_label_propagation(
}
}

iter_pr.done();
iter_pl.done();

update_perm.iter_mut().enumerate().for_each(|(i, x)| *x = i);
// create sorted clusters by contiguous labels
Expand All @@ -220,10 +223,10 @@ pub fn layered_label_propagation(
let mut file = std::fs::File::create(format!("labels_{}.bin", gamma_index))?;
labels.serialize(&mut file)?;

gamma_pr.update_and_display();
gamma_pl.update_and_display();
}

gamma_pr.done();
gamma_pl.done();

// compute the indices that sorts the gammas by cost
let mut gamma_indices = (0..costs.len()).collect::<Vec<_>>();
Expand Down
6 changes: 3 additions & 3 deletions src/algorithms/simplify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::graph::arc_list_graph;
use crate::traits::SequentialGraph;
use crate::utils::{BatchIterator, KMergeIters, SortPairs};
use anyhow::Result;
use dsi_progress_logger::ProgressLogger;
use dsi_progress_logger::*;
use itertools::{Dedup, Itertools};
use lender::*;
/// Make the graph undirected and remove selfloops
Expand All @@ -33,8 +33,8 @@ pub fn simplify(
let mut sorted = SortPairs::new(batch_size, dir.into_path())?;

let mut pl = ProgressLogger::default();
pl.item_name = "node";
pl.expected_updates = Some(graph.num_nodes());
pl.item_name("node")
.expected_updates(Some(graph.num_nodes()));
pl.start("Creating batches...");
// create batches of sorted edges
let mut iter = graph.iter();
Expand Down
6 changes: 3 additions & 3 deletions src/algorithms/transpose.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::graph::arc_list_graph;
use crate::traits::SequentialGraph;
use crate::utils::{BatchIterator, KMergeIters, SortPairs};
use anyhow::Result;
use dsi_progress_logger::ProgressLogger;
use dsi_progress_logger::*;
use lender::*;
/// Create transpose the graph and return a sequential graph view of it
#[allow(clippy::type_complexity)]
Expand All @@ -25,8 +25,8 @@ pub fn transpose(
let mut sorted = SortPairs::new(batch_size, dir.into_path())?;

let mut pl = ProgressLogger::default();
pl.item_name = "node";
pl.expected_updates = Some(graph.num_nodes());
pl.item_name("node")
.expected_updates(Some(graph.num_nodes()));
pl.start("Creating batches...");
// create batches of sorted edges
for_iter! { (src, succ) in graph.iter() =>
Expand Down
10 changes: 5 additions & 5 deletions src/bin/ascii_convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use anyhow::Result;
use clap::Parser;
use dsi_progress_logger::ProgressLogger;
use dsi_progress_logger::*;
use lender::*;
use webgraph::traits::SequentialGraph;

Expand All @@ -27,9 +27,9 @@ pub fn main() -> Result<()> {
.unwrap();

let seq_graph = webgraph::graph::bvgraph::load_seq(&args.basename)?;
let mut pr = ProgressLogger::default().display_memory();
pr.item_name = "offset";
pr.start("Computing offsets...");
let mut pl = ProgressLogger::default();
pl.display_memory(true).item_name("offset");
pl.start("Computing offsets...");

let mut iter = seq_graph.iter();
while let Some((node_id, successors)) = iter.next() {
Expand All @@ -43,7 +43,7 @@ pub fn main() -> Result<()> {
);
}

pr.done();
pl.done();

Ok(())
}
33 changes: 18 additions & 15 deletions src/bin/build_eliasfano.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
use anyhow::{Context, Result};
use clap::Parser;
use dsi_bitstream::prelude::*;
use dsi_progress_logger::ProgressLogger;
use dsi_progress_logger::*;
use epserde::prelude::*;
use log::info;
use std::fs::File;
Expand Down Expand Up @@ -51,9 +51,10 @@ pub fn main() -> Result<()> {
let of_file_str = format!("{}.offsets", args.basename);
let of_file_path = std::path::Path::new(&of_file_str);

let mut pr = ProgressLogger::default().display_memory();
pr.expected_updates = Some(num_nodes);
pr.item_name = "offset";
let mut pl = ProgressLogger::default();
pl.display_memory(true)
.item_name("offset")
.expected_updates(Some(num_nodes));

// if the offset files exists, read it to build elias-fano
if of_file_path.exists() {
Expand All @@ -62,48 +63,50 @@ pub fn main() -> Result<()> {
// create a bit reader on the file
let mut reader = BufBitReader::<BE, _>::new(<WordAdapter<u32, _>>::new(of_file));
// progress bar
pr.start("Translating offsets to EliasFano...");
pl.start("Translating offsets to EliasFano...");
// read the graph a write the offsets
let mut offset = 0;
for _node_id in 0..num_nodes + 1 {
// write where
offset += reader.read_gamma()?;
efb.push(offset as _)?;
// decode the next nodes so we know where the next node_id starts
pr.light_update();
pl.light_update();
}
} else {
info!("The offsets file does not exists, reading the graph to build Elias-Fano");
let seq_graph = webgraph::graph::bvgraph::load_seq(&args.basename)?;
let seq_graph = seq_graph.map_codes_reader_builder(DynamicCodesReaderSkipperBuilder::from);
// otherwise directly read the graph
// progress bar
pr.start("Building EliasFano...");
pl.start("Building EliasFano...");
// read the graph a write the offsets
let mut iter = seq_graph.iter_degrees();
for (new_offset, _node_id, _degree) in iter.by_ref() {
// write where
efb.push(new_offset as _)?;
// decode the next nodes so we know where the next node_id starts
pr.light_update();
pl.light_update();
}
efb.push(iter.get_pos() as _)?;
}
pr.done();
pl.done();

let ef = efb.build();

let mut pr = ProgressLogger::default().display_memory();
pr.start("Building the Index over the ones in the high-bits...");
let mut pl = ProgressLogger::default();
pl.display_memory(true);
pl.start("Building the Index over the ones in the high-bits...");
let ef: webgraph::EF<_> = ef.convert_to().unwrap();
pr.done();
pl.done();

let mut pr = ProgressLogger::default().display_memory();
pr.start("Writing to disk...");
let mut pl = ProgressLogger::default();
pl.display_memory(true);
pl.start("Writing to disk...");
// serialize and dump the schema to disk
let schema = ef.serialize_with_schema(&mut ef_file)?;
std::fs::write(format!("{}.ef.schema", args.basename), schema.to_csv())?;

pr.done();
pl.done();
Ok(())
}
17 changes: 9 additions & 8 deletions src/bin/build_offsets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
use anyhow::Result;
use clap::Parser;
use dsi_bitstream::prelude::*;
use dsi_progress_logger::ProgressLogger;
use dsi_progress_logger::*;
use std::io::BufWriter;
use webgraph::prelude::*;

Expand Down Expand Up @@ -37,10 +37,11 @@ pub fn main() -> Result<()> {
BufWriter::with_capacity(1 << 20, file),
));
// progress bar
let mut pr = ProgressLogger::default().display_memory();
pr.item_name = "offset";
pr.expected_updates = Some(seq_graph.num_nodes());
pr.start("Computing offsets...");
let mut pl = ProgressLogger::default();
pl.display_memory(true)
.item_name("offset")
.expected_updates(Some(seq_graph.num_nodes()));
pl.start("Computing offsets...");
// read the graph a write the offsets
let mut offset = 0;
let mut degs_iter = seq_graph.iter_degrees();
Expand All @@ -49,11 +50,11 @@ pub fn main() -> Result<()> {
writer.write_gamma((new_offset - offset) as _)?;
offset = new_offset;
// decode the next nodes so we know where the next node_id starts
pr.light_update();
pl.light_update();
}
// write the last offset, this is done to avoid decoding the last node
writer.write_gamma((degs_iter.get_pos() - offset) as _)?;
pr.light_update();
pr.done();
pl.light_update();
pl.done();
Ok(())
}
Loading

0 comments on commit e1792d7

Please sign in to comment.