Skip to content

Commit

Permalink
Add FST cache serialization/deserialization support (#160)
Browse files Browse the repository at this point in the history
* Add serialization for FST cache

* Add parsers for FST cache

* Some refactoring + tests

* Small fixes

* Implement PartialEq for FST caches & fix serialization/deserialization issue

* Make tests more exhaustives & fix hashmap cache serialization/deserialization issue

* Add State table serialization/deserialization support

* Adding new traits and implementing binary serialization/deserialization for state table dependant structs

* Refining the code

* Changes following review

* Add tests for StateTable serialization

* Changes following review

* Update changelog

* Remove unused import
  • Loading branch information
emricksinisonos authored Oct 22, 2021
1 parent 552671c commit 54083cc
Show file tree
Hide file tree
Showing 41 changed files with 1,457 additions and 156 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [unreleased]

## Added
- Binary serialization & deserialization support for FST caches.
- Binary serialization & deserialization support for Compose FST op state table.

## [0.8.0] - 2020-16-10

## Added
Expand Down
35 changes: 30 additions & 5 deletions rustfst/src/algorithms/compose/compose_fst.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
use anyhow::Result;
use std::borrow::Borrow;
use std::fmt::Debug;
use std::path::Path;

use crate::algorithms::compose::compose_filters::{
ComposeFilter, ComposeFilterBuilder, SequenceComposeFilterBuilder,
};
use crate::algorithms::compose::matchers::{GenericMatcher, Matcher};
use crate::algorithms::compose::{ComposeFstOp, ComposeFstOpOptions, ComposeStateTuple};
use crate::algorithms::lazy::{FstCache, LazyFst, SimpleVecCache, StateTable};
use crate::algorithms::compose::{
ComposeFstOp, ComposeFstOpOptions, ComposeFstOpState, ComposeStateTuple,
};
use crate::algorithms::lazy::{
FstCache, LazyFst, SerializableCache, SerializableLazyFst, SimpleVecCache,
};
use crate::fst_properties::FstProperties;
use crate::fst_traits::{AllocableFst, CoreFst, Fst, FstIterator, MutableFst, StateIterator};
use crate::semirings::Semiring;
use crate::parsers::SerializeBinary;
use crate::semirings::{Semiring, SerializableSemiring};
use crate::{StateId, SymbolTable, TrsVec};
use std::sync::Arc;

Expand Down Expand Up @@ -101,7 +107,7 @@ where
M1,
M2,
CFB,
StateTable<
ComposeFstOpState<
ComposeStateTuple<
<CFB::CF as ComposeFilter<W, F1, F2, B1, B2, CFB::IM1, CFB::IM2>>::FS,
>,
Expand All @@ -126,7 +132,7 @@ where
M1,
M2,
CFB,
StateTable<
ComposeFstOpState<
ComposeStateTuple<
<CFB::CF as ComposeFilter<W, F1, F2, B1, B2, CFB::IM1, CFB::IM2>>::FS,
>,
Expand Down Expand Up @@ -191,6 +197,25 @@ where
}
}

impl<W, F1, F2, B1, B2, M1, M2, CFB, Cache> SerializableLazyFst
for ComposeFst<W, F1, F2, B1, B2, M1, M2, CFB, Cache>
where
W: SerializableSemiring,
F1: Fst<W>,
F2: Fst<W>,
B1: Borrow<F1> + Debug + Clone,
B2: Borrow<F2> + Debug + Clone,
Cache: FstCache<W> + SerializableCache,
M1: Matcher<W, F1, B1>,
M2: Matcher<W, F2, B2>,
CFB: ComposeFilterBuilder<W, F1, F2, B1, B2, M1, M2>,
<CFB::CF as ComposeFilter<W, F1, F2, B1, B2, CFB::IM1, CFB::IM2>>::FS: SerializeBinary,
{
fn write<P: AsRef<Path>>(&self, cache_dir: P, op_state_dir: P) -> Result<()> {
self.0.write(cache_dir, op_state_dir)
}
}

impl<W, F1, F2, B1, B2, M1, M2, CFB, Cache> CoreFst<W>
for ComposeFst<W, F1, F2, B1, B2, M1, M2, CFB, Cache>
where
Expand Down
83 changes: 73 additions & 10 deletions rustfst/src/algorithms/compose/compose_fst_op.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,63 @@
use std::borrow::Borrow;
use std::fmt::Debug;
use std::fs::{read, File};
use std::hash::Hash;
use std::io::BufWriter;
use std::path::Path;
use std::sync::Arc;

use anyhow::Result;
use anyhow::{Context, Result};

use crate::algorithms::compose::compose_filters::{ComposeFilter, ComposeFilterBuilder};
use crate::algorithms::compose::filter_states::FilterState;
use crate::algorithms::compose::lookahead_filters::lookahead_selector::Selector;
use crate::algorithms::compose::matchers::{IterItemMatcher, MatcherFlags};
use crate::algorithms::compose::matchers::{MatchType, Matcher, REQUIRE_PRIORITY};
use crate::algorithms::compose::{ComposeFstOpOptions, ComposeStateTuple};
use crate::algorithms::lazy::{FstOp, StateTable};
use crate::algorithms::lazy::{AccessibleOpState, FstOp, SerializableOpState, StateTable};
use crate::fst_properties::mutable_properties::compose_properties;
use crate::fst_properties::FstProperties;
use crate::fst_traits::Fst;
use crate::parsers::SerializeBinary;
use crate::semirings::Semiring;
use crate::{StateId, Tr, Trs, TrsVec, EPS_LABEL, NO_LABEL};

#[derive(Debug, Clone)]
pub struct ComposeFstOpState<T: Hash + Eq + Clone> {
state_table: StateTable<T>,
}

impl<T: Hash + Eq + Clone> ComposeFstOpState<T> {
pub fn new() -> Self {
ComposeFstOpState {
state_table: StateTable::<T>::new(),
}
}
}

impl<T: Hash + Eq + Clone + SerializeBinary> SerializableOpState for ComposeFstOpState<T> {
/// Loads a ComposeFstOpState from a file in binary format.
fn read<P: AsRef<Path>>(path: P) -> Result<Self> {
let data = read(path.as_ref())
.with_context(|| format!("Can't open file : {:?}", path.as_ref()))?;

// Parse StateTable
let (_, state_table) = StateTable::<T>::parse_binary(&data)
.map_err(|e| format_err!("Error while parsing binary StateTable : {:?}", e))?;

Ok(Self { state_table })
}

/// Writes a ComposeFstOpState to a file in binary format.
fn write<P: AsRef<Path>>(&self, path: P) -> Result<()> {
let mut file = BufWriter::new(File::create(path)?);

// Write StateTable
self.state_table.write_binary(&mut file)?;
Ok(())
}
}

#[derive(Debug)]
pub struct ComposeFstOp<W, F1, F2, B1, B2, M1, M2, CFB>
where
Expand All @@ -30,7 +71,7 @@ where
CFB: ComposeFilterBuilder<W, F1, F2, B1, B2, M1, M2>,
{
compose_filter_builder: CFB,
state_table: StateTable<
compose_state: ComposeFstOpState<
ComposeStateTuple<<CFB::CF as ComposeFilter<W, F1, F2, B1, B2, CFB::IM1, CFB::IM2>>::FS>,
>,
match_type: MatchType,
Expand All @@ -53,7 +94,7 @@ where
fn clone(&self) -> Self {
Self {
compose_filter_builder: self.compose_filter_builder.clone(),
state_table: self.state_table.clone(),
compose_state: self.compose_state.clone(),
match_type: self.match_type.clone(),
properties: self.properties.clone(),
fst1: self.fst1.clone(),
Expand Down Expand Up @@ -89,7 +130,7 @@ where
M1,
M2,
CFB,
StateTable<
ComposeFstOpState<
ComposeStateTuple<
<CFB::CF as ComposeFilter<W, F1, F2, B1, B2, CFB::IM1, CFB::IM2>>::FS,
>,
Expand All @@ -111,7 +152,7 @@ where

Ok(Self {
compose_filter_builder,
state_table: opts.state_table.unwrap_or_else(StateTable::new),
compose_state: opts.op_state.unwrap_or_else(ComposeFstOpState::new),
match_type,
properties,
fst1,
Expand Down Expand Up @@ -233,7 +274,7 @@ where
arc1.ilabel,
arc2.olabel,
arc1.weight,
self.state_table.find_id(tuple),
self.compose_state.state_table.find_id(tuple),
))
}

Expand Down Expand Up @@ -306,6 +347,28 @@ where
}
}

impl<W, F1, F2, B1, B2, M1, M2, CFB> AccessibleOpState
for ComposeFstOp<W, F1, F2, B1, B2, M1, M2, CFB>
where
W: Semiring,
F1: Fst<W>,
F2: Fst<W>,
B1: Borrow<F1> + Debug + Clone,
B2: Borrow<F2> + Debug + Clone,
M1: Matcher<W, F1, B1>,
M2: Matcher<W, F2, B2>,
CFB: ComposeFilterBuilder<W, F1, F2, B1, B2, M1, M2>,
<CFB::CF as ComposeFilter<W, F1, F2, B1, B2, CFB::IM1, CFB::IM2>>::FS: SerializeBinary,
{
type FstOpState = ComposeFstOpState<
ComposeStateTuple<<CFB::CF as ComposeFilter<W, F1, F2, B1, B2, CFB::IM1, CFB::IM2>>::FS>,
>;

fn get_op_state(&self) -> &Self::FstOpState {
&self.compose_state
}
}

impl<W, F1, F2, B1, B2, M1, M2, CFB> FstOp<W> for ComposeFstOp<W, F1, F2, B1, B2, M1, M2, CFB>
where
W: Semiring,
Expand All @@ -331,11 +394,11 @@ where
let s2 = s2.unwrap();
let fs = compose_filter.start();
let tuple = ComposeStateTuple { s1, s2, fs };
Ok(Some(self.state_table.find_id(tuple)))
Ok(Some(self.compose_state.state_table.find_id(tuple)))
}

fn compute_trs(&self, state: StateId) -> Result<TrsVec<W>> {
let tuple = self.state_table.find_tuple(state);
let tuple = self.compose_state.state_table.find_tuple(state);
let s1 = tuple.s1;
let s2 = tuple.s2;

Expand All @@ -350,7 +413,7 @@ where
}

fn compute_final_weight(&self, state: StateId) -> Result<Option<W>> {
let tuple = self.state_table.find_tuple(state);
let tuple = self.compose_state.state_table.find_tuple(state);

// Construct a new ComposeFilter each time to avoid mutating the internal state.
let mut compose_filter = self.compose_filter_builder.build()?;
Expand Down
16 changes: 8 additions & 8 deletions rustfst/src/algorithms/compose/compose_fst_op_options.rs
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
pub struct ComposeFstOpOptions<M1, M2, CFB, ST> {
pub struct ComposeFstOpOptions<M1, M2, CFB, OS> {
pub matcher1: Option<M1>,
pub matcher2: Option<M2>,
pub filter_builder: Option<CFB>,
pub state_table: Option<ST>,
pub op_state: Option<OS>,
}

impl<M1, M2, CFB, ST> Default for ComposeFstOpOptions<M1, M2, CFB, ST> {
impl<M1, M2, CFB, OS> Default for ComposeFstOpOptions<M1, M2, CFB, OS> {
fn default() -> Self {
Self {
matcher1: None,
matcher2: None,
filter_builder: None,
state_table: None,
op_state: None,
}
}
}

impl<M1, M2, CFB, ST> ComposeFstOpOptions<M1, M2, CFB, ST> {
impl<M1, M2, CFB, OS> ComposeFstOpOptions<M1, M2, CFB, OS> {
pub fn new<
IM1: Into<Option<M1>>,
IM2: Into<Option<M2>>,
ICFB: Into<Option<CFB>>,
IST: Into<Option<ST>>,
IST: Into<Option<OS>>,
>(
matcher1: IM1,
matcher2: IM2,
filter: ICFB,
state_table: IST,
op_state: IST,
) -> Self {
Self {
matcher1: matcher1.into(),
matcher2: matcher2.into(),
filter_builder: filter.into(),
state_table: state_table.into(),
op_state: op_state.into(),
}
}
}
33 changes: 32 additions & 1 deletion rustfst/src/algorithms/compose/compose_state_tuple.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,39 @@
use crate::algorithms::compose::filter_states::FilterState;
use crate::parsers::nom_utils::NomCustomError;
use crate::parsers::{parse_bin_u64, write_bin_u64, SerializeBinary};
use crate::StateId;

use anyhow::Result;
use nom::IResult;
use std::io::Write;

#[derive(Default, PartialEq, Eq, Clone, Hash, PartialOrd, Debug)]
pub struct ComposeStateTuple<FS> {
pub struct ComposeStateTuple<FS: FilterState> {
pub fs: FS,
pub s1: StateId,
pub s2: StateId,
}

impl<FS: FilterState + SerializeBinary> SerializeBinary for ComposeStateTuple<FS> {
/// Parse a filter state from a binary buffer.
fn parse_binary(i: &[u8]) -> IResult<&[u8], Self, NomCustomError<&[u8]>> {
let (i, fs) = FS::parse_binary(i)?;
let (i, s1) = parse_bin_u64(i)?;
let (i, s2) = parse_bin_u64(i)?;
Ok((
i,
Self {
fs,
s1: s1 as StateId,
s2: s2 as StateId,
},
))
}
/// Writes a filter state to a writable buffer.
fn write_binary<W: Write>(&self, writer: &mut W) -> Result<()> {
self.fs.write_binary(writer)?;
write_bin_u64(writer, self.s1 as u64)?;
write_bin_u64(writer, self.s2 as u64)?;
Ok(())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ use std::hash::Hash;
use crate::{StateId, NO_STATE_ID};

use self::super::FilterState;
use crate::parsers::nom_utils::NomCustomError;
use crate::parsers::{parse_bin_u64, write_bin_u64, SerializeBinary};
use anyhow::Result;
use nom::IResult;
use std::io::Write;

/// Filter state that is a signed integral type.
#[derive(Debug, PartialEq, Clone, Eq, Hash)]
Expand All @@ -26,6 +31,22 @@ impl FilterState for IntegerFilterState {
}
}

impl SerializeBinary for IntegerFilterState {
fn parse_binary(i: &[u8]) -> IResult<&[u8], Self, NomCustomError<&[u8]>> {
let (i, state) = parse_bin_u64(i)?;
Ok((
i,
Self {
state: state as StateId,
},
))
}
fn write_binary<W: Write>(&self, writer: &mut W) -> Result<()> {
write_bin_u64(writer, self.state as u64)?;
Ok(())
}
}

// pub type IntFilterState = IntegerFilterState<i32>;
// pub type ShortFilterState = IntegerFilterState<i16>;
// pub type CharFilterState = IntegerFilterState<i8>;
Loading

0 comments on commit 54083cc

Please sign in to comment.