Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialized library #11

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: you can change scope of this dependency to test

<artifactId>commons-lang3</artifactId>
<version>3.3.2</version>
</dependency>

</dependencies>

<build>
Expand Down
18 changes: 17 additions & 1 deletion src/main/java/org/ahocorasick/interval/Interval.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package org.ahocorasick.interval;

public class Interval implements Intervalable {
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Field;

public class Interval implements Intervalable, Serializable {

private int start;
private int end;
Expand Down Expand Up @@ -61,4 +65,16 @@ public String toString() {
return this.start + ":" + this.end;
}

protected void writeObject(java.io.ObjectOutputStream stream)
throws IOException {
stream.writeInt(start);
stream.writeInt(end);
}

protected void readObject(java.io.ObjectInputStream stream)
throws IOException, ClassNotFoundException, IllegalAccessException, NoSuchFieldException {
this.start = stream.readInt();
this.end = stream.readInt();
}

}
22 changes: 21 additions & 1 deletion src/main/java/org/ahocorasick/trie/Emit.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
import org.ahocorasick.interval.Interval;
import org.ahocorasick.interval.Intervalable;

public class Emit extends Interval implements Intervalable {
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Field;

public class Emit extends Interval implements Intervalable, Serializable {

private final String keyword;

Expand All @@ -21,4 +25,20 @@ public String toString() {
return super.toString() + "=" + this.keyword;
}

@Override
protected void writeObject(java.io.ObjectOutputStream stream)
throws IOException {
super.writeObject(stream);
stream.writeUTF(keyword);
}

@Override
protected void readObject(java.io.ObjectInputStream stream)
throws IOException, ClassNotFoundException, IllegalAccessException, NoSuchFieldException {
Field f = this.getClass().getDeclaredField("keyword");
super.readObject(stream);
f.setAccessible(true);
f.set(this, stream.readUTF());
}

}
114 changes: 112 additions & 2 deletions src/main/java/org/ahocorasick/trie/State.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
package org.ahocorasick.trie;

import java.io.IOException;
import java.io.Serializable;
import java.lang.Character;
import java.lang.Integer;
import java.util.*;
import java.lang.reflect.Field;
import java.util.TreeMap;

/**
* <p>
Expand All @@ -23,7 +29,7 @@
*
* @author Robert Bor
*/
public class State {
public class State implements Serializable, Comparable<State> {

/** effective the size of the keyword */
private final int depth;
Expand All @@ -43,6 +49,17 @@ public class State {
/** whenever this state is reached, it will emit the matches keywords for future reference */
private Set<String> emits = null;

/** used for serialization */
public static IdentityHashMap<Object, Integer> objectToReference = new IdentityHashMap();
public static IdentityHashMap<Integer, Object> referenceToObject = new IdentityHashMap();
public static int referenceCount = 1;

public static void reset () {
objectToReference.clear();
referenceToObject.clear();
referenceCount = 1;
}

public State() {
this(0);
}
Expand Down Expand Up @@ -114,4 +131,97 @@ public Collection<Character> getTransitions() {
return this.success.keySet();
}

}
private void writeObject(java.io.ObjectOutputStream stream)
throws IOException {
stream.writeInt(depth);
stream.writeInt(success.size());
for (Map.Entry<Character, State> e : success.entrySet()) {
stream.writeObject(e.getKey());

Integer reference = objectToReference.get(e.getValue());
if (reference == null) {
objectToReference.put(e.getValue(), ++referenceCount);
stream.writeInt(0);
stream.writeInt(referenceCount);
stream.writeObject(e.getValue());
} else {
stream.writeInt(reference);
}
}

stream.writeObject(failure);
stream.writeObject(emits);
}

private void readObject(java.io.ObjectInputStream stream)
throws IOException, ClassNotFoundException, NoSuchFieldException, IllegalAccessException {

// Use reflection to modify final field
Field f = this.getClass().getDeclaredField("depth");
f.setAccessible(true);
f.set(this, stream.readInt());

f = this.getClass().getDeclaredField("rootState");
f.setAccessible(true);
f.set(this, (depth == 0) ? this : null);
int successSize = (Integer) stream.readInt();
success = new TreeMap<Character, org.ahocorasick.trie.State>();
for (int i = 0; i < successSize; i++) {
Character character = (Character) stream.readObject();
Integer reference = stream.readInt();
State treeState = null;
if (reference == 0) {
Integer referenceID = stream.readInt();
treeState = (State) stream.readObject();
referenceToObject.put(referenceID, treeState);
} else {
treeState = (org.ahocorasick.trie.State) referenceToObject.get(reference);
}
success.put(character, treeState);
}


failure = (State) stream.readObject();
emits = (TreeSet) stream.readObject();
}

private static IdentityHashMap<State, Integer> equalityReferenceMap = new IdentityHashMap();

private boolean gotoEquality(Map<Character,State> mine, Map<Character,State> other) {
if (mine.size() != other.size()) return false;
Iterator otherEntrySet = other.entrySet().iterator();
for (Map.Entry<Character, State> e : mine.entrySet()) {
Map.Entry<Character, State> otherE = (Map.Entry<Character, State> ) otherEntrySet.next();

if (!e.getKey().equals(otherE.getKey())) return false;
Integer reference = equalityReferenceMap.get(e.getValue());
if (reference == null) {
equalityReferenceMap.put(e.getValue(), equalityReferenceMap.size() + 1);
if (!e.getValue().equals(otherE.getValue())) return false;
}
}
return true;
}

@Override
public boolean equals(Object obj) {
if (!(obj instanceof State))
return false;
return compareTo((State) obj) == 0;
}

@Override
public int compareTo(State o) {
if (this.depth != o.depth)
return 1;
if ((this.depth == 0 && o.depth == 0) && (this.rootState != this || o.rootState != o))
return 1;
if (!gotoEquality(this.success, o.success))
return 1;
if (this.failure != null && o.failure != null && !this.failure.equals(o.failure))
return 1;
if (this.emits != null && o.emits != null && !this.emits.equals(o.emits))
return 1;
return 0;
}
}
63 changes: 51 additions & 12 deletions src/main/java/org/ahocorasick/trie/Trie.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,17 @@
import org.ahocorasick.interval.IntervalTree;
import org.ahocorasick.interval.Intervalable;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Queue;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;
import java.util.concurrent.LinkedBlockingDeque;

/**
*
* Based on the Aho-Corasick white paper, Bell technologies: ftp://163.13.200.222/assistant/bearhero/prog/%A8%E4%A5%A6/ac_bm.pdf
* @author Robert Bor
*/
public class Trie {
public class Trie implements Serializable, Comparable<Trie> {

private TrieConfig trieConfig;

Expand Down Expand Up @@ -87,6 +86,10 @@ private Token createMatch(Emit emit, String text) {

@SuppressWarnings("unchecked")
public Collection<Emit> parseText(String text) {
return parseText(text, false);
}

public Collection<Emit> parseText(String text, Boolean onlyWholeWords) {
checkForConstructedFailureStates();

int position = 0;
Expand All @@ -101,7 +104,7 @@ public Collection<Emit> parseText(String text) {
position++;
}

if (trieConfig.isOnlyWholeWords()) {
if (trieConfig.isOnlyWholeWords() || onlyWholeWords) {
removePartialMatches(text, collectedEmits);
}

Expand Down Expand Up @@ -132,10 +135,13 @@ private void removePartialMatches(String searchText, List<Emit> collectedEmits)
}

private State getState(State currentState, Character character) {
State newCurrentState = currentState.nextState(character);
while (newCurrentState == null) {
currentState = currentState.failure();
State newCurrentState = null;
if (currentState != null) {
newCurrentState = currentState.nextState(character);
while (newCurrentState == null && currentState.failure() != null) {
currentState = currentState.failure();
newCurrentState = currentState.nextState(character);
}
}
return newCurrentState;
}
Expand All @@ -149,10 +155,13 @@ private void checkForConstructedFailureStates() {
private void constructFailureStates() {
Queue<State> queue = new LinkedBlockingDeque<State>();

int i = 0;
// First, set the fail state of all depth 1 states to the root state
for (State depthOneState : this.rootState.getStates()) {
depthOneState.setFailure(this.rootState);
queue.add(depthOneState);
if (depthOneState != null) {
depthOneState.setFailure(this.rootState);
queue.add(depthOneState);
}
}
this.failureStatesConstructed = true;

Expand All @@ -176,12 +185,42 @@ private void constructFailureStates() {
}

private void storeEmits(int position, State currentState, List<Emit> collectedEmits) {
Collection<String> emits = currentState.emit();
Collection<String> emits = (currentState == null) ? Collections.<String> emptyList() : currentState.emit();
if (emits != null && !emits.isEmpty()) {
for (String emit : emits) {
collectedEmits.add(new Emit(position-emit.length()+1, position, emit));
}
}
}

private void writeObject(java.io.ObjectOutputStream stream)
throws IOException {
stream.writeObject(trieConfig);
stream.writeObject(rootState);
}

private void readObject(java.io.ObjectInputStream stream)
throws IOException, ClassNotFoundException {
trieConfig = (TrieConfig) stream.readObject();
rootState = (State) stream.readObject();
constructFailureStates();
}

@Override
public boolean equals(Object obj) {
if (!(obj instanceof Trie))
return false;
return compareTo((Trie) obj) == 0;
}

@Override
public int compareTo(Trie o) {
if (!this.trieConfig.equals(o.trieConfig))
return 1;
if (!this.rootState.equals(o.rootState))
return 1;
if (!this.failureStatesConstructed == o.failureStatesConstructed)
return 1;
return 0;
}
}
36 changes: 35 additions & 1 deletion src/main/java/org/ahocorasick/trie/TrieConfig.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package org.ahocorasick.trie;

public class TrieConfig {
import java.io.IOException;
import java.io.Serializable;

public class TrieConfig implements Serializable, Comparable<TrieConfig> {

private boolean allowOverlaps = true;

Expand Down Expand Up @@ -31,4 +34,35 @@ public boolean isCaseInsensitive() {
public void setCaseInsensitive(boolean caseInsensitive) {
this.caseInsensitive = caseInsensitive;
}

private void writeObject(java.io.ObjectOutputStream stream)
throws IOException {
stream.writeBoolean(allowOverlaps);
stream.writeBoolean(onlyWholeWords);
stream.writeBoolean(caseInsensitive);
}

private void readObject(java.io.ObjectInputStream stream)
throws IOException, ClassNotFoundException {
this.allowOverlaps = stream.readBoolean();
this.onlyWholeWords = stream.readBoolean();
this.caseInsensitive = stream.readBoolean();
}

@Override
public boolean equals(Object obj) {
if (!(obj instanceof TrieConfig))
return false;
return compareTo((TrieConfig) obj) == 0;
}

@Override
public int compareTo(TrieConfig o) {
if (this.allowOverlaps != o.allowOverlaps || this.caseInsensitive != o.caseInsensitive ||
this.onlyWholeWords != o.onlyWholeWords) {
return 1;
} else {
return 0;
}
}
}
Loading