Skip to content
This repository has been archived by the owner on May 29, 2020. It is now read-only.

Commit

Permalink
#18 progress: changed analyzers to be functions so that processing ca…
Browse files Browse the repository at this point in the history
…n be done w/ or w/o Actors.
  • Loading branch information
jasonbaldridge committed Jul 21, 2013
1 parent 856c767 commit 878bbb9
Showing 1 changed file with 47 additions and 20 deletions.
67 changes: 47 additions & 20 deletions src/main/scala/chalk/slab/AnalysisEngine.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,41 +6,68 @@ import akka.util.Timeout
import scala.collection.mutable.ListBuffer
import scala.concurrent.duration._

trait AnalysisComponent[X,Y,-Z<:Y,+W<:Y] extends Actor with ActorLogging {

/**
* An analysis function that takes a Slab with declared annotation types in it and outputs
* a new Slab with additional annotations of a new type.
*
* Documentation for the type variables:
* C = Content type
* B = Base annonation type
* I = Input annotation type
* O = Output annotation type
*/
trait AnalysisFunction[C,B,-I<:B,+O<:B] extends (Slab[C,B,I] => Slab[C,B,B with O])

/**
* An actor that mixes-in an AnalysisFunction and hands Slabs contained in Process messages over
* to the function.
*/
trait AnalysisComponent[C,B,-I<:B,+O<:B] extends Actor with ActorLogging with AnalysisFunction[C,B,I,O] {
import AnalysisComponent._

def process(slab: Slab[X,Y,Z]): Slab[X,Y,Y with W]

def receive = {
case Process(slab) =>
sender ! process(slab.asInstanceOf[Slab[X,Y,Z]])
case Process(slab) => sender ! apply(slab.asInstanceOf[Slab[C,B,I]])
}

}

/**
* Companion object, e.g. to hold messages that can be processed by an AnalysisComponent actor.
*/
object AnalysisComponent {
case class Process[X,Y,Z<:Y](slab: Slab[X,Y,Z])
case class Process[C,B,I<:B](slab: Slab[C,B,I])
}

class SentenceSegmenterActor[AnnotationTypes <: StringAnnotation]
extends AnalysisComponent[String, StringAnnotation, AnnotationTypes, Sentence] {

def process(slab: Slab[String, StringAnnotation, AnnotationTypes]) =
/**
* A simple regex sentence segmenter.
*/
trait SentenceSegmenter extends AnalysisFunction[String, StringAnnotation, StringAnnotation, Sentence] {
def apply(slab: Slab[String, StringAnnotation, StringAnnotation]) =
slab ++ "[^\\s.!?]+[^.!?]+[.!?]".r.findAllMatchIn(slab.content).map(m => Sentence(m.start, m.end))
}

class TokenizerActor[AnnotationTypes <: Sentence]
extends AnalysisComponent[String, StringAnnotation, AnnotationTypes, Token] {

def process(slab: Slab[String, StringAnnotation, AnnotationTypes]) =
/**
* An actor that uses SentenceSegmenter.
*/
class SentenceSegmenterActor extends SentenceSegmenter
with AnalysisComponent[String,StringAnnotation,StringAnnotation,Sentence]

/**
* A simple regex tokenizer.
*/
trait Tokenizer extends AnalysisFunction[String, StringAnnotation, Sentence, Token] {
def apply(slab: Slab[String, StringAnnotation, Sentence]) =
slab ++ slab.iterator[Sentence].flatMap(sentence =>
"\\p{L}+|\\p{P}+|\\p{N}+".r.findAllMatchIn(sentence.in(slab).content).map(m =>
Token(sentence.begin + m.start, sentence.begin + m.end)))

}

/**
* An actor that uses Tokenizer.
*/
class TokenizerActor extends AnalysisComponent[String, StringAnnotation, Sentence, Token] with Tokenizer

/**
* Example application doing actor based Slab processing.
*/
object AnalysisEngine {

import AnalysisComponent._
Expand All @@ -56,8 +83,8 @@ object AnalysisEngine {
implicit val timeout = Timeout(10 seconds)


val sentenceSegmenter = system.actorOf(Props[SentenceSegmenterActor[StringAnnotation]])
val tokenizer = system.actorOf(Props[TokenizerActor[Sentence]])
val sentenceSegmenter = system.actorOf(Props[SentenceSegmenterActor])
val tokenizer = system.actorOf(Props[TokenizerActor])

for {
slab1 <- (sentenceSegmenter ? Process(slab)).mapTo[Slab[String,StringAnnotation,Sentence]]
Expand Down

0 comments on commit 878bbb9

Please sign in to comment.