Merge pull request #21 from bethard/master

Renames StringAnnotation to Span, adds StringSlab alias
scalanlp · Jul 22, 2013 · b3d3421 · b3d3421
2 parents d356125 + d30fa19
commit b3d3421
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 34 deletions.
diff --git a/src/main/scala/chalk/slab/AnalysisEngine.scala b/src/main/scala/chalk/slab/AnalysisEngine.scala
@@ -25,14 +25,14 @@ object AnalysisComponent {
   case class Process[C,B,I<:B](slab: Slab[C,B,I])
 }
 
-trait StringAnalysisComponent[I<:StringAnnotation,O<:StringAnnotation]
-    extends AnalysisComponent[String,StringAnnotation,I,O]
+trait StringAnalysisComponent[I<:Span,O<:Span]
+    extends AnalysisComponent[String,Span,I,O]
 
 /**
   * An actor that uses SentenceSegmenter.
   */
-class SentenceSegmenterActor extends SentenceSegmenter[StringAnnotation]
-    with StringAnalysisComponent[StringAnnotation,Sentence]
+class SentenceSegmenterActor extends SentenceSegmenter[Span]
+    with StringAnalysisComponent[Span,Sentence]
 
 /**
   * An actor that uses Tokenizer.
@@ -51,7 +51,8 @@ class AnalysisEngine extends Actor with ActorLogging {
 
   import AnalysisComponent._
   import AnalysisEngine._
-  import StringAnnotation._
+  import Span._
+  import Slab.StringSlab
   implicit val ec = context.dispatcher
   implicit val timeout = Timeout(10 seconds)
 
@@ -62,8 +63,8 @@ class AnalysisEngine extends Actor with ActorLogging {
     case Process(slab) =>
       log.info("Processing slab:\n " + slab.content)
       (for {
-        slab1 <- (sentenceSegmenter ? Process(slab)).mapTo[Slab[String,StringAnnotation,Sentence]]
-        slab2 <- (tokenizer ? Process(slab1)).mapTo[Slab[String,StringAnnotation,Sentence with Token]]
+        slab1 <- (sentenceSegmenter ? Process(slab)).mapTo[StringSlab[Sentence]]
+        slab2 <- (tokenizer ? Process(slab1)).mapTo[StringSlab[Sentence with Token]]
       } yield {
         slab2
       }) pipeTo sender
@@ -81,7 +82,8 @@ object AnalysisEngine {
   case class ProcessCorpus(corpus: Iterator[String])
 
   import AnalysisComponent._
-  import StringAnnotation._
+  import Span._
+  import Slab.StringSlab
 
   val text1 = "Here is an example text. It has four sentences and it mentions Jimi Hendrix and Austin, Texas! In this third sentence, it also brings up Led Zeppelin and Radiohead, but does it ask a question? It also has a straggler sentence that doesn't end with punctuation"
 
@@ -99,7 +101,7 @@ object AnalysisEngine {
     val corpus = Iterator(text1,text2,text3)
 
     for {
-      slabs <- (engine ? ProcessCorpus(corpus)).mapTo[Iterator[Slab[String,StringAnnotation,Sentence with Token]]]
+      slabs <- (engine ? ProcessCorpus(corpus)).mapTo[Iterator[StringSlab[Sentence with Token]]]
       slab <- slabs
     } {
       // Notice that the last sentence (lacking EOS char) is missing.

diff --git a/src/main/scala/chalk/slab/AnalysisFunction.scala b/src/main/scala/chalk/slab/AnalysisFunction.scala
@@ -1,5 +1,7 @@
 package chalk.slab
 
+import Slab.StringSlab
+
 /**
   * An analysis function that takes a Slab with declared annotation types in it and outputs
   * a new Slab with additional annotations of a new type.
@@ -12,17 +14,17 @@ package chalk.slab
   */ 
 trait AnalysisFunction[C,B,I<:B,O<:B] extends (Slab[C,B,I] => Slab[C,B,B with I with O])
 
-trait StringAnalysisFunction[I<:StringAnnotation,O<:StringAnnotation] extends (Slab[String,StringAnnotation,I] => Slab[String,StringAnnotation,StringAnnotation with I with O])
+trait StringAnalysisFunction[I<:Span,O<:Span] extends (StringSlab[I] => StringSlab[Span with I with O])
 
-object StringIdentityAnalyzer extends StringAnalysisFunction[StringAnnotation, StringAnnotation] {
-  def apply(slab: Slab[String, StringAnnotation, StringAnnotation]) = slab
+object StringIdentityAnalyzer extends StringAnalysisFunction[Span, Span] {
+  def apply(slab: StringSlab[Span]) = slab
 }
 
 /**
   * A simple regex sentence segmenter.
   */
-trait SentenceSegmenter[I <: StringAnnotation] extends StringAnalysisFunction[I, Sentence] {
-  def apply(slab: Slab[String, StringAnnotation, I]) =
+trait SentenceSegmenter[I <: Span] extends StringAnalysisFunction[I, Sentence] {
+  def apply(slab: StringSlab[I]) =
     // the [Sentence] is required because of https://issues.scala-lang.org/browse/SI-7647
     slab.++[Sentence]("[^\\s.!?]+[^.!?]+[.!?]".r.findAllMatchIn(slab.content).map(m => Sentence(m.start, m.end)))
 }
@@ -31,7 +33,7 @@ trait SentenceSegmenter[I <: StringAnnotation] extends StringAnalysisFunction[I,
   * A simple regex tokenizer.
   */
 trait Tokenizer[I <: Sentence] extends StringAnalysisFunction[I, Token] {
-  def apply(slab: Slab[String, StringAnnotation, I]) =
+  def apply(slab: StringSlab[I]) =
     // the [Token] is required because of https://issues.scala-lang.org/browse/SI-7647
     slab.++[Token](slab.iterator[Sentence].flatMap(sentence =>
       "\\p{L}+|\\p{P}+|\\p{N}+".r.findAllMatchIn(sentence.in(slab).content).map(m =>
@@ -40,15 +42,15 @@ trait Tokenizer[I <: Sentence] extends StringAnalysisFunction[I, Token] {
 
 
 object AnalysisPipeline {
-  import StringAnnotation._
+  import Span._
 
   // added only to demonstrate necesssity of [I] parameter on analyzers
-  private[AnalysisPipeline] case class Document(val begin: Int, val end: Int) extends StringAnnotation
-  private[AnalysisPipeline] def documentAdder(slab: Slab[String, StringAnnotation, StringAnnotation]) =
+  private[AnalysisPipeline] case class Document(val begin: Int, val end: Int) extends Span
+  private[AnalysisPipeline] def documentAdder(slab: StringSlab[Span]) =
     slab ++ Iterator(Document(0, slab.content.length))
 
   def main (args: Array[String]) {
-    def sentenceSegmenter[I <: StringAnnotation] = new SentenceSegmenter[I]{}
+    def sentenceSegmenter[I <: Span] = new SentenceSegmenter[I]{}
     def tokenizer[I <: Sentence] = new Tokenizer[I]{}
     val pipeline = StringIdentityAnalyzer andThen documentAdder andThen sentenceSegmenter andThen tokenizer
     val slab = pipeline(Slab(AnalysisEngine.text1))

diff --git a/src/main/scala/chalk/slab/Slab.scala b/src/main/scala/chalk/slab/Slab.scala
@@ -38,34 +38,44 @@ abstract class SlabAnnotationOps[ContentType, BaseAnnotationType, AnnotationType
 // =========================
 // Annotation infrastructure
 // =========================
-trait StringAnnotation {
+trait Span {
   val begin: Int
   val end: Int
-  def in[AnnotationTypes <: StringAnnotation](slab: Slab[String, StringAnnotation, AnnotationTypes]) =
-    new SlabAnnotationOps(this, slab) {
-      def content = this.slab.content.substring(this.annotation.begin, this.annotation.end)
-    }
 }
 
-object StringAnnotation {
-  implicit object StringAnnotationHasBounds extends Slab.HasBounds[StringAnnotation] {
-    def covers(annotation1: StringAnnotation, annotation2: StringAnnotation): Boolean =
+object Span {
+  implicit class SpanInStringSlab(val span: Span) extends AnyVal {
+    def in[AnnotationTypes <: Span](slab: Slab.StringSlab[AnnotationTypes]) =
+      new StringSpanAnnotationOps(this.span, slab)
+  }
+
+  class StringSpanAnnotationOps[AnnotationType >: AnnotationTypes <: Span: ClassTag, AnnotationTypes <: Span](
+    annotation: AnnotationType,
+    slab: Slab.StringSlab[AnnotationTypes])
+    extends SlabAnnotationOps[String, Span, AnnotationType, AnnotationTypes](annotation, slab) {
+    def content = this.slab.content.substring(this.annotation.begin, this.annotation.end)
+  }
+
+  implicit object StringAnnotationHasBounds extends Slab.HasBounds[Span] {
+    def covers(annotation1: Span, annotation2: Span): Boolean =
       annotation1.begin <= annotation2.begin && annotation2.end <= annotation1.end
-    def follows(annotation1: StringAnnotation, annotation2: StringAnnotation): Boolean =
+    def follows(annotation1: Span, annotation2: Span): Boolean =
       annotation2.end <= annotation1.begin
-    def precedes(annotation1: StringAnnotation, annotation2: StringAnnotation): Boolean =
+    def precedes(annotation1: Span, annotation2: Span): Boolean =
       annotation1.end <= annotation2.begin
   }
 }
 
 // ===========
 // Annotations
 // ===========
-case class Sentence(val begin: Int, val end: Int) extends StringAnnotation
-case class Token(val begin: Int, val end: Int) extends StringAnnotation
+case class Sentence(val begin: Int, val end: Int) extends Span
+case class Token(val begin: Int, val end: Int) extends Span
 
 
 object Slab {
+  type StringSlab[+AnnotationTypes <: Span] = Slab[String, Span, AnnotationTypes]
+
   def apply[ContentType, BaseAnnotationType: HasBounds](content: ContentType): Slab[ContentType, BaseAnnotationType, BaseAnnotationType] =
     new HorribleInefficientSlab(content)
 

diff --git a/src/test/scala/chalk/slab/SlabTest.scala b/src/test/scala/chalk/slab/SlabTest.scala
@@ -10,12 +10,14 @@ class SlabTest extends FunSuite {
   // =========
   // Analyzers
   // =========
-  val stringBegin = (slab: Slab[String, StringAnnotation, StringAnnotation]) => slab
+  import Slab.StringSlab
 
-  def sentenceSegmenter[AnnotationTypes <: StringAnnotation](slab: Slab[String, StringAnnotation, AnnotationTypes]) =
+  val stringBegin = (slab: StringSlab[Span]) => slab
+
+  def sentenceSegmenter[AnnotationTypes <: Span](slab: StringSlab[AnnotationTypes]) =
     slab ++ "[^\\s.!?]+[^.!?]+[.!?]".r.findAllMatchIn(slab.content).map(m => Sentence(m.start, m.end))
 
-  def tokenizer[AnnotationTypes <: Sentence](slab: Slab[String, StringAnnotation, AnnotationTypes]) =
+  def tokenizer[AnnotationTypes <: Sentence](slab: StringSlab[AnnotationTypes]) =
     slab ++ slab.iterator[Sentence].flatMap(sentence =>
       "\\p{L}+|\\p{P}+|\\p{N}+".r.findAllMatchIn(sentence.in(slab).content).map(m =>
         Token(sentence.begin + m.start, sentence.begin + m.end)))