reorganized index file

unfoldingWord · Aug 31, 2018 · bebaed0 · bebaed0
1 parent 8b1dc5d
commit bebaed0
Show file tree

Hide file tree

Showing 4 changed files with 176 additions and 153 deletions.
diff --git a/src/WordMap.ts b/src/WordMap.ts
@@ -0,0 +1,140 @@
+import Lexer, {Token} from "wordmap-lexer";
+import AlignmentOccurrences from "./algorithms/AlignmentOccurrences";
+import AlignmentPosition from "./algorithms/AlignmentPosition";
+import CharacterLength from "./algorithms/CharacterLength";
+import NgramFrequency from "./algorithms/NgramFrequency";
+import NgramLength from "./algorithms/NgramLength";
+import PhrasePlausibility from "./algorithms/PhrasePlausibility";
+import Uniqueness from "./algorithms/Uniqueness";
+import Engine from "./Engine";
+import Alignment from "./structures/Alignment";
+import Ngram from "./structures/Ngram";
+import Prediction from "./structures/Prediction";
+import Suggestion from "./structures/Suggestion";
+
+/**
+ * Multi-Lingual Word Alignment Prediction
+ */
+export default class WordMap {
+  private engine: Engine;
+
+  constructor(opts = {}) {
+
+    this.engine = new Engine(opts);
+    this.engine.registerAlgorithm(new NgramFrequency());
+    this.engine.registerAlgorithm(new AlignmentPosition());
+    this.engine.registerAlgorithm(new PhrasePlausibility());
+    this.engine.registerAlgorithm(new NgramLength());
+    this.engine.registerAlgorithm(new CharacterLength());
+    this.engine.registerAlgorithm(new AlignmentOccurrences());
+    this.engine.registerAlgorithm(new Uniqueness());
+  }
+
+  /**
+   * Adds an array of corpus
+   * @param {string[][]} corpus
+   */
+  public appendCorpus(corpus: string[][]) {
+    for (const pair of corpus) {
+      this.appendCorpusString(pair[0], pair[1]);
+    }
+  }
+
+  /**
+   * Add corpus to the MAP.
+   * These may be single sentences or multiple sentence delimited by new lines.
+   * @param {string} source
+   * @param {string} target
+   */
+  public appendCorpusString(source: string, target: string) {
+    const sourceSentences = source.split("\n");
+    const targetSentences = target.split("\n");
+    const sourceTokens: Token[][] = [];
+    const targetTokens: Token[][] = [];
+
+    for (const s of sourceSentences) {
+      sourceTokens.push(Lexer.tokenize(s));
+    }
+    for (const s of targetSentences) {
+      targetTokens.push(Lexer.tokenize(s));
+    }
+
+    this.engine.addCorpus(sourceTokens, targetTokens);
+  }
+
+  public appendSavedAlignments(alignments: Alignment[]) {
+    this.engine.addSavedAlignments(alignments);
+  }
+
+  /**
+   * Appends some saved alignments.
+   * This may be multiple lines of text or a single line.
+   *
+   * @param {string} source - a string of source phrases separated by new lines
+   * @param {string} target - a string of target phrases separated by new lines
+   * @return {Alignment[]} an array of alignment objects (as a convenience)
+   */
+  public appendSavedAlignmentsString(source: string, target: string): Alignment[] {
+    const alignments: Alignment[] = [];
+    const sourceLines = source.split("\n");
+    const targetLines = target.split("\n");
+    const sourceLinesLength = sourceLines.length;
+    if (sourceLinesLength !== targetLines.length) {
+      throw new Error("source and target lines must be the same length");
+    }
+    for (let i = 0; i < sourceLinesLength; i++) {
+      const sourceTokens = Lexer.tokenize(sourceLines[i]);
+      const targetTokens = Lexer.tokenize(targetLines[i]);
+      alignments.push(new Alignment(
+        new Ngram(sourceTokens),
+        new Ngram(targetTokens)
+      ));
+    }
+    this.appendSavedAlignments(alignments);
+    return alignments;
+  }
+
+  /**
+   * Predicts the word alignments between the sentences
+   * @param {string} sourceSentence
+   * @param {string} targetSentence
+   * @param {number} maxSuggestions
+   * @return {Suggestion[]}
+   */
+  public predict(sourceSentence: string, targetSentence: string, maxSuggestions: number = 1): Suggestion[] {
+    const sourceTokens = Lexer.tokenize(sourceSentence);
+    const targetTokens = Lexer.tokenize(targetSentence);
+
+    let predictions = this.engine.run(sourceTokens, targetTokens);
+    predictions = this.engine.score(predictions);
+    return Engine.suggest(predictions, maxSuggestions);
+  }
+
+  /**
+   * Predicts word alignments between the sentences.
+   * Returns an array of suggestions that match the benchmark.
+   *
+   * @param {string} sourceSentence
+   * @param {string} targetSentence
+   * @param {Suggestion} benchmark
+   * @param {number} maxSuggestions
+   * @return {Suggestion[]}
+   */
+  public predictWithBenchmark(sourceSentence: string, targetSentence: string, benchmark: Alignment[], maxSuggestions: number = 1): Suggestion[] {
+    const sourceTokens = Lexer.tokenize(sourceSentence);
+    const targetTokens = Lexer.tokenize(targetSentence);
+
+    let predictions = this.engine.run(sourceTokens, targetTokens);
+    predictions = this.engine.score(predictions);
+
+    const validPredictions: Prediction[] = [];
+    for (const p of predictions) {
+      for (const a of benchmark) {
+        if (a.key === p.alignment.key) {
+          validPredictions.push(p);
+        }
+      }
+    }
+    return Engine.suggest(validPredictions, maxSuggestions);
+  }
+}
diff --git a/src/__tests__/map.ts b/src/__tests__/map.ts
@@ -1,8 +1,8 @@
 import * as fs from "fs-extra";
 import * as path from "path";
-import WordMap from "../index";
 import Alignment from "../structures/Alignment";
 import {makeMockAlignment} from "../util/testUtils";
+import WordMap from "../WordMap";
 
 describe("MAP", () => {
 
@@ -170,7 +170,8 @@ describe("MAP", () => {
       );
       const predictions = suggestions[0].getPredictions();
       expect(predictions).toHaveLength(6);
-      expect(predictions[4].key).not.toEqual("n:φιλάνδρους->n:love:their:own:husbands");
+      expect(predictions[4].key).not.toEqual(
+        "n:φιλάνδρους->n:love:their:own:husbands");
       expect(predictions[5].key).toEqual("n:φιλοτέκνους->n:and:children");
     });
 
@@ -185,7 +186,8 @@ describe("MAP", () => {
       );
       const predictions = suggestions[0].getPredictions();
       expect(predictions).toHaveLength(7);
-      expect(predictions[4].key).toEqual("n:φιλάνδρους->n:love:their:own:husbands");
+      expect(predictions[4].key).toEqual(
+        "n:φιλάνδρους->n:love:their:own:husbands");
       expect(predictions[6].key).toEqual("n:φιλοτέκνους->n:and:children");
     });
   });

diff --git a/src/__tests__/titus_map.ts b/src/__tests__/titus_map.ts
@@ -1,22 +1,25 @@
 import * as fs from "fs-extra";
 import * as path from "path";
-import WordMap from "../index";
 import Alignment from "../structures/Alignment";
 import {makeMockAlignment, scoreSuggestion} from "../util/testUtils";
+import WordMap from "../WordMap";
 
 describe("MAP predictions in Titus", () => {
   const greek = path.join(__dirname, "fixtures/corpus/tit/greek.txt");
   const english = path.join(__dirname, "fixtures/corpus/tit/english.txt");
   const map = new WordMap();
-  loadCorpus(map, greek,  english);
+  loadCorpus(map, greek, english);
 
   it("predicts the first verse", () => {
     const unalignedPair = [
       "Παῦλος, δοῦλος Θεοῦ, ἀπόστολος δὲ Ἰησοῦ Χριστοῦ, κατὰ πίστιν ἐκλεκτῶν Θεοῦ, καὶ ἐπίγνωσιν ἀληθείας, τῆς κατ’ εὐσέβειαν",
       "Paul a servant of God and an apostle of Jesus Christ for the faith of God s chosen people and the knowledge of the truth that agrees with godliness"
     ];
     const suggestions = map.predict(unalignedPair[0], unalignedPair[1], 2);
-    const chapterOneAlignmentPath = path.join(__dirname, "fixtures/corpus/tit/alignmentData/1.json");
+    const chapterOneAlignmentPath = path.join(
+      __dirname,
+      "fixtures/corpus/tit/alignmentData/1.json"
+    );
     scoreSuggestion(suggestions[0], getAlignments(chapterOneAlignmentPath, 1));
     console.log("suggestions\n", suggestions.map((s) => {
       return s.toString();
@@ -42,12 +45,24 @@ describe("MAP predictions in Titus", () => {
     benchmark.push(makeMockAlignment("ἀδελφοὺς", "brothers"));
     benchmark.push(makeMockAlignment("αὐτοῦ", "his"));
 
-    console.log("suggestions\n", map.predict(secondUnalignedPair[0], secondUnalignedPair[1], 2).map((s) => {
-      return s.toString();
-    }).join("\n"));
-    console.log("benchmarks\n", map.predictWithBenchmark(secondUnalignedPair[0], secondUnalignedPair[1], benchmark, 2).map((s) => {
-      return s.toString();
-    }).join("\n"));
+    console.log(
+      "suggestions\n",
+      map.predict(secondUnalignedPair[0], secondUnalignedPair[1], 2)
+        .map((s) => {
+          return s.toString();
+        })
+        .join("\n")
+    );
+    console.log(
+      "benchmarks\n",
+      map.predictWithBenchmark(secondUnalignedPair[0],
+        secondUnalignedPair[1],
+        benchmark,
+        2
+      ).map((s) => {
+        return s.toString();
+      }).join("\n")
+    );
   });
 });
 
@@ -73,6 +88,7 @@ function loadCorpus(map: WordMap, sourcePath: string, targetPath: string) {
  * @return {object}
  */
 function getAlignments(filePath: string, verse: number): object {
-  const verseAlignments = JSON.parse(fs.readFileSync(filePath).toString("utf-8"));
+  const verseAlignments = JSON.parse(fs.readFileSync(filePath)
+    .toString("utf-8"));
   return verseAlignments[verse.toString()].alignments;
 }
diff --git a/src/index.ts b/src/index.ts
@@ -1,140 +1,5 @@
-import AlignmentOccurrences from "./algorithms/AlignmentOccurrences";
-import AlignmentPosition from "./algorithms/AlignmentPosition";
-import CharacterLength from "./algorithms/CharacterLength";
-import NgramFrequency from "./algorithms/NgramFrequency";
-import NgramLength from "./algorithms/NgramLength";
-import PhrasePlausibility from "./algorithms/PhrasePlausibility";
-import Uniqueness from "./algorithms/Uniqueness";
-import Engine from "./Engine";
-import Lexer, {Token} from "wordmap-lexer";
-import Alignment from "./structures/Alignment";
-import Ngram from "./structures/Ngram";
-import Prediction from "./structures/Prediction";
-import Suggestion from "./structures/Suggestion";
-
-/**
- * Multi-Lingual Word Alignment Prediction
- */
-export default class WordMap {
-  private engine: Engine;
-
-  constructor(opts = {}) {
-
-    this.engine = new Engine(opts);
-    this.engine.registerAlgorithm(new NgramFrequency());
-    this.engine.registerAlgorithm(new AlignmentPosition());
-    this.engine.registerAlgorithm(new PhrasePlausibility());
-    this.engine.registerAlgorithm(new NgramLength());
-    this.engine.registerAlgorithm(new CharacterLength());
-    this.engine.registerAlgorithm(new AlignmentOccurrences());
-    this.engine.registerAlgorithm(new Uniqueness());
-  }
-
-  /**
-   * Adds an array of corpus
-   * @param {string[][]} corpus
-   */
-  public appendCorpus(corpus: string[][]) {
-    for (const pair of corpus) {
-      this.appendCorpusString(pair[0], pair[1]);
-    }
-  }
-
-  /**
-   * Add corpus to the MAP.
-   * These may be single sentences or multiple sentence delimited by new lines.
-   * @param {string} source
-   * @param {string} target
-   */
-  public appendCorpusString(source: string, target: string) {
-    const sourceSentences = source.split("\n");
-    const targetSentences = target.split("\n");
-    const sourceTokens: Token[][] = [];
-    const targetTokens: Token[][] = [];
-
-    for (const s of sourceSentences) {
-      sourceTokens.push(Lexer.tokenize(s));
-    }
-    for (const s of targetSentences) {
-      targetTokens.push(Lexer.tokenize(s));
-    }
-
-    this.engine.addCorpus(sourceTokens, targetTokens);
-  }
-
-  public appendSavedAlignments(alignments: Alignment[]) {
-    this.engine.addSavedAlignments(alignments);
-  }
-
-  /**
-   * Appends some saved alignments.
-   * This may be multiple lines of text or a single line.
-   *
-   * @param {string} source - a string of source phrases separated by new lines
-   * @param {string} target - a string of target phrases separated by new lines
-   * @return {Alignment[]} an array of alignment objects (as a convenience)
-   */
-  public appendSavedAlignmentsString(source: string, target: string): Alignment[] {
-    const alignments: Alignment[] = [];
-    const sourceLines = source.split("\n");
-    const targetLines = target.split("\n");
-    const sourceLinesLength = sourceLines.length;
-    if (sourceLinesLength !== targetLines.length) {
-      throw new Error("source and target lines must be the same length");
-    }
-    for (let i = 0; i < sourceLinesLength; i++) {
-      const sourceTokens = Lexer.tokenize(sourceLines[i]);
-      const targetTokens = Lexer.tokenize(targetLines[i]);
-      alignments.push(new Alignment(
-        new Ngram(sourceTokens),
-        new Ngram(targetTokens)
-      ));
-    }
-    this.appendSavedAlignments(alignments);
-    return alignments;
-  }
-
-  /**
-   * Predicts the word alignments between the sentences
-   * @param {string} sourceSentence
-   * @param {string} targetSentence
-   * @param {number} maxSuggestions
-   * @return {Suggestion[]}
-   */
-  public predict(sourceSentence: string, targetSentence: string, maxSuggestions: number = 1): Suggestion[] {
-    const sourceTokens = Lexer.tokenize(sourceSentence);
-    const targetTokens = Lexer.tokenize(targetSentence);
-
-    let predictions = this.engine.run(sourceTokens, targetTokens);
-    predictions = this.engine.score(predictions);
-    return Engine.suggest(predictions, maxSuggestions);
-  }
-
-  /**
-   * Predicts word alignments between the sentences.
-   * Returns an array of suggestions that match the benchmark.
-   *
-   * @param {string} sourceSentence
-   * @param {string} targetSentence
-   * @param {Suggestion} benchmark
-   * @param {number} maxSuggestions
-   * @return {Suggestion[]}
-   */
-  public predictWithBenchmark(sourceSentence: string, targetSentence: string, benchmark: Alignment[], maxSuggestions: number = 1): Suggestion[] {
-    const sourceTokens = Lexer.tokenize(sourceSentence);
-    const targetTokens = Lexer.tokenize(targetSentence);
-
-    let predictions = this.engine.run(sourceTokens, targetTokens);
-    predictions = this.engine.score(predictions);
-
-    const validPredictions: Prediction[] = [];
-    for (const p of predictions) {
-      for (const a of benchmark) {
-        if (a.key === p.alignment.key) {
-          validPredictions.push(p);
-        }
-      }
-    }
-    return Engine.suggest(validPredictions, maxSuggestions);
-  }
-}
+export {default} from "./WordMap";
+export {default as Alignment} from "./structures/Alignment";
+export {default as Ngram} from "./structures/Ngram";
+export {default as Prediction} from "./structures/Prediction";
+export {default as Suggestion} from "./structures/Suggestion";