From 51b8c0c07dbb76ad65698e47858ba9a87d67bdd4 Mon Sep 17 00:00:00 2001 From: Eugene Koontz Date: Sun, 1 Sep 2024 09:28:20 -0400 Subject: [PATCH] working on target-specific source language models for example: an English language model (source) which is specific to Spanish (target) --- resources/english/lexicon/pronouns.edn | 80 +------------------ resources/english/lexicon/rules/es.edn | 65 ++++++++++++++- resources/english/models/es.edn | 2 +- .../espa\303\261ol/lexicon/pronouns.edn" | 24 ++++-- "resources/espa\303\261ol/lexicon/rules.edn" | 13 +++ src/menard/english.cljc | 36 ++++----- src/menard/english/es.cljc | 29 +++++++ "src/menard/espa\303\261ol.cljc" | 2 +- src/menard/lexiconfn.cljc | 2 +- src/menard/model.cljc | 4 +- src/menard/translate.cljc | 8 +- src/menard/translate/es_en.cljc | 21 ++++- test/menard/test/translate/en_es.cljc | 2 +- 13 files changed, 172 insertions(+), 116 deletions(-) create mode 100644 src/menard/english/es.cljc diff --git a/resources/english/lexicon/pronouns.edn b/resources/english/lexicon/pronouns.edn index f8615ac93..5d20a6f75 100644 --- a/resources/english/lexicon/pronouns.edn +++ b/resources/english/lexicon/pronouns.edn @@ -171,84 +171,8 @@ :sem {:pred :what :ref {:human? true}} :wh-word true}] - "you" [{:agr {:person :2nd - :number :sing} - :case :nom - :sense 1 - :note [:informal :singular] - :sem {:pred :you - :ref {:context :informal}}} - - {:agr {:person :2nd - :gender :masc - :number :plur} - :sense 2 - :case :nom - :note [:informal :masculine :plural] - :sem {:pred :you - :ref {:context :informal}}} - - {:agr {:person :2nd - :gender :fem - :number :plur} - :case :nom - :sense 3 - :note [:informal :feminine :plural] - :sem {:pred :you - :ref {:context :informal} - :person-not :1st}} - - {:agr {:person :2nd - :number :sing} - :case :nom - :sense 4 - :note [:formal :singular] - :sem {:pred :you - :ref {:context :formal}}} - - {:agr {:person :2nd - :number :plur} - :case :nom - :sense 5 - :note [:formal :plural] - :sem {:pred :you - :ref {:context :formal}}} - - {:agr {:person :2nd - :number :sing} - :case :acc - :sense 6 - :note [:informal :singular] - :sem {:pred :you - :ref {:context :informal} - :person-not :1st}} - - {:agr {:person :2nd - :number :plur} - :case :acc - :sense 7 - :note [:informal :plural] - :sem {:pred :you - :ref {:context :informal} - :person-not :1st}} - - {:agr {:person :2nd - :number :sing} - :sense 8 - :case :acc - :note [:formal :singular] - :sem {:pred :you - :ref {:context :formal} - :person-not :1st}} - - {:agr {:person :2nd - :number :plur} - :sense 9 - :case :acc - :note [:formal :plural] - :sem {:pred :you - :ref {:context :formal} - :person-not :1st}}] + "you" [{:agr {:person :2nd} + :sem {:pred :you}}] "yourself" [{:case :acc :agr {:number :sing diff --git a/resources/english/lexicon/rules/es.edn b/resources/english/lexicon/rules/es.edn index 5c5017f9f..652143ff6 100644 --- a/resources/english/lexicon/rules/es.edn +++ b/resources/english/lexicon/rules/es.edn @@ -1,3 +1,64 @@ [{:rule :pronouns-2p - :if :top - :then [{}]}] + :if {:canonical "you"} + :then [{:agr {:number :sing} + :case :nom + :sense 1 + :note [:informal :singular] + :sem {:ref {:context :informal}}} + + {:agr {:gender :masc + :number :plur} + :sense 2 + :case :nom + :note [:informal :masculine :plural] + :sem {:ref {:context :informal}}} + + {:agr {:gender :fem + :number :plur} + :case :nom + :sense 3 + :note [:informal :feminine :plural] + :sem {:ref {:context :informal} + :person-not :1st}} + + {:agr {:number :sing} + :case :nom + :sense 4 + :note [:formal :singular] + :sem {:ref {:context :formal}}} + + {:agr {:number :plur} + :case :nom + :sense 5 + :note [:formal :plural] + :sem {:ref {:context :formal}}} + + {:agr {:number :sing} + :case :acc + :sense 6 + :note [:informal :singular] + :sem {:ref {:context :informal} + :person-not :1st}} + + {:agr {:number :plur} + :case :acc + :sense 7 + :note [:informal :plural] + :sem {:ref {:context :informal} + :person-not :1st}} + + {:agr {:number :sing} + :sense 8 + :case :acc + :note [:formal :singular] + :sem {:ref {:context :formal} + :person-not :1st}} + + {:agr {:person :2nd + :number :plur} + :sense 9 + :case :acc + :note [:formal :plural] + :sem {:ref {:context :formal} + :person-not :1st}}]}] + diff --git a/resources/english/models/es.edn b/resources/english/models/es.edn index a119d035a..ea4ddee7a 100644 --- a/resources/english/models/es.edn +++ b/resources/english/models/es.edn @@ -6,7 +6,7 @@ "verbs.edn"]} :grammar "english/grammar.edn" :lexicon {:path "english/lexicon" - :rules ["rules.edn" "es.edn"] + :rules ["rules.edn" "rules/es.edn"] :sources {"adjectives.edn" {:u {:cat :adjective}} "adverbs.edn" {:u {:cat :adverb}} "exclamations.edn" {:u {:cat :exclamation}} diff --git "a/resources/espa\303\261ol/lexicon/pronouns.edn" "b/resources/espa\303\261ol/lexicon/pronouns.edn" index b62e3a3bc..5b668c9de 100644 --- "a/resources/espa\303\261ol/lexicon/pronouns.edn" +++ "b/resources/espa\303\261ol/lexicon/pronouns.edn" @@ -147,13 +147,23 @@ ] - "te" [{:agr {:number :sing - :person :2nd - :formal? false} - :sem {:ref {:human? true} - :pred :you} - :reflexive? true}] - + "te" (let [agr (atom {:number :sing + :person :2nd + :formal? false}) + ref (atom {:human? true + :context :informal})] + [{:agr agr + :case :acc + :sem {:agr agr + :ref ref + :pred :you} + :reflexive? true} + {:case :acc + :agr agr + :sem {:agr agr + :ref ref + :pred :you} + :reflexive? false}]) "tú" [{:case :nom :agr {:number :sing diff --git "a/resources/espa\303\261ol/lexicon/rules.edn" "b/resources/espa\303\261ol/lexicon/rules.edn" index 98357999c..d2803e869 100644 --- "a/resources/espa\303\261ol/lexicon/rules.edn" +++ "b/resources/espa\303\261ol/lexicon/rules.edn" @@ -252,12 +252,24 @@ :reflexive? true} :2 []}}])} + {:rule :ver-rule + :if {:canonical "ver"} + :then [{:intransitive? true + :transitive? false} + (let [obj (atom :top)] + {:intransitive? false + :transitive? true + :sem {:obj obj} + :subcat {:2 {:reflexive? false + :sem obj}}})]} + {:rule :intrans-only :if {:cat :verb :intransitive? true :transitive? ::unspec} :then [{:transitive? false}]} + ;; move :ver-rule content into :trans-only: {:rule :trans-only :if {:cat :verb :intransitive? ::unspec @@ -307,6 +319,7 @@ :reflexive? false :sem subj} :2 {:cat :noun + :case :acc :sem obj} :3 []}}])} diff --git a/src/menard/english.cljc b/src/menard/english.cljc index d75b9817e..77e16012b 100644 --- a/src/menard/english.cljc +++ b/src/menard/english.cljc @@ -33,24 +33,19 @@ (declare sentence-punctuation) (defn morph - ([tree] - (cond - (map? (u/get-in tree [:syntax-tree])) - (-> (u/get-in tree [:syntax-tree]) - (s/morph (:morphology @complete/model)) - an) - - :else - (-> tree - (s/morph (:morphology @complete/model)) - an))) - - ([tree & {:keys [sentence-punctuation?]}] - (when sentence-punctuation? - (-> tree - morph - an - (sentence-punctuation (u/get-in tree [:sem :mood] :decl)))))) + [tree & [model sentence-punctuation?]] + (let [model (or model @complete/model) + sentence-punctuation (if sentence-punctuation? sentence-punctuation (fn [s] s))] + (cond + (map? (u/get-in tree [:syntax-tree])) + (-> (u/get-in tree [:syntax-tree]) + (s/morph (:morphology model)) + an) + + :else + (-> tree + (s/morph (:morphology model)) + an)))) #?(:clj (defn write-compiled-lexicon [] @@ -80,8 +75,9 @@ (log/warn (str "no entry from cat: " (u/get-in spec [:cat] ::none) " in lexeme-map: returning all lexemes.")) lexicon))))) -(defn syntax-tree [tree] - (s/syntax-tree tree (:morphology @complete/model))) +(defn syntax-tree [tree & [model]] + (let [model (or model @complete/model)] + (s/syntax-tree tree (:morphology model)))) (defn an "change 'a' to 'an' if the next word starts with a vowel; diff --git a/src/menard/english/es.cljc b/src/menard/english/es.cljc new file mode 100644 index 000000000..f609e4542 --- /dev/null +++ b/src/menard/english/es.cljc @@ -0,0 +1,29 @@ +(ns menard.english.es + (:require [dag_unify.core :as u] + [clojure.tools.logging :as log] + [menard.english :as en] + [menard.english.compile :refer [compile-lexicon]] + [menard.model :refer [create]])) + +(def model + (delay (create "english/models/es" + "complete" + compile-lexicon false {:include-derivation? false}))) + + +(defn analyze [surface] + (en/analyze surface @model)) + +(defn generate [spec] + (en/generate spec @model)) + +(defn morph [expression] + (en/morph expression @model false)) + +(defn parse [surface] + (en/parse surface @model)) + +(defn syntax-tree [tree] + (en/syntax-tree tree @model)) + + diff --git "a/src/menard/espa\303\261ol.cljc" "b/src/menard/espa\303\261ol.cljc" index 1cf86fb2c..1204e501f 100644 --- "a/src/menard/espa\303\261ol.cljc" +++ "b/src/menard/espa\303\261ol.cljc" @@ -53,7 +53,7 @@ (s/morph tree (:morphology @model)))) ;; for parsing diagnostics: -(def truncate? true) +(def truncate? false) ;; how to split up a string into tokens that can be analyzed: (def split-on #"[ ]+") diff --git a/src/menard/lexiconfn.cljc b/src/menard/lexiconfn.cljc index 7f578be93..ee819e444 100644 --- a/src/menard/lexiconfn.cljc +++ b/src/menard/lexiconfn.cljc @@ -276,7 +276,7 @@ #?(:clj (defn read-and-eval [rules-filename] - (log/debug (str "read-and-eval with rules-filename: " rules-filename)) + (log/info (str "read-and-eval with rules-filename: " rules-filename)) (-> rules-filename ((fn [filename] (if (re-find #"^file:///" filename) diff --git a/src/menard/model.cljc b/src/menard/model.cljc index ec036fa8b..9d6d42b8e 100644 --- a/src/menard/model.cljc +++ b/src/menard/model.cljc @@ -352,9 +352,11 @@ (let [model-spec (read-model-spec model-spec-filename) rules-files (if (string? (-> model-spec :lexicon :rules)) - [(-> model-spec :lexicon :rules)) + [(-> model-spec :lexicon :rules)] (-> model-spec :lexicon :rules)) lexical-rules-paths (map (fn [rule-file] + (log/info (str "OK!! READING RULE FILE: " + rule-file)) (str (-> model-spec :lexicon :path) "/" rule-file)) diff --git a/src/menard/translate.cljc b/src/menard/translate.cljc index e07a99a70..7be9ebe03 100644 --- a/src/menard/translate.cljc +++ b/src/menard/translate.cljc @@ -1,5 +1,6 @@ (ns menard.translate - (:require [menard.nederlands :as nl] + (:require [menard.english.complete :as en-complete] + [menard.nederlands :as nl] [menard.nederlands.complete :as nl-complete] [menard.english :as en] [menard.generate :as g] @@ -22,9 +23,12 @@ ;; expression without first parsing it. (def intermediate-parse? false) +(def en-model + ( + (defn en-generate [spec allow-backtracking?] (binding [g/allow-backtracking? allow-backtracking?] - (en/generate spec))) + (en/generate spec model))) (defn translate [source-expression] (when (:note source-expression) diff --git a/src/menard/translate/es_en.cljc b/src/menard/translate/es_en.cljc index c2244212e..2a094ff08 100644 --- a/src/menard/translate/es_en.cljc +++ b/src/menard/translate/es_en.cljc @@ -2,7 +2,7 @@ (:require [dag_unify.core :as u :refer [unify]] [dag_unify.serialization :refer [serialize]] [dag_unify.diagnostics :as diag] - [menard.english :as en] + [menard.english.es :as en] [menard.español :as es] [menard.generate :as g] [menard.lexiconfn :as l] @@ -10,14 +10,31 @@ #?(:cljs [menard.log :as log]))) (defn es-parse-to-en-spec [es-parse] - (log/debug (str "es-parse sem: " (l/pprint (u/get-in es-parse [:sem])))) + (log/info (str "es-parse sem: " (l/pprint (u/get-in es-parse [:sem])))) (unify {:sem {:mod []}} {:agr (-> es-parse (u/get-in [:agr])) + :reflexive? (-> es-parse (u/get-in [:reflexive?])) :sem (-> es-parse (u/get-in [:sem])) :cat (-> es-parse (u/get-in [:cat])) :subcat (-> es-parse (u/get-in [:subcat]))} {:sem {:iobj (-> es-parse (u/get-in [:sem :iobj] :none))}})) +(def es-generation-specs + [ + {:cat :verb + :rule "s" + :head {:rule "vp" + :head {:transitive? true}}} + {:cat :verb + :rule "s" + :subcat [] + :head {:rule "vp" + :head {:intransitive? true}}} + {:cat :verb + :rule "s" + :head {:intransitive? true} + :subcat []}]) + (defn es-to-en [es-input] (if es-input (log/debug (str "es-to-en: es-input: " es-input)) diff --git a/test/menard/test/translate/en_es.cljc b/test/menard/test/translate/en_es.cljc index f84c33406..9dbdf81a9 100644 --- a/test/menard/test/translate/en_es.cljc +++ b/test/menard/test/translate/en_es.cljc @@ -1,6 +1,6 @@ (ns menard.test.translate.en-es (:require [dag_unify.core :as u :refer [unify]] - [menard.english :as en] + [menard.english.es :as en] [menard.español :as es] [menard.lexiconfn :as l] [menard.translate.es-en :as translate]