Skip to content

Commit

Permalink
working on target-specific source language models
Browse files Browse the repository at this point in the history
for example: an English language model (source) which is specific to Spanish (target)
  • Loading branch information
ekoontz committed Sep 1, 2024
1 parent 7ae0ced commit 51b8c0c
Show file tree
Hide file tree
Showing 13 changed files with 172 additions and 116 deletions.
80 changes: 2 additions & 78 deletions resources/english/lexicon/pronouns.edn
Original file line number Diff line number Diff line change
Expand Up @@ -171,84 +171,8 @@
:sem {:pred :what
:ref {:human? true}}
:wh-word true}]
"you" [{:agr {:person :2nd
:number :sing}
:case :nom
:sense 1
:note [:informal :singular]
:sem {:pred :you
:ref {:context :informal}}}

{:agr {:person :2nd
:gender :masc
:number :plur}
:sense 2
:case :nom
:note [:informal :masculine :plural]
:sem {:pred :you
:ref {:context :informal}}}

{:agr {:person :2nd
:gender :fem
:number :plur}
:case :nom
:sense 3
:note [:informal :feminine :plural]
:sem {:pred :you
:ref {:context :informal}
:person-not :1st}}

{:agr {:person :2nd
:number :sing}
:case :nom
:sense 4
:note [:formal :singular]
:sem {:pred :you
:ref {:context :formal}}}

{:agr {:person :2nd
:number :plur}
:case :nom
:sense 5
:note [:formal :plural]
:sem {:pred :you
:ref {:context :formal}}}

{:agr {:person :2nd
:number :sing}
:case :acc
:sense 6
:note [:informal :singular]
:sem {:pred :you
:ref {:context :informal}
:person-not :1st}}

{:agr {:person :2nd
:number :plur}
:case :acc
:sense 7
:note [:informal :plural]
:sem {:pred :you
:ref {:context :informal}
:person-not :1st}}

{:agr {:person :2nd
:number :sing}
:sense 8
:case :acc
:note [:formal :singular]
:sem {:pred :you
:ref {:context :formal}
:person-not :1st}}

{:agr {:person :2nd
:number :plur}
:sense 9
:case :acc
:note [:formal :plural]
:sem {:pred :you
:ref {:context :formal}
:person-not :1st}}]
"you" [{:agr {:person :2nd}
:sem {:pred :you}}]

"yourself" [{:case :acc
:agr {:number :sing
Expand Down
65 changes: 63 additions & 2 deletions resources/english/lexicon/rules/es.edn
Original file line number Diff line number Diff line change
@@ -1,3 +1,64 @@
[{:rule :pronouns-2p
:if :top
:then [{}]}]
:if {:canonical "you"}
:then [{:agr {:number :sing}
:case :nom
:sense 1
:note [:informal :singular]
:sem {:ref {:context :informal}}}

{:agr {:gender :masc
:number :plur}
:sense 2
:case :nom
:note [:informal :masculine :plural]
:sem {:ref {:context :informal}}}

{:agr {:gender :fem
:number :plur}
:case :nom
:sense 3
:note [:informal :feminine :plural]
:sem {:ref {:context :informal}
:person-not :1st}}

{:agr {:number :sing}
:case :nom
:sense 4
:note [:formal :singular]
:sem {:ref {:context :formal}}}

{:agr {:number :plur}
:case :nom
:sense 5
:note [:formal :plural]
:sem {:ref {:context :formal}}}

{:agr {:number :sing}
:case :acc
:sense 6
:note [:informal :singular]
:sem {:ref {:context :informal}
:person-not :1st}}

{:agr {:number :plur}
:case :acc
:sense 7
:note [:informal :plural]
:sem {:ref {:context :informal}
:person-not :1st}}

{:agr {:number :sing}
:sense 8
:case :acc
:note [:formal :singular]
:sem {:ref {:context :formal}
:person-not :1st}}

{:agr {:person :2nd
:number :plur}
:sense 9
:case :acc
:note [:formal :plural]
:sem {:ref {:context :formal}
:person-not :1st}}]}]

2 changes: 1 addition & 1 deletion resources/english/models/es.edn
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"verbs.edn"]}
:grammar "english/grammar.edn"
:lexicon {:path "english/lexicon"
:rules ["rules.edn" "es.edn"]
:rules ["rules.edn" "rules/es.edn"]
:sources {"adjectives.edn" {:u {:cat :adjective}}
"adverbs.edn" {:u {:cat :adverb}}
"exclamations.edn" {:u {:cat :exclamation}}
Expand Down
24 changes: 17 additions & 7 deletions resources/español/lexicon/pronouns.edn
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,23 @@

]

"te" [{:agr {:number :sing
:person :2nd
:formal? false}
:sem {:ref {:human? true}
:pred :you}
:reflexive? true}]

"te" (let [agr (atom {:number :sing
:person :2nd
:formal? false})
ref (atom {:human? true
:context :informal})]
[{:agr agr
:case :acc
:sem {:agr agr
:ref ref
:pred :you}
:reflexive? true}
{:case :acc
:agr agr
:sem {:agr agr
:ref ref
:pred :you}
:reflexive? false}])

"" [{:case :nom
:agr {:number :sing
Expand Down
13 changes: 13 additions & 0 deletions resources/español/lexicon/rules.edn
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,24 @@
:reflexive? true}
:2 []}}])}

{:rule :ver-rule
:if {:canonical "ver"}
:then [{:intransitive? true
:transitive? false}
(let [obj (atom :top)]
{:intransitive? false
:transitive? true
:sem {:obj obj}
:subcat {:2 {:reflexive? false
:sem obj}}})]}

{:rule :intrans-only
:if {:cat :verb
:intransitive? true
:transitive? ::unspec}
:then [{:transitive? false}]}

;; move :ver-rule content into :trans-only:
{:rule :trans-only
:if {:cat :verb
:intransitive? ::unspec
Expand Down Expand Up @@ -307,6 +319,7 @@
:reflexive? false
:sem subj}
:2 {:cat :noun
:case :acc
:sem obj}
:3 []}}])}

Expand Down
36 changes: 16 additions & 20 deletions src/menard/english.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,19 @@
(declare sentence-punctuation)

(defn morph
([tree]
(cond
(map? (u/get-in tree [:syntax-tree]))
(-> (u/get-in tree [:syntax-tree])
(s/morph (:morphology @complete/model))
an)

:else
(-> tree
(s/morph (:morphology @complete/model))
an)))

([tree & {:keys [sentence-punctuation?]}]
(when sentence-punctuation?
(-> tree
morph
an
(sentence-punctuation (u/get-in tree [:sem :mood] :decl))))))
[tree & [model sentence-punctuation?]]
(let [model (or model @complete/model)
sentence-punctuation (if sentence-punctuation? sentence-punctuation (fn [s] s))]
(cond
(map? (u/get-in tree [:syntax-tree]))
(-> (u/get-in tree [:syntax-tree])
(s/morph (:morphology model))
an)

:else
(-> tree
(s/morph (:morphology model))
an))))

#?(:clj
(defn write-compiled-lexicon []
Expand Down Expand Up @@ -80,8 +75,9 @@
(log/warn (str "no entry from cat: " (u/get-in spec [:cat] ::none) " in lexeme-map: returning all lexemes."))
lexicon)))))

(defn syntax-tree [tree]
(s/syntax-tree tree (:morphology @complete/model)))
(defn syntax-tree [tree & [model]]
(let [model (or model @complete/model)]
(s/syntax-tree tree (:morphology model))))

(defn an
"change 'a' to 'an' if the next word starts with a vowel;
Expand Down
29 changes: 29 additions & 0 deletions src/menard/english/es.cljc
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
(ns menard.english.es
(:require [dag_unify.core :as u]
[clojure.tools.logging :as log]
[menard.english :as en]
[menard.english.compile :refer [compile-lexicon]]
[menard.model :refer [create]]))

(def model
(delay (create "english/models/es"
"complete"
compile-lexicon false {:include-derivation? false})))


(defn analyze [surface]
(en/analyze surface @model))

(defn generate [spec]
(en/generate spec @model))

(defn morph [expression]
(en/morph expression @model false))

(defn parse [surface]
(en/parse surface @model))

(defn syntax-tree [tree]
(en/syntax-tree tree @model))


2 changes: 1 addition & 1 deletion src/menard/español.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
(s/morph tree (:morphology @model))))

;; for parsing diagnostics:
(def truncate? true)
(def truncate? false)

;; how to split up a string into tokens that can be analyzed:
(def split-on #"[ ]+")
Expand Down
2 changes: 1 addition & 1 deletion src/menard/lexiconfn.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@

#?(:clj
(defn read-and-eval [rules-filename]
(log/debug (str "read-and-eval with rules-filename: " rules-filename))
(log/info (str "read-and-eval with rules-filename: " rules-filename))
(-> rules-filename
((fn [filename]
(if (re-find #"^file:///" filename)
Expand Down
4 changes: 3 additions & 1 deletion src/menard/model.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,11 @@
(let [model-spec (read-model-spec model-spec-filename)
rules-files
(if (string? (-> model-spec :lexicon :rules))
[(-> model-spec :lexicon :rules))
[(-> model-spec :lexicon :rules)]
(-> model-spec :lexicon :rules))
lexical-rules-paths (map (fn [rule-file]
(log/info (str "OK!! READING RULE FILE: "
rule-file))
(str
(-> model-spec :lexicon :path) "/"
rule-file))
Expand Down
8 changes: 6 additions & 2 deletions src/menard/translate.cljc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
(ns menard.translate
(:require [menard.nederlands :as nl]
(:require [menard.english.complete :as en-complete]
[menard.nederlands :as nl]
[menard.nederlands.complete :as nl-complete]
[menard.english :as en]
[menard.generate :as g]
Expand All @@ -22,9 +23,12 @@
;; expression without first parsing it.
(def intermediate-parse? false)

(def en-model
(

(defn en-generate [spec allow-backtracking?]
(binding [g/allow-backtracking? allow-backtracking?]
(en/generate spec)))
(en/generate spec model)))

(defn translate [source-expression]
(when (:note source-expression)
Expand Down
Loading

0 comments on commit 51b8c0c

Please sign in to comment.