From c387cdc8b02f25df9f42f471e6b65c1e6d297db8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Wed, 4 May 2022 09:54:34 +0800
Subject: [PATCH 01/14] Add stylish haskell config

---
 .stylish-haskell.yaml | 380 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 380 insertions(+)
 create mode 100644 .stylish-haskell.yaml

diff --git a/.stylish-haskell.yaml b/.stylish-haskell.yaml
new file mode 100644
index 0000000..87205d1
--- /dev/null
+++ b/.stylish-haskell.yaml
@@ -0,0 +1,380 @@
+# stylish-haskell configuration file
+# ==================================
+
+# The stylish-haskell tool is mainly configured by specifying steps. These steps
+# are a list, so they have an order, and one specific step may appear more than
+# once (if needed). Each file is processed by these steps in the given order.
+steps:
+  # Convert some ASCII sequences to their Unicode equivalents. This is disabled
+  # by default.
+  # - unicode_syntax:
+  #     # In order to make this work, we also need to insert the UnicodeSyntax
+  #     # language pragma. If this flag is set to true, we insert it when it's
+  #     # not already present. You may want to disable it if you configure
+  #     # language extensions using some other method than pragmas. Default:
+  #     # true.
+  #     add_language_pragma: true
+
+  # Format module header
+  #
+  # Currently, this option is not configurable and will format all exports and
+  # module declarations to minimize diffs
+  #
+  # - module_header:
+  #     # How many spaces use for indentation in the module header.
+  #     indent: 4
+  #
+  #     # Should export lists be sorted?  Sorting is only performed within the
+  #     # export section, as delineated by Haddock comments.
+  #     sort: true
+  #
+  #     # See `separate_lists` for the `imports` step.
+  #     separate_lists: true
+  #
+  #     # When to break the "where".
+  #     # Possible values:
+  #     # - exports: only break when there is an explicit export list.
+  #     # - single: only break when the export list counts more than one export.
+  #     # - inline: only break when the export list is too long. This is
+  #     #   determined by the `columns` setting. Not applicable when the export
+  #     #   list contains comments as newlines will be required.
+  #     # - always: always break before the "where".
+  #     break_where: exports
+  #
+  #     # Where to put open bracket
+  #     # Possible values:
+  #     # - same_line: put open bracket on the same line as the module name, before the
+  #     #              comment of the module
+  #     # - next_line: put open bracket on the next line, after module comment
+  #     open_bracket: next_line
+
+  # Format record definitions. This is disabled by default.
+  #
+  # You can control the layout of record fields. The only rules that can't be configured
+  # are these:
+  #
+  # - "|" is always aligned with "="
+  # - "," in fields is always aligned with "{"
+  # - "}" is likewise always aligned with "{"
+  #
+  # - records:
+  #     # How to format equals sign between type constructor and data constructor.
+  #     # Possible values:
+  #     # - "same_line" -- leave "=" AND data constructor on the same line as the type constructor.
+  #     # - "indent N" -- insert a new line and N spaces from the beginning of the next line.
+  #     equals: "indent 2"
+  #
+  #     # How to format first field of each record constructor.
+  #     # Possible values:
+  #     # - "same_line" -- "{" and first field goes on the same line as the data constructor.
+  #     # - "indent N" -- insert a new line and N spaces from the beginning of the data constructor
+  #     first_field: "indent 2"
+  #
+  #     # How many spaces to insert between the column with "," and the beginning of the comment in the next line.
+  #     field_comment: 2
+  #
+  #     # How many spaces to insert before "deriving" clause. Deriving clauses are always on separate lines.
+  #     deriving: 2
+  #
+  #     # How many spaces to insert before "via" clause counted from indentation of deriving clause
+  #     # Possible values:
+  #     # - "same_line" -- "via" part goes on the same line as "deriving" keyword.
+  #     # - "indent N" -- insert a new line and N spaces from the beginning of "deriving" keyword.
+  #     via: "indent 2"
+  #
+  #     # Sort typeclass names in the "deriving" list alphabetically.
+  #     sort_deriving: true
+  #
+  #     # Whether or not to break enums onto several lines
+  #     #
+  #     # Default: false
+  #     break_enums: false
+  #
+  #     # Whether or not to break single constructor data types before `=` sign
+  #     #
+  #     # Default: true
+  #     break_single_constructors: true
+  #
+  #     # Whether or not to curry constraints on function.
+  #     #
+  #     # E.g: @allValues :: Enum a => Bounded a => Proxy a -> [a]@
+  #     #
+  #     # Instead of @allValues :: (Enum a, Bounded a) => Proxy a -> [a]@
+  #     #
+  #     # Default: false
+  #     curried_context: false
+
+  # Align the right hand side of some elements.  This is quite conservative
+  # and only applies to statements where each element occupies a single
+  # line.
+  # Possible values:
+  # - always - Always align statements.
+  # - adjacent - Align statements that are on adjacent lines in groups.
+  # - never - Never align statements.
+  # All default to always.
+  - simple_align:
+      cases: always
+      top_level_patterns: always
+      records: always
+      multi_way_if: always
+
+  # Import cleanup
+  - imports:
+      # There are different ways we can align names and lists.
+      #
+      # - global: Align the import names and import list throughout the entire
+      #   file.
+      #
+      # - file: Like global, but don't add padding when there are no qualified
+      #   imports in the file.
+      #
+      # - group: Only align the imports per group (a group is formed by adjacent
+      #   import lines).
+      #
+      # - none: Do not perform any alignment.
+      #
+      # Default: global.
+      align: none
+
+      # The following options affect only import list alignment.
+      #
+      # List align has following options:
+      #
+      # - after_alias: Import list is aligned with end of import including
+      #   'as' and 'hiding' keywords.
+      #
+      #   > import qualified Data.List      as List (concat, foldl, foldr, head,
+      #   >                                          init, last, length)
+      #
+      # - with_alias: Import list is aligned with start of alias or hiding.
+      #
+      #   > import qualified Data.List      as List (concat, foldl, foldr, head,
+      #   >                                 init, last, length)
+      #
+      # - with_module_name: Import list is aligned `list_padding` spaces after
+      #   the module name.
+      #
+      #   > import qualified Data.List      as List (concat, foldl, foldr, head,
+      #                          init, last, length)
+      #
+      #   This is mainly intended for use with `pad_module_names: false`.
+      #
+      #   > import qualified Data.List as List (concat, foldl, foldr, head,
+      #                          init, last, length, scanl, scanr, take, drop,
+      #                          sort, nub)
+      #
+      # - new_line: Import list starts always on new line.
+      #
+      #   > import qualified Data.List      as List
+      #   >     (concat, foldl, foldr, head, init, last, length)
+      #
+      # - repeat: Repeat the module name to align the import list.
+      #
+      #   > import qualified Data.List      as List (concat, foldl, foldr, head)
+      #   > import qualified Data.List      as List (init, last, length)
+      #
+      # Default: after_alias
+      list_align: after_alias
+
+      # Right-pad the module names to align imports in a group:
+      #
+      # - true: a little more readable
+      #
+      #   > import qualified Data.List       as List (concat, foldl, foldr,
+      #   >                                           init, last, length)
+      #   > import qualified Data.List.Extra as List (concat, foldl, foldr,
+      #   >                                           init, last, length)
+      #
+      # - false: diff-safe
+      #
+      #   > import qualified Data.List as List (concat, foldl, foldr, init,
+      #   >                                     last, length)
+      #   > import qualified Data.List.Extra as List (concat, foldl, foldr,
+      #   >                                           init, last, length)
+      #
+      # Default: true
+      pad_module_names: true
+
+      # Long list align style takes effect when import is too long. This is
+      # determined by 'columns' setting.
+      #
+      # - inline: This option will put as much specs on same line as possible.
+      #
+      # - new_line: Import list will start on new line.
+      #
+      # - new_line_multiline: Import list will start on new line when it's
+      #   short enough to fit to single line. Otherwise it'll be multiline.
+      #
+      # - multiline: One line per import list entry.
+      #   Type with constructor list acts like single import.
+      #
+      #   > import qualified Data.Map as M
+      #   >     ( empty
+      #   >     , singleton
+      #   >     , ...
+      #   >     , delete
+      #   >     )
+      #
+      # Default: inline
+      long_list_align: inline
+
+      # Align empty list (importing instances)
+      #
+      # Empty list align has following options
+      #
+      # - inherit: inherit list_align setting
+      #
+      # - right_after: () is right after the module name:
+      #
+      #   > import Vector.Instances ()
+      #
+      # Default: inherit
+      empty_list_align: inherit
+
+      # List padding determines indentation of import list on lines after import.
+      # This option affects 'long_list_align'.
+      #
+      # - <integer>: constant value
+      #
+      # - module_name: align under start of module name.
+      #   Useful for 'file' and 'group' align settings.
+      #
+      # Default: 4
+      list_padding: 4
+
+      # Separate lists option affects formatting of import list for type
+      # or class. The only difference is single space between type and list
+      # of constructors, selectors and class functions.
+      #
+      # - true: There is single space between Foldable type and list of it's
+      #   functions.
+      #
+      #   > import Data.Foldable (Foldable (fold, foldl, foldMap))
+      #
+      # - false: There is no space between Foldable type and list of it's
+      #   functions.
+      #
+      #   > import Data.Foldable (Foldable(fold, foldl, foldMap))
+      #
+      # Default: true
+      separate_lists: true
+
+      # Space surround option affects formatting of import lists on a single
+      # line. The only difference is single space after the initial
+      # parenthesis and a single space before the terminal parenthesis.
+      #
+      # - true: There is single space associated with the enclosing
+      #   parenthesis.
+      #
+      #   > import Data.Foo ( foo )
+      #
+      # - false: There is no space associated with the enclosing parenthesis
+      #
+      #   > import Data.Foo (foo)
+      #
+      # Default: false
+      space_surround: false
+
+      # Post qualify option moves any qualifies found in import declarations
+      # to the end of the declaration. This also adjust padding for any
+      # unqualified import declarations.
+      #
+      # - true: Qualified as <module name> is moved to the end of the
+      #   declaration.
+      #
+      #   > import Data.Bar
+      #   > import Data.Foo qualified as F
+      #
+      # - false: Qualified remains in the default location and unqualified
+      #   imports are padded to align with qualified imports.
+      #
+      #   > import           Data.Bar
+      #   > import qualified Data.Foo as F
+      #
+      # Default: false
+      post_qualify: false
+
+
+  # Language pragmas
+  - language_pragmas:
+      # We can generate different styles of language pragma lists.
+      #
+      # - vertical: Vertical-spaced language pragmas, one per line.
+      #
+      # - compact: A more compact style.
+      #
+      # - compact_line: Similar to compact, but wrap each line with
+      #   `{-# LANGUAGE #-}'.
+      #
+      # - vertical_compact: Similar to vertical, but use only one language pragma.
+      #
+      # Default: vertical.
+      style: vertical
+
+      # Align affects alignment of closing pragma brackets.
+      #
+      # - true: Brackets are aligned in same column.
+      #
+      # - false: Brackets are not aligned together. There is only one space
+      #   between actual import and closing bracket.
+      #
+      # Default: true
+      align: true
+
+      # stylish-haskell can detect redundancy of some language pragmas. If this
+      # is set to true, it will remove those redundant pragmas. Default: true.
+      remove_redundant: true
+
+      # Language prefix to be used for pragma declaration, this allows you to
+      # use other options non case-sensitive like "language" or "Language".
+      # If a non correct String is provided, it will default to: LANGUAGE.
+      language_prefix: LANGUAGE
+
+  # Replace tabs by spaces. This is disabled by default.
+  # - tabs:
+  #     # Number of spaces to use for each tab. Default: 8, as specified by the
+  #     # Haskell report.
+  #     spaces: 8
+
+  # Remove trailing whitespace
+  - trailing_whitespace: {}
+
+  # Squash multiple spaces between the left and right hand sides of some
+  # elements into single spaces. Basically, this undoes the effect of
+  # simple_align but is a bit less conservative.
+  # - squash: {}
+
+# A common setting is the number of columns (parts of) code will be wrapped
+# to. Different steps take this into account.
+#
+# Set this to null to disable all line wrapping.
+#
+# Default: 80.
+columns: 80
+
+# By default, line endings are converted according to the OS. You can override
+# preferred format here.
+#
+# - native: Native newline format. CRLF on Windows, LF on other OSes.
+#
+# - lf: Convert to LF ("\n").
+#
+# - crlf: Convert to CRLF ("\r\n").
+#
+# Default: native.
+newline: native
+
+# Sometimes, language extensions are specified in a cabal file or from the
+# command line instead of using language pragmas in the file. stylish-haskell
+# needs to be aware of these, so it can parse the file correctly.
+#
+# No language extensions are enabled by default.
+# language_extensions:
+  # - TemplateHaskell
+  # - QuasiQuotes
+
+# Attempt to find the cabal file in ancestors of the current directory, and
+# parse options (currently only language extensions) from that.
+#
+# Default: true
+cabal: true

From 006e030c1f1d2f094612a8887cfb9121bcaf6e98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Wed, 4 May 2022 09:07:57 +0800
Subject: [PATCH 02/14] Format code with stylish-haskell

---
 UD2GF.hs           | 46 +++++++++++++++++++++++-----------------------
 tests/TestSuite.hs | 10 +++++-----
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/UD2GF.hs b/UD2GF.hs
index eb600f1..0c71bab 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -1,24 +1,24 @@
 module UD2GF where
 
+import Backend
+import GFConcepts
 import RTree
-import UDConcepts
 import UDAnnotations
-import GFConcepts
+import UDConcepts
 import UDOptions
 import UDVisualization
-import Backend
 
 import PGF hiding (CncLabels)
 
-import qualified Data.Map as M
-import Data.List
 import Data.Char
+import Data.List
+import qualified Data.Map as M
 import Data.Maybe
-import Text.PrettyPrint (render, cat)
+import Text.PrettyPrint (cat, render)
 
-import Debug.Trace (trace)
 import Data.Function (on)
 import Data.Ord (comparing)
+import Debug.Trace (trace)
 
 ---------
 -- to debug
@@ -66,7 +66,7 @@ showUD2GF opts env sentence = do
   ifOpt opts "ud" $ prt sentence
 
   case errors sentence of
-    [] -> return ()
+    []   -> return ()
     errs -> ifOpt opts "err" $ unlines errs
 
   let udtree = udSentence2tree sentence
@@ -132,11 +132,11 @@ showUD2GF opts env sentence = do
 
 
 data UD2GFStat = UD2GFStat {
-  totalWords :: Int,
-  interpretedWords :: Int,
-  unknownWords :: Int,
-  totalSentences :: Int,
-  completeSentences :: Int,
+  totalWords           :: Int,
+  interpretedWords     :: Int,
+  unknownWords         :: Int,
+  totalSentences       :: Int,
+  completeSentences    :: Int,
   typecorrectSentences :: Int
   }
  deriving Show
@@ -172,7 +172,7 @@ data CheckResult = CheckResult {
 
 prCheckResult cr = unlines $
   case resultUnknowns cr of
-    [] -> []
+    []  -> []
     uks -> [unwords $ "unknown words:" : map showCId uks]
   ++
   [resultMessage cr]
@@ -187,7 +187,7 @@ checkAbsTreeResult env t = CheckResult {
  where
   pgf = pgfGrammar env
   (mt,msg) = case inferExpr pgf (abstree2expr t) of
-    Left tce -> (Nothing, render (ppTcError tce))
+    Left tce        -> (Nothing, render (ppTcError tce))
     Right (exp,typ) -> (Just exp, "type checking OK")
 
 
@@ -209,12 +209,12 @@ data DevNode = DevNode {
   deriving Show
 
 mapDevAbsTree :: (AbsTreeInfo -> AbsTreeInfo) -> DevNode -> DevNode
-mapDevAbsTree f dn = dn { devAbsTrees = map f (devAbsTrees dn) } 
+mapDevAbsTree f dn = dn { devAbsTrees = map f (devAbsTrees dn) }
 
 data AbsTreeInfo = AbsTreeInfo
   { atiAbsTree :: AbsTree
-  , atiCat :: Cat
-  , atiUDIds :: [UDId]
+  , atiCat     :: Cat
+  , atiUDIds   :: [UDId]
   }
   deriving (Show, Eq)
 
@@ -287,7 +287,7 @@ addBackups0 tr@(RTree dn trs) = case map collectBackup (tr:trs) of  -- backups f
 theAbsTreeInfo :: DevTree -> AbsTreeInfo
 theAbsTreeInfo dt = case devAbsTrees (root dt) of
   [t] -> t
-  _ -> error $ "no unique abstree in " ++ prDevNode 2 (root dt)
+  _   -> error $ "no unique abstree in " ++ prDevNode 2 (root dt)
 
 -- split trees showing just one GF tree in each DevTree
 splitDevTree :: DevTree -> [DevTree]
@@ -298,7 +298,7 @@ splitDevTree tr@(RTree dn trs) =
    case elem (devIndex d) usage of
     True -> case sortOn ((1000-) . sizeRTree . atiAbsTree) [dt | dt@AbsTreeInfo { atiAbsTree = t} <- devAbsTrees d, isSubRTree t ast] of
       t:_ -> RTree (d{devAbsTrees = [t]}) (map (chase t) ts)
-      _ -> error $ "wrong indexing in\n" ++ prLinesRTree (prDevNode 1) tr
+      _   -> error $ "wrong indexing in\n" ++ prLinesRTree (prDevNode 1) tr
     False -> head $ splitDevTree $ RTree (d{devNeedBackup = True}) ts ---- head
 
 prtStatus udids =  "[" ++ concat (intersperse "," (map prt udids)) ++ "]"
@@ -485,7 +485,7 @@ analyseWords env = mapRTree lemma2fun
                       all (\f -> M.notMember f (disabledFunctions (cncLabels env))) (allNodesRTree at)]
     Right c -> case elem (w,c) auxWords of
       True -> [(newWordTree w c, c)]
-      _ -> []
+      _    -> []
 
   auxWords = [(lemma,cat) | ((fun_,lemma),(cat,labels_)) <- M.assocs (lemmaLabels (cncLabels env))]
 
@@ -542,5 +542,5 @@ udtree2devtree = markClosest . initialize
 -- >>> select [1,2,3]
 -- [(1,[2,3]),(2,[1,3]),(3,[1,2])]
 select :: [a] -> [(a,[a])]
-select [] = []
-select (a : as) = (a,as) : [ (b,a:bs) | (b,bs) <-select as ]
\ No newline at end of file
+select []       = []
+select (a : as) = (a,as) : [ (b,a:bs) | (b,bs) <-select as ]
diff --git a/tests/TestSuite.hs b/tests/TestSuite.hs
index 8232791..50e95c8 100644
--- a/tests/TestSuite.hs
+++ b/tests/TestSuite.hs
@@ -1,10 +1,10 @@
 module Main where
 
+import PGF
 import Test.Hspec
 import UD2GF
-import PGF
 import UDAnnotations
-import UDConcepts (UDData(UDData))
+import UDConcepts (UDData (UDData))
 
 
 myUDEnv :: IO UDEnv
@@ -40,7 +40,7 @@ bestTrees env conll = map exprStr exprs
       exprs = getExprs [] env conll
       exprStr expr = case expr of
         (x : _xs) -> showExpr [] x
-        _ -> "bestTree: ud2gf failed"
+        _         -> "bestTree: ud2gf failed"
 
 
 bestTree :: UDEnv -> String -> String
@@ -49,10 +49,10 @@ bestTree env conll = exprStr
       exprs = getExprs [] env conll
       exprStr = case exprs of
         (x : _xs) : _xss -> showExpr [] x
-        _ -> "bestTree: ud2gf failed"
+        _                -> "bestTree: ud2gf failed"
 
 theCatSleepsAlready :: String
-theCatSleepsAlready = unlines 
+theCatSleepsAlready = unlines
   [ "1\tthe\tthe\tDET\tQuant\tFORM=0\t2\tdet\t_\tFUN=DefArt"
   , "2\tcat\tcat\tNOUN\tNN\tNumber=Sing\t3\tnsubj\t_\tFUN=cat_N"
   , "3\tsleeps\tsleep\tVERB\tVBZ\tMood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin\t0\troot\t_\tFUN=sleepVBZ"

From 5dc0d8231fc9c443c5bdf37cf3d0da6b93cbf85f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Mon, 22 Nov 2021 13:40:29 +0800
Subject: [PATCH 03/14] ud2gf: Fall back to morphoanalysis if gf parsing fails

---
 UD2GF.hs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/UD2GF.hs b/UD2GF.hs
index 0c71bab..75d75e0 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -458,6 +458,7 @@ combineTrees env =
 analyseWords :: UDEnv -> DevTree -> DevTree
 analyseWords env = mapRTree lemma2fun
  where
+  morpho = buildMorpho (pgfGrammar env) (actLanguage env)
   lemma2fun dn = dn {
     devAbsTrees = [AbsTreeInfo { atiAbsTree = t, atiCat = c, atiUDIds = [devIndex dn]} | (t,c) <- justWords],
     devStatus = [devIndex dn],
@@ -480,6 +481,7 @@ analyseWords env = mapRTree lemma2fun
   --- this can fail if c is discontinuous, or return false positives if w is a form of another word
   parseWord w ec = case ec of
     Left c -> case parse (pgfGrammar env) (actLanguage env) (mkType [] c []) w of
+      [] -> [(RTree name [], c) | (name, _) <- lookupMorpho morpho w]
       ts -> [(at,c) | t <- ts,
                       let at = expr2abstree t,
                       all (\f -> M.notMember f (disabledFunctions (cncLabels env))) (allNodesRTree at)]

From 0cce2d79eed909c7a0e66a1592960013734dd9f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Mon, 22 Nov 2021 14:25:30 +0800
Subject: [PATCH 04/14] Use correct category for morpho parses

---
 UD2GF.hs | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/UD2GF.hs b/UD2GF.hs
index 75d75e0..4d3b145 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -471,7 +471,7 @@ analyseWords env = mapRTree lemma2fun
 
   -- find all functions that are possible parses of the word in any appropriate category
   --- it is still possible that some other category is meant
-  getWordTrees w cs = case concatMap (parseWord w) cs of
+  getWordTrees w cs = case morphoFallback w $ concatMap (parseWord w) cs of
     [] -> case cs of
       [] -> (True,[(newWordTree w unknownCat, unknownCat)])
       _  -> (True,[(newWordTree w ec, ec) | c <- cs, let ec = either id id c])
@@ -481,7 +481,6 @@ analyseWords env = mapRTree lemma2fun
   --- this can fail if c is discontinuous, or return false positives if w is a form of another word
   parseWord w ec = case ec of
     Left c -> case parse (pgfGrammar env) (actLanguage env) (mkType [] c []) w of
-      [] -> [(RTree name [], c) | (name, _) <- lookupMorpho morpho w]
       ts -> [(at,c) | t <- ts,
                       let at = expr2abstree t,
                       all (\f -> M.notMember f (disabledFunctions (cncLabels env))) (allNodesRTree at)]
@@ -491,6 +490,18 @@ analyseWords env = mapRTree lemma2fun
 
   auxWords = [(lemma,cat) | ((fun_,lemma),(cat,labels_)) <- M.assocs (lemmaLabels (cncLabels env))]
 
+  -- Fall back to morphoanalysis if gf parse fails
+  -- TODO: We might want to use the morphoanalysis for all words, not just when parse fails
+  morphoFallback :: String -> [(RTree Lemma, CId)] -> [(RTree CId, CId)]
+  morphoFallback w [] =
+    [(RTree name [], c)
+    | (name, _) <- lookupMorpho morpho w
+    , let expr = mkApp name []
+    , Right (e,tp) <- pure $ inferExpr (pgfGrammar env) expr
+    , ([], c, []) <- pure $ unType tp
+    ]
+  morphoFallback _ xs = xs
+
 -- auxiliaries
 newWordTree w c = RTree (mkCId (w ++ "_" ++ showCId c)) [] ---
 isNewWordFun f = isInfixOf "__x__" (showCId f)

From f1dc8aaa1f0570c7f51a406b87bd4f0c5302a2a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Mon, 22 Nov 2021 14:26:05 +0800
Subject: [PATCH 05/14] Use functionType instead of inferType

---
 UD2GF.hs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/UD2GF.hs b/UD2GF.hs
index 4d3b145..a656836 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -496,8 +496,7 @@ analyseWords env = mapRTree lemma2fun
   morphoFallback w [] =
     [(RTree name [], c)
     | (name, _) <- lookupMorpho morpho w
-    , let expr = mkApp name []
-    , Right (e,tp) <- pure $ inferExpr (pgfGrammar env) expr
+    , Just tp <- pure $ functionType (pgfGrammar env) name
     , ([], c, []) <- pure $ unType tp
     ]
   morphoFallback _ xs = xs

From 1bb37e695b872062ad2535ffa79b29614ce78adf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Tue, 23 Nov 2021 10:49:11 +0800
Subject: [PATCH 06/14] Use GF string literals for unknown words

---
 GFConcepts.hs | 1 +
 UD2GF.hs      | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/GFConcepts.hs b/GFConcepts.hs
index 898362e..7446a9e 100644
--- a/GFConcepts.hs
+++ b/GFConcepts.hs
@@ -42,6 +42,7 @@ expr2abstree e = case unApp e of
   _ -> error ("ERROR: no constructor tree from " ++ showExpr [] e)
 
 abstree2expr :: AbsTree -> PGF.Expr
+abstree2expr tr@(RTree f []) | [(str,"")] <- reads (showCId f) = mkStr str
 abstree2expr tr@(RTree f ts) = mkApp f (map abstree2expr ts)
 
 postOrderRTree :: RTree a -> RTree (a,Int)
diff --git a/UD2GF.hs b/UD2GF.hs
index a656836..1bac6f2 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -502,7 +502,8 @@ analyseWords env = mapRTree lemma2fun
   morphoFallback _ xs = xs
 
 -- auxiliaries
-newWordTree w c = RTree (mkCId (w ++ "_" ++ showCId c)) [] ---
+-- newWordTree w c = RTree (mkCId (w ++ "_" ++ showCId c)) [] ---
+newWordTree w c = RTree (mkCId ("Str" ++ showCId c)) [RTree (mkCId (show w)) []] ---
 isNewWordFun f = isInfixOf "__x__" (showCId f)
 unknownCat = mkCId "Adv" --- treat unknown words as adverbs ---- TODO: from config
 quote s = "\"" ++ s ++ "\""

From 522b61129ea36005ebac2583bc3a6b84600ec1aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Tue, 23 Nov 2021 11:04:37 +0800
Subject: [PATCH 07/14] Don't wrap string literals in double quotes

This reduces the problem with showCId escaping everything
---
 GFConcepts.hs | 4 +++-
 UD2GF.hs      | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/GFConcepts.hs b/GFConcepts.hs
index 7446a9e..d75594e 100644
--- a/GFConcepts.hs
+++ b/GFConcepts.hs
@@ -39,10 +39,12 @@ pgf2functions pgf = [(fun,(val,[arg | (_,_,ty) <- hs, let (_,arg,_) = unType ty]
 expr2abstree :: PGF.Expr -> AbsTree
 expr2abstree e = case unApp e of
   Just (f,es) -> RTree f (map expr2abstree es)
+  -- _ | Just q <- unStr e -> RTree (mkCId "StrLit") [RTree (mkCId (show q)) []]
   _ -> error ("ERROR: no constructor tree from " ++ showExpr [] e)
 
 abstree2expr :: AbsTree -> PGF.Expr
-abstree2expr tr@(RTree f []) | [(str,"")] <- reads (showCId f) = mkStr str
+-- TODO: showCId escapes things more than I would like.
+abstree2expr tr@(RTree f []) | Just str <- stripPrefix "__strlit__" (showCId f) = mkStr str
 abstree2expr tr@(RTree f ts) = mkApp f (map abstree2expr ts)
 
 postOrderRTree :: RTree a -> RTree (a,Int)
diff --git a/UD2GF.hs b/UD2GF.hs
index 1bac6f2..7649595 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -503,7 +503,7 @@ analyseWords env = mapRTree lemma2fun
 
 -- auxiliaries
 -- newWordTree w c = RTree (mkCId (w ++ "_" ++ showCId c)) [] ---
-newWordTree w c = RTree (mkCId ("Str" ++ showCId c)) [RTree (mkCId (show w)) []] ---
+newWordTree w c = RTree (mkCId ("Str" ++ showCId c)) [RTree (mkCId ("__strlit__" ++ w)) []] ---
 isNewWordFun f = isInfixOf "__x__" (showCId f)
 unknownCat = mkCId "Adv" --- treat unknown words as adverbs ---- TODO: from config
 quote s = "\"" ++ s ++ "\""

From 97b02f807ee8c84ee610b70a64a22438e9b56a9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Tue, 23 Nov 2021 11:22:00 +0800
Subject: [PATCH 08/14] Don't treat strlits as unknown words

---
 UD2GF.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/UD2GF.hs b/UD2GF.hs
index 7649595..c60d6aa 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -181,7 +181,7 @@ prCheckResult cr = unlines $
 checkAbsTreeResult :: UDEnv -> AbsTree -> CheckResult
 checkAbsTreeResult env t = CheckResult {
   resultTree = mt,
-  resultUnknowns = [f | f <- allNodesRTree t, Nothing <- [functionType pgf f]],
+  resultUnknowns = [f | f <- allNodesRTree t, "__strlit__" `isPrefixOf` showCId f, Nothing <- [functionType pgf f]],
   resultMessage = msg
   }
  where

From ae5a40e784eab5ddc470b17125a6317c6bd81123 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Tue, 23 Nov 2021 11:30:42 +0800
Subject: [PATCH 09/14] Refactor and fix bug

---
 GFConcepts.hs | 9 +++++++--
 UD2GF.hs      | 4 ++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/GFConcepts.hs b/GFConcepts.hs
index d75594e..c13a448 100644
--- a/GFConcepts.hs
+++ b/GFConcepts.hs
@@ -44,9 +44,14 @@ expr2abstree e = case unApp e of
 
 abstree2expr :: AbsTree -> PGF.Expr
 -- TODO: showCId escapes things more than I would like.
-abstree2expr tr@(RTree f []) | Just str <- stripPrefix "__strlit__" (showCId f) = mkStr str
+abstree2expr tr@(RTree f []) | Just str <- asStringLiteral f = mkStr str
 abstree2expr tr@(RTree f ts) = mkApp f (map abstree2expr ts)
 
+-- | Check if a CId is actually a string literal in disguise
+asStringLiteral :: CId -> Maybe String
+asStringLiteral f = stripPrefix stringLiteralPrefix (showCId f)
+stringLiteralPrefix = "__strlit__"
+
 postOrderRTree :: RTree a -> RTree (a,Int)
 postOrderRTree = post 0 where
 
@@ -54,7 +59,7 @@ postOrderRTree = post 0 where
   post n t = case t of
     RTree a ts -> case posts n ts of
       (nts,nn) -> RTree (a,nn) nts
-      
+
   posts :: Int -> [RTree a] -> ([RTree (a,Int)],Int)
   posts n ts = case ts of
     []   -> ([],n)
diff --git a/UD2GF.hs b/UD2GF.hs
index c60d6aa..9bb8ba3 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -181,7 +181,7 @@ prCheckResult cr = unlines $
 checkAbsTreeResult :: UDEnv -> AbsTree -> CheckResult
 checkAbsTreeResult env t = CheckResult {
   resultTree = mt,
-  resultUnknowns = [f | f <- allNodesRTree t, "__strlit__" `isPrefixOf` showCId f, Nothing <- [functionType pgf f]],
+  resultUnknowns = [f | f <- allNodesRTree t, Nothing <- [asStringLiteral f], Nothing <- [functionType pgf f]],
   resultMessage = msg
   }
  where
@@ -503,7 +503,7 @@ analyseWords env = mapRTree lemma2fun
 
 -- auxiliaries
 -- newWordTree w c = RTree (mkCId (w ++ "_" ++ showCId c)) [] ---
-newWordTree w c = RTree (mkCId ("Str" ++ showCId c)) [RTree (mkCId ("__strlit__" ++ w)) []] ---
+newWordTree w c = RTree (mkCId ("Str" ++ showCId c)) [RTree (mkCId (stringLiteralPrefix ++ w)) []] ---
 isNewWordFun f = isInfixOf "__x__" (showCId f)
 unknownCat = mkCId "Adv" --- treat unknown words as adverbs ---- TODO: from config
 quote s = "\"" ++ s ++ "\""

From 06392217ea3170f7ad127b560ca177d77407ec60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Wed, 1 Dec 2021 12:03:49 +0800
Subject: [PATCH 10/14] Unescape string literals that were put inside CIds

---
 GFConcepts.hs | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/GFConcepts.hs b/GFConcepts.hs
index c13a448..137a822 100644
--- a/GFConcepts.hs
+++ b/GFConcepts.hs
@@ -43,13 +43,24 @@ expr2abstree e = case unApp e of
   _ -> error ("ERROR: no constructor tree from " ++ showExpr [] e)
 
 abstree2expr :: AbsTree -> PGF.Expr
--- TODO: showCId escapes things more than I would like.
 abstree2expr tr@(RTree f []) | Just str <- asStringLiteral f = mkStr str
 abstree2expr tr@(RTree f ts) = mkApp f (map abstree2expr ts)
 
 -- | Check if a CId is actually a string literal in disguise
 asStringLiteral :: CId -> Maybe String
-asStringLiteral f = stripPrefix stringLiteralPrefix (showCId f)
+asStringLiteral f = stripPrefix stringLiteralPrefix $ unescape (showCId f)
+
+-- | showCId will escape non-valid literals, so we need to unescape them
+unescape :: String -> String
+unescape ('\'':str) = unescapeBackslashes str
+unescape str = str
+
+unescapeBackslashes :: String -> String
+unescapeBackslashes ('\\':x:xs) = x : unescapeBackslashes xs
+unescapeBackslashes ['\''] = ""
+unescapeBackslashes (x:xs) = x : unescapeBackslashes xs
+unescapeBackslashes "" = error "missing final endquote"
+
 stringLiteralPrefix = "__strlit__"
 
 postOrderRTree :: RTree a -> RTree (a,Int)

From 871014000a8b6f32e8181e3e9aaef5f45d4a76e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Wed, 1 Dec 2021 15:17:46 +0800
Subject: [PATCH 11/14] Use the word instead of lemma for morphoanalyze

---
 UD2GF.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/UD2GF.hs b/UD2GF.hs
index 9bb8ba3..4003616 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -465,13 +465,13 @@ analyseWords env = mapRTree lemma2fun
     devIsUnknown = isUnknown
     }
    where
-    (isUnknown,justWords) = getWordTrees (devLemma dn) (cats (devPOS dn))
+    (isUnknown,justWords) = getWordTrees (devWord dn) (devLemma dn) (cats (devPOS dn))
 
   cats pos = maybe [] (map (either (Left. fst) Right)) $ M.lookup pos (catsForPOS env)
 
   -- find all functions that are possible parses of the word in any appropriate category
   --- it is still possible that some other category is meant
-  getWordTrees w cs = case morphoFallback w $ concatMap (parseWord w) cs of
+  getWordTrees wf w cs = case morphoFallback wf $ concatMap (parseWord w) cs of
     [] -> case cs of
       [] -> (True,[(newWordTree w unknownCat, unknownCat)])
       _  -> (True,[(newWordTree w ec, ec) | c <- cs, let ec = either id id c])

From 4a820c618bcdb283028c06c91d0840369d2bd082 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Wed, 1 Dec 2021 16:17:53 +0800
Subject: [PATCH 12/14] Only generate Str fallbacks if the functions exist in
 the grammar

---
 UD2GF.hs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/UD2GF.hs b/UD2GF.hs
index 4003616..ea50fc1 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -474,10 +474,14 @@ analyseWords env = mapRTree lemma2fun
   getWordTrees wf w cs = case morphoFallback wf $ concatMap (parseWord w) cs of
     [] -> case cs of
       [] -> (True,[(newWordTree w unknownCat, unknownCat)])
-      _  -> (True,[(newWordTree w ec, ec) | c <- cs, let ec = either id id c])
+      _  -> (True,[(newWordTree w ec, ec) | c <- cs, let ec = either id id c, strFunExists ec])
 
     fs -> (False,fs)
 
+  strFunExists c | Just typ  <- functionType (pgfGrammar env) f = True
+                 | otherwise                                    = False
+      where f = mkCId ("Str" ++ showCId c)
+
   --- this can fail if c is discontinuous, or return false positives if w is a form of another word
   parseWord w ec = case ec of
     Left c -> case parse (pgfGrammar env) (actLanguage env) (mkType [] c []) w of

From 74185a1ec4199849b772c61d96e597432e557080 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Thu, 2 Dec 2021 10:57:03 +0800
Subject: [PATCH 13/14] If no str functions exists, give all of them

This prevents a Prelude.head error form occurring
---
 UD2GF.hs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/UD2GF.hs b/UD2GF.hs
index ea50fc1..6838af3 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -474,10 +474,17 @@ analyseWords env = mapRTree lemma2fun
   getWordTrees wf w cs = case morphoFallback wf $ concatMap (parseWord w) cs of
     [] -> case cs of
       [] -> (True,[(newWordTree w unknownCat, unknownCat)])
-      _  -> (True,[(newWordTree w ec, ec) | c <- cs, let ec = either id id c, strFunExists ec])
+      _  -> (True,[(newWordTree w ec, ec) | c <- cs, let ec = either id id c, strFunExists ec] 
+                   `ifEmpty` [(newWordTree w ec, ec) | c <- cs, let ec = either id id c])
 
     fs -> (False,fs)
 
+  -- | Return the first non-empty list
+  ifEmpty [] xs = xs
+  ifEmpty xs _  = xs
+  infixl 3 `ifEmpty`
+
+  -- Verify that a StrSomeCat function exists in grammar
   strFunExists c | Just typ  <- functionType (pgfGrammar env) f = True
                  | otherwise                                    = False
       where f = mkCId ("Str" ++ showCId c)

From d94a5fc26b1783198e0252302e1975c763c0497d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20K=C3=A4llberg?= <anka.213@gmail.com>
Date: Thu, 2 Dec 2021 11:00:26 +0800
Subject: [PATCH 14/14] If parse fails, try parsing the lowercase version

---
 UD2GF.hs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/UD2GF.hs b/UD2GF.hs
index 6838af3..47f7398 100644
--- a/UD2GF.hs
+++ b/UD2GF.hs
@@ -471,7 +471,7 @@ analyseWords env = mapRTree lemma2fun
 
   -- find all functions that are possible parses of the word in any appropriate category
   --- it is still possible that some other category is meant
-  getWordTrees wf w cs = case morphoFallback wf $ concatMap (parseWord w) cs of
+  getWordTrees wf w cs = case concatMap (parseWord w) cs `ifEmpty` concatMap (parseWord (map toLower w)) cs `ifEmpty` morphoFallback wf of
     [] -> case cs of
       [] -> (True,[(newWordTree w unknownCat, unknownCat)])
       _  -> (True,[(newWordTree w ec, ec) | c <- cs, let ec = either id id c, strFunExists ec] 
@@ -482,7 +482,7 @@ analyseWords env = mapRTree lemma2fun
   -- | Return the first non-empty list
   ifEmpty [] xs = xs
   ifEmpty xs _  = xs
-  infixl 3 `ifEmpty`
+  infixr 3 `ifEmpty`
 
   -- Verify that a StrSomeCat function exists in grammar
   strFunExists c | Just typ  <- functionType (pgfGrammar env) f = True
@@ -503,14 +503,13 @@ analyseWords env = mapRTree lemma2fun
 
   -- Fall back to morphoanalysis if gf parse fails
   -- TODO: We might want to use the morphoanalysis for all words, not just when parse fails
-  morphoFallback :: String -> [(RTree Lemma, CId)] -> [(RTree CId, CId)]
-  morphoFallback w [] =
+  morphoFallback :: String -> [(RTree CId, CId)]
+  morphoFallback w =
     [(RTree name [], c)
     | (name, _) <- lookupMorpho morpho w
     , Just tp <- pure $ functionType (pgfGrammar env) name
     , ([], c, []) <- pure $ unType tp
     ]
-  morphoFallback _ xs = xs
 
 -- auxiliaries
 -- newWordTree w c = RTree (mkCId (w ++ "_" ++ showCId c)) [] ---