diff --git a/.stylish-haskell.yaml b/.stylish-haskell.yaml new file mode 100644 index 0000000..87205d1 --- /dev/null +++ b/.stylish-haskell.yaml @@ -0,0 +1,380 @@ +# stylish-haskell configuration file +# ================================== + +# The stylish-haskell tool is mainly configured by specifying steps. These steps +# are a list, so they have an order, and one specific step may appear more than +# once (if needed). Each file is processed by these steps in the given order. +steps: + # Convert some ASCII sequences to their Unicode equivalents. This is disabled + # by default. + # - unicode_syntax: + # # In order to make this work, we also need to insert the UnicodeSyntax + # # language pragma. If this flag is set to true, we insert it when it's + # # not already present. You may want to disable it if you configure + # # language extensions using some other method than pragmas. Default: + # # true. + # add_language_pragma: true + + # Format module header + # + # Currently, this option is not configurable and will format all exports and + # module declarations to minimize diffs + # + # - module_header: + # # How many spaces use for indentation in the module header. + # indent: 4 + # + # # Should export lists be sorted? Sorting is only performed within the + # # export section, as delineated by Haddock comments. + # sort: true + # + # # See `separate_lists` for the `imports` step. + # separate_lists: true + # + # # When to break the "where". + # # Possible values: + # # - exports: only break when there is an explicit export list. + # # - single: only break when the export list counts more than one export. + # # - inline: only break when the export list is too long. This is + # # determined by the `columns` setting. Not applicable when the export + # # list contains comments as newlines will be required. + # # - always: always break before the "where". + # break_where: exports + # + # # Where to put open bracket + # # Possible values: + # # - same_line: put open bracket on the same line as the module name, before the + # # comment of the module + # # - next_line: put open bracket on the next line, after module comment + # open_bracket: next_line + + # Format record definitions. This is disabled by default. + # + # You can control the layout of record fields. The only rules that can't be configured + # are these: + # + # - "|" is always aligned with "=" + # - "," in fields is always aligned with "{" + # - "}" is likewise always aligned with "{" + # + # - records: + # # How to format equals sign between type constructor and data constructor. + # # Possible values: + # # - "same_line" -- leave "=" AND data constructor on the same line as the type constructor. + # # - "indent N" -- insert a new line and N spaces from the beginning of the next line. + # equals: "indent 2" + # + # # How to format first field of each record constructor. + # # Possible values: + # # - "same_line" -- "{" and first field goes on the same line as the data constructor. + # # - "indent N" -- insert a new line and N spaces from the beginning of the data constructor + # first_field: "indent 2" + # + # # How many spaces to insert between the column with "," and the beginning of the comment in the next line. + # field_comment: 2 + # + # # How many spaces to insert before "deriving" clause. Deriving clauses are always on separate lines. + # deriving: 2 + # + # # How many spaces to insert before "via" clause counted from indentation of deriving clause + # # Possible values: + # # - "same_line" -- "via" part goes on the same line as "deriving" keyword. + # # - "indent N" -- insert a new line and N spaces from the beginning of "deriving" keyword. + # via: "indent 2" + # + # # Sort typeclass names in the "deriving" list alphabetically. + # sort_deriving: true + # + # # Whether or not to break enums onto several lines + # # + # # Default: false + # break_enums: false + # + # # Whether or not to break single constructor data types before `=` sign + # # + # # Default: true + # break_single_constructors: true + # + # # Whether or not to curry constraints on function. + # # + # # E.g: @allValues :: Enum a => Bounded a => Proxy a -> [a]@ + # # + # # Instead of @allValues :: (Enum a, Bounded a) => Proxy a -> [a]@ + # # + # # Default: false + # curried_context: false + + # Align the right hand side of some elements. This is quite conservative + # and only applies to statements where each element occupies a single + # line. + # Possible values: + # - always - Always align statements. + # - adjacent - Align statements that are on adjacent lines in groups. + # - never - Never align statements. + # All default to always. + - simple_align: + cases: always + top_level_patterns: always + records: always + multi_way_if: always + + # Import cleanup + - imports: + # There are different ways we can align names and lists. + # + # - global: Align the import names and import list throughout the entire + # file. + # + # - file: Like global, but don't add padding when there are no qualified + # imports in the file. + # + # - group: Only align the imports per group (a group is formed by adjacent + # import lines). + # + # - none: Do not perform any alignment. + # + # Default: global. + align: none + + # The following options affect only import list alignment. + # + # List align has following options: + # + # - after_alias: Import list is aligned with end of import including + # 'as' and 'hiding' keywords. + # + # > import qualified Data.List as List (concat, foldl, foldr, head, + # > init, last, length) + # + # - with_alias: Import list is aligned with start of alias or hiding. + # + # > import qualified Data.List as List (concat, foldl, foldr, head, + # > init, last, length) + # + # - with_module_name: Import list is aligned `list_padding` spaces after + # the module name. + # + # > import qualified Data.List as List (concat, foldl, foldr, head, + # init, last, length) + # + # This is mainly intended for use with `pad_module_names: false`. + # + # > import qualified Data.List as List (concat, foldl, foldr, head, + # init, last, length, scanl, scanr, take, drop, + # sort, nub) + # + # - new_line: Import list starts always on new line. + # + # > import qualified Data.List as List + # > (concat, foldl, foldr, head, init, last, length) + # + # - repeat: Repeat the module name to align the import list. + # + # > import qualified Data.List as List (concat, foldl, foldr, head) + # > import qualified Data.List as List (init, last, length) + # + # Default: after_alias + list_align: after_alias + + # Right-pad the module names to align imports in a group: + # + # - true: a little more readable + # + # > import qualified Data.List as List (concat, foldl, foldr, + # > init, last, length) + # > import qualified Data.List.Extra as List (concat, foldl, foldr, + # > init, last, length) + # + # - false: diff-safe + # + # > import qualified Data.List as List (concat, foldl, foldr, init, + # > last, length) + # > import qualified Data.List.Extra as List (concat, foldl, foldr, + # > init, last, length) + # + # Default: true + pad_module_names: true + + # Long list align style takes effect when import is too long. This is + # determined by 'columns' setting. + # + # - inline: This option will put as much specs on same line as possible. + # + # - new_line: Import list will start on new line. + # + # - new_line_multiline: Import list will start on new line when it's + # short enough to fit to single line. Otherwise it'll be multiline. + # + # - multiline: One line per import list entry. + # Type with constructor list acts like single import. + # + # > import qualified Data.Map as M + # > ( empty + # > , singleton + # > , ... + # > , delete + # > ) + # + # Default: inline + long_list_align: inline + + # Align empty list (importing instances) + # + # Empty list align has following options + # + # - inherit: inherit list_align setting + # + # - right_after: () is right after the module name: + # + # > import Vector.Instances () + # + # Default: inherit + empty_list_align: inherit + + # List padding determines indentation of import list on lines after import. + # This option affects 'long_list_align'. + # + # - : constant value + # + # - module_name: align under start of module name. + # Useful for 'file' and 'group' align settings. + # + # Default: 4 + list_padding: 4 + + # Separate lists option affects formatting of import list for type + # or class. The only difference is single space between type and list + # of constructors, selectors and class functions. + # + # - true: There is single space between Foldable type and list of it's + # functions. + # + # > import Data.Foldable (Foldable (fold, foldl, foldMap)) + # + # - false: There is no space between Foldable type and list of it's + # functions. + # + # > import Data.Foldable (Foldable(fold, foldl, foldMap)) + # + # Default: true + separate_lists: true + + # Space surround option affects formatting of import lists on a single + # line. The only difference is single space after the initial + # parenthesis and a single space before the terminal parenthesis. + # + # - true: There is single space associated with the enclosing + # parenthesis. + # + # > import Data.Foo ( foo ) + # + # - false: There is no space associated with the enclosing parenthesis + # + # > import Data.Foo (foo) + # + # Default: false + space_surround: false + + # Post qualify option moves any qualifies found in import declarations + # to the end of the declaration. This also adjust padding for any + # unqualified import declarations. + # + # - true: Qualified as is moved to the end of the + # declaration. + # + # > import Data.Bar + # > import Data.Foo qualified as F + # + # - false: Qualified remains in the default location and unqualified + # imports are padded to align with qualified imports. + # + # > import Data.Bar + # > import qualified Data.Foo as F + # + # Default: false + post_qualify: false + + + # Language pragmas + - language_pragmas: + # We can generate different styles of language pragma lists. + # + # - vertical: Vertical-spaced language pragmas, one per line. + # + # - compact: A more compact style. + # + # - compact_line: Similar to compact, but wrap each line with + # `{-# LANGUAGE #-}'. + # + # - vertical_compact: Similar to vertical, but use only one language pragma. + # + # Default: vertical. + style: vertical + + # Align affects alignment of closing pragma brackets. + # + # - true: Brackets are aligned in same column. + # + # - false: Brackets are not aligned together. There is only one space + # between actual import and closing bracket. + # + # Default: true + align: true + + # stylish-haskell can detect redundancy of some language pragmas. If this + # is set to true, it will remove those redundant pragmas. Default: true. + remove_redundant: true + + # Language prefix to be used for pragma declaration, this allows you to + # use other options non case-sensitive like "language" or "Language". + # If a non correct String is provided, it will default to: LANGUAGE. + language_prefix: LANGUAGE + + # Replace tabs by spaces. This is disabled by default. + # - tabs: + # # Number of spaces to use for each tab. Default: 8, as specified by the + # # Haskell report. + # spaces: 8 + + # Remove trailing whitespace + - trailing_whitespace: {} + + # Squash multiple spaces between the left and right hand sides of some + # elements into single spaces. Basically, this undoes the effect of + # simple_align but is a bit less conservative. + # - squash: {} + +# A common setting is the number of columns (parts of) code will be wrapped +# to. Different steps take this into account. +# +# Set this to null to disable all line wrapping. +# +# Default: 80. +columns: 80 + +# By default, line endings are converted according to the OS. You can override +# preferred format here. +# +# - native: Native newline format. CRLF on Windows, LF on other OSes. +# +# - lf: Convert to LF ("\n"). +# +# - crlf: Convert to CRLF ("\r\n"). +# +# Default: native. +newline: native + +# Sometimes, language extensions are specified in a cabal file or from the +# command line instead of using language pragmas in the file. stylish-haskell +# needs to be aware of these, so it can parse the file correctly. +# +# No language extensions are enabled by default. +# language_extensions: + # - TemplateHaskell + # - QuasiQuotes + +# Attempt to find the cabal file in ancestors of the current directory, and +# parse options (currently only language extensions) from that. +# +# Default: true +cabal: true diff --git a/GFConcepts.hs b/GFConcepts.hs index 898362e..137a822 100644 --- a/GFConcepts.hs +++ b/GFConcepts.hs @@ -39,11 +39,30 @@ pgf2functions pgf = [(fun,(val,[arg | (_,_,ty) <- hs, let (_,arg,_) = unType ty] expr2abstree :: PGF.Expr -> AbsTree expr2abstree e = case unApp e of Just (f,es) -> RTree f (map expr2abstree es) + -- _ | Just q <- unStr e -> RTree (mkCId "StrLit") [RTree (mkCId (show q)) []] _ -> error ("ERROR: no constructor tree from " ++ showExpr [] e) abstree2expr :: AbsTree -> PGF.Expr +abstree2expr tr@(RTree f []) | Just str <- asStringLiteral f = mkStr str abstree2expr tr@(RTree f ts) = mkApp f (map abstree2expr ts) +-- | Check if a CId is actually a string literal in disguise +asStringLiteral :: CId -> Maybe String +asStringLiteral f = stripPrefix stringLiteralPrefix $ unescape (showCId f) + +-- | showCId will escape non-valid literals, so we need to unescape them +unescape :: String -> String +unescape ('\'':str) = unescapeBackslashes str +unescape str = str + +unescapeBackslashes :: String -> String +unescapeBackslashes ('\\':x:xs) = x : unescapeBackslashes xs +unescapeBackslashes ['\''] = "" +unescapeBackslashes (x:xs) = x : unescapeBackslashes xs +unescapeBackslashes "" = error "missing final endquote" + +stringLiteralPrefix = "__strlit__" + postOrderRTree :: RTree a -> RTree (a,Int) postOrderRTree = post 0 where @@ -51,7 +70,7 @@ postOrderRTree = post 0 where post n t = case t of RTree a ts -> case posts n ts of (nts,nn) -> RTree (a,nn) nts - + posts :: Int -> [RTree a] -> ([RTree (a,Int)],Int) posts n ts = case ts of [] -> ([],n) diff --git a/UD2GF.hs b/UD2GF.hs index eb600f1..47f7398 100644 --- a/UD2GF.hs +++ b/UD2GF.hs @@ -1,24 +1,24 @@ module UD2GF where +import Backend +import GFConcepts import RTree -import UDConcepts import UDAnnotations -import GFConcepts +import UDConcepts import UDOptions import UDVisualization -import Backend import PGF hiding (CncLabels) -import qualified Data.Map as M -import Data.List import Data.Char +import Data.List +import qualified Data.Map as M import Data.Maybe -import Text.PrettyPrint (render, cat) +import Text.PrettyPrint (cat, render) -import Debug.Trace (trace) import Data.Function (on) import Data.Ord (comparing) +import Debug.Trace (trace) --------- -- to debug @@ -66,7 +66,7 @@ showUD2GF opts env sentence = do ifOpt opts "ud" $ prt sentence case errors sentence of - [] -> return () + [] -> return () errs -> ifOpt opts "err" $ unlines errs let udtree = udSentence2tree sentence @@ -132,11 +132,11 @@ showUD2GF opts env sentence = do data UD2GFStat = UD2GFStat { - totalWords :: Int, - interpretedWords :: Int, - unknownWords :: Int, - totalSentences :: Int, - completeSentences :: Int, + totalWords :: Int, + interpretedWords :: Int, + unknownWords :: Int, + totalSentences :: Int, + completeSentences :: Int, typecorrectSentences :: Int } deriving Show @@ -172,7 +172,7 @@ data CheckResult = CheckResult { prCheckResult cr = unlines $ case resultUnknowns cr of - [] -> [] + [] -> [] uks -> [unwords $ "unknown words:" : map showCId uks] ++ [resultMessage cr] @@ -181,13 +181,13 @@ prCheckResult cr = unlines $ checkAbsTreeResult :: UDEnv -> AbsTree -> CheckResult checkAbsTreeResult env t = CheckResult { resultTree = mt, - resultUnknowns = [f | f <- allNodesRTree t, Nothing <- [functionType pgf f]], + resultUnknowns = [f | f <- allNodesRTree t, Nothing <- [asStringLiteral f], Nothing <- [functionType pgf f]], resultMessage = msg } where pgf = pgfGrammar env (mt,msg) = case inferExpr pgf (abstree2expr t) of - Left tce -> (Nothing, render (ppTcError tce)) + Left tce -> (Nothing, render (ppTcError tce)) Right (exp,typ) -> (Just exp, "type checking OK") @@ -209,12 +209,12 @@ data DevNode = DevNode { deriving Show mapDevAbsTree :: (AbsTreeInfo -> AbsTreeInfo) -> DevNode -> DevNode -mapDevAbsTree f dn = dn { devAbsTrees = map f (devAbsTrees dn) } +mapDevAbsTree f dn = dn { devAbsTrees = map f (devAbsTrees dn) } data AbsTreeInfo = AbsTreeInfo { atiAbsTree :: AbsTree - , atiCat :: Cat - , atiUDIds :: [UDId] + , atiCat :: Cat + , atiUDIds :: [UDId] } deriving (Show, Eq) @@ -287,7 +287,7 @@ addBackups0 tr@(RTree dn trs) = case map collectBackup (tr:trs) of -- backups f theAbsTreeInfo :: DevTree -> AbsTreeInfo theAbsTreeInfo dt = case devAbsTrees (root dt) of [t] -> t - _ -> error $ "no unique abstree in " ++ prDevNode 2 (root dt) + _ -> error $ "no unique abstree in " ++ prDevNode 2 (root dt) -- split trees showing just one GF tree in each DevTree splitDevTree :: DevTree -> [DevTree] @@ -298,7 +298,7 @@ splitDevTree tr@(RTree dn trs) = case elem (devIndex d) usage of True -> case sortOn ((1000-) . sizeRTree . atiAbsTree) [dt | dt@AbsTreeInfo { atiAbsTree = t} <- devAbsTrees d, isSubRTree t ast] of t:_ -> RTree (d{devAbsTrees = [t]}) (map (chase t) ts) - _ -> error $ "wrong indexing in\n" ++ prLinesRTree (prDevNode 1) tr + _ -> error $ "wrong indexing in\n" ++ prLinesRTree (prDevNode 1) tr False -> head $ splitDevTree $ RTree (d{devNeedBackup = True}) ts ---- head prtStatus udids = "[" ++ concat (intersperse "," (map prt udids)) ++ "]" @@ -458,25 +458,37 @@ combineTrees env = analyseWords :: UDEnv -> DevTree -> DevTree analyseWords env = mapRTree lemma2fun where + morpho = buildMorpho (pgfGrammar env) (actLanguage env) lemma2fun dn = dn { devAbsTrees = [AbsTreeInfo { atiAbsTree = t, atiCat = c, atiUDIds = [devIndex dn]} | (t,c) <- justWords], devStatus = [devIndex dn], devIsUnknown = isUnknown } where - (isUnknown,justWords) = getWordTrees (devLemma dn) (cats (devPOS dn)) + (isUnknown,justWords) = getWordTrees (devWord dn) (devLemma dn) (cats (devPOS dn)) cats pos = maybe [] (map (either (Left. fst) Right)) $ M.lookup pos (catsForPOS env) -- find all functions that are possible parses of the word in any appropriate category --- it is still possible that some other category is meant - getWordTrees w cs = case concatMap (parseWord w) cs of + getWordTrees wf w cs = case concatMap (parseWord w) cs `ifEmpty` concatMap (parseWord (map toLower w)) cs `ifEmpty` morphoFallback wf of [] -> case cs of [] -> (True,[(newWordTree w unknownCat, unknownCat)]) - _ -> (True,[(newWordTree w ec, ec) | c <- cs, let ec = either id id c]) + _ -> (True,[(newWordTree w ec, ec) | c <- cs, let ec = either id id c, strFunExists ec] + `ifEmpty` [(newWordTree w ec, ec) | c <- cs, let ec = either id id c]) fs -> (False,fs) + -- | Return the first non-empty list + ifEmpty [] xs = xs + ifEmpty xs _ = xs + infixr 3 `ifEmpty` + + -- Verify that a StrSomeCat function exists in grammar + strFunExists c | Just typ <- functionType (pgfGrammar env) f = True + | otherwise = False + where f = mkCId ("Str" ++ showCId c) + --- this can fail if c is discontinuous, or return false positives if w is a form of another word parseWord w ec = case ec of Left c -> case parse (pgfGrammar env) (actLanguage env) (mkType [] c []) w of @@ -485,12 +497,23 @@ analyseWords env = mapRTree lemma2fun all (\f -> M.notMember f (disabledFunctions (cncLabels env))) (allNodesRTree at)] Right c -> case elem (w,c) auxWords of True -> [(newWordTree w c, c)] - _ -> [] + _ -> [] auxWords = [(lemma,cat) | ((fun_,lemma),(cat,labels_)) <- M.assocs (lemmaLabels (cncLabels env))] + -- Fall back to morphoanalysis if gf parse fails + -- TODO: We might want to use the morphoanalysis for all words, not just when parse fails + morphoFallback :: String -> [(RTree CId, CId)] + morphoFallback w = + [(RTree name [], c) + | (name, _) <- lookupMorpho morpho w + , Just tp <- pure $ functionType (pgfGrammar env) name + , ([], c, []) <- pure $ unType tp + ] + -- auxiliaries -newWordTree w c = RTree (mkCId (w ++ "_" ++ showCId c)) [] --- +-- newWordTree w c = RTree (mkCId (w ++ "_" ++ showCId c)) [] --- +newWordTree w c = RTree (mkCId ("Str" ++ showCId c)) [RTree (mkCId (stringLiteralPrefix ++ w)) []] --- isNewWordFun f = isInfixOf "__x__" (showCId f) unknownCat = mkCId "Adv" --- treat unknown words as adverbs ---- TODO: from config quote s = "\"" ++ s ++ "\"" @@ -542,5 +565,5 @@ udtree2devtree = markClosest . initialize -- >>> select [1,2,3] -- [(1,[2,3]),(2,[1,3]),(3,[1,2])] select :: [a] -> [(a,[a])] -select [] = [] -select (a : as) = (a,as) : [ (b,a:bs) | (b,bs) <-select as ] \ No newline at end of file +select [] = [] +select (a : as) = (a,as) : [ (b,a:bs) | (b,bs) <-select as ] diff --git a/tests/TestSuite.hs b/tests/TestSuite.hs index 8232791..50e95c8 100644 --- a/tests/TestSuite.hs +++ b/tests/TestSuite.hs @@ -1,10 +1,10 @@ module Main where +import PGF import Test.Hspec import UD2GF -import PGF import UDAnnotations -import UDConcepts (UDData(UDData)) +import UDConcepts (UDData (UDData)) myUDEnv :: IO UDEnv @@ -40,7 +40,7 @@ bestTrees env conll = map exprStr exprs exprs = getExprs [] env conll exprStr expr = case expr of (x : _xs) -> showExpr [] x - _ -> "bestTree: ud2gf failed" + _ -> "bestTree: ud2gf failed" bestTree :: UDEnv -> String -> String @@ -49,10 +49,10 @@ bestTree env conll = exprStr exprs = getExprs [] env conll exprStr = case exprs of (x : _xs) : _xss -> showExpr [] x - _ -> "bestTree: ud2gf failed" + _ -> "bestTree: ud2gf failed" theCatSleepsAlready :: String -theCatSleepsAlready = unlines +theCatSleepsAlready = unlines [ "1\tthe\tthe\tDET\tQuant\tFORM=0\t2\tdet\t_\tFUN=DefArt" , "2\tcat\tcat\tNOUN\tNN\tNumber=Sing\t3\tnsubj\t_\tFUN=cat_N" , "3\tsleeps\tsleep\tVERB\tVBZ\tMood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin\t0\troot\t_\tFUN=sleepVBZ"