haskell · knothed · Oct 2, 2020 · Oct 13, 2020 · Oct 13, 2020 · Oct 15, 2020
diff --git a/CHANGES b/CHANGES
@@ -1,3 +1,13 @@
+-----------------------------------------------------------------------------
+1.21.0
+
+        * Add two new backends:
+         * Continuation-based LALR(1) (recursive ascent)
+         * Continuation-based RAD(1) (recursive ascent-descent)
+        * RAD generally produces smaller compiled code using less states
+        * Continuation-based form introduces a speed advantage over
+          table-based form
+
 -----------------------------------------------------------------------------
 1.20.0
 

diff --git a/cabal.project.local b/cabal.project.local
diff --git a/happy.cabal b/happy.cabal
@@ -1,5 +1,5 @@
 name: happy
-version: 1.20.0
+version: 1.21.0
 license: BSD2
 license-file: LICENSE
 copyright: (c) Andy Gill, Simon Marlow
@@ -161,12 +161,14 @@ executable happy
   build-depends: base < 5,
                  array,
                  containers >= 0.4.2,
+                 dom-lt >= 0.2.2,
+                 text,
                  mtl >= 2.2.1
                      -- mtl-2.2.1 added Control.Monad.Except
 
   default-language: Haskell98
   default-extensions: CPP, MagicHash, FlexibleContexts
-  ghc-options: -Wall
+  ghc-options: -Wall -Wno-name-shadowing -Wno-missing-signatures -Wno-type-defaults -Wno-incomplete-patterns -Wno-unused-local-binds -Wno-unused-local-binds -Wno-unused-matches
   other-modules:
         Paths_happy
         AbsSyn
@@ -186,6 +188,11 @@ executable happy
         AttrGrammarParser
         ParamRules
         PrettyGrammar
+        RADCodeGen
+        RADCodeGen_LALR
+        RADStateGen
+        RADTools
+        Follow
 
 test-suite tests
   type: exitcode-stdio-1.0

diff --git a/src/Follow.hs b/src/Follow.hs
@@ -0,0 +1,73 @@
+module Follow where
+  import Grammar
+  import RADTools
+  import Control.Monad
+  import Control.Monad.ST
+  import Data.Array.ST
+  import GHC.Arr
+  import Data.List (findIndices, tails)
+  import NameSet (NameSet, empty, fromList, union, unions, delete, member, singleton)
+
+  -- Calculate the follow sets for all nonterminals in the grammar.
+  followArray :: Grammar -> ([Name] -> NameSet) -> Array Name NameSet
+  followArray g first = runST $ do
+    let bounds = liftM2 (,) head last (non_terminals g)
+    arr <- newArray bounds empty
+    startSymbols arr
+    updateRepeatedly arr first
+    elems <- getElems arr
+    return (listArray bounds elems)
+    where
+      startSymbols :: (STArray s Int NameSet) -> ST s ()
+      startSymbols arr = do
+        mapM_ (setEOF arr) (starts g)
+      setEOF :: (STArray s Int NameSet) -> (a, Int, b, c) -> ST s ()
+      setEOF arr (_, nt, _, _) = writeArray arr nt (singleton (eof_term g))
+
+      updateRepeatedly :: (STArray s Int NameSet) -> ([Name] -> NameSet) -> ST s ()      
+      updateRepeatedly arr first = do
+        old <- getElems arr
+        updateStep arr first
+        new <- getElems arr
+        if old == new then return () else updateRepeatedly arr first
+
+      updateStep :: (STArray s Int NameSet) -> ([Name] -> NameSet) -> ST s ()
+      updateStep arr first = mapM_ (updateRule arr first) (productions g)
+
+      updateRule :: (STArray s Int NameSet) -> ([Name] -> NameSet) -> Production -> ST s ()
+      updateRule arr first (Production lhs rhs _ _) = mapM_ (updateNT arr lhs first) (tails rhs)
+
+      updateNT :: (STArray s Int NameSet) -> Name -> ([Name] -> NameSet) -> [Name] -> ST s ()
+      updateNT _ _ _ [] = return ()
+      updateNT arr lhs first (tok:rhsRest)
+        | not (elem tok (non_terminals g)) = return ()
+        | otherwise = do
+          let first' = first rhsRest
+          let first'' = delete epsilonTok first'
+          follow_lhs <- readArray arr lhs
+          let new_follow = union first'' (if member epsilonTok first' then follow_lhs else empty)
+          old_follow <- readArray arr tok
+          writeArray arr tok (union old_follow new_follow)          
+
+  -- The lc-follow set of a single nonterminal, given a full "follow" array.
+  -- We only use rules where NT appears after the recognition point. If this is the case, enter into FOLLOW (not into LCFOLLOW),
+  -- i.e. recursive rules are processed as normal, irrespective of their recognition points.
+  lcfollow :: XGrammar -> ([Name] -> NameSet) -> (Array Name NameSet) -> Name -> NameSet
+  lcfollow x@(XGrammar { g = g }) first follow nt
+    | member nt startSymbols = union (singleton (eof_term g)) rest
+    | otherwise = rest
+    where
+      startSymbols = fromList $ map (\(_, a, _, _) -> a) (starts g)
+
+      rest = unions $ map (uncurry process) rules
+      rules = filter (rhsContains nt) (zip [0..] (productions g))
+      rhsContains nt (_, (Production _ rhs _ _)) = elem nt rhs
+
+      process :: Int -> Production -> NameSet
+      process ruleIndex (Production lhs rhs _ _) = unions $ map process' $ (reverse (findIndices (== nt) rhs)) where
+        process' i
+          | i < ((recognitionPoints x) !! ruleIndex) = empty
+          | member epsilonTok first_b = union (delete epsilonTok first_b) (follow ! lhs)
+          | otherwise = first_b
+          where
+          first_b = first (drop (i+1) rhs)
diff --git a/src/Grammar.lhs b/src/Grammar.lhs
@@ -36,13 +36,16 @@ Here is our mid-section datatype
 
 > data Production
 >       = Production Name [Name] (String,[Int]) Priority
+>    deriving (Eq
 
 #ifdef DEBUG
 
->       deriving Show
+>    , Show
 
 #endif
 
+>    )
+
 > data Grammar
 >       = Grammar {
 >               productions       :: [Production],

diff --git a/src/LALR.lhs b/src/LALR.lhs
@@ -8,7 +8,7 @@ Generation of LALR parsing tables.
 > module LALR
 >       (genActionTable, genGotoTable, genLR0items, precalcClosure0,
 >        propLookaheads, calcLookaheads, mergeLookaheadInfo, countConflicts,
->        Lr0Item(..), Lr1Item)
+>        Lr0Item(..), Lr1Item(..))
 >       where
 
 > import GenUtils
@@ -35,15 +35,7 @@ Generation of LALR parsing tables.
 This means rule $a$, with dot at $b$ (all starting at 0)
 
 > data Lr0Item = Lr0 {-#UNPACK#-}!Int {-#UNPACK#-}!Int                  -- (rule, dot)
->       deriving (Eq,Ord
-
-#ifdef DEBUG
-
->       ,Show
-
-#endif
-
->       )
+>       deriving (Eq,Ord,Show)
 
 > data Lr1Item = Lr1 {-#UNPACK#-}!Int {-#UNPACK#-}!Int NameSet  -- (rule, dot, lookahead)
 

diff --git a/src/Main.lhs b/src/Main.lhs
@@ -21,6 +21,9 @@ Path settings auto-generated by Cabal:
 > import ProduceCode (produceParser)
 > import ProduceGLRCode
 > import Info (genInfoFile)
+> import qualified RADCodeGen as RAD
+> import qualified RADCodeGen_LALR as LALR
+> import RADStateGen
 > import Target (Target(..))
 > import System.Console.GetOpt
 > import Control.Monad ( liftM )
@@ -30,6 +33,7 @@ Path settings auto-generated by Cabal:
 > import System.IO
 > import Data.Array( assocs, elems, (!) )
 > import Data.List( nub, isSuffixOf )
+> import Data.Maybe (fromMaybe)
 > import Data.Version ( showVersion )
 #if defined(mingw32_HOST_OS)
 > import Foreign.Marshal.Array
@@ -202,6 +206,84 @@ Add any special options or imports required by the parsing machinery.
 >                    )
 >       in
 
+-------------------------------------
+Branch off to continuation-based LALR parser production:
+
+>       getForallMatch cli >>= \forallMatch ->
+>       let showTypes = OptCB_ShowTypes `elem` cli || rank2Types
+>           showComments = OptCB_ShowComments `elem` cli
+>           rank2Types = maybe False (return True) forallMatch
+>           match = fromMaybe "" forallMatch
+>           rulesTupleBased = OptCB_RAD_TupleBased `elem` cli
+>       in
+
+>       if OptCB_LALR `elem` cli
+>       then
+>       let (isMonad, _, parserType, _, _) = monad g
+>       
+>           ptype = case (Grammar.lexer g, isMonad) of
+>             (Nothing, False) -> LALR.Normal
+>             (Nothing, True) -> LALR.Monad
+>             (Just _, False) -> error "%lexer without %monad not supported in RAD"
+>             (Just _, True) -> LALR.MonadLexer
+>         
+>           options = LALR.GenOptions {
+>             LALR.ptype = ptype,
+>             LALR.wrapperType = if parserType == "Parser" then "HappyP" else "Parser",
+>             LALR.errorTokenType = "ErrorToken",
+>             LALR.header = fromMaybe "" hd,
+>             LALR.footer = fromMaybe "" tl,
+>             LALR.showTypes = showTypes,
+>             LALR.comments = showComments,
+>             LALR.rank2Types = rank2Types,
+>             LALR.forallMatch = match
+>           }
+>           
+>           lalrStates = generateLALRStates g action goto items2
+>       in
+>       LALR.genCode options g lalrStates action goto >>=
+>       (if outfilename == "-" then putStr else writeFile outfilename)
+>       
+>       else
+
+Branch off to continuation-based RAD parser production:
+
+>       if OptCB_RAD `elem` cli || OptCB_RAD_TupleBased `elem` cli
+>       then
+>         
+>       let (isMonad, _, parserType, _, _) = monad g
+>
+>           optimize = OptCB_RAD_Optimizations `elem` cli
+>       
+>           ptype = case (Grammar.lexer g, isMonad) of
+>             (Nothing, False) -> RAD.Normal
+>             (Nothing, True) -> RAD.Monad
+>             (Just _, False) -> error "%lexer without %monad not supported in RAD"
+>             (Just _, True) -> RAD.MonadLexer
+>         
+>           options = RAD.GenOptions {
+>             RAD.ptype = ptype,
+>             RAD.wrapperType = if parserType == "Parser" then "HappyP" else "Parser",
+>             RAD.errorTokenType = "ErrorToken",
+>             RAD.header = fromMaybe "" hd,
+>             RAD.footer = fromMaybe "" tl,
+>             RAD.showTypes = showTypes,
+>             RAD.comments = showComments,
+>             RAD.rank2Types = rank2Types,
+>             RAD.rulesTupleBased = rulesTupleBased,
+>             RAD.forallMatch = match,
+>             RAD.optimize = optimize
+>           }
+>           
+>           lalrStates = generateLALRStates g action goto items2
+>       in
+>       createXGrammar g lalrStates >>= \x ->
+>       generateRADStates x lalrStates unused_rules >>= \radStates ->
+>       RAD.genCode options x radStates action goto >>=
+>       (if outfilename == "-" then putStr else writeFile outfilename)
+>       
+>       else
+
 
 %---------------------------------------
 Branch off to GLR parser production
@@ -403,6 +485,14 @@ The command line arguments.
 >               | OptGLR
 >               | OptGLR_Decode
 >               | OptGLR_Filter
+>
+>               | OptCB_RAD
+>               | OptCB_RAD_TupleBased
+>		| OptCB_RAD_Optimizations
+>               | OptCB_LALR
+>               | OptCB_ShowTypes
+>               | OptCB_ShowComments
+>               | OptCB_ForallMatch (Maybe String)
 >  deriving Eq
 
 > argInfo :: [OptDescr CLIFlags]
@@ -436,7 +526,21 @@ The command line arguments.
 >    Option ['?'] ["help"] (NoArg DumpHelp)
 >       "display this help and exit",
 >    Option ['V','v'] ["version"] (NoArg DumpVersion)   -- ToDo: -v is deprecated
->       "output version information and exit"
+>       "output version information and exit",
+>    Option [] ["cb-rad"] (NoArg OptCB_RAD)
+>       "create a continuation-based Recursive Ascent-Descent parser. Not compatible with most other options",
+>    Option [] ["cb-rad-tuple"] (NoArg OptCB_RAD_TupleBased)
+>       "same as cb-rad, but uses tuples instead of continuations inside rule functions",
+>    Option [] ["optims"] (NoArg OptCB_RAD_Optimizations)
+>       "add optimizations such as eta-expansions and explicit rule-inlining to a RAD parser",
+>    Option [] ["cb-lalr"] (NoArg OptCB_LALR)
+>       "create a continuation-based LALR parser. Not compatible with most other options",
+>    Option [] ["types"] (NoArg OptCB_ShowTypes)
+>       "show function types for continuation-based parsers",
+>    Option [] ["comments"] (NoArg OptCB_ShowComments)
+>       "show comments for continuation-based parsers",
+>    Option [] ["forall"] (OptArg OptCB_ForallMatch "MATCH")
+>       "a string which is used to detect and handle higher-rank function types"
 
 #ifdef DEBUG
 
@@ -572,6 +676,13 @@ Extract various command-line options.
 >               []         -> return Nothing
 >               f:fs       -> return (Just (map toLower (last (f:fs))))
 
+> getForallMatch :: [CLIFlags] -> IO (Maybe String)
+> getForallMatch cli
+>       = case [ s | (OptCB_ForallMatch s) <- cli ] of
+>               []      -> return Nothing
+>               [f]     -> return f
+>               _many   -> dieHappy "multiple --forall options\n"
+
 > getCoerce :: Target -> [CLIFlags] -> IO Bool
 > getCoerce _target cli
 >       = if OptUseCoercions `elem` cli