From 22b3dbb05c9a54fdaabd222728c3847cc20bd734 Mon Sep 17 00:00:00 2001 From: Sebastian Graf Date: Wed, 24 Jan 2024 19:15:08 +0100 Subject: [PATCH 01/10] Update .gitignore --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 13cda316..0a5b1bf2 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ dist-newstyle cabal-dev .cabal-sandbox cabal.sandbox.config +cabal.project.local .*.swp .*.swo -/.vscode/ \ No newline at end of file +/.vscode/ +packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.hs +packages/frontend/src/Happy/Frontend/AttrGrammar/Parser.hs From 086b3b051e30d47f7a9203d452e0ed2f3b8a93e4 Mon Sep 17 00:00:00 2001 From: Sebastian Graf Date: Fri, 26 Jan 2024 10:40:15 +0100 Subject: [PATCH 02/10] -Wno-incomplete-uni-patterns God, this was annoying --- packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs | 1 + packages/tabular/src/Happy/Tabular/LALR.lhs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index 68fb5288..33dc543e 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -4,6 +4,7 @@ The code generator. (c) 1993-2001 Andy Gill, Simon Marlow ----------------------------------------------------------------------------- +> {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} > module Happy.Backend.LALR.ProduceCode (produceParser) where > import Paths_happy_backend_lalr ( version ) diff --git a/packages/tabular/src/Happy/Tabular/LALR.lhs b/packages/tabular/src/Happy/Tabular/LALR.lhs index 8597a405..edd9f137 100644 --- a/packages/tabular/src/Happy/Tabular/LALR.lhs +++ b/packages/tabular/src/Happy/Tabular/LALR.lhs @@ -5,6 +5,8 @@ Generation of LALR parsing tables. (c) 1997-2001 Simon Marlow ----------------------------------------------------------------------------- +> {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} +> > module Happy.Tabular.LALR > (genActionTable, genGotoTable, genLR0items, precalcClosure0, > propLookaheads, calcLookaheads, mergeLookaheadInfo, countConflicts, From 96c394397ba91442a23400ca5ab20c02c608d3d1 Mon Sep 17 00:00:00 2001 From: Sebastian Graf Date: Sun, 21 Jan 2024 15:17:39 +0100 Subject: [PATCH 03/10] tmp --- packages/backend-lalr/data/HappyTemplate.hs | 49 ++++++-- .../src/Happy/Backend/LALR/ProduceCode.lhs | 111 ++++++++++-------- .../src/Happy/CodeGen/Common/Options.lhs | 8 +- .../frontend/src/Happy/Frontend/AbsSyn.lhs | 8 +- .../frontend/src/Happy/Frontend/Lexer.lhs | 5 +- .../frontend/src/Happy/Frontend/Mangler.lhs | 4 +- .../src/Happy/Frontend/Parser/Bootstrapped.ly | 8 +- tests/monaderror-explist.y | 6 +- 8 files changed, 129 insertions(+), 70 deletions(-) diff --git a/packages/backend-lalr/data/HappyTemplate.hs b/packages/backend-lalr/data/HappyTemplate.hs index e9125ea6..00ecbb3c 100644 --- a/packages/backend-lalr/data/HappyTemplate.hs +++ b/packages/backend-lalr/data/HappyTemplate.hs @@ -183,6 +183,11 @@ happyShift new_state ERROR_TOK tk st sts stk@(x `HappyStk` _) = let i = GET_ERROR_TOKEN(x) in -- trace "shifting the error token" $ DO_ACTION(new_state,i,tk,CONS(st,sts),stk) + -- TODO: When `i` would enter error recovery again, we should instead + -- discard input until the lookahead is acceptable. Perhaps this is + -- simplest to implement in CodeGen for productions using `error`; + -- there we know the context and can implement local shift+discard actions. + -- still need to remember parser-defined error site, though. happyShift new_state i tk st sts stk = happyNewToken new_state CONS(st,sts) (MK_TOKEN(tk)`HappyStk`stk) @@ -191,8 +196,14 @@ happyShift new_state i tk st sts stk = happySpecReduce_0 i fn ERROR_TOK tk st sts stk = happyFail [] ERROR_TOK tk st sts stk + -- SG: I'm very doubtful that passing [] ("no token expected here") + -- as the first arg to happyFail here and in the following calls is + -- correct. I'm not going to touch it for a lack of understanding + -- and concerns of of backward compatibility, but + -- `happyExpListPerState (IBOX(st) :: Prelude.Int)` + -- seems like a good candidate. happySpecReduce_0 nt fn j tk st@(HAPPYSTATE(action)) sts stk - = GOTO(action) nt j tk st CONS(st,sts) (fn `HappyStk` stk) + = happySeq fn (GOTO(action) nt j tk st CONS(st,sts) (fn `HappyStk` stk)) happySpecReduce_1 i fn ERROR_TOK tk st sts stk = happyFail [] ERROR_TOK tk st sts stk @@ -266,20 +277,35 @@ happyGoto action j tk st = action j j tk (HappyState action) #endif ----------------------------------------------------------------------------- --- Error recovery (ERROR_TOK is the error token) - --- parse error if we are in recovery and we fail again +-- Error recovery +-- +-- When there is no applicable action for the current lookahead token `tk`, +-- happy enters error recovery mode. It works in 2 phases: +-- +-- 1. Fixup: Try to see if there is an action for the error token (`errorTok`, +-- which is ERROR_TOK). If there is, do *not* emit an error and pretend +-- instead that an `errorTok` was inserted. +-- When there is no `errorTok` action, call `happyErro` and enter error +-- resumption mode. +-- 2. Error resumption mode: After `happyError` was called TODO: happyError is fatal. +-- Perhaps we should introduce a new `happyAddError`? +-- Current plan: New %resumptive declaration for specifying the two funs, +-- mutually exclusive with %error. +-- +-- This is what usually is associated with `error` +-- in `bison` or `menhir`. Since `error` is used for the Fixup mechanism (1) +-- above, we call the corresponding token `catch`. +-- In particular, `catch` will never *omit* calls to `happyFail`. + +-- parse error if we are in recovery and reached the end of the state stack happyFail explist ERROR_TOK tk old_st _ stk@(x `HappyStk` _) = let i = GET_ERROR_TOKEN(x) in -- trace "failing" $ - happyError_ explist i tk - -{- We don't need state discarding for our restricted implementation of - "error". In fact, it can cause some bogus parses, so I've disabled it - for now --SDM + happyError_ explist i tk noResumption_ +{- -- discard a state -happyFail ERROR_TOK tk old_st CONS(HAPPYSTATE(action),sts) +happyFail explist ERROR_TOK tk old_st CONS(HAPPYSTATE(action),sts) (saved_tok `HappyStk` _ `HappyStk` stk) = -- trace ("discarding state, depth " ++ show (length stk)) $ DO_ACTION(action,ERROR_TOK,tk,sts,(saved_tok`HappyStk`stk)) @@ -287,10 +313,11 @@ happyFail ERROR_TOK tk old_st CONS(HAPPYSTATE(action),sts) -- Enter error recovery: generate an error token, -- save the old token and carry on. +-- When a `happyShift` accepts, we will pop off the error +-- token to resume parsing with the current lookahead `i`. happyFail explist i tk HAPPYSTATE(action) sts stk = -- trace "entering error recovery" $ DO_ACTION(action,ERROR_TOK,tk,sts, MK_ERROR_TOKEN(i) `HappyStk` stk) - -- Internal happy errors: notHappyAtAll :: a diff --git a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index 33dc543e..59cdfb90 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -63,6 +63,7 @@ Produce the complete output file. > , token_type = token_type' > , error_handler = error_handler' > , error_sig = error_sig' +> , error_resumptive = error_resumptive' > }) > action goto lang_exts module_header module_trailer > target coerce ghc strict @@ -99,8 +100,9 @@ Produce the complete output file. > | otherwise = str "Prelude.Int" > > -- Parsing monad and its constraints -> pty = str monad_tycon -> pcont = str monad_context +> pty = str monad_tycon -- str "P" +> ptyAt a = brack' (pty . str " " . a) -- \(str "a") -> str "(P a)" +> pcont = str monad_context -- str "Read a", some constraint for "P" to be a monad > > -- If GHC is enabled, wrap the content in a CPP ifdef that includes the > -- content and tests whether the GHC version is >= 7.10.3 @@ -242,13 +244,13 @@ based parsers -- types aren't as important there). > . interleave' ",\n " > [ mkActionName i | (i,_action') <- zip [ 0 :: Int .. ] > (assocs action) ] -> . str " :: " . str monad_context . str " => " +> . str " :: " . pcont . str " => " > . intMaybeHash . str " -> " . happyReductionValue . str "\n\n" > . interleave' ",\n " > [ mkReduceFun i | > (i,_action) <- zip [ n_starts :: Int .. ] > (drop n_starts prods) ] -> . str " :: " . str monad_context . str " => " +> . str " :: " . pcont . str " => " > . happyReductionValue . str "\n\n" > | otherwise = id @@ -381,7 +383,7 @@ happyMonadReduce to get polymorphic recursion. Sigh. > . str " -> " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " > . happyAbsSyn . str " -> " -> . pty . str " " . happyAbsSyn . str "\n" +> . ptyAt happyAbsSyn . str "\n" > | otherwise -> id in > filterTypeSig tysig . mkReduceFun i . str " = " > . str s . strspace . lt' . strspace . showInt adjusted_nt @@ -441,10 +443,10 @@ The token conversion function. > . str "let cont i = " . doAction . str " sts stk tks in\n\t" > . str "case tk of {\n\t" > . interleave ";\n\t" (map doToken token_rep) -> . str "_ -> happyError' ((tk:tks), [])\n\t" +> . str "_ -> happyError' (tk:tks) [] noResumption_\n\t" > . str "}\n\n" -> . str "happyError_ explist " . eofTok . str " tk tks = happyError' (tks, explist)\n" -> . str "happyError_ explist _ tk tks = happyError' ((tk:tks), explist)\n"; +> . str "happyError_ explist " . eofTok . str " tk resume tks = happyError' tks explist resume\n" +> . str "happyError_ explist _ tk resume tks = happyError' (tk:tks) explist resume\n"; > -- when the token is EOF, tk == _|_ (notHappyAtAll) > -- so we must not pass it to happyError' @@ -456,15 +458,15 @@ The token conversion function. > . str " -> Happy_GHC_Exts.Int#\n" > . str " -> " . token . str "\n" > . str " -> HappyState " . token . str " (t -> " -> . pty . str " a)\n" +> . ptyAt (str "a") . str ")\n" > . str " -> [HappyState " . token . str " (t -> " -> . pty . str " a)]\n" +> . ptyAt (str "a") . str ")]\n" > . str " -> t\n" -> . str " -> " . pty . str " a)\n" +> . str " -> " . ptyAt (str "a") . str ")\n" > . str " -> [HappyState " . token . str " (t -> " -> . pty . str " a)]\n" +> . ptyAt (str "a") . str ")]\n" > . str " -> t\n" -> . str " -> " . pty . str " a\n" +> . str " -> " . ptyAt (str "a") . str "\n" > _ -> id > . str "happyNewToken action sts stk\n\t= " > . str lexer'' @@ -476,10 +478,10 @@ The token conversion function. > . str (eof' ++ " -> ") > . eofAction "tk" . str ";\n\t" > . interleave ";\n\t" (map doToken token_rep) -> . str "_ -> happyError' (tk, [])\n\t" +> . str "_ -> happyError' tk [] noResumption_\n\t" > . str "})\n\n" -> . str "happyError_ explist " . eofTok . str " tk = happyError' (tk, explist)\n" -> . str "happyError_ explist _ tk = happyError' (tk, explist)\n"; +> . str "happyError_ explist " . eofTok . str " tk resume = happyError' tk explist resume\n" +> . str "happyError_ explist _ tk resume = happyError' tk explist resume\n"; > -- superfluous pattern match needed to force happyError_ to > -- have the correct type. > } @@ -703,7 +705,7 @@ action array indexed by (terminal * last_state) + state > . shows n_states . str ") ([" > . interleave' "," (map shows goto_offs) > . str "\n\t])\n\n" -> +> > . str "happyAdjustOffset :: Prelude.Int -> Prelude.Int\n" > . str "happyAdjustOffset = Prelude.id\n\n" > @@ -839,27 +841,30 @@ MonadStuff: > produceMonadStuff = -> str "happyThen :: " . pcont . str " => " . pty -> . str " a -> (a -> " . pty -> . str " b) -> " . pty . str " b\n" +> str "happyThen :: " . pcont . str " => " . ptyAt (str "a") +> . str " -> (a -> " . ptyAt (str "b") +> . str ") -> " . ptyAt (str "b") . str "\n" > . str "happyThen = " . brack monad_then . nl -> . str "happyReturn :: " . pcont . str " => a -> " . pty . str " a\n" +> . str "happyReturn :: " . pcont . str " => a -> " . ptyAt (str "a") . str "\n" > . str "happyReturn = " . brack monad_return . nl > . case lexer' of > Nothing -> > str "happyThen1 m k tks = (" . str monad_then > . str ") m (\\a -> k a tks)\n" -> . str "happyReturn1 :: " . pcont . str " => a -> b -> " . pty . str " a\n" +> . str "happyReturn1 :: " . pcont . str " => a -> b -> " . ptyAt (str "a") . str "\n" > . str "happyReturn1 = \\a tks -> " . brack monad_return > . str " a\n" -> . str "happyError' :: " . str monad_context . str " => ([" -> . token -> . str "], [Prelude.String]) -> " -> . str monad_tycon -> . str " a\n" -> . str "happyError' = " -> . str (if use_monad then "" else "HappyIdentity Prelude.. ") -> . errorHandler . str "\n" +> . str "happyError' :: " . pcont . str " => " +> . str "[" . token . str "] -> " +> . str "[Prelude.String] -> " +> . ptyAt (str "(Maybe a)") . str " -> " +> . ptyAt (str "a") +> . str "\n" +> . str "happyError' = " . errorHandler . str "\n" +> . str "noResumption_ :: " . pcont . str " => " +> . ptyAt (str "(Maybe a)") +> . str "\n" +> . str "noResumption_ = " . noResumption . str "\n" > _ -> > let > happyParseSig @@ -872,7 +877,7 @@ MonadStuff: > | target == TargetArrayBased = > str "happyNewToken :: " . pcont . str " => " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " . happyAbsSyn -> . str " -> " . pty . str " " . happyAbsSyn . str"\n" +> . str " -> " . ptyAt happyAbsSyn . str"\n" > . str "\n" > | otherwise = id > doActionSig @@ -880,7 +885,7 @@ MonadStuff: > str "happyDoAction :: " . pcont . str " => " . intMaybeHash > . str " -> " . str token_type' . str " -> " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " . happyAbsSyn -> . str " -> " . pty . str " " . happyAbsSyn . str "\n" +> . str " -> " . ptyAt happyAbsSyn . str "\n" > . str "\n" > | otherwise = id > reduceArrSig @@ -889,7 +894,7 @@ MonadStuff: > . str " => Happy_Data_Array.Array Prelude.Int (" . intMaybeHash > . str " -> " . str token_type' . str " -> " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " . happyAbsSyn -> . str " -> " . pty . str " " . happyAbsSyn . str ")\n" +> . str " -> " . ptyAt happyAbsSyn . str ")\n" > . str "\n" > | otherwise = id in > filterTypeSig (happyParseSig . newTokenSig . doActionSig . reduceArrSig) @@ -897,15 +902,19 @@ MonadStuff: > . str " a -> (a -> " . pty > . str " b) -> " . pty . str " b\n" > . str "happyThen1 = happyThen\n" -> . str "happyReturn1 :: " . pcont . str " => a -> " . pty . str " a\n" +> . str "happyReturn1 :: " . pcont . str " => a -> " . ptyAt (str "a") . str "\n" > . str "happyReturn1 = happyReturn\n" -> . str "happyError' :: " . str monad_context . str " => (" -> . token . str ", [Prelude.String]) -> " -> . str monad_tycon -> . str " a\n" -> . str "happyError' tk = " -> . str (if use_monad then "" else "HappyIdentity ") -> . errorHandler . str " tk\n" +> . str "happyError' :: " . pcont . str " => " +> . token . str " -> " +> . str "[Prelude.String] -> " +> . ptyAt (str "(Maybe a)") . str " -> " +> . ptyAt (str "a") +> . str "\n" +> . str "happyError' = " . errorHandler . str "\n" +> . str "noResumption_ :: " . pcont . str " => " +> . ptyAt (str "(Maybe a)") +> . str "\n" +> . str "noResumption_ = " . noResumption . str "\n" An error handler specified with %error is passed the current token when used with %lexer, but happyError (the old way but kept for @@ -913,13 +922,19 @@ compatibility) is not passed the current token. Also, the %errorhandlertype directive determines the API of the provided function. > errorHandler = -> case error_handler' of -> Just h -> case error_sig' of -> ErrorHandlerTypeExpList -> str h -> ErrorHandlerTypeDefault -> str "(\\(tokens, _) -> " . str h . str " tokens)" -> Nothing -> case lexer' of -> Nothing -> str "(\\(tokens, _) -> happyError tokens)" -> Just _ -> str "(\\(tokens, explist) -> happyError)" +> str "(\\tokens explist resume -> " . +> (if use_monad then str "" +> else str "HappyIdentity Prelude.$ ") . +> str (case error_handler' of Just h -> h; Nothing -> "happyError") . str " " . +> str (case (error_handler', lexer') of (Nothing, Just _) -> "" +> _ -> "tokens ") . +> (case error_sig' of ErrorHandlerTypeExpList -> str "explist " +> ErrorHandlerTypeDefault -> str "") . +> (if error_resumptive' then str "resume " +> else str "") . +> str ")" +> noResumption = if use_monad then brack monad_return . str " Nothing" +> else str "HappyIdentity Nothing" > reduceArrElem n > = str "\t(" . shows n . str " , " diff --git a/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs b/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs index 66239991..ef925f91 100644 --- a/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs +++ b/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs @@ -21,5 +21,11 @@ The CommonOptions data type. > expect :: Maybe Int, > lexer :: Maybe (String,String), > error_handler :: Maybe String, -> error_sig :: ErrorHandlerType +> error_sig :: ErrorHandlerType, +> -- ^ ErrorHandlerTypExpList: error handler expects a +> -- `[String]` as first arg with the pretty-printed expected +> -- tokens +> error_resumptive :: Bool +> -- ^ `True` => The error handler expects a `resume` +> -- continuation as last argument. > } diff --git a/packages/frontend/src/Happy/Frontend/AbsSyn.lhs b/packages/frontend/src/Happy/Frontend/AbsSyn.lhs index 3b195a7f..6f0841b7 100644 --- a/packages/frontend/src/Happy/Frontend/AbsSyn.lhs +++ b/packages/frontend/src/Happy/Frontend/AbsSyn.lhs @@ -11,7 +11,7 @@ Here is the abstract syntax of the language we parse. > AbsSyn(..), Directive(..), > getTokenType, getTokenSpec, getParserNames, getLexer, > getImportedIdentity, getMonad, getError, -> getPrios, getPrioNames, getExpect, getErrorHandlerType, +> getPrios, getPrioNames, getExpect, getErrorHandlerType, getErrorResumptive, > getAttributes, getAttributetype, > Rule(..), Prod(..), Term(..), Prec(..) > ) where @@ -66,7 +66,6 @@ generate some error messages. > | TokenSpec [(a,String)] -- %token > | TokenName String (Maybe String) Bool -- %name/%partial (True <=> %partial) > | TokenLexer String String -- %lexer -> | TokenErrorHandlerType String -- %errorhandlertype > | TokenImportedIdentity -- %importedidentity > | TokenMonad String String String String -- %monad > | TokenNonassoc [String] -- %nonassoc @@ -74,6 +73,8 @@ generate some error messages. > | TokenLeft [String] -- %left > | TokenExpect Int -- %expect > | TokenError String -- %error +> | TokenErrorHandlerType String -- %errorhandlertype +> | TokenErrorResumptive -- %resumptive > | TokenAttributetype String -- %attributetype > | TokenAttribute String String -- %attribute > deriving Show @@ -151,6 +152,9 @@ generate some error messages. > [] -> ErrorHandlerTypeDefault > _ -> error "multiple errorhandlertype directives" +> getErrorResumptive :: [Directive t] -> Bool +> getErrorResumptive ds = not (null [ () | TokenErrorResumptive <- ds ]) + > getAttributes :: [Directive t] -> [(String, String)] > getAttributes ds > = [ (ident,typ) | (TokenAttribute ident typ) <- ds ] diff --git a/packages/frontend/src/Happy/Frontend/Lexer.lhs b/packages/frontend/src/Happy/Frontend/Lexer.lhs index 8bfe65bd..03806ee0 100644 --- a/packages/frontend/src/Happy/Frontend/Lexer.lhs +++ b/packages/frontend/src/Happy/Frontend/Lexer.lhs @@ -37,7 +37,6 @@ The lexer. > | TokSpecId_Token -- %token > | TokSpecId_Name -- %name > | TokSpecId_Partial -- %partial -> | TokSpecId_ErrorHandlerType -- %errorhandlertype > | TokSpecId_Lexer -- %lexer > | TokSpecId_ImportedIdentity -- %importedidentity > | TokSpecId_Monad -- %monad @@ -48,6 +47,8 @@ The lexer. > | TokSpecId_Shift -- %shift > | TokSpecId_Expect -- %expect > | TokSpecId_Error -- %error +> | TokSpecId_ErrorHandlerType -- %errorhandlertype +> | TokSpecId_ErrorResumptive -- %errorresumptive > | TokSpecId_Attributetype -- %attributetype > | TokSpecId_Attribute -- %attribute > | TokCodeQuote -- stuff inside { .. } @@ -131,6 +132,8 @@ followed by a special identifier. > cont (TokenKW TokSpecId_Expect) rest > 'e':'r':'r':'o':'r':'h':'a':'n':'d':'l':'e':'r':'t':'y':'p':'e':rest -> > cont (TokenKW TokSpecId_ErrorHandlerType) rest +> 'e':'r':'r':'o':'r':'r':'e':'s':'u':'m':'t':'i':'v':'e':rest -> +> cont (TokenKW TokSpecId_ErrorResumptive) rest > 'e':'r':'r':'o':'r':rest -> > cont (TokenKW TokSpecId_Error) rest > 'a':'t':'t':'r':'i':'b':'u':'t':'e':'t':'y':'p':'e':rest -> diff --git a/packages/frontend/src/Happy/Frontend/Mangler.lhs b/packages/frontend/src/Happy/Frontend/Mangler.lhs index 85f885d0..a2d908e7 100644 --- a/packages/frontend/src/Happy/Frontend/Mangler.lhs +++ b/packages/frontend/src/Happy/Frontend/Mangler.lhs @@ -69,6 +69,7 @@ This bit is a real mess, mainly because of the error message support. > starts' = case getParserNames dirs of > [] -> [TokenName "happyParse" Nothing False] > ns -> ns +> error_resumptive' = getErrorResumptive dirs > > start_strs = [ startName++'_':p | (TokenName p _ _) <- starts' ] @@ -87,7 +88,7 @@ Build up a mapping from name values to strings. > case lookupName str' of > [a] -> return a > [] -> do addErr ("unknown identifier '" ++ str' ++ "'") -> return errorTok +> return errorTok -- SG: What a confusing use of errorTok.. Use dummyTok? > (a:_) -> do addErr ("multiple use of '" ++ str' ++ "'") > return a @@ -249,6 +250,7 @@ Get the token specs in terms of Names. > lexer = getLexer dirs, > error_handler = getError dirs, > error_sig = getErrorHandlerType dirs, +> error_resumptive = error_resumptive', > token_type = getTokenType dirs, > expect = getExpect dirs > }) diff --git a/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly b/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly index b6494113..7f9d07fa 100644 --- a/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly +++ b/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly @@ -34,6 +34,7 @@ The parser. > spec_expect { TokenKW TokSpecId_Expect } > spec_error { TokenKW TokSpecId_Error } > spec_errorhandlertype { TokenKW TokSpecId_ErrorHandlerType } +> spec_errorresumptive { TokenKW TokSpecId_ErrorResumptive } > spec_attribute { TokenKW TokSpecId_Attribute } > spec_attributetype { TokenKW TokSpecId_Attributetype } > code { TokenInfo $$ TokCodeQuote } @@ -104,11 +105,11 @@ The parser. > | spec_shift { PrecShift } > | { PrecNone } -> tokInfos :: { [Directive String] } +> tokInfos :: { [Directive String] } > : tokInfos tokInfo { $2 : $1 } > | tokInfo { [$1] } -> tokInfo :: { Directive String } +> tokInfo :: { Directive String } > : spec_tokentype code { TokenType $2 } > | spec_token tokenSpecs { TokenSpec $2 } > | spec_name id optStart { TokenName $2 $3 False } @@ -124,7 +125,8 @@ The parser. > | spec_left ids { TokenLeft $2 } > | spec_expect int { TokenExpect $2 } > | spec_error code { TokenError $2 } -> | spec_errorhandlertype id { TokenErrorHandlerType $2 } +> | spec_errorhandlertype id { TokenErrorHandlerType $2 } +> | spec_errorresumptive { TokenErrorResumptive } > | spec_attributetype code { TokenAttributetype $2 } > | spec_attribute id code { TokenAttribute $2 $3 } diff --git a/tests/monaderror-explist.y b/tests/monaderror-explist.y index 558f28ee..3f027ef6 100644 --- a/tests/monaderror-explist.y +++ b/tests/monaderror-explist.y @@ -46,9 +46,9 @@ data Token | TokenTest deriving (Eq,Show) -handleErrorExpList :: ([Token], [String]) -> ParseM a -handleErrorExpList ([], _) = throwError $ ParseError Nothing -handleErrorExpList (ts, explist) = throwError $ ParseError $ Just $ (head ts, explist) +handleErrorExpList :: [Token] -> [String] -> ParseM a +handleErrorExpList [] _ = throwError $ ParseError Nothing +handleErrorExpList ts explist = throwError $ ParseError $ Just $ (head ts, explist) lexer :: String -> [Token] lexer [] = [] From be178d9abe3689220a88fa2c0bc4562fc6199eaf Mon Sep 17 00:00:00 2001 From: Sebastian Graf Date: Sun, 21 Jan 2024 18:59:20 +0100 Subject: [PATCH 04/10] Introduce catchTok --- .../src/Happy/Backend/GLR/ProduceCode.lhs | 2 ++ .../src/Happy/Backend/LALR/ProduceCode.lhs | 5 +++-- packages/frontend/src/Happy/Frontend/Mangler.lhs | 8 ++++++-- packages/grammar/src/Happy/Grammar.lhs | 13 ++++++++----- packages/tabular/src/Happy/Tabular.lhs | 2 +- packages/tabular/src/Happy/Tabular/First.lhs | 2 +- packages/tabular/src/Happy/Tabular/LALR.lhs | 2 +- 7 files changed, 22 insertions(+), 12 deletions(-) diff --git a/packages/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs b/packages/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs index 7d56d51b..5e077f5d 100644 --- a/packages/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs +++ b/packages/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs @@ -307,6 +307,8 @@ It also shares identical reduction values as CAFs > mkLine state (symInt,action) > | symInt == errorTok -- skip error productions > = "" -- NB see ProduceCode's handling of these +> | symInt == catchTok -- skip error productions +> = "" -- NB see ProduceCode's handling of these > | otherwise > = case action of > LR'Fail -> "" diff --git a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index 59cdfb90..465ef48c 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -1187,10 +1187,11 @@ See notes under "Action Tables" above for some subtleties in this function. > f (t, LR'Shift _ _ ) = [t - fst token_names_bound] > f (_, _) = [] > -> -- adjust terminals by -(fst_term+1), so they start at 1 (error is 0). +> -- adjust terminals by -(fst_term+2), so they start at 2 (error is 0, catch is 1). > -- (see ARRAY_NOTES) > adjust token | token == errorTok = 0 -> | otherwise = token - fst_term + 1 +> | token == catchTok = 1 +> | otherwise = token - fst_term + 2 > > mkActVals assocs' default_act = > [ (adjust token, actionVal act) diff --git a/packages/frontend/src/Happy/Frontend/Mangler.lhs b/packages/frontend/src/Happy/Frontend/Mangler.lhs index a2d908e7..cc67254d 100644 --- a/packages/frontend/src/Happy/Frontend/Mangler.lhs +++ b/packages/frontend/src/Happy/Frontend/Mangler.lhs @@ -76,13 +76,17 @@ This bit is a real mess, mainly because of the error message support. Build up a mapping from name values to strings. > name_env = (errorTok, errorName) : +> (catchTok, catchName) : > (dummyTok, dummyName) : > zip start_names start_strs ++ > zip nonterm_names nonterm_strs ++ > zip terminal_names terminal_strs > lookupName :: String -> [Name] -> lookupName n = [ t | (t,r) <- name_env, r == n ] +> lookupName n = [ t | (t,r) <- name_env, r == n +> , t /= catchTok || error_resumptive' ] +> -- hide catchName unless %errorresumptive is active +> -- issue93.y uses catch as a nonterminal, we should not steal it > mapToName str' = > case lookupName str' of @@ -230,7 +234,7 @@ Get the token specs in terms of Names. > lookupProdNo = (prod_array !), > lookupProdsOfName = lookup_prods, > token_specs = tokspec, -> terminals = errorTok : terminal_names, +> terminals = errorTok : catchTok : terminal_names, > non_terminals = start_names ++ nonterm_names, > -- INCLUDES the %start tokens > starts = zip4 parser_names start_names start_toks diff --git a/packages/grammar/src/Happy/Grammar.lhs b/packages/grammar/src/Happy/Grammar.lhs index d088b0b3..ef29ca53 100644 --- a/packages/grammar/src/Happy/Grammar.lhs +++ b/packages/grammar/src/Happy/Grammar.lhs @@ -11,7 +11,8 @@ The Grammar data type. > Priority(..), > Assoc(..), > -> errorName, errorTok, startName, dummyName, firstStartTok, dummyTok, +> errorName, errorTok, catchName, catchTok, +> startName, dummyName, firstStartTok, dummyTok, > eofName, epsilonTok, > > mapDollarDollar @@ -111,15 +112,17 @@ In normal and GHC-based parsers, these numbers are also used in the generated grammar itself, except that the error token is mapped to -1. For array-based parsers, see the note in Tabular/LALR.lhs. -> startName, eofName, errorName, dummyName :: String +> startName, eofName, errorName, catchName, dummyName :: String > startName = "%start" -- with a suffix, like %start_1, %start_2 etc. > eofName = "%eof" > errorName = "error" +> catchName = "catch" > dummyName = "%dummy" -- shouldn't occur in the grammar anywhere -> firstStartTok, dummyTok, errorTok, epsilonTok :: Name -> firstStartTok = 3 -> dummyTok = 2 +> firstStartTok, dummyTok, errorTok, catchTok, epsilonTok :: Name +> firstStartTok = 4 +> dummyTok = 3 +> catchTok = 2 > errorTok = 1 > epsilonTok = 0 diff --git a/packages/tabular/src/Happy/Tabular.lhs b/packages/tabular/src/Happy/Tabular.lhs index f86d1c78..860d76cc 100644 --- a/packages/tabular/src/Happy/Tabular.lhs +++ b/packages/tabular/src/Happy/Tabular.lhs @@ -64,7 +64,7 @@ Find unused rules and tokens > start_rules = [ 0 .. (length starts' - 1) ] > used_rules = start_rules ++ > nub [ r | (_,a) <- actions, r <- extract_reductions a ] -> used_tokens = errorTok : eof : +> used_tokens = errorTok : catchTok : eof : > nub [ t | (t,a) <- actions, is_shift a ] > n_prods = length productions' > unused_terminals = filter (`notElem` used_tokens) terms diff --git a/packages/tabular/src/Happy/Tabular/First.lhs b/packages/tabular/src/Happy/Tabular/First.lhs index 7c0da2ca..5d1c9306 100644 --- a/packages/tabular/src/Happy/Tabular/First.lhs +++ b/packages/tabular/src/Happy/Tabular/First.lhs @@ -49,7 +49,7 @@ This will never terminate. > getNext fst_term prodNo prodsOfName env = > [ (nm, next nm) | (nm,_) <- env ] > where -> fn t | t == errorTok || t >= fst_term = Set.singleton t +> fn t | t == errorTok || t == catchTok || t >= fst_term = Set.singleton t > fn x = maybe (error "attempted FIRST(e) :-(") id (lookup x env) > next :: Name -> NameSet diff --git a/packages/tabular/src/Happy/Tabular/LALR.lhs b/packages/tabular/src/Happy/Tabular/LALR.lhs index edd9f137..d286cf88 100644 --- a/packages/tabular/src/Happy/Tabular/LALR.lhs +++ b/packages/tabular/src/Happy/Tabular/LALR.lhs @@ -520,7 +520,7 @@ Generate the action table > possAction goto _set (Lr1 rule pos la) = > case findRule g rule pos of -> Just t | t >= fst_term || t == errorTok -> +> Just t | t >= fst_term || t == errorTok || t == catchTok -> > let f j = (t,LR'Shift j p) > p = maybe No id (lookup t prios) > in map f $ maybeToList (lookup t goto) From c5302677ba3ebfce021c774c06a239fe60f0ee05 Mon Sep 17 00:00:00 2001 From: Sebastian Graf Date: Wed, 24 Jan 2024 10:46:41 +0100 Subject: [PATCH 05/10] Implement error fixup and resume logic --- packages/backend-lalr/data/HappyTemplate.hs | 245 ++++++++++-------- .../src/Happy/Backend/LALR/ProduceCode.lhs | 163 ++++++------ .../frontend/src/Happy/Frontend/Lexer.lhs | 39 +-- .../src/Happy/Frontend/Parser/Oracle.hs | 9 +- tests/issue265.y | 80 ++++++ tests/monaderror-resume.y | 115 ++++++++ 6 files changed, 446 insertions(+), 205 deletions(-) create mode 100644 tests/issue265.y create mode 100644 tests/monaderror-resume.y diff --git a/packages/backend-lalr/data/HappyTemplate.hs b/packages/backend-lalr/data/HappyTemplate.hs index 00ecbb3c..f8dfa72b 100644 --- a/packages/backend-lalr/data/HappyTemplate.hs +++ b/packages/backend-lalr/data/HappyTemplate.hs @@ -46,16 +46,18 @@ data Happy_IntList = HappyCons FAST_INT Happy_IntList #if defined(HAPPY_ARRAY) # define ERROR_TOK ILIT(0) +# define CATCH_TOK ILIT(1) # define DO_ACTION(state,i,tk,sts,stk) happyDoAction i tk state sts (stk) # define HAPPYSTATE(i) (i) # define GOTO(action) happyGoto -# define IF_ARRAYS(x) (x) +# define IF_ARRAY(x) (x) #else # define ERROR_TOK ILIT(1) +# define CATCH_TOK ILIT(2) # define DO_ACTION(state,i,tk,sts,stk) state i i tk HAPPYSTATE(state) sts (stk) # define HAPPYSTATE(i) (HappyState (i)) # define GOTO(action) action -# define IF_ARRAYS(x) +# define IF_ARRAY(x) #endif #if defined(HAPPY_COERCE) @@ -97,39 +99,60 @@ happyParse start_state = happyNewToken start_state notHappyAtAll notHappyAtAll happyAccept ERROR_TOK tk st sts (_ `HappyStk` ans `HappyStk` _) = happyReturn1 ans happyAccept j tk st sts (HappyStk ans _) = - IF_GHC(happyTcHack j IF_ARRAYS(happyTcHack st)) (happyReturn1 ans) + IF_GHC(happyTcHack j IF_ARRAY(happyTcHack st)) (happyReturn1 ans) ----------------------------------------------------------------------------- -- Arrays only: do the next action #if defined(HAPPY_ARRAY) -happyDoAction i tk st - = DEBUG_TRACE("state: " ++ show IBOX(st) ++ - ",\ttoken: " ++ show IBOX(i) ++ - ",\taction: ") - case action of - ILIT(0) -> DEBUG_TRACE("fail.\n") - happyFail (happyExpListPerState (IBOX(st) :: Prelude.Int)) i tk st - ILIT(-1) -> DEBUG_TRACE("accept.\n") - happyAccept i tk st - n | LT(n,(ILIT(0) :: FAST_INT)) -> DEBUG_TRACE("reduce (rule " ++ show rule - ++ ")") - (happyReduceArr Happy_Data_Array.! rule) i tk st - where rule = IBOX(NEGATE(PLUS(n,(ILIT(1) :: FAST_INT)))) - n -> DEBUG_TRACE("shift, enter state " - ++ show IBOX(new_state) - ++ "\n") - happyShift new_state i tk st - where new_state = MINUS(n,(ILIT(1) :: FAST_INT)) - where off = happyAdjustOffset (indexShortOffAddr happyActOffsets st) - off_i = PLUS(off, i) - check = if GTE(off_i,(ILIT(0) :: FAST_INT)) - then EQ(indexShortOffAddr happyCheck off_i, i) - else Prelude.False - action - | check = indexShortOffAddr happyTable off_i - | Prelude.otherwise = indexShortOffAddr happyDefActions st +happyDoAction i tk st = + DEBUG_TRACE("state: " ++ show IBOX(st) ++ + ",\ttoken: " ++ show IBOX(i) ++ + ",\taction: ") + case happyDecodeAction (happyNextAction i st) of + HappyFail -> DEBUG_TRACE("failing.\n") + happyFail st i tk st + HappyAccept -> DEBUG_TRACE("accept.\n") + happyAccept i tk st + HappyReduce rule -> DEBUG_TRACE("reduce (rule " ++ show IBOX(rule) ++ ")") + (happyReduceArr Happy_Data_Array.! IBOX(rule)) i tk st + HappyShift new_state -> DEBUG_TRACE("shift, enter state " ++ show IBOX(new_state) ++ "\n") + happyShift new_state i tk st + +{-# INLINE happyNextAction #-} +happyNextAction i st = case happyIndexActionTable i st of + Just (IBOX(act)) -> act + Nothing -> indexShortOffAddr happyDefActions st + +{-# INLINE happyIndexActionTable #-} +happyIndexActionTable i st + | GTE(off,ILIT(0)), EQ(indexShortOffAddr happyCheck off, i) + = Prelude.Just (IBOX(indexShortOffAddr happyTable off)) + | otherwise + = Prelude.Nothing + where + off = PLUS(happyAdjustOffset (indexShortOffAddr happyActOffsets st), i) + +data HappyAction + = HappyFail + | HappyAccept + | HappyReduce FAST_INT -- rule number + | HappyShift FAST_INT -- new state + +{-# INLINE happyDecodeAction #-} +happyDecodeAction ILIT(0) = HappyFail +happyDecodeAction ILIT(-1) = HappyAccept +happyDecodeAction action + | LT(action,ILIT(0)) + = HappyReduce NEGATE(PLUS(action,ILIT(1))) + | otherwise + = HappyShift MINUS(action,ILIT(1)) + +{-# INLINE happyIndexGotoTable #-} +happyIndexGotoTable nt st = indexShortOffAddr happyTable off + where + off = PLUS(happyAdjustOffset (indexShortOffAddr happyGotoOffsets st), nt) #endif /* HAPPY_ARRAY */ @@ -181,74 +204,46 @@ newtype HappyState b c = HappyState happyShift new_state ERROR_TOK tk st sts stk@(x `HappyStk` _) = let i = GET_ERROR_TOKEN(x) in --- trace "shifting the error token" $ + DEBUG_TRACE("shifting the error token") DO_ACTION(new_state,i,tk,CONS(st,sts),stk) - -- TODO: When `i` would enter error recovery again, we should instead - -- discard input until the lookahead is acceptable. Perhaps this is - -- simplest to implement in CodeGen for productions using `error`; - -- there we know the context and can implement local shift+discard actions. - -- still need to remember parser-defined error site, though. - happyShift new_state i tk st sts stk = happyNewToken new_state CONS(st,sts) (MK_TOKEN(tk)`HappyStk`stk) -- happyReduce is specialised for the common cases. -happySpecReduce_0 i fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk - -- SG: I'm very doubtful that passing [] ("no token expected here") - -- as the first arg to happyFail here and in the following calls is - -- correct. I'm not going to touch it for a lack of understanding - -- and concerns of of backward compatibility, but - -- `happyExpListPerState (IBOX(st) :: Prelude.Int)` - -- seems like a good candidate. happySpecReduce_0 nt fn j tk st@(HAPPYSTATE(action)) sts stk = happySeq fn (GOTO(action) nt j tk st CONS(st,sts) (fn `HappyStk` stk)) -happySpecReduce_1 i fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk -happySpecReduce_1 nt fn j tk _ sts@(CONS(st@HAPPYSTATE(action),_)) (v1`HappyStk`stk') +happySpecReduce_1 nt fn j tk old_st sts@(CONS(st@HAPPYSTATE(action),_)) (v1`HappyStk`stk') = let r = fn v1 in - happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) + IF_ARRAY(happyTcHack old_st) happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) -happySpecReduce_2 i fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk -happySpecReduce_2 nt fn j tk _ CONS(_,sts@(CONS(st@HAPPYSTATE(action),_))) (v1`HappyStk`v2`HappyStk`stk') +happySpecReduce_2 nt fn j tk old_st CONS(_,sts@(CONS(st@HAPPYSTATE(action),_))) (v1`HappyStk`v2`HappyStk`stk') = let r = fn v1 v2 in - happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) + IF_ARRAY(happyTcHack old_st) happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) -happySpecReduce_3 i fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk -happySpecReduce_3 nt fn j tk _ CONS(_,CONS(_,sts@(CONS(st@HAPPYSTATE(action),_)))) (v1`HappyStk`v2`HappyStk`v3`HappyStk`stk') +happySpecReduce_3 nt fn j tk old_st CONS(_,CONS(_,sts@(CONS(st@HAPPYSTATE(action),_)))) (v1`HappyStk`v2`HappyStk`v3`HappyStk`stk') = let r = fn v1 v2 v3 in - happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) + IF_ARRAY(happyTcHack old_st) happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) -happyReduce k i fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk -happyReduce k nt fn j tk st sts stk - = case happyDrop MINUS(k,(ILIT(1) :: FAST_INT)) sts of +happyReduce k nt fn j tk st sts stk = + case happyDrop k CONS(st,sts) of sts1@(CONS(st1@HAPPYSTATE(action),_)) -> let r = fn stk in -- it doesn't hurt to always seq here... happyDoSeq r (GOTO(action) nt j tk st1 sts1 r) -happyMonadReduce k nt fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk happyMonadReduce k nt fn j tk st sts stk = case happyDrop k CONS(st,sts) of sts1@(CONS(st1@HAPPYSTATE(action),_)) -> let drop_stk = happyDropStk k stk in happyThen1 (fn stk tk) (\r -> GOTO(action) nt j tk st1 sts1 (r `HappyStk` drop_stk)) -happyMonad2Reduce k nt fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk happyMonad2Reduce k nt fn j tk st sts stk = - case happyDrop k CONS(st,sts) of + j `happyTcHack` case happyDrop k CONS(st,sts) of sts1@(CONS(st1@HAPPYSTATE(action),_)) -> let drop_stk = happyDropStk k stk #if defined(HAPPY_ARRAY) - off = happyAdjustOffset (indexShortOffAddr happyGotoOffsets st1) - off_i = PLUS(off, nt) - new_state = indexShortOffAddr happyTable off_i + new_state = happyIndexGotoTable nt st1 #else _ = nt :: FAST_INT new_state = action @@ -268,10 +263,7 @@ happyDropStk n (x `HappyStk` xs) = happyDropStk MINUS(n,(ILIT(1)::FAST_INT)) xs #if defined(HAPPY_ARRAY) happyGoto nt j tk st = DEBUG_TRACE(", goto state " ++ show IBOX(new_state) ++ "\n") - happyDoAction j tk new_state - where off = happyAdjustOffset (indexShortOffAddr happyGotoOffsets st) - off_i = PLUS(off, nt) - new_state = indexShortOffAddr happyTable off_i + happyDoAction j tk new_state where new_state = (happyIndexGotoTable nt st) #else happyGoto action j tk st = action j j tk (HappyState action) #endif @@ -285,39 +277,84 @@ happyGoto action j tk st = action j j tk (HappyState action) -- 1. Fixup: Try to see if there is an action for the error token (`errorTok`, -- which is ERROR_TOK). If there is, do *not* emit an error and pretend -- instead that an `errorTok` was inserted. --- When there is no `errorTok` action, call `happyErro` and enter error --- resumption mode. --- 2. Error resumption mode: After `happyError` was called TODO: happyError is fatal. --- Perhaps we should introduce a new `happyAddError`? --- Current plan: New %resumptive declaration for specifying the two funs, --- mutually exclusive with %error. +-- When there is no `errorTok` action, call the error handler +-- (e.g., `happyError`) with the resumption continuation `happyResume`. +-- 2. Error resumption mode: If the error handler wants to resume parsing in +-- order to report multiple parse errors, it will call the resumption +-- continuation (of result type `P (Maybe a)`). +-- In the absence of the %resumptive declaration, this resumption will +-- always (do a bit of work, and) `return Nothing`. +-- In the presence of the %resumptive declaration, the grammar author +-- can use the special `catch` terminal to declare where parsing should +-- resume after an error. +-- E.g., if `stmt : expr ';' | catch ';'` then the resumption will -- --- This is what usually is associated with `error` --- in `bison` or `menhir`. Since `error` is used for the Fixup mechanism (1) --- above, we call the corresponding token `catch`. --- In particular, `catch` will never *omit* calls to `happyFail`. +-- (a) Pop off the state stack until it finds an item +-- `stmt -> . catch ';'`. +-- Then, it will push a `catchTok` onto the stack, perform a shift and +-- end up in item `stmt -> catch . ';'`. +-- (b) Discard tokens from the lexer until it finds ';'. +-- (In general, it will discard until the lookahead has a non-default +-- action in the matches a token that applies +-- in the situation `P -> α catch . β`, where β might empty.) +-- +-- The `catch` resumption mechanism (2) is what usually is associated with +-- `error` in `bison` or `menhir`. Since `error` is used for the Fixup mechanism +-- (1) above, we call the corresponding token `catch`. + +-- Enter error Fixup: generate an error token, +-- save the old token and carry on. +-- When a `happyShift` accepts, we will pop off the error +-- token to resume parsing with the current lookahead `i`. +happyTryFixup i tk HAPPYSTATE(action) sts stk = + DEBUG_TRACE("entering `error` fixup.\n") + DO_ACTION(action,ERROR_TOK,tk,sts, MK_ERROR_TOKEN(i) `HappyStk` stk) + -- NB: `happyShift` will simply pop the error token and carry on with + -- `tk`. Hence we don't change `tk` in the call here + +-- parse error if we are in fixup and fail again +happyFixupFailed state_num tk st sts (x `HappyStk` stk) = + let i = GET_ERROR_TOKEN(x) in + DEBUG_TRACE("`error` fixup failed.\n") +#if defined(HAPPY_ARRAY) + -- TODO: Walk the stack instead of looking only at the top state_num + happyError_ i tk (happyExpListPerState (IBOX(state_num))) (happyResume i tk st sts stk) +#else + happyError_ i tk (happyExpListPerState (IBOX(state_num))) (happyResume i tk st sts stk) +#endif + +happyFail state_num ERROR_TOK = happyFixupFailed state_num +happyFail _ i = happyTryFixup i + +#if defined(HAPPY_ARRAY) +happyResume i tk st sts stk = pop_items st sts stk + where + pop_items st sts stk + | HappyShift new_state <- happyDecodeAction (happyNextAction CATCH_TOK st) + = DEBUG_TRACE("shifting catch token " ++ show IBOX(st) ++ " -> " ++ show IBOX(new_state) ++ "\n") + discard_input_until_exp i tk new_state CONS(st,sts) (MK_ERROR_TOKEN(i) `HappyStk` stk) + | DEBUG_TRACE("can't shift catch in " ++ show IBOX(st) ++ ", ") True + , IBOX(n_starts) <- happy_n_starts, LT(st, n_starts) + = DEBUG_TRACE("because it is a start state. no resumption.\n") + happyReturn1 Nothing + | CONS(st1,sts1) <- sts, _ `HappyStk` stk1 <- stk + = DEBUG_TRACE("discarding.\n") + pop_items st1 sts1 stk1 +-- discard_input_until_exp :: Happy_GHC_Exts.Int# -> Token -> Happy_GHC_Exts.Int# -> _ + discard_input_until_exp i tk st sts stk + | HappyFail <- happyDecodeAction (happyNextAction i st) + = DEBUG_TRACE("discard token in state " ++ show IBOX(st) ++ ": " ++ show IBOX(i) ++ "\n") + happyLex (\_eof_tk -> happyReturn1 Nothing) + (\i tk -> discard_input_until_exp i tk st sts stk) -- not eof + | otherwise + = DEBUG_TRACE("found expected token in state " ++ show IBOX(st) ++ ": " ++ show IBOX(i) ++ "\n") + happyFmap1 (\a -> a `happySeq` Just a) + (DO_ACTION(st,i,tk,sts,stk)) +#else +happyResume (i :: FAST_INT) tk st sts stk = happyReturn1 Nothing +#endif + --- parse error if we are in recovery and reached the end of the state stack -happyFail explist ERROR_TOK tk old_st _ stk@(x `HappyStk` _) = - let i = GET_ERROR_TOKEN(x) in --- trace "failing" $ - happyError_ explist i tk noResumption_ - -{- --- discard a state -happyFail explist ERROR_TOK tk old_st CONS(HAPPYSTATE(action),sts) - (saved_tok `HappyStk` _ `HappyStk` stk) = --- trace ("discarding state, depth " ++ show (length stk)) $ - DO_ACTION(action,ERROR_TOK,tk,sts,(saved_tok`HappyStk`stk)) --} - --- Enter error recovery: generate an error token, --- save the old token and carry on. --- When a `happyShift` accepts, we will pop off the error --- token to resume parsing with the current lookahead `i`. -happyFail explist i tk HAPPYSTATE(action) sts stk = --- trace "entering error recovery" $ - DO_ACTION(action,ERROR_TOK,tk,sts, MK_ERROR_TOKEN(i) `HappyStk` stk) -- Internal happy errors: notHappyAtAll :: a @@ -330,6 +367,8 @@ notHappyAtAll = Prelude.error "Internal Happy error\n" happyTcHack :: Happy_GHC_Exts.Int# -> a -> a happyTcHack x y = y {-# INLINE happyTcHack #-} +#else +happyTcHack x y = y #endif ----------------------------------------------------------------------------- diff --git a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index 465ef48c..2bb47f07 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -25,6 +25,7 @@ The code generator. > import Data.Array.Unboxed ( UArray ) > import Data.Array.MArray ( MArray(..), freeze, readArray, writeArray ) > import Data.Array.IArray ( Array, IArray(..), (!), array, assocs, elems ) +> import Debug.Trace %----------------------------------------------------------------------------- Produce the complete output file. @@ -74,7 +75,7 @@ Produce the complete output file. > -- don't screw up any OPTIONS pragmas in the header. > . produceAbsSynDecl . nl > . produceTypes -> . produceExpListPerState +> . produceExpToksPerState > . produceActionTable target > . produceReductions > . produceTokenConverter . nl @@ -436,24 +437,31 @@ The token conversion function. > = case lexer' of { > > Nothing -> -> str "happyNewToken action sts stk [] =\n\t" -> . eofAction "notHappyAtAll" -> . str " []\n\n" -> . str "happyNewToken action sts stk (tk:tks) =\n\t" -> . str "let cont i = " . doAction . str " sts stk tks in\n\t" -> . str "case tk of {\n\t" +> str "happyTerminalToTok term = case term of {\n\t" > . interleave ";\n\t" (map doToken token_rep) -> . str "_ -> happyError' (tk:tks) [] noResumption_\n\t" -> . str "}\n\n" -> . str "happyError_ explist " . eofTok . str " tk resume tks = happyError' tks explist resume\n" -> . str "happyError_ explist _ tk resume tks = happyError' (tk:tks) explist resume\n"; +> . str "_ -> error \"Encountered a token that was not declared to happy\"\n\t}\n" +> . str "{-# NOINLINE happyTerminalToTok #-}\n" +> . str "\n" +> . str "happyEofTok = " . shows (tokIndex eof) . str "\n" +> . str "\n" +> . str "happyLex kend _kmore [] = kend notHappyAtAll []\n" +> . str "happyLex _kend kmore (tk:tks) = kmore (happyTerminalToTok tk) tk tks\n" +> . str "{-# INLINE happyLex #-}\n" +> . str "\n" +> . str "happyNewToken action sts stk = happyLex (\\tk -> " . eofAction . str ") (" +> . str "\\i tk -> " . doAction . str " sts stk)\n" +> . str "\n" +> . str "happyError_ " . eofTok . str " tk explist resume tks = happyError' tks explist resume\n" +> . str "happyError_ _ tk explist resume tks = happyError' (tk:tks) explist resume\n" > -- when the token is EOF, tk == _|_ (notHappyAtAll) > -- so we must not pass it to happyError' +> . str "\n"; > Just (lexer'',eof') -> > case (target, ghc) of > (TargetHaskell, True) -> -> str "happyNewToken :: " . pcont . str " => " +> str "happyTerminalToTok :: " . token . str " -> Happy_GHC_Exts.Int#\n" +> . str "happyNewToken :: " . pcont . str " => " > . str "(Happy_GHC_Exts.Int#\n" > . str " -> Happy_GHC_Exts.Int#\n" > . str " -> " . token . str "\n" @@ -468,32 +476,37 @@ The token conversion function. > . str " -> t\n" > . str " -> " . ptyAt (str "a") . str "\n" > _ -> id -> . str "happyNewToken action sts stk\n\t= " -> . str lexer'' -> . str "(\\tk -> " -> . str "\n\tlet cont i = " -> . doAction -> . str " sts stk in\n\t" -> . str "case tk of {\n\t" -> . str (eof' ++ " -> ") -> . eofAction "tk" . str ";\n\t" +> . str "happyTerminalToTok term = case term of {\n\t" +> . str eof' . str " -> " . eofTok . str ";\n\t" > . interleave ";\n\t" (map doToken token_rep) -> . str "_ -> happyError' tk [] noResumption_\n\t" -> . str "})\n\n" -> . str "happyError_ explist " . eofTok . str " tk resume = happyError' tk explist resume\n" -> . str "happyError_ explist _ tk resume = happyError' tk explist resume\n"; +> . str "_ -> error \"Encountered a token that was not declared to happy\"\n\t}\n" +> . str "{-# NOINLINE happyTerminalToTok #-}\n" +> . str "\n" +> . str "happyEofTok = " . shows (tokIndex eof) . str "\n" +> . str "\n" +> . str "happyLex kend kmore = " . str lexer'' . str " (\\tk -> case tk of {\n\t" +> . str eof' . str " -> kend tk;\n\t" +> . str "_ -> kmore (happyTerminalToTok tk) tk })\n" +> . str "{-# INLINE happyLex #-}\n" +> . str "\n" +> . str "happyNewToken action sts stk = happyLex (\\tk -> " . eofAction . str ") (" +> . str "\\i tk -> " . doAction . str " sts stk)\n" +> . str "\n" +> . str "happyError_ " . eofTok . str " = happyError'\n" +> . str "happyError_ _ = happyError'\n" > -- superfluous pattern match needed to force happyError_ to > -- have the correct type. +> . str "\n"; > } > where -> eofAction tk = +> eofAction = > (case target of > TargetArrayBased -> -> str "happyDoAction " . eofTok . strspace . str tk . str " action" +> str "happyDoAction " . eofTok . strspace . str " tk action" > _ -> str "action " . eofTok . strspace . eofTok -> . strspace . str tk . str " (HappyState action)") +> . str " tk (HappyState action)") > . str " sts stk" > eofTok = showInt (tokIndex eof) > @@ -501,10 +514,7 @@ The token conversion function. > TargetArrayBased -> str "happyDoAction i tk action" > _ -> str "action i i tk (HappyState action)" > -> doToken (i,tok) -> = str (removeDollarDollar tok) -> . str " -> cont " -> . showInt (tokIndex i) +> doToken (i,tok) = str (removeDollarDollar tok) . str " -> " . showInt (tokIndex i) Use a variable rather than '_' to replace '$$', so we can use it on the left hand side of '@'. @@ -527,8 +537,10 @@ the left hand side of '@'. > tokIndex > = case target of > TargetHaskell -> id -> TargetArrayBased -> \i -> i - n_nonterminals - n_starts - 2 +> TargetArrayBased -> arrayTokAdjustment > -- tokens adjusted to start at zero, see ARRAY_NOTES +> arrayTokAdjustment i = i - fst_term + 2 -- +2: errorTok, catchTok +> %----------------------------------------------------------------------------- Action Tables. @@ -587,24 +599,26 @@ machinery to discard states in the parser... > . str "happy_n_terms = " . shows n_terminals . str " :: Prelude.Int\n" > . str "happy_n_nonterms = " . shows n_nonterminals . str " :: Prelude.Int\n\n" > -> produceExpListPerState -> = produceExpListArray +> produceExpToksPerState +> = produceExpToksArray > . str "{-# NOINLINE happyExpListPerState #-}\n" > . str "happyExpListPerState st =\n" > . str " token_strs_expected\n" -> . str " where token_strs = " . str (show $ elems token_names') . str "\n" -> . str " bit_start = st Prelude.* " . str (show nr_tokens) . str "\n" -> . str " bit_end = (st Prelude.+ 1) Prelude.* " . str (show nr_tokens) . str "\n" -> . str " read_bit = readArrayBit happyExpList\n" +> . str " where token_strs = " . shows (elems token_names') . str "\n" +> . str " bit_start = st Prelude.* " . shows nr_tokens . str "\n" +> . str " bit_end = (st Prelude.+ 1) Prelude.* " . shows nr_tokens . str "\n" +> . str " read_bit = readArrayBit happyExpToks\n" > . str " bits = Prelude.map read_bit [bit_start..bit_end Prelude.- 1]\n" > . str " bits_indexed = Prelude.zip bits [0.." -> . str (show (nr_tokens - 1)) . str "]\n" +> . shows (nr_tokens - 1) . str "]\n" > . str " token_strs_expected = Prelude.concatMap f bits_indexed\n" > . str " f (Prelude.False, _) = []\n" > . str " f (Prelude.True, nr) = [token_strs Prelude.!! nr]\n" > . str "\n" > where (first_token, last_token) = bounds token_names' > nr_tokens = last_token - first_token + 1 +> -- NB: This number includes non-terminals as well as special +> -- terminals (`error`,`%dummy`,...). Hence the need to adjust. > > produceStateFunction goto' (state, acts) > = foldr (.) id (map produceActions assocs_acts) @@ -613,18 +627,11 @@ machinery to discard states in the parser... > . (if ghc > then str " x = happyTcHack x " > else str " _ = ") -> . mkAction default_act -> . (case default_act of -> LR'Fail -> callHappyExpListPerState -> LR'MustFail -> callHappyExpListPerState -> _ -> str "") +> . mkAction (showInt state) default_act > . str "\n\n" > > where gotos = goto' ! state > -> callHappyExpListPerState = str " (happyExpListPerState " -> . str (show state) . str ")" -> > produceActions (_, LR'Fail{-'-}) = id > produceActions (t, action'@(LR'Reduce{-'-} _ _)) > | action' == default_act = id @@ -634,11 +641,7 @@ machinery to discard states in the parser... > > producePossiblyFailingAction t action' > = actionFunction t -> . mkAction action' -> . (case action' of -> LR'Fail -> str " []" -> LR'MustFail -> str " []" -> _ -> str "") +> . mkAction (showInt state) action' > . str "\n" > > produceGotos (t, Goto i) @@ -727,15 +730,15 @@ action array indexed by (terminal * last_state) + state > . interleave' "," (map shows table) > . str "\n\t])\n\n" -> produceExpListArray +> produceExpToksArray > | ghc -> = str "happyExpList :: HappyAddr\n" -> . str "happyExpList = HappyA# \"" --" +> = str "happyExpToks :: HappyAddr\n" +> . str "happyExpToks = HappyA# \"" --" > . str (hexChars explist) > . str "\"#\n\n" --" > | otherwise -> = str "happyExpList :: Happy_Data_Array.Array Prelude.Int Prelude.Int\n" -> . str "happyExpList = Happy_Data_Array.listArray (0," +> = str "happyExpToks :: Happy_Data_Array.Array Prelude.Int Prelude.Int\n" +> . str "happyExpToks = Happy_Data_Array.listArray (0," > . shows table_size . str ") ([" > . interleave' "," (map shows explist) > . str "\n\t])\n\n" @@ -823,6 +826,7 @@ MonadStuff: happyThen :: () => HappyIdentity a -> (a -> HappyIdentity b) -> HappyIdentity b happyReturn :: () => a -> HappyIdentity a happyThen1 m k tks = happyThen m (\a -> k a tks) + happyFmap1 f m tks = happyThen (m tks) (\a -> happyReturn (f a)) happyReturn1 = \a tks -> happyReturn a - with %monad: @@ -830,6 +834,7 @@ MonadStuff: happyThen :: CONTEXT => P a -> (a -> P b) -> P b happyReturn :: CONTEXT => a -> P a happyThen1 m k tks = happyThen m (\a -> k a tks) + happyFmap1 f m tks = happyThen (m tks) (\a -> happyReturn (f a)) happyReturn1 = \a tks -> happyReturn a - with %monad & %lexer: @@ -838,6 +843,7 @@ MonadStuff: happyReturn :: CONTEXT => a -> P a happyThen1 = happyThen happyReturn1 = happyReturn + happyFmap1 f m = happyThen m (\a -> happyReturn (f a)) > produceMonadStuff = @@ -851,20 +857,18 @@ MonadStuff: > Nothing -> > str "happyThen1 m k tks = (" . str monad_then > . str ") m (\\a -> k a tks)\n" +> . str "happyFmap1 f m tks = happyThen (m tks) (\\a -> happyReturn (f a))\n" > . str "happyReturn1 :: " . pcont . str " => a -> b -> " . ptyAt (str "a") . str "\n" > . str "happyReturn1 = \\a tks -> " . brack monad_return > . str " a\n" > . str "happyError' :: " . pcont . str " => " > . str "[" . token . str "] -> " -> . str "[Prelude.String] -> " -> . ptyAt (str "(Maybe a)") . str " -> " +> . str "[Prelude.String] -> (" +> . str "[" . token . str "] -> " +> . ptyAt (str "(Maybe a)") . str ") -> " > . ptyAt (str "a") > . str "\n" > . str "happyError' = " . errorHandler . str "\n" -> . str "noResumption_ :: " . pcont . str " => " -> . ptyAt (str "(Maybe a)") -> . str "\n" -> . str "noResumption_ = " . noResumption . str "\n" > _ -> > let > happyParseSig @@ -902,6 +906,7 @@ MonadStuff: > . str " a -> (a -> " . pty > . str " b) -> " . pty . str " b\n" > . str "happyThen1 = happyThen\n" +> . str "happyFmap1 f m = happyThen m (\\a -> happyReturn (f a))\n" > . str "happyReturn1 :: " . pcont . str " => a -> " . ptyAt (str "a") . str "\n" > . str "happyReturn1 = happyReturn\n" > . str "happyError' :: " . pcont . str " => " @@ -911,10 +916,6 @@ MonadStuff: > . ptyAt (str "a") > . str "\n" > . str "happyError' = " . errorHandler . str "\n" -> . str "noResumption_ :: " . pcont . str " => " -> . ptyAt (str "(Maybe a)") -> . str "\n" -> . str "noResumption_ = " . noResumption . str "\n" An error handler specified with %error is passed the current token when used with %lexer, but happyError (the old way but kept for @@ -933,8 +934,6 @@ directive determines the API of the provided function. > (if error_resumptive' then str "resume " > else str "") . > str ")" -> noResumption = if use_monad then brack monad_return . str " Nothing" -> else str "HappyIdentity Nothing" > reduceArrElem n > = str "\t(" . shows n . str " , " @@ -1047,13 +1046,13 @@ vars used in this piece of code. > actionVal LR'Fail = 0 > actionVal LR'MustFail = 0 -> mkAction :: LRAction -> String -> String -> mkAction (LR'Shift i _) = str "happyShift " . mkActionName i -> mkAction LR'Accept = str "happyAccept" -> mkAction LR'Fail = str "happyFail" -> mkAction LR'MustFail = str "happyFail" -> mkAction (LR'Reduce i _) = str "happyReduce_" . shows i -> mkAction (LR'Multiple _ a) = mkAction a +> mkAction :: (String -> String) -> LRAction -> String -> String +> mkAction _ (LR'Shift i _) = str "happyShift " . mkActionName i +> mkAction _ LR'Accept = str "happyAccept" +> mkAction show_state LR'Fail = str "happyFail " . show_state +> mkAction show_state LR'MustFail = str "happyFail " . show_state +> mkAction _ (LR'Reduce i _) = str "happyReduce_" . shows i +> mkAction show_state (LR'Multiple _ a) = mkAction show_state a > mkActionName :: Int -> String -> String > mkActionName i = str "action_" . shows i @@ -1064,6 +1063,8 @@ See notes under "Action Tables" above for some subtleties in this function. > getDefault actions = > -- pick out the action for the error token, if any > case [ act | (e, act) <- actions, e == errorTok ] of +> _ -> LR'Fail +> > > -- use error reduction as the default action, if there is one. > act@(LR'Reduce _ _) : _ -> act @@ -1135,7 +1136,7 @@ See notes under "Action Tables" above for some subtleties in this function. > , [Int] -- happyTable > , [Int] -- happyDefAction > , [Int] -- happyCheck -> , [Int] -- happyExpList +> , [Int] -- happyExpToks > , Int -- happyMinOffset > ) > @@ -1173,9 +1174,9 @@ See notes under "Action Tables" above for some subtleties in this function. > length acts'', > acts'') > | (state, acts) <- assocs action, -> let (err:_dummy:vec) = assocs acts +> let (err:catch:_dummy:vec) = assocs acts > vec' = drop (n_starts+n_nonterminals) vec -> acts' = filter notFail (err:vec') +> acts' = filter notFail (err:catch:vec') > default_act = getDefault acts' > acts'' = mkActVals acts' default_act > ] @@ -1479,7 +1480,7 @@ larger than an expected minimum value. > checkedHexChars :: Int -> [Int] -> String > checkedHexChars minValue = concatMap hexChar' > where hexChar' i | checkHexChar minValue i = hexChar i -> | otherwise = error "grammar does not fit in 16-bit representation that is used with '--ghc'" +> | otherwise = error "grammar does not fit in 16-bit representation that is used with '--ghc' " ++ show i > checkHexChar :: Int -> Int -> Bool > checkHexChar minValue i = i <= 32767 || i - 65536 < minValue diff --git a/packages/frontend/src/Happy/Frontend/Lexer.lhs b/packages/frontend/src/Happy/Frontend/Lexer.lhs index 03806ee0..e3440b65 100644 --- a/packages/frontend/src/Happy/Frontend/Lexer.lhs +++ b/packages/frontend/src/Happy/Frontend/Lexer.lhs @@ -104,44 +104,47 @@ followed by a special identifier. > lexPercent :: (Token -> Pfunc a) -> [Char] -> Int -> ParseResult a > lexPercent cont s = case s of > '%':rest -> cont (TokenKW TokDoublePercent) rest -> 't':'o':'k':'e':'n':'t':'y':'p':'e':rest -> +> 't':'o':'k':'e':'n':'t':'y':'p':'e':rest | end_of_id rest -> > cont (TokenKW TokSpecId_TokenType) rest -> 't':'o':'k':'e':'n':rest -> +> 't':'o':'k':'e':'n':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Token) rest -> 'n':'a':'m':'e':rest -> +> 'n':'a':'m':'e':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Name) rest -> 'p':'a':'r':'t':'i':'a':'l':rest -> +> 'p':'a':'r':'t':'i':'a':'l':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Partial) rest -> 'i':'m':'p':'o':'r':'t':'e':'d':'i':'d':'e':'n':'t':'i':'t':'y':rest -> +> 'i':'m':'p':'o':'r':'t':'e':'d':'i':'d':'e':'n':'t':'i':'t':'y':rest | end_of_id rest -> > cont (TokenKW TokSpecId_ImportedIdentity) rest -> 'm':'o':'n':'a':'d':rest -> +> 'm':'o':'n':'a':'d':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Monad) rest -> 'l':'e':'x':'e':'r':rest -> +> 'l':'e':'x':'e':'r':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Lexer) rest -> 'n':'o':'n':'a':'s':'s':'o':'c':rest -> +> 'n':'o':'n':'a':'s':'s':'o':'c':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Nonassoc) rest -> 'l':'e':'f':'t':rest -> +> 'l':'e':'f':'t':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Left) rest -> 'r':'i':'g':'h':'t':rest -> +> 'r':'i':'g':'h':'t':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Right) rest -> 'p':'r':'e':'c':rest -> +> 'p':'r':'e':'c':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Prec) rest -> 's':'h':'i':'f':'t':rest -> +> 's':'h':'i':'f':'t':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Shift) rest -> 'e':'x':'p':'e':'c':'t':rest -> +> 'e':'x':'p':'e':'c':'t':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Expect) rest -> 'e':'r':'r':'o':'r':'h':'a':'n':'d':'l':'e':'r':'t':'y':'p':'e':rest -> +> 'e':'r':'r':'o':'r':'h':'a':'n':'d':'l':'e':'r':'t':'y':'p':'e':rest | end_of_id rest -> > cont (TokenKW TokSpecId_ErrorHandlerType) rest -> 'e':'r':'r':'o':'r':'r':'e':'s':'u':'m':'t':'i':'v':'e':rest -> +> 'e':'r':'r':'o':'r':'r':'e':'s':'u':'m':'p':'t':'i':'v':'e':rest | end_of_id rest -> > cont (TokenKW TokSpecId_ErrorResumptive) rest -> 'e':'r':'r':'o':'r':rest -> +> 'e':'r':'r':'o':'r':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Error) rest -> 'a':'t':'t':'r':'i':'b':'u':'t':'e':'t':'y':'p':'e':rest -> +> 'a':'t':'t':'r':'i':'b':'u':'t':'e':'t':'y':'p':'e':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Attributetype) rest -> 'a':'t':'t':'r':'i':'b':'u':'t':'e':rest -> +> 'a':'t':'t':'r':'i':'b':'u':'t':'e':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Attribute) rest > _ -> lexError ("unrecognised directive: %" ++ > takeWhile (not.isSpace) s) s +> where +> end_of_id (c:_) = not (isAlphaNum c) +> end_of_id [] = True > lexColon :: (Token -> Pfunc a) -> [Char] -> Int -> ParseResult a > lexColon cont (':':rest) = cont (TokenKW TokDoubleColon) rest diff --git a/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs b/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs index cdeebe8b..a0777300 100644 --- a/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs +++ b/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs @@ -86,12 +86,15 @@ optTokInfoP = withToken match where match (TokenKW TokSpecId_Expect) = Consume `andThenJust` pure TokenExpect <*> numP - match (TokenKW TokSpecId_Error) = - Consume `andThenJust` - pure TokenError <*> codeP match (TokenKW TokSpecId_ErrorHandlerType) = Consume `andThenJust` pure TokenErrorHandlerType <*> idtP + match (TokenKW TokSpecId_ErrorResumptive) = + Consume `andThenJust` + pure TokenErrorResumptive + match (TokenKW TokSpecId_Error) = + Consume `andThenJust` + pure TokenError <*> codeP match (TokenKW TokSpecId_Attributetype) = Consume `andThenJust` pure TokenAttributetype <*> codeP diff --git a/tests/issue265.y b/tests/issue265.y new file mode 100644 index 00000000..9cbfcc21 --- /dev/null +++ b/tests/issue265.y @@ -0,0 +1,80 @@ +{ +{-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE FlexibleInstances #-} +-- For ancient GHC 7.0.4 +{-# LANGUAGE MultiParamTypeClasses #-} +module Main where + +import Control.Monad (when) +import Data.Char +import System.Exit +} + +%name parseStmts +%tokentype { Token } +%errorhandlertype explist +%error { handleError } + +%monad { ParseM } { (>>=) } { return } + +%token + '1' { TOne } + '+' { TPlus } + ';' { TSemi } + +%% + +Stmts : {- empty -} { [] } + | Stmt { [$1] } + | Stmts ';' Stmt { $1 ++ [$3] } + +Stmt : Exp { ExpStmt $1 } + +Exp : '1' { One } + | Exp '+' Exp %shift { Plus $1 $3 } + +{ +data Token = TOne | TPlus | TSemi + deriving (Eq,Show) + +type Stmts = [Stmt] +data Stmt = ExpStmt Exp + deriving (Eq, Show) +data Exp = One | Plus Exp Exp + deriving (Eq, Show) + +type ParseM = Either ParseError + +data ParseError + = ParseError [String] + deriving Eq +instance Show ParseError where + show (ParseError exp) = "Parse error. Expected: " ++ show exp + +recordParseError :: [String] -> ParseM a +recordParseError expected = Left (ParseError expected) + +handleError :: ([Token], [String]) -> ParseM a +handleError (ts, expected) = recordParseError expected + +lexer :: String -> [Token] +lexer [] = [] +lexer (c:cs) + | isSpace c = lexer cs + | c == '1' = TOne:(lexer cs) + | c == '+' = TPlus:(lexer cs) + | c == ';' = TSemi:(lexer cs) + | otherwise = error "lexer error" + +main :: IO () +main = do + test "11;1" $ \res -> res == Left (ParseError ["';'","'+'"]) + where + test inp p = do + putStrLn $ "testing " ++ inp + let tokens = lexer inp + let res = parseStmts tokens + when (not (p res)) $ do + print res + exitWith (ExitFailure 1) +} diff --git a/tests/monaderror-resume.y b/tests/monaderror-resume.y new file mode 100644 index 00000000..5a0dbd45 --- /dev/null +++ b/tests/monaderror-resume.y @@ -0,0 +1,115 @@ +{ +{-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE FlexibleInstances #-} +-- For ancient GHC 7.0.4 +{-# LANGUAGE MultiParamTypeClasses #-} +module Main where + +import Control.Monad (when) +import Data.Char +import System.Exit +} + +%name parseStmts +%tokentype { Token } +%errorresumptive -- the entire point of this test +%errorhandlertype explist -- as in monaderror-explist.y +%error { handleError } + +%monad { ParseM } { (>>=) } { return } + +%token + '1' { TOne } + '+' { TPlus } + ';' { TSemi } + +%% + +Stmts : {- empty -} { [] } + | Stmt { [$1] } + | Stmts ';' Stmt { $1 ++ [$3] } + | catch ';' Stmt %shift { [$3] } -- Could insert error AST token here in place of $1 + | catch { [] } -- Catch-all at the end + +Stmt : Exp { ExpStmt $1 } + +Exp : '1' { One } + | Exp '+' Exp %shift { Plus $1 $3 } + +{ +data Token = TOne | TPlus | TSemi + deriving (Eq,Show) + +type Stmts = [Stmt] +data Stmt = ExpStmt Exp + deriving (Eq, Show) +data Exp = One | Plus Exp Exp + deriving (Eq, Show) + +----------- Validation monad +data Validate e a = V e (Maybe a) + deriving Functor +instance Monoid e => Applicative (Validate e) where + pure a = V mempty (Just a) + V e1 f <*> V e2 a = V (e1 <> e2) (f <*> a) +instance Monoid e => Monad (Validate e) where + V e Nothing >>= _ = V e Nothing -- fatal + V e1 (Just a) >>= k | V e2 b <- k a = V (e1 <> e2) b -- non-fatal + +abort :: Monoid e => Validate e a -- this would be mzero from MonadPlus +abort = V mempty Nothing + +recordError :: e -> Validate e () -- this would be tell from MonadWriter +recordError e = V e (Just ()) + +runValidate (V e mb_a) = (e, mb_a) +----------- + +type ParseM = Validate [ParseError] + +data ParseError + = ParseError [String] + deriving Eq +instance Show ParseError where + show (ParseError exp) = "Parse error. Expected: " ++ show exp + +recordParseError :: [String] -> ParseM () +recordParseError expected = recordError [ParseError expected] + +handleError :: [Token] -> [String] -> ([Token] -> ParseM (Maybe a)) -> ParseM a +handleError ts expected resume = do + recordParseError expected + mb_ast <- resume ts + case mb_ast of + Just ast -> return ast + Nothing -> abort -- abort after parsing with no AST when resumption is impossible + +lexer :: String -> [Token] +lexer [] = [] +lexer (c:cs) + | isSpace c = lexer cs + | c == '1' = TOne:(lexer cs) + | c == '+' = TPlus:(lexer cs) + | c == ';' = TSemi:(lexer cs) + | otherwise = error "lexer error" + +main :: IO () +main = do + test "1+1;1" $ \(_,mb_ast) -> mb_ast == Just [ExpStmt (One `Plus` One), ExpStmt One] + test "1++1;1" $ \(errs,_) -> errs == [ParseError ["'1'"]] + test "1++1;+" $ \(errs,_) -> errs == [ParseError ["'1'"], ParseError ["'1'"]] + test "11;1" $ \(errs,_) -> errs == [ParseError []] + -- urgh, `Exp -> '1' .` is purely a reduction action. + -- We must walk the stack to get better messages + test "11;++" $ \(errs,_) -> errs == [ParseError [], ParseError ["'1'"]] + -- urgh, `Exp -> '1' .` is purely a reduction action. + -- We must walk the stack to get better messages + where + test inp p = do + putStrLn $ "testing " ++ inp + let tokens = lexer inp + let res = runValidate $ parseStmts tokens + when (not (p res)) $ do + print res + exitWith (ExitFailure 1) +} From 8d4f7d678da7df43b05fb679141783e1de4dc829 Mon Sep 17 00:00:00 2001 From: Sebastian Graf Date: Fri, 26 Jan 2024 09:06:35 +0100 Subject: [PATCH 06/10] 32 bit table entries --- packages/backend-lalr/data/HappyTemplate.hs | 27 ++-- .../src/Happy/Backend/LALR/ProduceCode.lhs | 129 ++++++++---------- 2 files changed, 71 insertions(+), 85 deletions(-) diff --git a/packages/backend-lalr/data/HappyTemplate.hs b/packages/backend-lalr/data/HappyTemplate.hs index f8dfa72b..d4ced432 100644 --- a/packages/backend-lalr/data/HappyTemplate.hs +++ b/packages/backend-lalr/data/HappyTemplate.hs @@ -123,16 +123,16 @@ happyDoAction i tk st = {-# INLINE happyNextAction #-} happyNextAction i st = case happyIndexActionTable i st of Just (IBOX(act)) -> act - Nothing -> indexShortOffAddr happyDefActions st + Nothing -> indexOffAddr happyDefActions st {-# INLINE happyIndexActionTable #-} happyIndexActionTable i st - | GTE(off,ILIT(0)), EQ(indexShortOffAddr happyCheck off, i) - = Prelude.Just (IBOX(indexShortOffAddr happyTable off)) + | GTE(off,ILIT(0)), EQ(indexOffAddr happyCheck off, i) + = Prelude.Just (IBOX(indexOffAddr happyTable off)) | otherwise = Prelude.Nothing where - off = PLUS(happyAdjustOffset (indexShortOffAddr happyActOffsets st), i) + off = PLUS(indexOffAddr happyActOffsets st, i) data HappyAction = HappyFail @@ -150,22 +150,17 @@ happyDecodeAction action = HappyShift MINUS(action,ILIT(1)) {-# INLINE happyIndexGotoTable #-} -happyIndexGotoTable nt st = indexShortOffAddr happyTable off +happyIndexGotoTable nt st = indexOffAddr happyTable off where - off = PLUS(happyAdjustOffset (indexShortOffAddr happyGotoOffsets st), nt) + off = PLUS(indexOffAddr happyGotoOffsets st, nt) #endif /* HAPPY_ARRAY */ #ifdef HAPPY_GHC -indexShortOffAddr (HappyA# arr) off = - Happy_GHC_Exts.narrow16Int# i - where - i = Happy_GHC_Exts.word2Int# (Happy_GHC_Exts.or# (Happy_GHC_Exts.uncheckedShiftL# high 8#) low) - high = Happy_GHC_Exts.int2Word# (Happy_GHC_Exts.ord# (Happy_GHC_Exts.indexCharOffAddr# arr (off' Happy_GHC_Exts.+# 1#))) - low = Happy_GHC_Exts.int2Word# (Happy_GHC_Exts.ord# (Happy_GHC_Exts.indexCharOffAddr# arr off')) - off' = off Happy_GHC_Exts.*# 2# +indexOffAddr (HappyA# arr) off = + Happy_GHC_Exts.int32ToInt# (Happy_GHC_Exts.indexInt32OffAddr# arr off) #else -indexShortOffAddr arr off = arr Happy_Data_Array.! off +indexOffAddr arr off = arr Happy_Data_Array.! off #endif {-# INLINE happyLt #-} @@ -173,11 +168,11 @@ happyLt x y = LT(x,y) #ifdef HAPPY_GHC readArrayBit arr bit = - Bits.testBit IBOX(indexShortOffAddr arr ((unbox_int bit) `Happy_GHC_Exts.iShiftRA#` 4#)) (bit `Prelude.mod` 16) + Bits.testBit IBOX(indexOffAddr arr ((unbox_int bit) `Happy_GHC_Exts.iShiftRA#` 5#)) (bit `Prelude.mod` 32) where unbox_int (Happy_GHC_Exts.I# x) = x #else readArrayBit arr bit = - Bits.testBit IBOX(indexShortOffAddr arr (bit `Prelude.div` 16)) (bit `Prelude.mod` 16) + Bits.testBit IBOX(indexOffAddr arr (bit `Prelude.div` 16)) (bit `Prelude.mod` 16) #endif #ifdef HAPPY_GHC diff --git a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index 2bb47f07..ffdc40f0 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -20,12 +20,13 @@ The code generator. > import Control.Monad ( forM_ ) > import Control.Monad.ST ( ST, runST ) -> import Data.Bits ( setBit ) +> import Data.Word +> import Data.Int +> import Data.Bits > import Data.Array.ST ( STUArray ) > import Data.Array.Unboxed ( UArray ) > import Data.Array.MArray ( MArray(..), freeze, readArray, writeArray ) > import Data.Array.IArray ( Array, IArray(..), (!), array, assocs, elems ) -> import Debug.Trace %----------------------------------------------------------------------------- Produce the complete output file. @@ -548,10 +549,17 @@ Action Tables. Here we do a bit of trickery and replace the normal default action (failure) for each state with at least one reduction action. For each such state, we pick one reduction action to be the default action. -This should make the code smaller without affecting the speed. It -changes the sematics for errors, however; errors could be detected in -a different state now (but they'll still be detected at the same point -in the token stream). +This should make the code smaller without affecting the speed. +It changes the sematics for errors, however; errors could be detected in a +different state now (but they'll still be detected at the same point in the +token stream). + +SG: For a data point, in issue93 the happyTable triples in size when we always +pick failure as the default reduction. +Presumably that is because there are quite a few reduction states, in which the +only non-default transition is a reduction. +Our scheme above ensures that these states don't occupy space in the main +happyTable at all; they just get an entry in the happyDefActions. Further notes on default cases: @@ -664,36 +672,27 @@ action array indexed by (terminal * last_state) + state > | ghc > = str "happyActOffsets :: HappyAddr\n" > . str "happyActOffsets = HappyA# \"" --" -> . str (checkedHexChars min_off act_offs) +> . hexChars act_offs > . str "\"#\n\n" --" > > . str "happyGotoOffsets :: HappyAddr\n" > . str "happyGotoOffsets = HappyA# \"" --" -> . str (checkedHexChars min_off goto_offs) +> . hexChars goto_offs > . str "\"#\n\n" --" > -> . str "happyAdjustOffset :: Happy_GHC_Exts.Int# -> Happy_GHC_Exts.Int#\n" -> . str "happyAdjustOffset off = " -> . (if length table < 32768 -> then str "off" -> else str "if happyLt off (" . shows min_off . str "# :: Happy_GHC_Exts.Int#)" -> . str " then off Happy_GHC_Exts.+# 65536#" -> . str " else off") -> . str "\n\n" --" -> > . str "happyDefActions :: HappyAddr\n" > . str "happyDefActions = HappyA# \"" --" -> . str (hexChars defaults) +> . hexChars defaults > . str "\"#\n\n" --" > > . str "happyCheck :: HappyAddr\n" > . str "happyCheck = HappyA# \"" --" -> . str (hexChars check) +> . hexChars check > . str "\"#\n\n" --" > > . str "happyTable :: HappyAddr\n" > . str "happyTable = HappyA# \"" --" -> . str (hexChars table) +> . hexChars table > . str "\"#\n\n" --" > | otherwise @@ -709,9 +708,6 @@ action array indexed by (terminal * last_state) + state > . interleave' "," (map shows goto_offs) > . str "\n\t])\n\n" > -> . str "happyAdjustOffset :: Prelude.Int -> Prelude.Int\n" -> . str "happyAdjustOffset = Prelude.id\n\n" -> > . str "happyDefActions :: Happy_Data_Array.Array Prelude.Int Prelude.Int\n" > . str "happyDefActions = Happy_Data_Array.listArray (0," > . shows n_states . str ") ([" @@ -734,7 +730,7 @@ action array indexed by (terminal * last_state) + state > | ghc > = str "happyExpToks :: HappyAddr\n" > . str "happyExpToks = HappyA# \"" --" -> . str (hexChars explist) +> . hexChars explist > . str "\"#\n\n" --" > | otherwise > = str "happyExpToks :: Happy_Data_Array.Array Prelude.Int Prelude.Int\n" @@ -748,7 +744,7 @@ action array indexed by (terminal * last_state) + state > n_terminals = length terms > n_nonterminals = length nonterms - n_starts -- lose %starts > -> (act_offs,goto_offs,table,defaults,check,explist,min_off) +> (act_offs,goto_offs,table,defaults,check,explist) > = mkTables action goto first_nonterm' fst_term > n_terminals n_nonterminals n_starts (bounds token_names') > @@ -1063,8 +1059,6 @@ See notes under "Action Tables" above for some subtleties in this function. > getDefault actions = > -- pick out the action for the error token, if any > case [ act | (e, act) <- actions, e == errorTok ] of -> _ -> LR'Fail -> > > -- use error reduction as the default action, if there is one. > act@(LR'Reduce _ _) : _ -> act @@ -1075,9 +1069,9 @@ See notes under "Action Tables" above for some subtleties in this function. > (act : _) | act /= LR'Fail -> LR'Fail > > -- no error actions, pick a reduce to be the default. -> _ -> case reduces of -> [] -> LR'Fail -> (act:_) -> act -- pick the first one we see for now +> _ -> case reduces of +> [] -> LR'Fail +> (act:_) -> act -- pick the first one we see for now > > where reduces > = [ act | (_, act@(LR'Reduce _ _)) <- actions ] @@ -1101,7 +1095,6 @@ See notes under "Action Tables" above for some subtleties in this function. -- happyCheck -- Indicates whether we should use the default action for state - -- the table is laid out such that the action for a given state & token -- can be found by: -- @@ -1128,6 +1121,10 @@ See notes under "Action Tables" above for some subtleties in this function. -- try to fit the actions into the check table, using the ordering -- from above. +SG: If you want to know more about similar compression schemes, consult + Storing a Sparse Table (https://dl.acm.org/doi/10.1145/359168.359175) +One can think of the mapping @\(state,token) -> (offs ! state)+token@ as a hash +and @check@ as the way to detect "collisions" (i.e., default entries). > mkTables > :: ActionTable -> GotoTable -> Name -> Int -> Int -> Int -> Int -> (Int, Int) -> @@ -1137,7 +1134,6 @@ See notes under "Action Tables" above for some subtleties in this function. > , [Int] -- happyDefAction > , [Int] -- happyCheck > , [Int] -- happyExpToks -> , Int -- happyMinOffset > ) > > mkTables action goto first_nonterm' fst_term @@ -1150,11 +1146,10 @@ See notes under "Action Tables" above for some subtleties in this function. > , def_actions > , take max_off (elems check) > , elems explist -> , min_off > ) > where > -> (table,check,act_offs,goto_offs,explist,min_off,max_off) +> (table,check,act_offs,goto_offs,explist,max_off) > = runST (genTables (length actions) > max_token token_names_bound > sorted_actions explist_actions) @@ -1242,7 +1237,6 @@ See notes under "Action Tables" above for some subtleties in this function. > , UArray Int Int -- action offsets > , UArray Int Int -- goto offsets > , UArray Int Int -- expected tokens list -> , Int -- lowest offset in table > , Int -- highest offset in table > ) > @@ -1255,7 +1249,7 @@ See notes under "Action Tables" above for some subtleties in this function. > off_arr <- newArray (-max_token, mAX_TABLE_SIZE) 0 > exp_array <- newArray (0, (n_actions * n_token_names + 15) `div` 16) 0 > -> (min_off,max_off) <- genTables' table check act_offs goto_offs off_arr exp_array entries +> max_off <- genTables' table check act_offs goto_offs off_arr exp_array entries > explist max_token n_token_names > > table' <- freeze table @@ -1263,7 +1257,7 @@ See notes under "Action Tables" above for some subtleties in this function. > act_offs' <- freeze act_offs > goto_offs' <- freeze goto_offs > exp_array' <- freeze exp_array -> return (table',check',act_offs',goto_offs',exp_array',min_off,max_off+1) +> return (table',check',act_offs',goto_offs',exp_array',max_off+1) > where > n_states = n_actions - 1 @@ -1283,19 +1277,19 @@ See notes under "Action Tables" above for some subtleties in this function. > -> [(Int, [Int])] -- expected tokens lists > -> Int -- maximum token no. > -> Int -- number of token names -> -> ST s (Int,Int) -- lowest and highest offsets in table +> -> ST s Int -- highest offsets in table > > genTables' table check act_offs goto_offs off_arr exp_array entries > explist max_token n_token_names -> = fill_exp_array >> fit_all entries 0 0 1 +> = fill_exp_array >> fit_all entries 0 1 > where > -> fit_all [] min_off max_off _ = return (min_off, max_off) -> fit_all (s:ss) min_off max_off fst_zero = do -> (off, new_min_off, new_max_off, new_fst_zero) <- fit s min_off max_off fst_zero +> fit_all [] max_off _ = return max_off +> fit_all (s:ss) max_off fst_zero = do +> (off, new_max_off, new_fst_zero) <- fit s max_off fst_zero > ss' <- same_states s ss off > writeArray off_arr off 1 -> fit_all ss' new_min_off new_max_off new_fst_zero +> fit_all ss' new_max_off new_fst_zero > > fill_exp_array = > forM_ explist $ \(state, tokens) -> @@ -1321,19 +1315,16 @@ See notes under "Action Tables" above for some subtleties in this function. > -- fit a vector into the table. Return the offset of the vector, > -- the maximum offset used in the table, and the offset of the first > -- entry in the table (used to speed up the lookups a bit). -> fit (_,_,_,_,_,[]) min_off max_off fst_zero = return (0,min_off,max_off,fst_zero) +> fit (_,_,_,_,_,[]) max_off fst_zero = return (0,max_off,fst_zero) > > fit (act_or_goto, state_no, _deflt, _, _, state@((t,_):_)) -> min_off max_off fst_zero = do +> max_off fst_zero = do > -- start at offset 1 in the table: all the empty states > -- (states with just a default reduction) are mapped to > -- offset zero. > off <- findFreeOffset (-t+fst_zero) check off_arr state -> let new_min_off | furthest_left < min_off = furthest_left -> | otherwise = min_off -> new_max_off | furthest_right > max_off = furthest_right +> let new_max_off | furthest_right > max_off = furthest_right > | otherwise = max_off -> furthest_left = off > furthest_right = off + max_token > > -- trace ("fit: state " ++ show state_no ++ ", off " ++ show off ++ ", elems " ++ show state) $ do @@ -1341,7 +1332,7 @@ See notes under "Action Tables" above for some subtleties in this function. > writeArray (which_off act_or_goto) state_no off > addState off table check state > new_fst_zero <- findFstFreeSlot check fst_zero -> return (off, new_min_off, new_max_off, new_fst_zero) +> return (off, new_max_off, new_fst_zero) When looking for a free offset in the table, we use the 'check' table rather than the main table. The check table starts off with (-1) in @@ -1459,28 +1450,28 @@ slot is free or not. -- Convert an integer to a 16-bit number encoded in \xNN\xNN format suitable -- for placing in a string. -> hexChars :: [Int] -> String -> hexChars = concatMap hexChar +> hexChars :: [Int] -> String -> String +> hexChars is s = foldr (hexChar . toInt32) s is -> hexChar :: Int -> String -> hexChar i | i < 0 = hexChar (i + 65536) -> hexChar i = toHex (i `mod` 256) ++ toHex (i `div` 256) +The following definition of @hexChar@ is endian-ness preserving. +Should endianness differ between the architecture running happy and the one +running the compiled parser, the order of [0,1,2,3] must be reversed. -> toHex :: Int -> String -> toHex i = ['\\','x', hexDig (i `div` 16), hexDig (i `mod` 16)] +> hexChar :: Int32 -> String -> String +> hexChar i s = foldr (toHex . byte i) s [0,1,2,3] -> hexDig :: Int -> Char -> hexDig i | i <= 9 = chr (i + ord '0') -> | otherwise = chr (i - 10 + ord 'a') +> byte :: Int32 -> Int -> Word8 +> byte n i = fromIntegral (0xFF .&. unsafeShiftR n (i*8)) -This guards against integers that are so large as to (when converted using -'hexChar') wrap around the maximum value of 16-bit numbers and then end up -larger than an expected minimum value. +> toHex :: Word8 -> String -> String +> toHex i s = '\\':'x':hexDig (0xF .&. unsafeShiftR i 4):hexDig (0xF .&. i):s -> checkedHexChars :: Int -> [Int] -> String -> checkedHexChars minValue = concatMap hexChar' -> where hexChar' i | checkHexChar minValue i = hexChar i -> | otherwise = error "grammar does not fit in 16-bit representation that is used with '--ghc' " ++ show i +> hexDig :: Word8 -> Char +> hexDig i | i <= 9 = chr (fromIntegral i + ord '0') +> | otherwise = chr (fromIntegral i - 10 + ord 'a') -> checkHexChar :: Int -> Int -> Bool -> checkHexChar minValue i = i <= 32767 || i - 65536 < minValue +> toInt32 :: Int -> Int32 +> toInt32 i +> | i == fromIntegral i32 = i32 +> | otherwise = error ("offset was too large for Int32: " ++ show i) +> where i32 = fromIntegral i From 8e942585ad0a54518ddb24633db88003243538fa Mon Sep 17 00:00:00 2001 From: Sebastian Graf Date: Fri, 26 Jan 2024 13:55:55 +0100 Subject: [PATCH 07/10] Simulate reductions and overhaul expected token generation --- packages/backend-lalr/data/HappyTemplate.hs | 82 ++++++++++++++++--- .../backend-lalr/src/Happy/Backend/LALR.hs | 8 +- .../src/Happy/Backend/LALR/ProduceCode.lhs | 68 +++++++++------ tests/monaderror-resume.y | 21 +++-- 4 files changed, 130 insertions(+), 49 deletions(-) diff --git a/packages/backend-lalr/data/HappyTemplate.hs b/packages/backend-lalr/data/HappyTemplate.hs index d4ced432..ee6fa735 100644 --- a/packages/backend-lalr/data/HappyTemplate.hs +++ b/packages/backend-lalr/data/HappyTemplate.hs @@ -112,7 +112,7 @@ happyDoAction i tk st = ",\taction: ") case happyDecodeAction (happyNextAction i st) of HappyFail -> DEBUG_TRACE("failing.\n") - happyFail st i tk st + happyFail i tk st HappyAccept -> DEBUG_TRACE("accept.\n") happyAccept i tk st HappyReduce rule -> DEBUG_TRACE("reduce (rule " ++ show IBOX(rule) ++ ")") @@ -139,6 +139,7 @@ data HappyAction | HappyAccept | HappyReduce FAST_INT -- rule number | HappyShift FAST_INT -- new state + deriving Show {-# INLINE happyDecodeAction #-} happyDecodeAction ILIT(0) = HappyFail @@ -159,8 +160,21 @@ happyIndexGotoTable nt st = indexOffAddr happyTable off #ifdef HAPPY_GHC indexOffAddr (HappyA# arr) off = Happy_GHC_Exts.int32ToInt# (Happy_GHC_Exts.indexInt32OffAddr# arr off) + +#ifdef HAPPY_ARRAY +indexRuleArr arr r = (IBOX(nt), IBOX(len)) + where + IBOX(n_starts) = happy_n_starts + offs = TIMES(MINUS(r,n_starts),ILIT(2)) + nt = indexOffAddr arr offs + len = indexOffAddr arr PLUS(offs,ILIT(1)) +#endif #else indexOffAddr arr off = arr Happy_Data_Array.! off + +#ifdef HAPPY_ARRAY +indexRuleArr arr nt = arr Happy_Data_Array.! nt +#endif #endif {-# INLINE happyLt #-} @@ -308,18 +322,17 @@ happyTryFixup i tk HAPPYSTATE(action) sts stk = -- `tk`. Hence we don't change `tk` in the call here -- parse error if we are in fixup and fail again -happyFixupFailed state_num tk st sts (x `HappyStk` stk) = +happyFixupFailed tk st sts (x `HappyStk` stk) = let i = GET_ERROR_TOKEN(x) in DEBUG_TRACE("`error` fixup failed.\n") #if defined(HAPPY_ARRAY) - -- TODO: Walk the stack instead of looking only at the top state_num - happyError_ i tk (happyExpListPerState (IBOX(state_num))) (happyResume i tk st sts stk) + happyError_ i tk (map happyTokenToString (happyExpectedTokens st sts)) (happyResume i tk st sts stk) #else - happyError_ i tk (happyExpListPerState (IBOX(state_num))) (happyResume i tk st sts stk) + happyError_ i tk [] (happyReturn1 Nothing) #endif -happyFail state_num ERROR_TOK = happyFixupFailed state_num -happyFail _ i = happyTryFixup i +happyFail ERROR_TOK = happyFixupFailed +happyFail i = happyTryFixup i #if defined(HAPPY_ARRAY) happyResume i tk st sts stk = pop_items st sts stk @@ -335,9 +348,8 @@ happyResume i tk st sts stk = pop_items st sts stk | CONS(st1,sts1) <- sts, _ `HappyStk` stk1 <- stk = DEBUG_TRACE("discarding.\n") pop_items st1 sts1 stk1 --- discard_input_until_exp :: Happy_GHC_Exts.Int# -> Token -> Happy_GHC_Exts.Int# -> _ discard_input_until_exp i tk st sts stk - | HappyFail <- happyDecodeAction (happyNextAction i st) + | ultimately_fails i st sts = DEBUG_TRACE("discard token in state " ++ show IBOX(st) ++ ": " ++ show IBOX(i) ++ "\n") happyLex (\_eof_tk -> happyReturn1 Nothing) (\i tk -> discard_input_until_exp i tk st sts stk) -- not eof @@ -345,14 +357,62 @@ happyResume i tk st sts stk = pop_items st sts stk = DEBUG_TRACE("found expected token in state " ++ show IBOX(st) ++ ": " ++ show IBOX(i) ++ "\n") happyFmap1 (\a -> a `happySeq` Just a) (DO_ACTION(st,i,tk,sts,stk)) -#else -happyResume (i :: FAST_INT) tk st sts stk = happyReturn1 Nothing + ultimately_fails i st sts = + DEBUG_TRACE("trying token " ++ show IBOX(i) ++ " in state " ++ show IBOX(st) ++ ": ") + case happyDecodeAction (happyNextAction i st) of + HappyFail -> DEBUG_TRACE("fail.\n") True + HappyAccept -> DEBUG_TRACE("accept.\n") False + HappyShift _ -> DEBUG_TRACE("shift.\n") False + HappyReduce r -> case happySimulateReduce r st sts of + CONS(st1,sts1) -> ultimately_fails i st1 sts1 + +happySimulateReduce r st sts = + DEBUG_TRACE("simulate reduction of rule " ++ show IBOX(r) ++ ", ") + let (IBOX(nt), IBOX(len)) = indexRuleArr happyRuleArr r in + DEBUG_TRACE("nt " ++ show IBOX(nt) ++ ", len: " ++ show IBOX(len) ++ ", new_st ") + let sts1@CONS(st1,_) = happyDrop len CONS(st,sts) + new_st = happyIndexGotoTable nt st1 in + DEBUG_TRACE(show IBOX(new_st) ++ ".\n") + CONS(new_st, sts1) + +happyTokenToString i = happyTokenStrings Prelude.!! (i Prelude.- 2) +happyExpectedTokens st sts = + DEBUG_TRACE("constructing expected tokens.\n") + search_shifts st sts [] + where + search_shifts st sts shifts = foldr (add_action st sts) shifts (distinct_actions st) + add_action st sts (IBOX(i), IBOX(act)) shifts = + DEBUG_TRACE("found action in state " ++ show IBOX(st) ++ ", input " ++ show IBOX(i) ++ ", " ++ show (happyDecodeAction act) ++ "\n") + case happyDecodeAction act of + HappyFail -> shifts + HappyAccept -> shifts -- This would always be %eof or error... Not helpful + HappyShift _ -> Happy_Data_List.insert IBOX(i) shifts + HappyReduce r -> case happySimulateReduce r st sts of + CONS(st1,sts1) -> search_shifts st1 sts1 shifts + distinct_actions st + = ((-1), IBOX(indexOffAddr happyDefActions st)) + : [ (i, act) | i <- [begin_i..happy_n_terms], act <- get_act row_off i ] + where + row_off = indexOffAddr happyActOffsets st + begin_i = 2 -- +2: errorTok,catchTok + get_act off IBOX(i) + | let off_i = PLUS(off,i) + , GTE(off_i,ILIT(0)) + , EQ(indexOffAddr happyCheck off_i,i) + = [IBOX(indexOffAddr happyTable off_i)] + | otherwise + = [] + #endif -- Internal happy errors: +#if defined(HAPPY_GHC) +notHappyAtAll :: Happy_GHC_Stack.HasCallStack => a +#else notHappyAtAll :: a +#endif notHappyAtAll = Prelude.error "Internal Happy error\n" ----------------------------------------------------------------------------- diff --git a/packages/backend-lalr/src/Happy/Backend/LALR.hs b/packages/backend-lalr/src/Happy/Backend/LALR.hs index 61e82768..a919ac8b 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR.hs +++ b/packages/backend-lalr/src/Happy/Backend/LALR.hs @@ -19,16 +19,18 @@ magicFilter magicName = case magicName of in filter_output importsToInject :: Bool -> Bool -> String -importsToInject ghc debug = concat ["\n", import_array, import_bits, glaexts_import, debug_imports, applicative_imports] +importsToInject ghc debug = concat ["\n", import_array, import_list, import_bits, glaexts_import, debug_imports, applicative_imports] where - glaexts_import | ghc = import_glaexts + glaexts_import | ghc = import_glaexts ++ import_ghcstack | otherwise = "" debug_imports | debug = import_debug | otherwise = "" applicative_imports = import_applicative import_glaexts = "import qualified GHC.Exts as Happy_GHC_Exts\n" + import_ghcstack = "import qualified GHC.Stack as Happy_GHC_Stack\n" import_array = "import qualified Data.Array as Happy_Data_Array\n" + import_list = "import qualified Data.List as Happy_Data_List\n" import_bits = "import qualified Data.Bits as Bits\n" import_debug = "import qualified System.IO as Happy_System_IO\n" ++ "import qualified System.IO.Unsafe as Happy_System_IO_Unsafe\n" ++ @@ -49,4 +51,4 @@ defines debug array ghc coerce = unlines [ "#define " ++ d ++ " 1" | d <- vars_t , [ "HAPPY_ARRAY" | array ] , [ "HAPPY_GHC" | ghc ] , [ "HAPPY_COERCE" | coerce ] - ] \ No newline at end of file + ] diff --git a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index ffdc40f0..47db3b6e 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -604,24 +604,16 @@ machinery to discard states in the parser... > produceActionTable TargetArrayBased > = produceActionArray > . produceReduceArray +> . produceRuleArray > . str "happy_n_terms = " . shows n_terminals . str " :: Prelude.Int\n" > . str "happy_n_nonterms = " . shows n_nonterminals . str " :: Prelude.Int\n\n" +> . str "happy_n_starts = " . shows n_starts . str " :: Prelude.Int\n\n" > > produceExpToksPerState > = produceExpToksArray -> . str "{-# NOINLINE happyExpListPerState #-}\n" -> . str "happyExpListPerState st =\n" -> . str " token_strs_expected\n" -> . str " where token_strs = " . shows (elems token_names') . str "\n" -> . str " bit_start = st Prelude.* " . shows nr_tokens . str "\n" -> . str " bit_end = (st Prelude.+ 1) Prelude.* " . shows nr_tokens . str "\n" -> . str " read_bit = readArrayBit happyExpToks\n" -> . str " bits = Prelude.map read_bit [bit_start..bit_end Prelude.- 1]\n" -> . str " bits_indexed = Prelude.zip bits [0.." -> . shows (nr_tokens - 1) . str "]\n" -> . str " token_strs_expected = Prelude.concatMap f bits_indexed\n" -> . str " f (Prelude.False, _) = []\n" -> . str " f (Prelude.True, nr) = [token_strs Prelude.!! nr]\n" +> . str "{-# NOINLINE happyTokenStrings #-}\n" +> . str "happyTokenStrings = " . shows (drop (fst_term - 1) (elems token_names')) . str "\n" +> -- fst_term - 1: fst_term includes eofToken, but that is last in the list. > . str "\n" > where (first_token, last_token) = bounds token_names' > nr_tokens = last_token - first_token + 1 @@ -635,7 +627,7 @@ machinery to discard states in the parser... > . (if ghc > then str " x = happyTcHack x " > else str " _ = ") -> . mkAction (showInt state) default_act +> . mkAction default_act > . str "\n\n" > > where gotos = goto' ! state @@ -649,7 +641,7 @@ machinery to discard states in the parser... > > producePossiblyFailingAction t action' > = actionFunction t -> . mkAction (showInt state) action' +> . mkAction action' > . str "\n" > > produceGotos (t, Goto i) @@ -750,8 +742,8 @@ action array indexed by (terminal * last_state) + state > > table_size = length table - 1 > -> produceReduceArray -> = {- str "happyReduceArr :: Array Int a\n" -} +> produceReduceArray -- rule number to (non-terminal number, rule length) +> = {- str "happyReduceArr :: Happy_Data_Array.Array Prelude.Int (Prelude.Int,Prelude.Int)\n" -} > str "happyReduceArr = Happy_Data_Array.array (" > . shows (n_starts :: Int) -- omit the %start reductions > . str ", " @@ -759,6 +751,24 @@ action array indexed by (terminal * last_state) + state > . str ") [\n" > . interleave' ",\n" (map reduceArrElem [n_starts..n_rules]) > . str "\n\t]\n\n" +> +> produceRuleArray +> | ghc +> = str "happyRuleArr :: HappyAddr\n" +> . str "happyRuleArr = HappyA# \"" -- " +> . hexChars (concatMap (\(nt,len) -> [nt,len]) ruleArrElems) +> . str "\"#\n\n" --" +> | otherwise +> = str "happyRuleArr :: Happy_Data_Array.Array Int (Int,Int)\n" +> . str "happyRuleArr = Happy_Data_Array.array (" +> . shows (n_starts :: Int) -- omit the %start reductions +> . str ", " +> . shows n_rules +> . str ") [\n" +> . interleave' ",\n" (zipWith showRuleArrElem [n_starts..n_rules] ruleArrElems) +> . str "\n\t]\n\n" +> +> ruleArrElems = map (\(Production nt toks _code _prio) -> (nt-first_nonterm',length toks)) (drop n_starts prods) > n_rules = length prods - 1 :: Int @@ -922,9 +932,9 @@ directive determines the API of the provided function. > str "(\\tokens explist resume -> " . > (if use_monad then str "" > else str "HappyIdentity Prelude.$ ") . -> str (case error_handler' of Just h -> h; Nothing -> "happyError") . str " " . -> str (case (error_handler', lexer') of (Nothing, Just _) -> "" -> _ -> "tokens ") . +> (case error_handler' of Just h -> brack h . str " "; Nothing -> str "happyError ") . +> (case (error_handler', lexer') of (Nothing, Just _) -> id +> _ -> str "tokens ") . > (case error_sig' of ErrorHandlerTypeExpList -> str "explist " > ErrorHandlerTypeDefault -> str "") . > (if error_resumptive' then str "resume " @@ -934,6 +944,10 @@ directive determines the API of the provided function. > reduceArrElem n > = str "\t(" . shows n . str " , " > . str "happyReduce_" . shows n . char ')' +> +> showRuleArrElem r (nt, len) +> = str "\t(" . shows r . str " , (" +> . shows nt . str "," . shows len . str ") )" ----------------------------------------------------------------------------- -- Produce the parser entry and exit points @@ -1042,13 +1056,13 @@ vars used in this piece of code. > actionVal LR'Fail = 0 > actionVal LR'MustFail = 0 -> mkAction :: (String -> String) -> LRAction -> String -> String -> mkAction _ (LR'Shift i _) = str "happyShift " . mkActionName i -> mkAction _ LR'Accept = str "happyAccept" -> mkAction show_state LR'Fail = str "happyFail " . show_state -> mkAction show_state LR'MustFail = str "happyFail " . show_state -> mkAction _ (LR'Reduce i _) = str "happyReduce_" . shows i -> mkAction show_state (LR'Multiple _ a) = mkAction show_state a +> mkAction :: LRAction -> String -> String +> mkAction (LR'Shift i _) = str "happyShift " . mkActionName i +> mkAction LR'Accept = str "happyAccept" +> mkAction LR'Fail = str "happyFail" +> mkAction LR'MustFail = str "happyFail" +> mkAction (LR'Reduce i _) = str "happyReduce_" . shows i +> mkAction (LR'Multiple _ a) = mkAction a > mkActionName :: Int -> String -> String > mkActionName i = str "action_" . shows i diff --git a/tests/monaderror-resume.y b/tests/monaderror-resume.y index 5a0dbd45..578f7d73 100644 --- a/tests/monaderror-resume.y +++ b/tests/monaderror-resume.y @@ -10,7 +10,8 @@ import Data.Char import System.Exit } -%name parseStmts +%name parseStmts Stmts +%name parseExp Exp %tokentype { Token } %errorresumptive -- the entire point of this test %errorhandlertype explist -- as in monaderror-explist.y @@ -29,7 +30,6 @@ Stmts : {- empty -} { [] } | Stmt { [$1] } | Stmts ';' Stmt { $1 ++ [$3] } | catch ';' Stmt %shift { [$3] } -- Could insert error AST token here in place of $1 - | catch { [] } -- Catch-all at the end Stmt : Exp { ExpStmt $1 } @@ -98,12 +98,10 @@ main = do test "1+1;1" $ \(_,mb_ast) -> mb_ast == Just [ExpStmt (One `Plus` One), ExpStmt One] test "1++1;1" $ \(errs,_) -> errs == [ParseError ["'1'"]] test "1++1;+" $ \(errs,_) -> errs == [ParseError ["'1'"], ParseError ["'1'"]] - test "11;1" $ \(errs,_) -> errs == [ParseError []] - -- urgh, `Exp -> '1' .` is purely a reduction action. - -- We must walk the stack to get better messages - test "11;++" $ \(errs,_) -> errs == [ParseError [], ParseError ["'1'"]] - -- urgh, `Exp -> '1' .` is purely a reduction action. - -- We must walk the stack to get better messages + test "11;1" $ \(errs,_) -> errs == [ParseError ["';'"]] + test "11;++" $ \(errs,_) -> errs == [ParseError ["';'"], ParseError ["'1'"]] + test "11;1++" $ \(errs,_) -> errs == [ParseError ["';'"], ParseError ["'1'"]] + testExp "11" $ \(errs,_) -> errs == [ParseError ["'+'"]] where test inp p = do putStrLn $ "testing " ++ inp @@ -112,4 +110,11 @@ main = do when (not (p res)) $ do print res exitWith (ExitFailure 1) + testExp inp p = do + putStrLn $ "testing Exp " ++ inp + let tokens = lexer inp + let res = runValidate $ parseExp tokens + when (not (p res)) $ do + print res + exitWith (ExitFailure 1) } From 1db84460d70dfe9dc8bb6b2e59a5687ef3909265 Mon Sep 17 00:00:00 2001 From: Sebastian Graf Date: Sat, 27 Jan 2024 12:17:35 +0100 Subject: [PATCH 08/10] Better abort/report factoring --- happy.cabal | 2 +- packages/backend-lalr/data/HappyTemplate.hs | 13 ++- .../src/Happy/Backend/LALR/ProduceCode.lhs | 89 +++++++++++-------- .../src/Happy/CodeGen/Common/Options.lhs | 30 ++++--- packages/frontend/happy-frontend.cabal | 2 +- .../frontend/src/Happy/Frontend/AbsSyn.lhs | 37 +++----- .../frontend/src/Happy/Frontend/Lexer.lhs | 9 +- .../frontend/src/Happy/Frontend/Mangler.lhs | 8 +- .../src/Happy/Frontend/Parser/Bootstrapped.ly | 8 +- .../src/Happy/Frontend/Parser/Oracle.hs | 16 ++-- tests/monaderror-explist.y | 2 +- tests/monaderror-resume.y | 14 ++- 12 files changed, 117 insertions(+), 113 deletions(-) diff --git a/happy.cabal b/happy.cabal index a2904506..39f3bf36 100644 --- a/happy.cabal +++ b/happy.cabal @@ -151,7 +151,7 @@ executable happy happy-backend-glr == 2.0 default-language: Haskell98 - default-extensions: CPP, MagicHash, FlexibleContexts, NamedFieldPuns + default-extensions: CPP, MagicHash, FlexibleContexts, NamedFieldPuns, PatternGuards ghc-options: -Wall other-modules: Paths_happy diff --git a/packages/backend-lalr/data/HappyTemplate.hs b/packages/backend-lalr/data/HappyTemplate.hs index ee6fa735..f175779e 100644 --- a/packages/backend-lalr/data/HappyTemplate.hs +++ b/packages/backend-lalr/data/HappyTemplate.hs @@ -83,7 +83,7 @@ happyTrace string expr = Happy_System_IO_Unsafe.unsafePerformIO $ do #endif infixr 9 `HappyStk` -data HappyStk a = HappyStk a (HappyStk a) +data HappyStk a = HappyStk !a (HappyStk a) ----------------------------------------------------------------------------- -- starting the parse @@ -326,9 +326,9 @@ happyFixupFailed tk st sts (x `HappyStk` stk) = let i = GET_ERROR_TOKEN(x) in DEBUG_TRACE("`error` fixup failed.\n") #if defined(HAPPY_ARRAY) - happyError_ i tk (map happyTokenToString (happyExpectedTokens st sts)) (happyResume i tk st sts stk) + happyReport i tk (map happyTokenToString (happyExpectedTokens st sts)) (happyResume i tk st sts stk) #else - happyError_ i tk [] (happyReturn1 Nothing) + happyReport i tk [] happyAbort #endif happyFail ERROR_TOK = happyFixupFailed @@ -344,19 +344,18 @@ happyResume i tk st sts stk = pop_items st sts stk | DEBUG_TRACE("can't shift catch in " ++ show IBOX(st) ++ ", ") True , IBOX(n_starts) <- happy_n_starts, LT(st, n_starts) = DEBUG_TRACE("because it is a start state. no resumption.\n") - happyReturn1 Nothing + happyAbort | CONS(st1,sts1) <- sts, _ `HappyStk` stk1 <- stk = DEBUG_TRACE("discarding.\n") pop_items st1 sts1 stk1 discard_input_until_exp i tk st sts stk | ultimately_fails i st sts = DEBUG_TRACE("discard token in state " ++ show IBOX(st) ++ ": " ++ show IBOX(i) ++ "\n") - happyLex (\_eof_tk -> happyReturn1 Nothing) + happyLex (\_eof_tk -> happyAbort) (\i tk -> discard_input_until_exp i tk st sts stk) -- not eof | otherwise = DEBUG_TRACE("found expected token in state " ++ show IBOX(st) ++ ": " ++ show IBOX(i) ++ "\n") - happyFmap1 (\a -> a `happySeq` Just a) - (DO_ACTION(st,i,tk,sts,stk)) + DO_ACTION(st,i,tk,sts,stk) ultimately_fails i st sts = DEBUG_TRACE("trying token " ++ show IBOX(i) ++ " in state " ++ show IBOX(st) ++ ": ") case happyDecodeAction (happyNextAction i st) of diff --git a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index 47db3b6e..51206219 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -64,8 +64,7 @@ Produce the complete output file. > , monad = (use_monad,monad_context,monad_tycon,monad_then,monad_return) > , token_type = token_type' > , error_handler = error_handler' -> , error_sig = error_sig' -> , error_resumptive = error_resumptive' +> , error_expected = error_expected' > }) > action goto lang_exts module_header module_trailer > target coerce ghc strict @@ -452,10 +451,10 @@ The token conversion function. > . str "happyNewToken action sts stk = happyLex (\\tk -> " . eofAction . str ") (" > . str "\\i tk -> " . doAction . str " sts stk)\n" > . str "\n" -> . str "happyError_ " . eofTok . str " tk explist resume tks = happyError' tks explist resume\n" -> . str "happyError_ _ tk explist resume tks = happyError' (tk:tks) explist resume\n" +> . str "happyReport " . eofTok . str " tk explist resume tks = happyReport' tks explist resume\n" +> . str "happyReport _ tk explist resume tks = happyReport' (tk:tks) explist resume\n" > -- when the token is EOF, tk == _|_ (notHappyAtAll) -> -- so we must not pass it to happyError' +> -- so we must not pass it to happyReport' > . str "\n"; > Just (lexer'',eof') -> @@ -493,9 +492,9 @@ The token conversion function. > . str "happyNewToken action sts stk = happyLex (\\tk -> " . eofAction . str ") (" > . str "\\i tk -> " . doAction . str " sts stk)\n" > . str "\n" -> . str "happyError_ " . eofTok . str " = happyError'\n" -> . str "happyError_ _ = happyError'\n" -> -- superfluous pattern match needed to force happyError_ to +> . str "happyReport " . eofTok . str " = happyReport'\n" +> . str "happyReport _ = happyReport'\n" +> -- superfluous pattern match needed to force happyReport to > -- have the correct type. > . str "\n"; > } @@ -615,10 +614,6 @@ machinery to discard states in the parser... > . str "happyTokenStrings = " . shows (drop (fst_term - 1) (elems token_names')) . str "\n" > -- fst_term - 1: fst_term includes eofToken, but that is last in the list. > . str "\n" -> where (first_token, last_token) = bounds token_names' -> nr_tokens = last_token - first_token + 1 -> -- NB: This number includes non-terminals as well as special -> -- terminals (`error`,`%dummy`,...). Hence the need to adjust. > > produceStateFunction goto' (state, acts) > = foldr (.) id (map produceActions assocs_acts) @@ -742,9 +737,8 @@ action array indexed by (terminal * last_state) + state > > table_size = length table - 1 > -> produceReduceArray -- rule number to (non-terminal number, rule length) -> = {- str "happyReduceArr :: Happy_Data_Array.Array Prelude.Int (Prelude.Int,Prelude.Int)\n" -} -> str "happyReduceArr = Happy_Data_Array.array (" +> produceReduceArray +> = str "happyReduceArr = Happy_Data_Array.array (" > . shows (n_starts :: Int) -- omit the %start reductions > . str ", " > . shows n_rules @@ -752,7 +746,7 @@ action array indexed by (terminal * last_state) + state > . interleave' ",\n" (map reduceArrElem [n_starts..n_rules]) > . str "\n\t]\n\n" > -> produceRuleArray +> produceRuleArray -- rule number to (non-terminal number, rule length) > | ghc > = str "happyRuleArr :: HappyAddr\n" > . str "happyRuleArr = HappyA# \"" -- " @@ -867,14 +861,21 @@ MonadStuff: > . str "happyReturn1 :: " . pcont . str " => a -> b -> " . ptyAt (str "a") . str "\n" > . str "happyReturn1 = \\a tks -> " . brack monad_return > . str " a\n" -> . str "happyError' :: " . pcont . str " => " +> . str "happyReport' :: " . pcont . str " => " > . str "[" . token . str "] -> " > . str "[Prelude.String] -> (" > . str "[" . token . str "] -> " -> . ptyAt (str "(Maybe a)") . str ") -> " +> . ptyAt (str "a") . str ") -> " > . ptyAt (str "a") > . str "\n" -> . str "happyError' = " . errorHandler . str "\n" +> . str "happyReport' = " . callReportError . str "\n" +> . str "\n" +> . str "happyAbort :: " . pcont . str " => " +> . str "[" . token . str "] -> " +> . ptyAt (str "a") +> . str "\n" +> . str "happyAbort = " . str abort_handler . str "\n" +> . str "\n" > _ -> > let > happyParseSig @@ -915,31 +916,49 @@ MonadStuff: > . str "happyFmap1 f m = happyThen m (\\a -> happyReturn (f a))\n" > . str "happyReturn1 :: " . pcont . str " => a -> " . ptyAt (str "a") . str "\n" > . str "happyReturn1 = happyReturn\n" -> . str "happyError' :: " . pcont . str " => " +> . str "happyReport' :: " . pcont . str " => " > . token . str " -> " > . str "[Prelude.String] -> " -> . ptyAt (str "(Maybe a)") . str " -> " +> . ptyAt (str "a") . str " -> " > . ptyAt (str "a") > . str "\n" -> . str "happyError' = " . errorHandler . str "\n" +> . str "happyReport' = " . callReportError . str "\n" +> . str "\n" +> . str "happyAbort :: " . pcont . str " => " +> . ptyAt (str "a") +> . str "\n" +> . str "happyAbort = " . str abort_handler . str "\n" +> . str "\n" +The error handler takes up to three arguments. An error handler specified with %error is passed the current token -when used with %lexer, but happyError (the old way but kept for -compatibility) is not passed the current token. Also, the %errorhandlertype -directive determines the API of the provided function. - -> errorHandler = -> str "(\\tokens explist resume -> " . +when used with %lexer as the first argument, but happyError (the old way but kept for +compatibility) is not passed the current token. +Furthermore, the second argument is the list of expected tokens +in the presence of the %error.expected directive. +The last argument is the "resumption", a continuation that tries to find +an item on the stack taking a @catch@ terminal where parsing may resume, +in the presence of the two-argument form of the %error directive. + +> callReportError = -- this one wraps around report_error_handler to expose a unified interface +> str "(\\tokens expected resume -> " . > (if use_monad then str "" > else str "HappyIdentity Prelude.$ ") . -> (case error_handler' of Just h -> brack h . str " "; Nothing -> str "happyError ") . -> (case (error_handler', lexer') of (Nothing, Just _) -> id -> _ -> str "tokens ") . -> (case error_sig' of ErrorHandlerTypeExpList -> str "explist " -> ErrorHandlerTypeDefault -> str "") . -> (if error_resumptive' then str "resume " -> else str "") . +> report_error_handler . +> (case (error_handler', lexer') of (DefaultErrorHandler, Just _) -> id +> _ -> str " tokens") . +> (if error_expected' then str " expected" +> else id) . +> (case error_handler' of ResumptiveErrorHandler{} -> str " resume" +> _ -> id) . > str ")" +> report_error_handler = case error_handler' of +> DefaultErrorHandler -> str "happyError" +> CustomErrorHandler h -> brack h +> ResumptiveErrorHandler _abort report -> brack report +> abort_handler = case error_handler' of +> ResumptiveErrorHandler abort _report -> abort +> _ -> "error \"Called abort handler in non-resumptive parser\"" > reduceArrElem n > = str "\t(" . shows n . str " , " diff --git a/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs b/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs index ef925f91..42e19d2c 100644 --- a/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs +++ b/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs @@ -5,14 +5,20 @@ The CommonOptions data type. ----------------------------------------------------------------------------- > module Happy.CodeGen.Common.Options ( -> ErrorHandlerType(..), -> CommonOptions(..) +> ErrorHandlerInfo(..), CommonOptions(..) > ) where -> data ErrorHandlerType -> = ErrorHandlerTypeDefault -> | ErrorHandlerTypeExpList - +> data ErrorHandlerInfo +> = DefaultErrorHandler +> -- ^ Default handler `happyError`. +> | CustomErrorHandler String +> -- ^ Call this handler on error. +> | ResumptiveErrorHandler String {- abort -} String {- addMessage -} +> -- ^ `ResumptiveErrorHandler abort reportError`: +> -- Calls non-fatal `reportError ... resume` with resumption `resume` to +> -- get more errors, ultimately failing with `abort` when parse can't be +> -- resumed. +> > data CommonOptions > = CommonOptions { > token_type :: String, @@ -20,12 +26,8 @@ The CommonOptions data type. > monad :: (Bool,String,String,String,String), > expect :: Maybe Int, > lexer :: Maybe (String,String), -> error_handler :: Maybe String, -> error_sig :: ErrorHandlerType, -> -- ^ ErrorHandlerTypExpList: error handler expects a -> -- `[String]` as first arg with the pretty-printed expected -> -- tokens -> error_resumptive :: Bool -> -- ^ `True` => The error handler expects a `resume` -> -- continuation as last argument. +> error_handler :: ErrorHandlerInfo, +> error_expected :: Bool +> -- ^ Error handler expects a `[String]` as arg after current +> -- token carrying the pretty-printed expected tokens. > } diff --git a/packages/frontend/happy-frontend.cabal b/packages/frontend/happy-frontend.cabal index 517796ac..b42a0134 100644 --- a/packages/frontend/happy-frontend.cabal +++ b/packages/frontend/happy-frontend.cabal @@ -61,7 +61,7 @@ library happy-grammar == 2.0 default-language: Haskell98 - default-extensions: CPP, MagicHash, FlexibleContexts + default-extensions: CPP, MagicHash, FlexibleContexts, PatternGuards ghc-options: -Wall other-modules: Happy.Frontend.ParseMonad diff --git a/packages/frontend/src/Happy/Frontend/AbsSyn.lhs b/packages/frontend/src/Happy/Frontend/AbsSyn.lhs index 6f0841b7..c055922a 100644 --- a/packages/frontend/src/Happy/Frontend/AbsSyn.lhs +++ b/packages/frontend/src/Happy/Frontend/AbsSyn.lhs @@ -10,13 +10,13 @@ Here is the abstract syntax of the language we parse. > BookendedAbsSyn(..), > AbsSyn(..), Directive(..), > getTokenType, getTokenSpec, getParserNames, getLexer, -> getImportedIdentity, getMonad, getError, -> getPrios, getPrioNames, getExpect, getErrorHandlerType, getErrorResumptive, +> getImportedIdentity, getMonad, ErrorHandlerInfo(..), getError, +> getPrios, getPrioNames, getExpect, getErrorHandlerExpectedList, > getAttributes, getAttributetype, > Rule(..), Prod(..), Term(..), Prec(..) > ) where -> import Happy.CodeGen.Common.Options (ErrorHandlerType(..)) +> import Happy.CodeGen.Common.Options (ErrorHandlerInfo(..)) > data BookendedAbsSyn > = BookendedAbsSyn @@ -72,12 +72,11 @@ generate some error messages. > | TokenRight [String] -- %right > | TokenLeft [String] -- %left > | TokenExpect Int -- %expect -> | TokenError String -- %error -> | TokenErrorHandlerType String -- %errorhandlertype -> | TokenErrorResumptive -- %resumptive +> | TokenError String (Maybe String) -- %error +> | TokenErrorExpected -- %error.expected > | TokenAttributetype String -- %attributetype > | TokenAttribute String String -- %attribute -> deriving Show +> deriving (Eq, Show) > getTokenType :: [Directive t] -> String > getTokenType ds @@ -135,25 +134,17 @@ generate some error messages. > [] -> Nothing > _ -> error "multiple expect directives" -> getError :: [Directive t] -> Maybe String +> getError :: [Directive t] -> ErrorHandlerInfo > getError ds -> = case [ a | (TokenError a) <- ds ] of -> [t] -> Just t -> [] -> Nothing +> = case [ (a, mb_b) | (TokenError a mb_b) <- ds ] of +> [] -> DefaultErrorHandler +> [(a,Nothing)] -> CustomErrorHandler a +> [(abort,Just addMessage)] -> ResumptiveErrorHandler abort addMessage > _ -> error "multiple error directives" -> getErrorHandlerType :: [Directive t] -> ErrorHandlerType -> getErrorHandlerType ds -> = case [ a | (TokenErrorHandlerType a) <- ds ] of -> [t] -> case t of -> "explist" -> ErrorHandlerTypeExpList -> "default" -> ErrorHandlerTypeDefault -> _ -> error "unsupported %errorhandlertype value" -> [] -> ErrorHandlerTypeDefault -> _ -> error "multiple errorhandlertype directives" - -> getErrorResumptive :: [Directive t] -> Bool -> getErrorResumptive ds = not (null [ () | TokenErrorResumptive <- ds ]) +> getErrorHandlerExpectedList :: Eq t => [Directive t] -> Bool +> getErrorHandlerExpectedList ds +> = TokenErrorExpected `elem` ds > getAttributes :: [Directive t] -> [(String, String)] > getAttributes ds diff --git a/packages/frontend/src/Happy/Frontend/Lexer.lhs b/packages/frontend/src/Happy/Frontend/Lexer.lhs index e3440b65..d51060db 100644 --- a/packages/frontend/src/Happy/Frontend/Lexer.lhs +++ b/packages/frontend/src/Happy/Frontend/Lexer.lhs @@ -47,8 +47,7 @@ The lexer. > | TokSpecId_Shift -- %shift > | TokSpecId_Expect -- %expect > | TokSpecId_Error -- %error -> | TokSpecId_ErrorHandlerType -- %errorhandlertype -> | TokSpecId_ErrorResumptive -- %errorresumptive +> | TokSpecId_ErrorExpected -- %error.expected > | TokSpecId_Attributetype -- %attributetype > | TokSpecId_Attribute -- %attribute > | TokCodeQuote -- stuff inside { .. } @@ -130,10 +129,8 @@ followed by a special identifier. > cont (TokenKW TokSpecId_Shift) rest > 'e':'x':'p':'e':'c':'t':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Expect) rest -> 'e':'r':'r':'o':'r':'h':'a':'n':'d':'l':'e':'r':'t':'y':'p':'e':rest | end_of_id rest -> -> cont (TokenKW TokSpecId_ErrorHandlerType) rest -> 'e':'r':'r':'o':'r':'r':'e':'s':'u':'m':'p':'t':'i':'v':'e':rest | end_of_id rest -> -> cont (TokenKW TokSpecId_ErrorResumptive) rest +> 'e':'r':'r':'o':'r':'.':'e':'x':'p':'e':'c':'t':'e':'d':rest | end_of_id rest -> +> cont (TokenKW TokSpecId_ErrorExpected) rest > 'e':'r':'r':'o':'r':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Error) rest > 'a':'t':'t':'r':'i':'b':'u':'t':'e':'t':'y':'p':'e':rest | end_of_id rest -> diff --git a/packages/frontend/src/Happy/Frontend/Mangler.lhs b/packages/frontend/src/Happy/Frontend/Mangler.lhs index cc67254d..2372f1ef 100644 --- a/packages/frontend/src/Happy/Frontend/Mangler.lhs +++ b/packages/frontend/src/Happy/Frontend/Mangler.lhs @@ -69,7 +69,8 @@ This bit is a real mess, mainly because of the error message support. > starts' = case getParserNames dirs of > [] -> [TokenName "happyParse" Nothing False] > ns -> ns -> error_resumptive' = getErrorResumptive dirs +> error_resumptive | ResumptiveErrorHandler{} <- getError dirs = True +> | otherwise = False > > start_strs = [ startName++'_':p | (TokenName p _ _) <- starts' ] @@ -84,7 +85,7 @@ Build up a mapping from name values to strings. > lookupName :: String -> [Name] > lookupName n = [ t | (t,r) <- name_env, r == n -> , t /= catchTok || error_resumptive' ] +> , t /= catchTok || error_resumptive ] > -- hide catchName unless %errorresumptive is active > -- issue93.y uses catch as a nonterminal, we should not steal it @@ -253,8 +254,7 @@ Get the token specs in terms of Names. > monad = getMonad dirs, > lexer = getLexer dirs, > error_handler = getError dirs, -> error_sig = getErrorHandlerType dirs, -> error_resumptive = error_resumptive', +> error_expected = getErrorHandlerExpectedList dirs, > token_type = getTokenType dirs, > expect = getExpect dirs > }) diff --git a/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly b/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly index 7f9d07fa..2fc315e3 100644 --- a/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly +++ b/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly @@ -33,8 +33,7 @@ The parser. > spec_shift { TokenKW TokSpecId_Shift } > spec_expect { TokenKW TokSpecId_Expect } > spec_error { TokenKW TokSpecId_Error } -> spec_errorhandlertype { TokenKW TokSpecId_ErrorHandlerType } -> spec_errorresumptive { TokenKW TokSpecId_ErrorResumptive } +> spec_errorexpected { TokenKW TokSpecId_ErrorExpected } > spec_attribute { TokenKW TokSpecId_Attribute } > spec_attributetype { TokenKW TokSpecId_Attributetype } > code { TokenInfo $$ TokCodeQuote } @@ -124,9 +123,8 @@ The parser. > | spec_right ids { TokenRight $2 } > | spec_left ids { TokenLeft $2 } > | spec_expect int { TokenExpect $2 } -> | spec_error code { TokenError $2 } -> | spec_errorhandlertype id { TokenErrorHandlerType $2 } -> | spec_errorresumptive { TokenErrorResumptive } +> | spec_error code code { TokenError $2 $3 } +> | spec_errorexpected { TokenErrorExpected } > | spec_attributetype code { TokenAttributetype $2 } > | spec_attribute id code { TokenAttribute $2 $3 } diff --git a/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs b/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs index a0777300..ca6ede62 100644 --- a/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs +++ b/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs @@ -86,15 +86,17 @@ optTokInfoP = withToken match where match (TokenKW TokSpecId_Expect) = Consume `andThenJust` pure TokenExpect <*> numP - match (TokenKW TokSpecId_ErrorHandlerType) = + match (TokenKW TokSpecId_ErrorExpected) = Consume `andThenJust` - pure TokenErrorHandlerType <*> idtP - match (TokenKW TokSpecId_ErrorResumptive) = - Consume `andThenJust` - pure TokenErrorResumptive + pure TokenErrorExpected match (TokenKW TokSpecId_Error) = - Consume `andThenJust` - pure TokenError <*> codeP + Consume `andThenJust` do + codes <- manyP optCodeP + case codes of + [c1] -> return $ TokenError c1 Nothing + [c1, c2] -> return $ TokenError c1 (Just c2) + [] -> parseError "Expected a code block" + _ -> parseError "Too many code blocks" match (TokenKW TokSpecId_Attributetype) = Consume `andThenJust` pure TokenAttributetype <*> codeP diff --git a/tests/monaderror-explist.y b/tests/monaderror-explist.y index 3f027ef6..dd84d23c 100644 --- a/tests/monaderror-explist.y +++ b/tests/monaderror-explist.y @@ -14,8 +14,8 @@ import Data.List (isPrefixOf) %name parseFoo %tokentype { Token } -%errorhandlertype explist %error { handleErrorExpList } +%error.expected %monad { ParseM } { (>>=) } { return } diff --git a/tests/monaderror-resume.y b/tests/monaderror-resume.y index 578f7d73..29a0e079 100644 --- a/tests/monaderror-resume.y +++ b/tests/monaderror-resume.y @@ -13,9 +13,8 @@ import System.Exit %name parseStmts Stmts %name parseExp Exp %tokentype { Token } -%errorresumptive -- the entire point of this test -%errorhandlertype explist -- as in monaderror-explist.y -%error { handleError } +%error { \_ -> abort } { reportError } -- the entire point of this test +%error.expected -- as in monaderror-explist.y %monad { ParseM } { (>>=) } { return } @@ -76,13 +75,10 @@ instance Show ParseError where recordParseError :: [String] -> ParseM () recordParseError expected = recordError [ParseError expected] -handleError :: [Token] -> [String] -> ([Token] -> ParseM (Maybe a)) -> ParseM a -handleError ts expected resume = do +reportError :: [Token] -> [String] -> ([Token] -> ParseM a) -> ParseM a +reportError ts expected resume = do recordParseError expected - mb_ast <- resume ts - case mb_ast of - Just ast -> return ast - Nothing -> abort -- abort after parsing with no AST when resumption is impossible + resume ts lexer :: String -> [Token] lexer [] = [] From cb667c7430ffe862f236ac91d9131604f325da7b Mon Sep 17 00:00:00 2001 From: Sebastian Graf Date: Sat, 27 Jan 2024 13:19:30 +0100 Subject: [PATCH 09/10] Only report when not already trying to resume --- packages/backend-lalr/data/HappyTemplate.hs | 14 +++- .../src/Happy/Backend/LALR/ProduceCode.lhs | 80 +++++++------------ tests/monaderror-resume.y | 6 +- 3 files changed, 43 insertions(+), 57 deletions(-) diff --git a/packages/backend-lalr/data/HappyTemplate.hs b/packages/backend-lalr/data/HappyTemplate.hs index f175779e..f017515a 100644 --- a/packages/backend-lalr/data/HappyTemplate.hs +++ b/packages/backend-lalr/data/HappyTemplate.hs @@ -326,7 +326,19 @@ happyFixupFailed tk st sts (x `HappyStk` stk) = let i = GET_ERROR_TOKEN(x) in DEBUG_TRACE("`error` fixup failed.\n") #if defined(HAPPY_ARRAY) - happyReport i tk (map happyTokenToString (happyExpectedTokens st sts)) (happyResume i tk st sts stk) + let resume = happyResume i tk st sts stk + expected = map happyTokenToString (happyExpectedTokens st sts) in + if happyAlreadyInResumption st sts + then resume + else happyReport i tk expected resume + +happyAlreadyInResumption st sts + | IBOX(n_starts) <- happy_n_starts, LT(st, n_starts) + = False -- end of the stack + | IBOX(st) `elem` happyCatchStates + = True + | CONS(st1,sts1) <- sts + = happyAlreadyInResumption st1 sts1 #else happyReport i tk [] happyAbort #endif diff --git a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index 51206219..e790414e 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -16,9 +16,8 @@ The code generator. > import Data.Maybe ( isJust, isNothing, fromMaybe ) > import Data.Char ( ord, chr ) -> import Data.List ( sortBy ) +> import Data.List ( sortBy, nub ) -> import Control.Monad ( forM_ ) > import Control.Monad.ST ( ST, runST ) > import Data.Word > import Data.Int @@ -75,7 +74,7 @@ Produce the complete output file. > -- don't screw up any OPTIONS pragmas in the header. > . produceAbsSynDecl . nl > . produceTypes -> . produceExpToksPerState +> . produceTokToStringList > . produceActionTable target > . produceReductions > . produceTokenConverter . nl @@ -604,13 +603,13 @@ machinery to discard states in the parser... > = produceActionArray > . produceReduceArray > . produceRuleArray +> . produceCatchStates > . str "happy_n_terms = " . shows n_terminals . str " :: Prelude.Int\n" > . str "happy_n_nonterms = " . shows n_nonterminals . str " :: Prelude.Int\n\n" > . str "happy_n_starts = " . shows n_starts . str " :: Prelude.Int\n\n" > -> produceExpToksPerState -> = produceExpToksArray -> . str "{-# NOINLINE happyTokenStrings #-}\n" +> produceTokToStringList +> = str "{-# NOINLINE happyTokenStrings #-}\n" > . str "happyTokenStrings = " . shows (drop (fst_term - 1) (elems token_names')) . str "\n" > -- fst_term - 1: fst_term includes eofToken, but that is last in the list. > . str "\n" @@ -713,25 +712,12 @@ action array indexed by (terminal * last_state) + state > . interleave' "," (map shows table) > . str "\n\t])\n\n" -> produceExpToksArray -> | ghc -> = str "happyExpToks :: HappyAddr\n" -> . str "happyExpToks = HappyA# \"" --" -> . hexChars explist -> . str "\"#\n\n" --" -> | otherwise -> = str "happyExpToks :: Happy_Data_Array.Array Prelude.Int Prelude.Int\n" -> . str "happyExpToks = Happy_Data_Array.listArray (0," -> . shows table_size . str ") ([" -> . interleave' "," (map shows explist) -> . str "\n\t])\n\n" - > (_, last_state) = bounds action > n_states = last_state + 1 > n_terminals = length terms > n_nonterminals = length nonterms - n_starts -- lose %starts > -> (act_offs,goto_offs,table,defaults,check,explist) +> (act_offs,goto_offs,table,defaults,check,catch_states) > = mkTables action goto first_nonterm' fst_term > n_terminals n_nonterminals n_starts (bounds token_names') > @@ -763,8 +749,12 @@ action array indexed by (terminal * last_state) + state > . str "\n\t]\n\n" > > ruleArrElems = map (\(Production nt toks _code _prio) -> (nt-first_nonterm',length toks)) (drop n_starts prods) - +> > n_rules = length prods - 1 :: Int +> +> produceCatchStates +> = str "happyCatchStates :: [Int]\n" +> . str "happyCatchStates = " . shows catch_states . str "\n\n" > showInt i | ghc = shows i . showChar '#' > | otherwise = shows i @@ -1166,7 +1156,7 @@ and @check@ as the way to detect "collisions" (i.e., default entries). > , [Int] -- happyTable > , [Int] -- happyDefAction > , [Int] -- happyCheck -> , [Int] -- happyExpToks +> , [Int] -- happyCatchStates > ) > > mkTables action goto first_nonterm' fst_term @@ -1178,14 +1168,14 @@ and @check@ as the way to detect "collisions" (i.e., default entries). > , take max_off (elems table) > , def_actions > , take max_off (elems check) -> , elems explist +> , shifted_catch_states > ) > where > -> (table,check,act_offs,goto_offs,explist,max_off) +> (table,check,act_offs,goto_offs,max_off) > = runST (genTables (length actions) > max_token token_names_bound -> sorted_actions explist_actions) +> sorted_actions) > > -- the maximum token number used in the parser > max_token = max n_terminals (n_starts+n_nonterminals) - 1 @@ -1209,12 +1199,11 @@ and @check@ as the way to detect "collisions" (i.e., default entries). > acts'' = mkActVals acts' default_act > ] > -> explist_actions :: [(Int, [Int])] -> explist_actions = [ (state, concatMap f $ assocs acts) -> | (state, acts) <- assocs action ] -> where -> f (t, LR'Shift _ _ ) = [t - fst token_names_bound] -> f (_, _) = [] +> shifted_catch_states :: [Int] +> shifted_catch_states = -- collect the states in which we have just shifted a catchTok +> nub [ to_state | (_from_state, acts) <- assocs action +> , let (_err:catch:_) = assocs acts +> , (_tok, LR'Shift to_state _) <- return catch ] > > -- adjust terminals by -(fst_term+2), so they start at 2 (error is 0, catch is 1). > -- (see ARRAY_NOTES) @@ -1264,33 +1253,29 @@ and @check@ as the way to detect "collisions" (i.e., default entries). > -> Int -- maximum token no. > -> (Int, Int) -- token names bounds > -> [TableEntry] -- entries for the table -> -> [(Int, [Int])] -- expected tokens lists > -> ST s ( UArray Int Int -- table > , UArray Int Int -- check > , UArray Int Int -- action offsets > , UArray Int Int -- goto offsets -> , UArray Int Int -- expected tokens list > , Int -- highest offset in table > ) > -> genTables n_actions max_token token_names_bound entries explist = do +> genTables n_actions max_token token_names_bound entries = do > > table <- newArray (0, mAX_TABLE_SIZE) 0 > check <- newArray (0, mAX_TABLE_SIZE) (-1) > act_offs <- newArray (0, n_actions) 0 > goto_offs <- newArray (0, n_actions) 0 > off_arr <- newArray (-max_token, mAX_TABLE_SIZE) 0 -> exp_array <- newArray (0, (n_actions * n_token_names + 15) `div` 16) 0 > -> max_off <- genTables' table check act_offs goto_offs off_arr exp_array entries -> explist max_token n_token_names +> max_off <- genTables' table check act_offs goto_offs off_arr entries +> max_token n_token_names > > table' <- freeze table > check' <- freeze check > act_offs' <- freeze act_offs > goto_offs' <- freeze goto_offs -> exp_array' <- freeze exp_array -> return (table',check',act_offs',goto_offs',exp_array',max_off+1) +> return (table',check',act_offs',goto_offs',max_off+1) > where > n_states = n_actions - 1 @@ -1305,16 +1290,14 @@ and @check@ as the way to detect "collisions" (i.e., default entries). > -> STUArray s Int Int -- action offsets > -> STUArray s Int Int -- goto offsets > -> STUArray s Int Int -- offset array -> -> STUArray s Int Int -- expected token list > -> [TableEntry] -- entries for the table -> -> [(Int, [Int])] -- expected tokens lists > -> Int -- maximum token no. > -> Int -- number of token names > -> ST s Int -- highest offsets in table > -> genTables' table check act_offs goto_offs off_arr exp_array entries -> explist max_token n_token_names -> = fill_exp_array >> fit_all entries 0 1 +> genTables' table check act_offs goto_offs off_arr entries +> max_token n_token_names +> = fit_all entries 0 1 > where > > fit_all [] max_off _ = return max_off @@ -1324,15 +1307,6 @@ and @check@ as the way to detect "collisions" (i.e., default entries). > writeArray off_arr off 1 > fit_all ss' new_max_off new_fst_zero > -> fill_exp_array = -> forM_ explist $ \(state, tokens) -> -> forM_ tokens $ \token -> do -> let bit_nr = state * n_token_names + token -> let word_nr = bit_nr `div` 16 -> let word_offset = bit_nr `mod` 16 -> x <- readArray exp_array word_nr -> writeArray exp_array word_nr (setBit x word_offset) -> > -- try to merge identical states. We only try the next state(s) > -- in the list, but the list is kind-of sorted so we shouldn't > -- miss too many. diff --git a/tests/monaderror-resume.y b/tests/monaderror-resume.y index 29a0e079..46ebf9fc 100644 --- a/tests/monaderror-resume.y +++ b/tests/monaderror-resume.y @@ -93,10 +93,10 @@ main :: IO () main = do test "1+1;1" $ \(_,mb_ast) -> mb_ast == Just [ExpStmt (One `Plus` One), ExpStmt One] test "1++1;1" $ \(errs,_) -> errs == [ParseError ["'1'"]] - test "1++1;+" $ \(errs,_) -> errs == [ParseError ["'1'"], ParseError ["'1'"]] + test "1++1;1;+" $ \(errs,_) -> errs == [ParseError ["'1'"], ParseError ["'1'"]] test "11;1" $ \(errs,_) -> errs == [ParseError ["';'"]] - test "11;++" $ \(errs,_) -> errs == [ParseError ["';'"], ParseError ["'1'"]] - test "11;1++" $ \(errs,_) -> errs == [ParseError ["';'"], ParseError ["'1'"]] + test "11;1;++" $ \(errs,_) -> errs == [ParseError ["';'"], ParseError ["'1'"]] + test "11;1;1++" $ \(errs,_) -> errs == [ParseError ["';'"], ParseError ["'1'"]] testExp "11" $ \(errs,_) -> errs == [ParseError ["'+'"]] where test inp p = do From ece7d39203c551454c29d7da95ab3a7714bb2d77 Mon Sep 17 00:00:00 2001 From: Sebastian Graf Date: Sun, 4 Feb 2024 12:30:35 +0100 Subject: [PATCH 10/10] Fix bootstrapped parser --- packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly b/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly index 2fc315e3..006a72e2 100644 --- a/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly +++ b/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly @@ -123,7 +123,7 @@ The parser. > | spec_right ids { TokenRight $2 } > | spec_left ids { TokenLeft $2 } > | spec_expect int { TokenExpect $2 } -> | spec_error code code { TokenError $2 $3 } +> | spec_error code optCode { TokenError $2 $3 } > | spec_errorexpected { TokenErrorExpected } > | spec_attributetype code { TokenAttributetype $2 } > | spec_attribute id code { TokenAttribute $2 $3 }