diff --git a/.gitignore b/.gitignore index 13cda316..0a5b1bf2 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ dist-newstyle cabal-dev .cabal-sandbox cabal.sandbox.config +cabal.project.local .*.swp .*.swo -/.vscode/ \ No newline at end of file +/.vscode/ +packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.hs +packages/frontend/src/Happy/Frontend/AttrGrammar/Parser.hs diff --git a/happy.cabal b/happy.cabal index a2904506..39f3bf36 100644 --- a/happy.cabal +++ b/happy.cabal @@ -151,7 +151,7 @@ executable happy happy-backend-glr == 2.0 default-language: Haskell98 - default-extensions: CPP, MagicHash, FlexibleContexts, NamedFieldPuns + default-extensions: CPP, MagicHash, FlexibleContexts, NamedFieldPuns, PatternGuards ghc-options: -Wall other-modules: Paths_happy diff --git a/packages/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs b/packages/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs index 7d56d51b..5e077f5d 100644 --- a/packages/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs +++ b/packages/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs @@ -307,6 +307,8 @@ It also shares identical reduction values as CAFs > mkLine state (symInt,action) > | symInt == errorTok -- skip error productions > = "" -- NB see ProduceCode's handling of these +> | symInt == catchTok -- skip error productions +> = "" -- NB see ProduceCode's handling of these > | otherwise > = case action of > LR'Fail -> "" diff --git a/packages/backend-lalr/data/HappyTemplate.hs b/packages/backend-lalr/data/HappyTemplate.hs index e9125ea6..f017515a 100644 --- a/packages/backend-lalr/data/HappyTemplate.hs +++ b/packages/backend-lalr/data/HappyTemplate.hs @@ -46,16 +46,18 @@ data Happy_IntList = HappyCons FAST_INT Happy_IntList #if defined(HAPPY_ARRAY) # define ERROR_TOK ILIT(0) +# define CATCH_TOK ILIT(1) # define DO_ACTION(state,i,tk,sts,stk) happyDoAction i tk state sts (stk) # define HAPPYSTATE(i) (i) # define GOTO(action) happyGoto -# define IF_ARRAYS(x) (x) +# define IF_ARRAY(x) (x) #else # define ERROR_TOK ILIT(1) +# define CATCH_TOK ILIT(2) # define DO_ACTION(state,i,tk,sts,stk) state i i tk HAPPYSTATE(state) sts (stk) # define HAPPYSTATE(i) (HappyState (i)) # define GOTO(action) action -# define IF_ARRAYS(x) +# define IF_ARRAY(x) #endif #if defined(HAPPY_COERCE) @@ -81,7 +83,7 @@ happyTrace string expr = Happy_System_IO_Unsafe.unsafePerformIO $ do #endif infixr 9 `HappyStk` -data HappyStk a = HappyStk a (HappyStk a) +data HappyStk a = HappyStk !a (HappyStk a) ----------------------------------------------------------------------------- -- starting the parse @@ -97,52 +99,82 @@ happyParse start_state = happyNewToken start_state notHappyAtAll notHappyAtAll happyAccept ERROR_TOK tk st sts (_ `HappyStk` ans `HappyStk` _) = happyReturn1 ans happyAccept j tk st sts (HappyStk ans _) = - IF_GHC(happyTcHack j IF_ARRAYS(happyTcHack st)) (happyReturn1 ans) + IF_GHC(happyTcHack j IF_ARRAY(happyTcHack st)) (happyReturn1 ans) ----------------------------------------------------------------------------- -- Arrays only: do the next action #if defined(HAPPY_ARRAY) -happyDoAction i tk st - = DEBUG_TRACE("state: " ++ show IBOX(st) ++ - ",\ttoken: " ++ show IBOX(i) ++ - ",\taction: ") - case action of - ILIT(0) -> DEBUG_TRACE("fail.\n") - happyFail (happyExpListPerState (IBOX(st) :: Prelude.Int)) i tk st - ILIT(-1) -> DEBUG_TRACE("accept.\n") - happyAccept i tk st - n | LT(n,(ILIT(0) :: FAST_INT)) -> DEBUG_TRACE("reduce (rule " ++ show rule - ++ ")") - (happyReduceArr Happy_Data_Array.! rule) i tk st - where rule = IBOX(NEGATE(PLUS(n,(ILIT(1) :: FAST_INT)))) - n -> DEBUG_TRACE("shift, enter state " - ++ show IBOX(new_state) - ++ "\n") - happyShift new_state i tk st - where new_state = MINUS(n,(ILIT(1) :: FAST_INT)) - where off = happyAdjustOffset (indexShortOffAddr happyActOffsets st) - off_i = PLUS(off, i) - check = if GTE(off_i,(ILIT(0) :: FAST_INT)) - then EQ(indexShortOffAddr happyCheck off_i, i) - else Prelude.False - action - | check = indexShortOffAddr happyTable off_i - | Prelude.otherwise = indexShortOffAddr happyDefActions st +happyDoAction i tk st = + DEBUG_TRACE("state: " ++ show IBOX(st) ++ + ",\ttoken: " ++ show IBOX(i) ++ + ",\taction: ") + case happyDecodeAction (happyNextAction i st) of + HappyFail -> DEBUG_TRACE("failing.\n") + happyFail i tk st + HappyAccept -> DEBUG_TRACE("accept.\n") + happyAccept i tk st + HappyReduce rule -> DEBUG_TRACE("reduce (rule " ++ show IBOX(rule) ++ ")") + (happyReduceArr Happy_Data_Array.! IBOX(rule)) i tk st + HappyShift new_state -> DEBUG_TRACE("shift, enter state " ++ show IBOX(new_state) ++ "\n") + happyShift new_state i tk st + +{-# INLINE happyNextAction #-} +happyNextAction i st = case happyIndexActionTable i st of + Just (IBOX(act)) -> act + Nothing -> indexOffAddr happyDefActions st + +{-# INLINE happyIndexActionTable #-} +happyIndexActionTable i st + | GTE(off,ILIT(0)), EQ(indexOffAddr happyCheck off, i) + = Prelude.Just (IBOX(indexOffAddr happyTable off)) + | otherwise + = Prelude.Nothing + where + off = PLUS(indexOffAddr happyActOffsets st, i) + +data HappyAction + = HappyFail + | HappyAccept + | HappyReduce FAST_INT -- rule number + | HappyShift FAST_INT -- new state + deriving Show + +{-# INLINE happyDecodeAction #-} +happyDecodeAction ILIT(0) = HappyFail +happyDecodeAction ILIT(-1) = HappyAccept +happyDecodeAction action + | LT(action,ILIT(0)) + = HappyReduce NEGATE(PLUS(action,ILIT(1))) + | otherwise + = HappyShift MINUS(action,ILIT(1)) + +{-# INLINE happyIndexGotoTable #-} +happyIndexGotoTable nt st = indexOffAddr happyTable off + where + off = PLUS(indexOffAddr happyGotoOffsets st, nt) #endif /* HAPPY_ARRAY */ #ifdef HAPPY_GHC -indexShortOffAddr (HappyA# arr) off = - Happy_GHC_Exts.narrow16Int# i +indexOffAddr (HappyA# arr) off = + Happy_GHC_Exts.int32ToInt# (Happy_GHC_Exts.indexInt32OffAddr# arr off) + +#ifdef HAPPY_ARRAY +indexRuleArr arr r = (IBOX(nt), IBOX(len)) where - i = Happy_GHC_Exts.word2Int# (Happy_GHC_Exts.or# (Happy_GHC_Exts.uncheckedShiftL# high 8#) low) - high = Happy_GHC_Exts.int2Word# (Happy_GHC_Exts.ord# (Happy_GHC_Exts.indexCharOffAddr# arr (off' Happy_GHC_Exts.+# 1#))) - low = Happy_GHC_Exts.int2Word# (Happy_GHC_Exts.ord# (Happy_GHC_Exts.indexCharOffAddr# arr off')) - off' = off Happy_GHC_Exts.*# 2# + IBOX(n_starts) = happy_n_starts + offs = TIMES(MINUS(r,n_starts),ILIT(2)) + nt = indexOffAddr arr offs + len = indexOffAddr arr PLUS(offs,ILIT(1)) +#endif #else -indexShortOffAddr arr off = arr Happy_Data_Array.! off +indexOffAddr arr off = arr Happy_Data_Array.! off + +#ifdef HAPPY_ARRAY +indexRuleArr arr nt = arr Happy_Data_Array.! nt +#endif #endif {-# INLINE happyLt #-} @@ -150,11 +182,11 @@ happyLt x y = LT(x,y) #ifdef HAPPY_GHC readArrayBit arr bit = - Bits.testBit IBOX(indexShortOffAddr arr ((unbox_int bit) `Happy_GHC_Exts.iShiftRA#` 4#)) (bit `Prelude.mod` 16) + Bits.testBit IBOX(indexOffAddr arr ((unbox_int bit) `Happy_GHC_Exts.iShiftRA#` 5#)) (bit `Prelude.mod` 32) where unbox_int (Happy_GHC_Exts.I# x) = x #else readArrayBit arr bit = - Bits.testBit IBOX(indexShortOffAddr arr (bit `Prelude.div` 16)) (bit `Prelude.mod` 16) + Bits.testBit IBOX(indexOffAddr arr (bit `Prelude.div` 16)) (bit `Prelude.mod` 16) #endif #ifdef HAPPY_GHC @@ -181,63 +213,46 @@ newtype HappyState b c = HappyState happyShift new_state ERROR_TOK tk st sts stk@(x `HappyStk` _) = let i = GET_ERROR_TOKEN(x) in --- trace "shifting the error token" $ + DEBUG_TRACE("shifting the error token") DO_ACTION(new_state,i,tk,CONS(st,sts),stk) - happyShift new_state i tk st sts stk = happyNewToken new_state CONS(st,sts) (MK_TOKEN(tk)`HappyStk`stk) -- happyReduce is specialised for the common cases. -happySpecReduce_0 i fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk happySpecReduce_0 nt fn j tk st@(HAPPYSTATE(action)) sts stk - = GOTO(action) nt j tk st CONS(st,sts) (fn `HappyStk` stk) + = happySeq fn (GOTO(action) nt j tk st CONS(st,sts) (fn `HappyStk` stk)) -happySpecReduce_1 i fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk -happySpecReduce_1 nt fn j tk _ sts@(CONS(st@HAPPYSTATE(action),_)) (v1`HappyStk`stk') +happySpecReduce_1 nt fn j tk old_st sts@(CONS(st@HAPPYSTATE(action),_)) (v1`HappyStk`stk') = let r = fn v1 in - happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) + IF_ARRAY(happyTcHack old_st) happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) -happySpecReduce_2 i fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk -happySpecReduce_2 nt fn j tk _ CONS(_,sts@(CONS(st@HAPPYSTATE(action),_))) (v1`HappyStk`v2`HappyStk`stk') +happySpecReduce_2 nt fn j tk old_st CONS(_,sts@(CONS(st@HAPPYSTATE(action),_))) (v1`HappyStk`v2`HappyStk`stk') = let r = fn v1 v2 in - happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) + IF_ARRAY(happyTcHack old_st) happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) -happySpecReduce_3 i fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk -happySpecReduce_3 nt fn j tk _ CONS(_,CONS(_,sts@(CONS(st@HAPPYSTATE(action),_)))) (v1`HappyStk`v2`HappyStk`v3`HappyStk`stk') +happySpecReduce_3 nt fn j tk old_st CONS(_,CONS(_,sts@(CONS(st@HAPPYSTATE(action),_)))) (v1`HappyStk`v2`HappyStk`v3`HappyStk`stk') = let r = fn v1 v2 v3 in - happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) + IF_ARRAY(happyTcHack old_st) happySeq r (GOTO(action) nt j tk st sts (r `HappyStk` stk')) -happyReduce k i fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk -happyReduce k nt fn j tk st sts stk - = case happyDrop MINUS(k,(ILIT(1) :: FAST_INT)) sts of +happyReduce k nt fn j tk st sts stk = + case happyDrop k CONS(st,sts) of sts1@(CONS(st1@HAPPYSTATE(action),_)) -> let r = fn stk in -- it doesn't hurt to always seq here... happyDoSeq r (GOTO(action) nt j tk st1 sts1 r) -happyMonadReduce k nt fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk happyMonadReduce k nt fn j tk st sts stk = case happyDrop k CONS(st,sts) of sts1@(CONS(st1@HAPPYSTATE(action),_)) -> let drop_stk = happyDropStk k stk in happyThen1 (fn stk tk) (\r -> GOTO(action) nt j tk st1 sts1 (r `HappyStk` drop_stk)) -happyMonad2Reduce k nt fn ERROR_TOK tk st sts stk - = happyFail [] ERROR_TOK tk st sts stk happyMonad2Reduce k nt fn j tk st sts stk = - case happyDrop k CONS(st,sts) of + j `happyTcHack` case happyDrop k CONS(st,sts) of sts1@(CONS(st1@HAPPYSTATE(action),_)) -> let drop_stk = happyDropStk k stk #if defined(HAPPY_ARRAY) - off = happyAdjustOffset (indexShortOffAddr happyGotoOffsets st1) - off_i = PLUS(off, nt) - new_state = indexShortOffAddr happyTable off_i + new_state = happyIndexGotoTable nt st1 #else _ = nt :: FAST_INT new_state = action @@ -257,43 +272,158 @@ happyDropStk n (x `HappyStk` xs) = happyDropStk MINUS(n,(ILIT(1)::FAST_INT)) xs #if defined(HAPPY_ARRAY) happyGoto nt j tk st = DEBUG_TRACE(", goto state " ++ show IBOX(new_state) ++ "\n") - happyDoAction j tk new_state - where off = happyAdjustOffset (indexShortOffAddr happyGotoOffsets st) - off_i = PLUS(off, nt) - new_state = indexShortOffAddr happyTable off_i + happyDoAction j tk new_state where new_state = (happyIndexGotoTable nt st) #else happyGoto action j tk st = action j j tk (HappyState action) #endif ----------------------------------------------------------------------------- --- Error recovery (ERROR_TOK is the error token) +-- Error recovery +-- +-- When there is no applicable action for the current lookahead token `tk`, +-- happy enters error recovery mode. It works in 2 phases: +-- +-- 1. Fixup: Try to see if there is an action for the error token (`errorTok`, +-- which is ERROR_TOK). If there is, do *not* emit an error and pretend +-- instead that an `errorTok` was inserted. +-- When there is no `errorTok` action, call the error handler +-- (e.g., `happyError`) with the resumption continuation `happyResume`. +-- 2. Error resumption mode: If the error handler wants to resume parsing in +-- order to report multiple parse errors, it will call the resumption +-- continuation (of result type `P (Maybe a)`). +-- In the absence of the %resumptive declaration, this resumption will +-- always (do a bit of work, and) `return Nothing`. +-- In the presence of the %resumptive declaration, the grammar author +-- can use the special `catch` terminal to declare where parsing should +-- resume after an error. +-- E.g., if `stmt : expr ';' | catch ';'` then the resumption will +-- +-- (a) Pop off the state stack until it finds an item +-- `stmt -> . catch ';'`. +-- Then, it will push a `catchTok` onto the stack, perform a shift and +-- end up in item `stmt -> catch . ';'`. +-- (b) Discard tokens from the lexer until it finds ';'. +-- (In general, it will discard until the lookahead has a non-default +-- action in the matches a token that applies +-- in the situation `P -> α catch . β`, where β might empty.) +-- +-- The `catch` resumption mechanism (2) is what usually is associated with +-- `error` in `bison` or `menhir`. Since `error` is used for the Fixup mechanism +-- (1) above, we call the corresponding token `catch`. + +-- Enter error Fixup: generate an error token, +-- save the old token and carry on. +-- When a `happyShift` accepts, we will pop off the error +-- token to resume parsing with the current lookahead `i`. +happyTryFixup i tk HAPPYSTATE(action) sts stk = + DEBUG_TRACE("entering `error` fixup.\n") + DO_ACTION(action,ERROR_TOK,tk,sts, MK_ERROR_TOKEN(i) `HappyStk` stk) + -- NB: `happyShift` will simply pop the error token and carry on with + -- `tk`. Hence we don't change `tk` in the call here + +-- parse error if we are in fixup and fail again +happyFixupFailed tk st sts (x `HappyStk` stk) = + let i = GET_ERROR_TOKEN(x) in + DEBUG_TRACE("`error` fixup failed.\n") +#if defined(HAPPY_ARRAY) + let resume = happyResume i tk st sts stk + expected = map happyTokenToString (happyExpectedTokens st sts) in + if happyAlreadyInResumption st sts + then resume + else happyReport i tk expected resume + +happyAlreadyInResumption st sts + | IBOX(n_starts) <- happy_n_starts, LT(st, n_starts) + = False -- end of the stack + | IBOX(st) `elem` happyCatchStates + = True + | CONS(st1,sts1) <- sts + = happyAlreadyInResumption st1 sts1 +#else + happyReport i tk [] happyAbort +#endif + +happyFail ERROR_TOK = happyFixupFailed +happyFail i = happyTryFixup i + +#if defined(HAPPY_ARRAY) +happyResume i tk st sts stk = pop_items st sts stk + where + pop_items st sts stk + | HappyShift new_state <- happyDecodeAction (happyNextAction CATCH_TOK st) + = DEBUG_TRACE("shifting catch token " ++ show IBOX(st) ++ " -> " ++ show IBOX(new_state) ++ "\n") + discard_input_until_exp i tk new_state CONS(st,sts) (MK_ERROR_TOKEN(i) `HappyStk` stk) + | DEBUG_TRACE("can't shift catch in " ++ show IBOX(st) ++ ", ") True + , IBOX(n_starts) <- happy_n_starts, LT(st, n_starts) + = DEBUG_TRACE("because it is a start state. no resumption.\n") + happyAbort + | CONS(st1,sts1) <- sts, _ `HappyStk` stk1 <- stk + = DEBUG_TRACE("discarding.\n") + pop_items st1 sts1 stk1 + discard_input_until_exp i tk st sts stk + | ultimately_fails i st sts + = DEBUG_TRACE("discard token in state " ++ show IBOX(st) ++ ": " ++ show IBOX(i) ++ "\n") + happyLex (\_eof_tk -> happyAbort) + (\i tk -> discard_input_until_exp i tk st sts stk) -- not eof + | otherwise + = DEBUG_TRACE("found expected token in state " ++ show IBOX(st) ++ ": " ++ show IBOX(i) ++ "\n") + DO_ACTION(st,i,tk,sts,stk) + ultimately_fails i st sts = + DEBUG_TRACE("trying token " ++ show IBOX(i) ++ " in state " ++ show IBOX(st) ++ ": ") + case happyDecodeAction (happyNextAction i st) of + HappyFail -> DEBUG_TRACE("fail.\n") True + HappyAccept -> DEBUG_TRACE("accept.\n") False + HappyShift _ -> DEBUG_TRACE("shift.\n") False + HappyReduce r -> case happySimulateReduce r st sts of + CONS(st1,sts1) -> ultimately_fails i st1 sts1 + +happySimulateReduce r st sts = + DEBUG_TRACE("simulate reduction of rule " ++ show IBOX(r) ++ ", ") + let (IBOX(nt), IBOX(len)) = indexRuleArr happyRuleArr r in + DEBUG_TRACE("nt " ++ show IBOX(nt) ++ ", len: " ++ show IBOX(len) ++ ", new_st ") + let sts1@CONS(st1,_) = happyDrop len CONS(st,sts) + new_st = happyIndexGotoTable nt st1 in + DEBUG_TRACE(show IBOX(new_st) ++ ".\n") + CONS(new_st, sts1) + +happyTokenToString i = happyTokenStrings Prelude.!! (i Prelude.- 2) +happyExpectedTokens st sts = + DEBUG_TRACE("constructing expected tokens.\n") + search_shifts st sts [] + where + search_shifts st sts shifts = foldr (add_action st sts) shifts (distinct_actions st) + add_action st sts (IBOX(i), IBOX(act)) shifts = + DEBUG_TRACE("found action in state " ++ show IBOX(st) ++ ", input " ++ show IBOX(i) ++ ", " ++ show (happyDecodeAction act) ++ "\n") + case happyDecodeAction act of + HappyFail -> shifts + HappyAccept -> shifts -- This would always be %eof or error... Not helpful + HappyShift _ -> Happy_Data_List.insert IBOX(i) shifts + HappyReduce r -> case happySimulateReduce r st sts of + CONS(st1,sts1) -> search_shifts st1 sts1 shifts + distinct_actions st + = ((-1), IBOX(indexOffAddr happyDefActions st)) + : [ (i, act) | i <- [begin_i..happy_n_terms], act <- get_act row_off i ] + where + row_off = indexOffAddr happyActOffsets st + begin_i = 2 -- +2: errorTok,catchTok + get_act off IBOX(i) + | let off_i = PLUS(off,i) + , GTE(off_i,ILIT(0)) + , EQ(indexOffAddr happyCheck off_i,i) + = [IBOX(indexOffAddr happyTable off_i)] + | otherwise + = [] + +#endif --- parse error if we are in recovery and we fail again -happyFail explist ERROR_TOK tk old_st _ stk@(x `HappyStk` _) = - let i = GET_ERROR_TOKEN(x) in --- trace "failing" $ - happyError_ explist i tk - -{- We don't need state discarding for our restricted implementation of - "error". In fact, it can cause some bogus parses, so I've disabled it - for now --SDM - --- discard a state -happyFail ERROR_TOK tk old_st CONS(HAPPYSTATE(action),sts) - (saved_tok `HappyStk` _ `HappyStk` stk) = --- trace ("discarding state, depth " ++ show (length stk)) $ - DO_ACTION(action,ERROR_TOK,tk,sts,(saved_tok`HappyStk`stk)) --} - --- Enter error recovery: generate an error token, --- save the old token and carry on. -happyFail explist i tk HAPPYSTATE(action) sts stk = --- trace "entering error recovery" $ - DO_ACTION(action,ERROR_TOK,tk,sts, MK_ERROR_TOKEN(i) `HappyStk` stk) -- Internal happy errors: +#if defined(HAPPY_GHC) +notHappyAtAll :: Happy_GHC_Stack.HasCallStack => a +#else notHappyAtAll :: a +#endif notHappyAtAll = Prelude.error "Internal Happy error\n" ----------------------------------------------------------------------------- @@ -303,6 +433,8 @@ notHappyAtAll = Prelude.error "Internal Happy error\n" happyTcHack :: Happy_GHC_Exts.Int# -> a -> a happyTcHack x y = y {-# INLINE happyTcHack #-} +#else +happyTcHack x y = y #endif ----------------------------------------------------------------------------- diff --git a/packages/backend-lalr/src/Happy/Backend/LALR.hs b/packages/backend-lalr/src/Happy/Backend/LALR.hs index 61e82768..a919ac8b 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR.hs +++ b/packages/backend-lalr/src/Happy/Backend/LALR.hs @@ -19,16 +19,18 @@ magicFilter magicName = case magicName of in filter_output importsToInject :: Bool -> Bool -> String -importsToInject ghc debug = concat ["\n", import_array, import_bits, glaexts_import, debug_imports, applicative_imports] +importsToInject ghc debug = concat ["\n", import_array, import_list, import_bits, glaexts_import, debug_imports, applicative_imports] where - glaexts_import | ghc = import_glaexts + glaexts_import | ghc = import_glaexts ++ import_ghcstack | otherwise = "" debug_imports | debug = import_debug | otherwise = "" applicative_imports = import_applicative import_glaexts = "import qualified GHC.Exts as Happy_GHC_Exts\n" + import_ghcstack = "import qualified GHC.Stack as Happy_GHC_Stack\n" import_array = "import qualified Data.Array as Happy_Data_Array\n" + import_list = "import qualified Data.List as Happy_Data_List\n" import_bits = "import qualified Data.Bits as Bits\n" import_debug = "import qualified System.IO as Happy_System_IO\n" ++ "import qualified System.IO.Unsafe as Happy_System_IO_Unsafe\n" ++ @@ -49,4 +51,4 @@ defines debug array ghc coerce = unlines [ "#define " ++ d ++ " 1" | d <- vars_t , [ "HAPPY_ARRAY" | array ] , [ "HAPPY_GHC" | ghc ] , [ "HAPPY_COERCE" | coerce ] - ] \ No newline at end of file + ] diff --git a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index 68fb5288..e790414e 100644 --- a/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/packages/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -4,6 +4,7 @@ The code generator. (c) 1993-2001 Andy Gill, Simon Marlow ----------------------------------------------------------------------------- +> {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} > module Happy.Backend.LALR.ProduceCode (produceParser) where > import Paths_happy_backend_lalr ( version ) @@ -15,11 +16,12 @@ The code generator. > import Data.Maybe ( isJust, isNothing, fromMaybe ) > import Data.Char ( ord, chr ) -> import Data.List ( sortBy ) +> import Data.List ( sortBy, nub ) -> import Control.Monad ( forM_ ) > import Control.Monad.ST ( ST, runST ) -> import Data.Bits ( setBit ) +> import Data.Word +> import Data.Int +> import Data.Bits > import Data.Array.ST ( STUArray ) > import Data.Array.Unboxed ( UArray ) > import Data.Array.MArray ( MArray(..), freeze, readArray, writeArray ) @@ -61,7 +63,7 @@ Produce the complete output file. > , monad = (use_monad,monad_context,monad_tycon,monad_then,monad_return) > , token_type = token_type' > , error_handler = error_handler' -> , error_sig = error_sig' +> , error_expected = error_expected' > }) > action goto lang_exts module_header module_trailer > target coerce ghc strict @@ -72,7 +74,7 @@ Produce the complete output file. > -- don't screw up any OPTIONS pragmas in the header. > . produceAbsSynDecl . nl > . produceTypes -> . produceExpListPerState +> . produceTokToStringList > . produceActionTable target > . produceReductions > . produceTokenConverter . nl @@ -98,8 +100,9 @@ Produce the complete output file. > | otherwise = str "Prelude.Int" > > -- Parsing monad and its constraints -> pty = str monad_tycon -> pcont = str monad_context +> pty = str monad_tycon -- str "P" +> ptyAt a = brack' (pty . str " " . a) -- \(str "a") -> str "(P a)" +> pcont = str monad_context -- str "Read a", some constraint for "P" to be a monad > > -- If GHC is enabled, wrap the content in a CPP ifdef that includes the > -- content and tests whether the GHC version is >= 7.10.3 @@ -241,13 +244,13 @@ based parsers -- types aren't as important there). > . interleave' ",\n " > [ mkActionName i | (i,_action') <- zip [ 0 :: Int .. ] > (assocs action) ] -> . str " :: " . str monad_context . str " => " +> . str " :: " . pcont . str " => " > . intMaybeHash . str " -> " . happyReductionValue . str "\n\n" > . interleave' ",\n " > [ mkReduceFun i | > (i,_action) <- zip [ n_starts :: Int .. ] > (drop n_starts prods) ] -> . str " :: " . str monad_context . str " => " +> . str " :: " . pcont . str " => " > . happyReductionValue . str "\n\n" > | otherwise = id @@ -380,7 +383,7 @@ happyMonadReduce to get polymorphic recursion. Sigh. > . str " -> " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " > . happyAbsSyn . str " -> " -> . pty . str " " . happyAbsSyn . str "\n" +> . ptyAt happyAbsSyn . str "\n" > | otherwise -> id in > filterTypeSig tysig . mkReduceFun i . str " = " > . str s . strspace . lt' . strspace . showInt adjusted_nt @@ -433,64 +436,76 @@ The token conversion function. > = case lexer' of { > > Nothing -> -> str "happyNewToken action sts stk [] =\n\t" -> . eofAction "notHappyAtAll" -> . str " []\n\n" -> . str "happyNewToken action sts stk (tk:tks) =\n\t" -> . str "let cont i = " . doAction . str " sts stk tks in\n\t" -> . str "case tk of {\n\t" +> str "happyTerminalToTok term = case term of {\n\t" > . interleave ";\n\t" (map doToken token_rep) -> . str "_ -> happyError' ((tk:tks), [])\n\t" -> . str "}\n\n" -> . str "happyError_ explist " . eofTok . str " tk tks = happyError' (tks, explist)\n" -> . str "happyError_ explist _ tk tks = happyError' ((tk:tks), explist)\n"; +> . str "_ -> error \"Encountered a token that was not declared to happy\"\n\t}\n" +> . str "{-# NOINLINE happyTerminalToTok #-}\n" +> . str "\n" +> . str "happyEofTok = " . shows (tokIndex eof) . str "\n" +> . str "\n" +> . str "happyLex kend _kmore [] = kend notHappyAtAll []\n" +> . str "happyLex _kend kmore (tk:tks) = kmore (happyTerminalToTok tk) tk tks\n" +> . str "{-# INLINE happyLex #-}\n" +> . str "\n" +> . str "happyNewToken action sts stk = happyLex (\\tk -> " . eofAction . str ") (" +> . str "\\i tk -> " . doAction . str " sts stk)\n" +> . str "\n" +> . str "happyReport " . eofTok . str " tk explist resume tks = happyReport' tks explist resume\n" +> . str "happyReport _ tk explist resume tks = happyReport' (tk:tks) explist resume\n" > -- when the token is EOF, tk == _|_ (notHappyAtAll) -> -- so we must not pass it to happyError' +> -- so we must not pass it to happyReport' +> . str "\n"; > Just (lexer'',eof') -> > case (target, ghc) of > (TargetHaskell, True) -> -> str "happyNewToken :: " . pcont . str " => " +> str "happyTerminalToTok :: " . token . str " -> Happy_GHC_Exts.Int#\n" +> . str "happyNewToken :: " . pcont . str " => " > . str "(Happy_GHC_Exts.Int#\n" > . str " -> Happy_GHC_Exts.Int#\n" > . str " -> " . token . str "\n" > . str " -> HappyState " . token . str " (t -> " -> . pty . str " a)\n" +> . ptyAt (str "a") . str ")\n" > . str " -> [HappyState " . token . str " (t -> " -> . pty . str " a)]\n" +> . ptyAt (str "a") . str ")]\n" > . str " -> t\n" -> . str " -> " . pty . str " a)\n" +> . str " -> " . ptyAt (str "a") . str ")\n" > . str " -> [HappyState " . token . str " (t -> " -> . pty . str " a)]\n" +> . ptyAt (str "a") . str ")]\n" > . str " -> t\n" -> . str " -> " . pty . str " a\n" +> . str " -> " . ptyAt (str "a") . str "\n" > _ -> id -> . str "happyNewToken action sts stk\n\t= " -> . str lexer'' -> . str "(\\tk -> " -> . str "\n\tlet cont i = " -> . doAction -> . str " sts stk in\n\t" -> . str "case tk of {\n\t" -> . str (eof' ++ " -> ") -> . eofAction "tk" . str ";\n\t" +> . str "happyTerminalToTok term = case term of {\n\t" +> . str eof' . str " -> " . eofTok . str ";\n\t" > . interleave ";\n\t" (map doToken token_rep) -> . str "_ -> happyError' (tk, [])\n\t" -> . str "})\n\n" -> . str "happyError_ explist " . eofTok . str " tk = happyError' (tk, explist)\n" -> . str "happyError_ explist _ tk = happyError' (tk, explist)\n"; -> -- superfluous pattern match needed to force happyError_ to +> . str "_ -> error \"Encountered a token that was not declared to happy\"\n\t}\n" +> . str "{-# NOINLINE happyTerminalToTok #-}\n" +> . str "\n" +> . str "happyEofTok = " . shows (tokIndex eof) . str "\n" +> . str "\n" +> . str "happyLex kend kmore = " . str lexer'' . str " (\\tk -> case tk of {\n\t" +> . str eof' . str " -> kend tk;\n\t" +> . str "_ -> kmore (happyTerminalToTok tk) tk })\n" +> . str "{-# INLINE happyLex #-}\n" +> . str "\n" +> . str "happyNewToken action sts stk = happyLex (\\tk -> " . eofAction . str ") (" +> . str "\\i tk -> " . doAction . str " sts stk)\n" +> . str "\n" +> . str "happyReport " . eofTok . str " = happyReport'\n" +> . str "happyReport _ = happyReport'\n" +> -- superfluous pattern match needed to force happyReport to > -- have the correct type. +> . str "\n"; > } > where -> eofAction tk = +> eofAction = > (case target of > TargetArrayBased -> -> str "happyDoAction " . eofTok . strspace . str tk . str " action" +> str "happyDoAction " . eofTok . strspace . str " tk action" > _ -> str "action " . eofTok . strspace . eofTok -> . strspace . str tk . str " (HappyState action)") +> . str " tk (HappyState action)") > . str " sts stk" > eofTok = showInt (tokIndex eof) > @@ -498,10 +513,7 @@ The token conversion function. > TargetArrayBased -> str "happyDoAction i tk action" > _ -> str "action i i tk (HappyState action)" > -> doToken (i,tok) -> = str (removeDollarDollar tok) -> . str " -> cont " -> . showInt (tokIndex i) +> doToken (i,tok) = str (removeDollarDollar tok) . str " -> " . showInt (tokIndex i) Use a variable rather than '_' to replace '$$', so we can use it on the left hand side of '@'. @@ -524,8 +536,10 @@ the left hand side of '@'. > tokIndex > = case target of > TargetHaskell -> id -> TargetArrayBased -> \i -> i - n_nonterminals - n_starts - 2 +> TargetArrayBased -> arrayTokAdjustment > -- tokens adjusted to start at zero, see ARRAY_NOTES +> arrayTokAdjustment i = i - fst_term + 2 -- +2: errorTok, catchTok +> %----------------------------------------------------------------------------- Action Tables. @@ -533,10 +547,17 @@ Action Tables. Here we do a bit of trickery and replace the normal default action (failure) for each state with at least one reduction action. For each such state, we pick one reduction action to be the default action. -This should make the code smaller without affecting the speed. It -changes the sematics for errors, however; errors could be detected in -a different state now (but they'll still be detected at the same point -in the token stream). +This should make the code smaller without affecting the speed. +It changes the sematics for errors, however; errors could be detected in a +different state now (but they'll still be detected at the same point in the +token stream). + +SG: For a data point, in issue93 the happyTable triples in size when we always +pick failure as the default reduction. +Presumably that is because there are quite a few reduction states, in which the +only non-default transition is a reduction. +Our scheme above ensures that these states don't occupy space in the main +happyTable at all; they just get an entry in the happyDefActions. Further notes on default cases: @@ -581,27 +602,17 @@ machinery to discard states in the parser... > produceActionTable TargetArrayBased > = produceActionArray > . produceReduceArray +> . produceRuleArray +> . produceCatchStates > . str "happy_n_terms = " . shows n_terminals . str " :: Prelude.Int\n" > . str "happy_n_nonterms = " . shows n_nonterminals . str " :: Prelude.Int\n\n" +> . str "happy_n_starts = " . shows n_starts . str " :: Prelude.Int\n\n" > -> produceExpListPerState -> = produceExpListArray -> . str "{-# NOINLINE happyExpListPerState #-}\n" -> . str "happyExpListPerState st =\n" -> . str " token_strs_expected\n" -> . str " where token_strs = " . str (show $ elems token_names') . str "\n" -> . str " bit_start = st Prelude.* " . str (show nr_tokens) . str "\n" -> . str " bit_end = (st Prelude.+ 1) Prelude.* " . str (show nr_tokens) . str "\n" -> . str " read_bit = readArrayBit happyExpList\n" -> . str " bits = Prelude.map read_bit [bit_start..bit_end Prelude.- 1]\n" -> . str " bits_indexed = Prelude.zip bits [0.." -> . str (show (nr_tokens - 1)) . str "]\n" -> . str " token_strs_expected = Prelude.concatMap f bits_indexed\n" -> . str " f (Prelude.False, _) = []\n" -> . str " f (Prelude.True, nr) = [token_strs Prelude.!! nr]\n" +> produceTokToStringList +> = str "{-# NOINLINE happyTokenStrings #-}\n" +> . str "happyTokenStrings = " . shows (drop (fst_term - 1) (elems token_names')) . str "\n" +> -- fst_term - 1: fst_term includes eofToken, but that is last in the list. > . str "\n" -> where (first_token, last_token) = bounds token_names' -> nr_tokens = last_token - first_token + 1 > > produceStateFunction goto' (state, acts) > = foldr (.) id (map produceActions assocs_acts) @@ -611,17 +622,10 @@ machinery to discard states in the parser... > then str " x = happyTcHack x " > else str " _ = ") > . mkAction default_act -> . (case default_act of -> LR'Fail -> callHappyExpListPerState -> LR'MustFail -> callHappyExpListPerState -> _ -> str "") > . str "\n\n" > > where gotos = goto' ! state > -> callHappyExpListPerState = str " (happyExpListPerState " -> . str (show state) . str ")" -> > produceActions (_, LR'Fail{-'-}) = id > produceActions (t, action'@(LR'Reduce{-'-} _ _)) > | action' == default_act = id @@ -632,10 +636,6 @@ machinery to discard states in the parser... > producePossiblyFailingAction t action' > = actionFunction t > . mkAction action' -> . (case action' of -> LR'Fail -> str " []" -> LR'MustFail -> str " []" -> _ -> str "") > . str "\n" > > produceGotos (t, Goto i) @@ -658,36 +658,27 @@ action array indexed by (terminal * last_state) + state > | ghc > = str "happyActOffsets :: HappyAddr\n" > . str "happyActOffsets = HappyA# \"" --" -> . str (checkedHexChars min_off act_offs) +> . hexChars act_offs > . str "\"#\n\n" --" > > . str "happyGotoOffsets :: HappyAddr\n" > . str "happyGotoOffsets = HappyA# \"" --" -> . str (checkedHexChars min_off goto_offs) +> . hexChars goto_offs > . str "\"#\n\n" --" > -> . str "happyAdjustOffset :: Happy_GHC_Exts.Int# -> Happy_GHC_Exts.Int#\n" -> . str "happyAdjustOffset off = " -> . (if length table < 32768 -> then str "off" -> else str "if happyLt off (" . shows min_off . str "# :: Happy_GHC_Exts.Int#)" -> . str " then off Happy_GHC_Exts.+# 65536#" -> . str " else off") -> . str "\n\n" --" -> > . str "happyDefActions :: HappyAddr\n" > . str "happyDefActions = HappyA# \"" --" -> . str (hexChars defaults) +> . hexChars defaults > . str "\"#\n\n" --" > > . str "happyCheck :: HappyAddr\n" > . str "happyCheck = HappyA# \"" --" -> . str (hexChars check) +> . hexChars check > . str "\"#\n\n" --" > > . str "happyTable :: HappyAddr\n" > . str "happyTable = HappyA# \"" --" -> . str (hexChars table) +> . hexChars table > . str "\"#\n\n" --" > | otherwise @@ -702,9 +693,6 @@ action array indexed by (terminal * last_state) + state > . shows n_states . str ") ([" > . interleave' "," (map shows goto_offs) > . str "\n\t])\n\n" -> -> . str "happyAdjustOffset :: Prelude.Int -> Prelude.Int\n" -> . str "happyAdjustOffset = Prelude.id\n\n" > > . str "happyDefActions :: Happy_Data_Array.Array Prelude.Int Prelude.Int\n" > . str "happyDefActions = Happy_Data_Array.listArray (0," @@ -724,41 +712,49 @@ action array indexed by (terminal * last_state) + state > . interleave' "," (map shows table) > . str "\n\t])\n\n" -> produceExpListArray -> | ghc -> = str "happyExpList :: HappyAddr\n" -> . str "happyExpList = HappyA# \"" --" -> . str (hexChars explist) -> . str "\"#\n\n" --" -> | otherwise -> = str "happyExpList :: Happy_Data_Array.Array Prelude.Int Prelude.Int\n" -> . str "happyExpList = Happy_Data_Array.listArray (0," -> . shows table_size . str ") ([" -> . interleave' "," (map shows explist) -> . str "\n\t])\n\n" - > (_, last_state) = bounds action > n_states = last_state + 1 > n_terminals = length terms > n_nonterminals = length nonterms - n_starts -- lose %starts > -> (act_offs,goto_offs,table,defaults,check,explist,min_off) +> (act_offs,goto_offs,table,defaults,check,catch_states) > = mkTables action goto first_nonterm' fst_term > n_terminals n_nonterminals n_starts (bounds token_names') > > table_size = length table - 1 > > produceReduceArray -> = {- str "happyReduceArr :: Array Int a\n" -} -> str "happyReduceArr = Happy_Data_Array.array (" +> = str "happyReduceArr = Happy_Data_Array.array (" > . shows (n_starts :: Int) -- omit the %start reductions > . str ", " > . shows n_rules > . str ") [\n" > . interleave' ",\n" (map reduceArrElem [n_starts..n_rules]) > . str "\n\t]\n\n" - +> +> produceRuleArray -- rule number to (non-terminal number, rule length) +> | ghc +> = str "happyRuleArr :: HappyAddr\n" +> . str "happyRuleArr = HappyA# \"" -- " +> . hexChars (concatMap (\(nt,len) -> [nt,len]) ruleArrElems) +> . str "\"#\n\n" --" +> | otherwise +> = str "happyRuleArr :: Happy_Data_Array.Array Int (Int,Int)\n" +> . str "happyRuleArr = Happy_Data_Array.array (" +> . shows (n_starts :: Int) -- omit the %start reductions +> . str ", " +> . shows n_rules +> . str ") [\n" +> . interleave' ",\n" (zipWith showRuleArrElem [n_starts..n_rules] ruleArrElems) +> . str "\n\t]\n\n" +> +> ruleArrElems = map (\(Production nt toks _code _prio) -> (nt-first_nonterm',length toks)) (drop n_starts prods) +> > n_rules = length prods - 1 :: Int +> +> produceCatchStates +> = str "happyCatchStates :: [Int]\n" +> . str "happyCatchStates = " . shows catch_states . str "\n\n" > showInt i | ghc = shows i . showChar '#' > | otherwise = shows i @@ -820,6 +816,7 @@ MonadStuff: happyThen :: () => HappyIdentity a -> (a -> HappyIdentity b) -> HappyIdentity b happyReturn :: () => a -> HappyIdentity a happyThen1 m k tks = happyThen m (\a -> k a tks) + happyFmap1 f m tks = happyThen (m tks) (\a -> happyReturn (f a)) happyReturn1 = \a tks -> happyReturn a - with %monad: @@ -827,6 +824,7 @@ MonadStuff: happyThen :: CONTEXT => P a -> (a -> P b) -> P b happyReturn :: CONTEXT => a -> P a happyThen1 m k tks = happyThen m (\a -> k a tks) + happyFmap1 f m tks = happyThen (m tks) (\a -> happyReturn (f a)) happyReturn1 = \a tks -> happyReturn a - with %monad & %lexer: @@ -835,30 +833,39 @@ MonadStuff: happyReturn :: CONTEXT => a -> P a happyThen1 = happyThen happyReturn1 = happyReturn + happyFmap1 f m = happyThen m (\a -> happyReturn (f a)) > produceMonadStuff = -> str "happyThen :: " . pcont . str " => " . pty -> . str " a -> (a -> " . pty -> . str " b) -> " . pty . str " b\n" +> str "happyThen :: " . pcont . str " => " . ptyAt (str "a") +> . str " -> (a -> " . ptyAt (str "b") +> . str ") -> " . ptyAt (str "b") . str "\n" > . str "happyThen = " . brack monad_then . nl -> . str "happyReturn :: " . pcont . str " => a -> " . pty . str " a\n" +> . str "happyReturn :: " . pcont . str " => a -> " . ptyAt (str "a") . str "\n" > . str "happyReturn = " . brack monad_return . nl > . case lexer' of > Nothing -> > str "happyThen1 m k tks = (" . str monad_then > . str ") m (\\a -> k a tks)\n" -> . str "happyReturn1 :: " . pcont . str " => a -> b -> " . pty . str " a\n" +> . str "happyFmap1 f m tks = happyThen (m tks) (\\a -> happyReturn (f a))\n" +> . str "happyReturn1 :: " . pcont . str " => a -> b -> " . ptyAt (str "a") . str "\n" > . str "happyReturn1 = \\a tks -> " . brack monad_return > . str " a\n" -> . str "happyError' :: " . str monad_context . str " => ([" -> . token -> . str "], [Prelude.String]) -> " -> . str monad_tycon -> . str " a\n" -> . str "happyError' = " -> . str (if use_monad then "" else "HappyIdentity Prelude.. ") -> . errorHandler . str "\n" +> . str "happyReport' :: " . pcont . str " => " +> . str "[" . token . str "] -> " +> . str "[Prelude.String] -> (" +> . str "[" . token . str "] -> " +> . ptyAt (str "a") . str ") -> " +> . ptyAt (str "a") +> . str "\n" +> . str "happyReport' = " . callReportError . str "\n" +> . str "\n" +> . str "happyAbort :: " . pcont . str " => " +> . str "[" . token . str "] -> " +> . ptyAt (str "a") +> . str "\n" +> . str "happyAbort = " . str abort_handler . str "\n" +> . str "\n" > _ -> > let > happyParseSig @@ -871,7 +878,7 @@ MonadStuff: > | target == TargetArrayBased = > str "happyNewToken :: " . pcont . str " => " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " . happyAbsSyn -> . str " -> " . pty . str " " . happyAbsSyn . str"\n" +> . str " -> " . ptyAt happyAbsSyn . str"\n" > . str "\n" > | otherwise = id > doActionSig @@ -879,7 +886,7 @@ MonadStuff: > str "happyDoAction :: " . pcont . str " => " . intMaybeHash > . str " -> " . str token_type' . str " -> " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " . happyAbsSyn -> . str " -> " . pty . str " " . happyAbsSyn . str "\n" +> . str " -> " . ptyAt happyAbsSyn . str "\n" > . str "\n" > | otherwise = id > reduceArrSig @@ -888,7 +895,7 @@ MonadStuff: > . str " => Happy_Data_Array.Array Prelude.Int (" . intMaybeHash > . str " -> " . str token_type' . str " -> " . intMaybeHash > . str " -> Happy_IntList -> HappyStk " . happyAbsSyn -> . str " -> " . pty . str " " . happyAbsSyn . str ")\n" +> . str " -> " . ptyAt happyAbsSyn . str ")\n" > . str "\n" > | otherwise = id in > filterTypeSig (happyParseSig . newTokenSig . doActionSig . reduceArrSig) @@ -896,33 +903,60 @@ MonadStuff: > . str " a -> (a -> " . pty > . str " b) -> " . pty . str " b\n" > . str "happyThen1 = happyThen\n" -> . str "happyReturn1 :: " . pcont . str " => a -> " . pty . str " a\n" +> . str "happyFmap1 f m = happyThen m (\\a -> happyReturn (f a))\n" +> . str "happyReturn1 :: " . pcont . str " => a -> " . ptyAt (str "a") . str "\n" > . str "happyReturn1 = happyReturn\n" -> . str "happyError' :: " . str monad_context . str " => (" -> . token . str ", [Prelude.String]) -> " -> . str monad_tycon -> . str " a\n" -> . str "happyError' tk = " -> . str (if use_monad then "" else "HappyIdentity ") -> . errorHandler . str " tk\n" - +> . str "happyReport' :: " . pcont . str " => " +> . token . str " -> " +> . str "[Prelude.String] -> " +> . ptyAt (str "a") . str " -> " +> . ptyAt (str "a") +> . str "\n" +> . str "happyReport' = " . callReportError . str "\n" +> . str "\n" +> . str "happyAbort :: " . pcont . str " => " +> . ptyAt (str "a") +> . str "\n" +> . str "happyAbort = " . str abort_handler . str "\n" +> . str "\n" + +The error handler takes up to three arguments. An error handler specified with %error is passed the current token -when used with %lexer, but happyError (the old way but kept for -compatibility) is not passed the current token. Also, the %errorhandlertype -directive determines the API of the provided function. - -> errorHandler = -> case error_handler' of -> Just h -> case error_sig' of -> ErrorHandlerTypeExpList -> str h -> ErrorHandlerTypeDefault -> str "(\\(tokens, _) -> " . str h . str " tokens)" -> Nothing -> case lexer' of -> Nothing -> str "(\\(tokens, _) -> happyError tokens)" -> Just _ -> str "(\\(tokens, explist) -> happyError)" +when used with %lexer as the first argument, but happyError (the old way but kept for +compatibility) is not passed the current token. +Furthermore, the second argument is the list of expected tokens +in the presence of the %error.expected directive. +The last argument is the "resumption", a continuation that tries to find +an item on the stack taking a @catch@ terminal where parsing may resume, +in the presence of the two-argument form of the %error directive. + +> callReportError = -- this one wraps around report_error_handler to expose a unified interface +> str "(\\tokens expected resume -> " . +> (if use_monad then str "" +> else str "HappyIdentity Prelude.$ ") . +> report_error_handler . +> (case (error_handler', lexer') of (DefaultErrorHandler, Just _) -> id +> _ -> str " tokens") . +> (if error_expected' then str " expected" +> else id) . +> (case error_handler' of ResumptiveErrorHandler{} -> str " resume" +> _ -> id) . +> str ")" +> report_error_handler = case error_handler' of +> DefaultErrorHandler -> str "happyError" +> CustomErrorHandler h -> brack h +> ResumptiveErrorHandler _abort report -> brack report +> abort_handler = case error_handler' of +> ResumptiveErrorHandler abort _report -> abort +> _ -> "error \"Called abort handler in non-resumptive parser\"" > reduceArrElem n > = str "\t(" . shows n . str " , " > . str "happyReduce_" . shows n . char ')' +> +> showRuleArrElem r (nt, len) +> = str "\t(" . shows r . str " , (" +> . shows nt . str "," . shows len . str ") )" ----------------------------------------------------------------------------- -- Produce the parser entry and exit points @@ -1058,9 +1092,9 @@ See notes under "Action Tables" above for some subtleties in this function. > (act : _) | act /= LR'Fail -> LR'Fail > > -- no error actions, pick a reduce to be the default. -> _ -> case reduces of -> [] -> LR'Fail -> (act:_) -> act -- pick the first one we see for now +> _ -> case reduces of +> [] -> LR'Fail +> (act:_) -> act -- pick the first one we see for now > > where reduces > = [ act | (_, act@(LR'Reduce _ _)) <- actions ] @@ -1084,7 +1118,6 @@ See notes under "Action Tables" above for some subtleties in this function. -- happyCheck -- Indicates whether we should use the default action for state - -- the table is laid out such that the action for a given state & token -- can be found by: -- @@ -1111,6 +1144,10 @@ See notes under "Action Tables" above for some subtleties in this function. -- try to fit the actions into the check table, using the ordering -- from above. +SG: If you want to know more about similar compression schemes, consult + Storing a Sparse Table (https://dl.acm.org/doi/10.1145/359168.359175) +One can think of the mapping @\(state,token) -> (offs ! state)+token@ as a hash +and @check@ as the way to detect "collisions" (i.e., default entries). > mkTables > :: ActionTable -> GotoTable -> Name -> Int -> Int -> Int -> Int -> (Int, Int) -> @@ -1119,8 +1156,7 @@ See notes under "Action Tables" above for some subtleties in this function. > , [Int] -- happyTable > , [Int] -- happyDefAction > , [Int] -- happyCheck -> , [Int] -- happyExpList -> , Int -- happyMinOffset +> , [Int] -- happyCatchStates > ) > > mkTables action goto first_nonterm' fst_term @@ -1132,15 +1168,14 @@ See notes under "Action Tables" above for some subtleties in this function. > , take max_off (elems table) > , def_actions > , take max_off (elems check) -> , elems explist -> , min_off +> , shifted_catch_states > ) > where > -> (table,check,act_offs,goto_offs,explist,min_off,max_off) +> (table,check,act_offs,goto_offs,max_off) > = runST (genTables (length actions) > max_token token_names_bound -> sorted_actions explist_actions) +> sorted_actions) > > -- the maximum token number used in the parser > max_token = max n_terminals (n_starts+n_nonterminals) - 1 @@ -1157,24 +1192,24 @@ See notes under "Action Tables" above for some subtleties in this function. > length acts'', > acts'') > | (state, acts) <- assocs action, -> let (err:_dummy:vec) = assocs acts +> let (err:catch:_dummy:vec) = assocs acts > vec' = drop (n_starts+n_nonterminals) vec -> acts' = filter notFail (err:vec') +> acts' = filter notFail (err:catch:vec') > default_act = getDefault acts' > acts'' = mkActVals acts' default_act > ] > -> explist_actions :: [(Int, [Int])] -> explist_actions = [ (state, concatMap f $ assocs acts) -> | (state, acts) <- assocs action ] -> where -> f (t, LR'Shift _ _ ) = [t - fst token_names_bound] -> f (_, _) = [] +> shifted_catch_states :: [Int] +> shifted_catch_states = -- collect the states in which we have just shifted a catchTok +> nub [ to_state | (_from_state, acts) <- assocs action +> , let (_err:catch:_) = assocs acts +> , (_tok, LR'Shift to_state _) <- return catch ] > -> -- adjust terminals by -(fst_term+1), so they start at 1 (error is 0). +> -- adjust terminals by -(fst_term+2), so they start at 2 (error is 0, catch is 1). > -- (see ARRAY_NOTES) > adjust token | token == errorTok = 0 -> | otherwise = token - fst_term + 1 +> | token == catchTok = 1 +> | otherwise = token - fst_term + 2 > > mkActVals assocs' default_act = > [ (adjust token, actionVal act) @@ -1218,34 +1253,29 @@ See notes under "Action Tables" above for some subtleties in this function. > -> Int -- maximum token no. > -> (Int, Int) -- token names bounds > -> [TableEntry] -- entries for the table -> -> [(Int, [Int])] -- expected tokens lists > -> ST s ( UArray Int Int -- table > , UArray Int Int -- check > , UArray Int Int -- action offsets > , UArray Int Int -- goto offsets -> , UArray Int Int -- expected tokens list -> , Int -- lowest offset in table > , Int -- highest offset in table > ) > -> genTables n_actions max_token token_names_bound entries explist = do +> genTables n_actions max_token token_names_bound entries = do > > table <- newArray (0, mAX_TABLE_SIZE) 0 > check <- newArray (0, mAX_TABLE_SIZE) (-1) > act_offs <- newArray (0, n_actions) 0 > goto_offs <- newArray (0, n_actions) 0 > off_arr <- newArray (-max_token, mAX_TABLE_SIZE) 0 -> exp_array <- newArray (0, (n_actions * n_token_names + 15) `div` 16) 0 > -> (min_off,max_off) <- genTables' table check act_offs goto_offs off_arr exp_array entries -> explist max_token n_token_names +> max_off <- genTables' table check act_offs goto_offs off_arr entries +> max_token n_token_names > > table' <- freeze table > check' <- freeze check > act_offs' <- freeze act_offs > goto_offs' <- freeze goto_offs -> exp_array' <- freeze exp_array -> return (table',check',act_offs',goto_offs',exp_array',min_off,max_off+1) +> return (table',check',act_offs',goto_offs',max_off+1) > where > n_states = n_actions - 1 @@ -1260,33 +1290,22 @@ See notes under "Action Tables" above for some subtleties in this function. > -> STUArray s Int Int -- action offsets > -> STUArray s Int Int -- goto offsets > -> STUArray s Int Int -- offset array -> -> STUArray s Int Int -- expected token list > -> [TableEntry] -- entries for the table -> -> [(Int, [Int])] -- expected tokens lists > -> Int -- maximum token no. > -> Int -- number of token names -> -> ST s (Int,Int) -- lowest and highest offsets in table +> -> ST s Int -- highest offsets in table > -> genTables' table check act_offs goto_offs off_arr exp_array entries -> explist max_token n_token_names -> = fill_exp_array >> fit_all entries 0 0 1 +> genTables' table check act_offs goto_offs off_arr entries +> max_token n_token_names +> = fit_all entries 0 1 > where > -> fit_all [] min_off max_off _ = return (min_off, max_off) -> fit_all (s:ss) min_off max_off fst_zero = do -> (off, new_min_off, new_max_off, new_fst_zero) <- fit s min_off max_off fst_zero +> fit_all [] max_off _ = return max_off +> fit_all (s:ss) max_off fst_zero = do +> (off, new_max_off, new_fst_zero) <- fit s max_off fst_zero > ss' <- same_states s ss off > writeArray off_arr off 1 -> fit_all ss' new_min_off new_max_off new_fst_zero -> -> fill_exp_array = -> forM_ explist $ \(state, tokens) -> -> forM_ tokens $ \token -> do -> let bit_nr = state * n_token_names + token -> let word_nr = bit_nr `div` 16 -> let word_offset = bit_nr `mod` 16 -> x <- readArray exp_array word_nr -> writeArray exp_array word_nr (setBit x word_offset) +> fit_all ss' new_max_off new_fst_zero > > -- try to merge identical states. We only try the next state(s) > -- in the list, but the list is kind-of sorted so we shouldn't @@ -1303,19 +1322,16 @@ See notes under "Action Tables" above for some subtleties in this function. > -- fit a vector into the table. Return the offset of the vector, > -- the maximum offset used in the table, and the offset of the first > -- entry in the table (used to speed up the lookups a bit). -> fit (_,_,_,_,_,[]) min_off max_off fst_zero = return (0,min_off,max_off,fst_zero) +> fit (_,_,_,_,_,[]) max_off fst_zero = return (0,max_off,fst_zero) > > fit (act_or_goto, state_no, _deflt, _, _, state@((t,_):_)) -> min_off max_off fst_zero = do +> max_off fst_zero = do > -- start at offset 1 in the table: all the empty states > -- (states with just a default reduction) are mapped to > -- offset zero. > off <- findFreeOffset (-t+fst_zero) check off_arr state -> let new_min_off | furthest_left < min_off = furthest_left -> | otherwise = min_off -> new_max_off | furthest_right > max_off = furthest_right +> let new_max_off | furthest_right > max_off = furthest_right > | otherwise = max_off -> furthest_left = off > furthest_right = off + max_token > > -- trace ("fit: state " ++ show state_no ++ ", off " ++ show off ++ ", elems " ++ show state) $ do @@ -1323,7 +1339,7 @@ See notes under "Action Tables" above for some subtleties in this function. > writeArray (which_off act_or_goto) state_no off > addState off table check state > new_fst_zero <- findFstFreeSlot check fst_zero -> return (off, new_min_off, new_max_off, new_fst_zero) +> return (off, new_max_off, new_fst_zero) When looking for a free offset in the table, we use the 'check' table rather than the main table. The check table starts off with (-1) in @@ -1441,28 +1457,28 @@ slot is free or not. -- Convert an integer to a 16-bit number encoded in \xNN\xNN format suitable -- for placing in a string. -> hexChars :: [Int] -> String -> hexChars = concatMap hexChar +> hexChars :: [Int] -> String -> String +> hexChars is s = foldr (hexChar . toInt32) s is -> hexChar :: Int -> String -> hexChar i | i < 0 = hexChar (i + 65536) -> hexChar i = toHex (i `mod` 256) ++ toHex (i `div` 256) +The following definition of @hexChar@ is endian-ness preserving. +Should endianness differ between the architecture running happy and the one +running the compiled parser, the order of [0,1,2,3] must be reversed. -> toHex :: Int -> String -> toHex i = ['\\','x', hexDig (i `div` 16), hexDig (i `mod` 16)] +> hexChar :: Int32 -> String -> String +> hexChar i s = foldr (toHex . byte i) s [0,1,2,3] -> hexDig :: Int -> Char -> hexDig i | i <= 9 = chr (i + ord '0') -> | otherwise = chr (i - 10 + ord 'a') +> byte :: Int32 -> Int -> Word8 +> byte n i = fromIntegral (0xFF .&. unsafeShiftR n (i*8)) -This guards against integers that are so large as to (when converted using -'hexChar') wrap around the maximum value of 16-bit numbers and then end up -larger than an expected minimum value. +> toHex :: Word8 -> String -> String +> toHex i s = '\\':'x':hexDig (0xF .&. unsafeShiftR i 4):hexDig (0xF .&. i):s -> checkedHexChars :: Int -> [Int] -> String -> checkedHexChars minValue = concatMap hexChar' -> where hexChar' i | checkHexChar minValue i = hexChar i -> | otherwise = error "grammar does not fit in 16-bit representation that is used with '--ghc'" +> hexDig :: Word8 -> Char +> hexDig i | i <= 9 = chr (fromIntegral i + ord '0') +> | otherwise = chr (fromIntegral i - 10 + ord 'a') -> checkHexChar :: Int -> Int -> Bool -> checkHexChar minValue i = i <= 32767 || i - 65536 < minValue +> toInt32 :: Int -> Int32 +> toInt32 i +> | i == fromIntegral i32 = i32 +> | otherwise = error ("offset was too large for Int32: " ++ show i) +> where i32 = fromIntegral i diff --git a/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs b/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs index 66239991..42e19d2c 100644 --- a/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs +++ b/packages/codegen-common/src/Happy/CodeGen/Common/Options.lhs @@ -5,14 +5,20 @@ The CommonOptions data type. ----------------------------------------------------------------------------- > module Happy.CodeGen.Common.Options ( -> ErrorHandlerType(..), -> CommonOptions(..) +> ErrorHandlerInfo(..), CommonOptions(..) > ) where -> data ErrorHandlerType -> = ErrorHandlerTypeDefault -> | ErrorHandlerTypeExpList - +> data ErrorHandlerInfo +> = DefaultErrorHandler +> -- ^ Default handler `happyError`. +> | CustomErrorHandler String +> -- ^ Call this handler on error. +> | ResumptiveErrorHandler String {- abort -} String {- addMessage -} +> -- ^ `ResumptiveErrorHandler abort reportError`: +> -- Calls non-fatal `reportError ... resume` with resumption `resume` to +> -- get more errors, ultimately failing with `abort` when parse can't be +> -- resumed. +> > data CommonOptions > = CommonOptions { > token_type :: String, @@ -20,6 +26,8 @@ The CommonOptions data type. > monad :: (Bool,String,String,String,String), > expect :: Maybe Int, > lexer :: Maybe (String,String), -> error_handler :: Maybe String, -> error_sig :: ErrorHandlerType +> error_handler :: ErrorHandlerInfo, +> error_expected :: Bool +> -- ^ Error handler expects a `[String]` as arg after current +> -- token carrying the pretty-printed expected tokens. > } diff --git a/packages/frontend/happy-frontend.cabal b/packages/frontend/happy-frontend.cabal index 517796ac..b42a0134 100644 --- a/packages/frontend/happy-frontend.cabal +++ b/packages/frontend/happy-frontend.cabal @@ -61,7 +61,7 @@ library happy-grammar == 2.0 default-language: Haskell98 - default-extensions: CPP, MagicHash, FlexibleContexts + default-extensions: CPP, MagicHash, FlexibleContexts, PatternGuards ghc-options: -Wall other-modules: Happy.Frontend.ParseMonad diff --git a/packages/frontend/src/Happy/Frontend/AbsSyn.lhs b/packages/frontend/src/Happy/Frontend/AbsSyn.lhs index 3b195a7f..c055922a 100644 --- a/packages/frontend/src/Happy/Frontend/AbsSyn.lhs +++ b/packages/frontend/src/Happy/Frontend/AbsSyn.lhs @@ -10,13 +10,13 @@ Here is the abstract syntax of the language we parse. > BookendedAbsSyn(..), > AbsSyn(..), Directive(..), > getTokenType, getTokenSpec, getParserNames, getLexer, -> getImportedIdentity, getMonad, getError, -> getPrios, getPrioNames, getExpect, getErrorHandlerType, +> getImportedIdentity, getMonad, ErrorHandlerInfo(..), getError, +> getPrios, getPrioNames, getExpect, getErrorHandlerExpectedList, > getAttributes, getAttributetype, > Rule(..), Prod(..), Term(..), Prec(..) > ) where -> import Happy.CodeGen.Common.Options (ErrorHandlerType(..)) +> import Happy.CodeGen.Common.Options (ErrorHandlerInfo(..)) > data BookendedAbsSyn > = BookendedAbsSyn @@ -66,17 +66,17 @@ generate some error messages. > | TokenSpec [(a,String)] -- %token > | TokenName String (Maybe String) Bool -- %name/%partial (True <=> %partial) > | TokenLexer String String -- %lexer -> | TokenErrorHandlerType String -- %errorhandlertype > | TokenImportedIdentity -- %importedidentity > | TokenMonad String String String String -- %monad > | TokenNonassoc [String] -- %nonassoc > | TokenRight [String] -- %right > | TokenLeft [String] -- %left > | TokenExpect Int -- %expect -> | TokenError String -- %error +> | TokenError String (Maybe String) -- %error +> | TokenErrorExpected -- %error.expected > | TokenAttributetype String -- %attributetype > | TokenAttribute String String -- %attribute -> deriving Show +> deriving (Eq, Show) > getTokenType :: [Directive t] -> String > getTokenType ds @@ -134,22 +134,17 @@ generate some error messages. > [] -> Nothing > _ -> error "multiple expect directives" -> getError :: [Directive t] -> Maybe String +> getError :: [Directive t] -> ErrorHandlerInfo > getError ds -> = case [ a | (TokenError a) <- ds ] of -> [t] -> Just t -> [] -> Nothing +> = case [ (a, mb_b) | (TokenError a mb_b) <- ds ] of +> [] -> DefaultErrorHandler +> [(a,Nothing)] -> CustomErrorHandler a +> [(abort,Just addMessage)] -> ResumptiveErrorHandler abort addMessage > _ -> error "multiple error directives" -> getErrorHandlerType :: [Directive t] -> ErrorHandlerType -> getErrorHandlerType ds -> = case [ a | (TokenErrorHandlerType a) <- ds ] of -> [t] -> case t of -> "explist" -> ErrorHandlerTypeExpList -> "default" -> ErrorHandlerTypeDefault -> _ -> error "unsupported %errorhandlertype value" -> [] -> ErrorHandlerTypeDefault -> _ -> error "multiple errorhandlertype directives" +> getErrorHandlerExpectedList :: Eq t => [Directive t] -> Bool +> getErrorHandlerExpectedList ds +> = TokenErrorExpected `elem` ds > getAttributes :: [Directive t] -> [(String, String)] > getAttributes ds diff --git a/packages/frontend/src/Happy/Frontend/Lexer.lhs b/packages/frontend/src/Happy/Frontend/Lexer.lhs index 8bfe65bd..d51060db 100644 --- a/packages/frontend/src/Happy/Frontend/Lexer.lhs +++ b/packages/frontend/src/Happy/Frontend/Lexer.lhs @@ -37,7 +37,6 @@ The lexer. > | TokSpecId_Token -- %token > | TokSpecId_Name -- %name > | TokSpecId_Partial -- %partial -> | TokSpecId_ErrorHandlerType -- %errorhandlertype > | TokSpecId_Lexer -- %lexer > | TokSpecId_ImportedIdentity -- %importedidentity > | TokSpecId_Monad -- %monad @@ -48,6 +47,7 @@ The lexer. > | TokSpecId_Shift -- %shift > | TokSpecId_Expect -- %expect > | TokSpecId_Error -- %error +> | TokSpecId_ErrorExpected -- %error.expected > | TokSpecId_Attributetype -- %attributetype > | TokSpecId_Attribute -- %attribute > | TokCodeQuote -- stuff inside { .. } @@ -103,42 +103,45 @@ followed by a special identifier. > lexPercent :: (Token -> Pfunc a) -> [Char] -> Int -> ParseResult a > lexPercent cont s = case s of > '%':rest -> cont (TokenKW TokDoublePercent) rest -> 't':'o':'k':'e':'n':'t':'y':'p':'e':rest -> +> 't':'o':'k':'e':'n':'t':'y':'p':'e':rest | end_of_id rest -> > cont (TokenKW TokSpecId_TokenType) rest -> 't':'o':'k':'e':'n':rest -> +> 't':'o':'k':'e':'n':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Token) rest -> 'n':'a':'m':'e':rest -> +> 'n':'a':'m':'e':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Name) rest -> 'p':'a':'r':'t':'i':'a':'l':rest -> +> 'p':'a':'r':'t':'i':'a':'l':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Partial) rest -> 'i':'m':'p':'o':'r':'t':'e':'d':'i':'d':'e':'n':'t':'i':'t':'y':rest -> +> 'i':'m':'p':'o':'r':'t':'e':'d':'i':'d':'e':'n':'t':'i':'t':'y':rest | end_of_id rest -> > cont (TokenKW TokSpecId_ImportedIdentity) rest -> 'm':'o':'n':'a':'d':rest -> +> 'm':'o':'n':'a':'d':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Monad) rest -> 'l':'e':'x':'e':'r':rest -> +> 'l':'e':'x':'e':'r':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Lexer) rest -> 'n':'o':'n':'a':'s':'s':'o':'c':rest -> +> 'n':'o':'n':'a':'s':'s':'o':'c':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Nonassoc) rest -> 'l':'e':'f':'t':rest -> +> 'l':'e':'f':'t':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Left) rest -> 'r':'i':'g':'h':'t':rest -> +> 'r':'i':'g':'h':'t':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Right) rest -> 'p':'r':'e':'c':rest -> +> 'p':'r':'e':'c':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Prec) rest -> 's':'h':'i':'f':'t':rest -> +> 's':'h':'i':'f':'t':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Shift) rest -> 'e':'x':'p':'e':'c':'t':rest -> +> 'e':'x':'p':'e':'c':'t':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Expect) rest -> 'e':'r':'r':'o':'r':'h':'a':'n':'d':'l':'e':'r':'t':'y':'p':'e':rest -> -> cont (TokenKW TokSpecId_ErrorHandlerType) rest -> 'e':'r':'r':'o':'r':rest -> +> 'e':'r':'r':'o':'r':'.':'e':'x':'p':'e':'c':'t':'e':'d':rest | end_of_id rest -> +> cont (TokenKW TokSpecId_ErrorExpected) rest +> 'e':'r':'r':'o':'r':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Error) rest -> 'a':'t':'t':'r':'i':'b':'u':'t':'e':'t':'y':'p':'e':rest -> +> 'a':'t':'t':'r':'i':'b':'u':'t':'e':'t':'y':'p':'e':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Attributetype) rest -> 'a':'t':'t':'r':'i':'b':'u':'t':'e':rest -> +> 'a':'t':'t':'r':'i':'b':'u':'t':'e':rest | end_of_id rest -> > cont (TokenKW TokSpecId_Attribute) rest > _ -> lexError ("unrecognised directive: %" ++ > takeWhile (not.isSpace) s) s +> where +> end_of_id (c:_) = not (isAlphaNum c) +> end_of_id [] = True > lexColon :: (Token -> Pfunc a) -> [Char] -> Int -> ParseResult a > lexColon cont (':':rest) = cont (TokenKW TokDoubleColon) rest diff --git a/packages/frontend/src/Happy/Frontend/Mangler.lhs b/packages/frontend/src/Happy/Frontend/Mangler.lhs index 85f885d0..2372f1ef 100644 --- a/packages/frontend/src/Happy/Frontend/Mangler.lhs +++ b/packages/frontend/src/Happy/Frontend/Mangler.lhs @@ -69,25 +69,31 @@ This bit is a real mess, mainly because of the error message support. > starts' = case getParserNames dirs of > [] -> [TokenName "happyParse" Nothing False] > ns -> ns +> error_resumptive | ResumptiveErrorHandler{} <- getError dirs = True +> | otherwise = False > > start_strs = [ startName++'_':p | (TokenName p _ _) <- starts' ] Build up a mapping from name values to strings. > name_env = (errorTok, errorName) : +> (catchTok, catchName) : > (dummyTok, dummyName) : > zip start_names start_strs ++ > zip nonterm_names nonterm_strs ++ > zip terminal_names terminal_strs > lookupName :: String -> [Name] -> lookupName n = [ t | (t,r) <- name_env, r == n ] +> lookupName n = [ t | (t,r) <- name_env, r == n +> , t /= catchTok || error_resumptive ] +> -- hide catchName unless %errorresumptive is active +> -- issue93.y uses catch as a nonterminal, we should not steal it > mapToName str' = > case lookupName str' of > [a] -> return a > [] -> do addErr ("unknown identifier '" ++ str' ++ "'") -> return errorTok +> return errorTok -- SG: What a confusing use of errorTok.. Use dummyTok? > (a:_) -> do addErr ("multiple use of '" ++ str' ++ "'") > return a @@ -229,7 +235,7 @@ Get the token specs in terms of Names. > lookupProdNo = (prod_array !), > lookupProdsOfName = lookup_prods, > token_specs = tokspec, -> terminals = errorTok : terminal_names, +> terminals = errorTok : catchTok : terminal_names, > non_terminals = start_names ++ nonterm_names, > -- INCLUDES the %start tokens > starts = zip4 parser_names start_names start_toks @@ -248,7 +254,7 @@ Get the token specs in terms of Names. > monad = getMonad dirs, > lexer = getLexer dirs, > error_handler = getError dirs, -> error_sig = getErrorHandlerType dirs, +> error_expected = getErrorHandlerExpectedList dirs, > token_type = getTokenType dirs, > expect = getExpect dirs > }) diff --git a/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly b/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly index b6494113..006a72e2 100644 --- a/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly +++ b/packages/frontend/src/Happy/Frontend/Parser/Bootstrapped.ly @@ -33,7 +33,7 @@ The parser. > spec_shift { TokenKW TokSpecId_Shift } > spec_expect { TokenKW TokSpecId_Expect } > spec_error { TokenKW TokSpecId_Error } -> spec_errorhandlertype { TokenKW TokSpecId_ErrorHandlerType } +> spec_errorexpected { TokenKW TokSpecId_ErrorExpected } > spec_attribute { TokenKW TokSpecId_Attribute } > spec_attributetype { TokenKW TokSpecId_Attributetype } > code { TokenInfo $$ TokCodeQuote } @@ -104,11 +104,11 @@ The parser. > | spec_shift { PrecShift } > | { PrecNone } -> tokInfos :: { [Directive String] } +> tokInfos :: { [Directive String] } > : tokInfos tokInfo { $2 : $1 } > | tokInfo { [$1] } -> tokInfo :: { Directive String } +> tokInfo :: { Directive String } > : spec_tokentype code { TokenType $2 } > | spec_token tokenSpecs { TokenSpec $2 } > | spec_name id optStart { TokenName $2 $3 False } @@ -123,8 +123,8 @@ The parser. > | spec_right ids { TokenRight $2 } > | spec_left ids { TokenLeft $2 } > | spec_expect int { TokenExpect $2 } -> | spec_error code { TokenError $2 } -> | spec_errorhandlertype id { TokenErrorHandlerType $2 } +> | spec_error code optCode { TokenError $2 $3 } +> | spec_errorexpected { TokenErrorExpected } > | spec_attributetype code { TokenAttributetype $2 } > | spec_attribute id code { TokenAttribute $2 $3 } diff --git a/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs b/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs index cdeebe8b..ca6ede62 100644 --- a/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs +++ b/packages/frontend/src/Happy/Frontend/Parser/Oracle.hs @@ -86,12 +86,17 @@ optTokInfoP = withToken match where match (TokenKW TokSpecId_Expect) = Consume `andThenJust` pure TokenExpect <*> numP - match (TokenKW TokSpecId_Error) = - Consume `andThenJust` - pure TokenError <*> codeP - match (TokenKW TokSpecId_ErrorHandlerType) = + match (TokenKW TokSpecId_ErrorExpected) = Consume `andThenJust` - pure TokenErrorHandlerType <*> idtP + pure TokenErrorExpected + match (TokenKW TokSpecId_Error) = + Consume `andThenJust` do + codes <- manyP optCodeP + case codes of + [c1] -> return $ TokenError c1 Nothing + [c1, c2] -> return $ TokenError c1 (Just c2) + [] -> parseError "Expected a code block" + _ -> parseError "Too many code blocks" match (TokenKW TokSpecId_Attributetype) = Consume `andThenJust` pure TokenAttributetype <*> codeP diff --git a/packages/grammar/src/Happy/Grammar.lhs b/packages/grammar/src/Happy/Grammar.lhs index d088b0b3..ef29ca53 100644 --- a/packages/grammar/src/Happy/Grammar.lhs +++ b/packages/grammar/src/Happy/Grammar.lhs @@ -11,7 +11,8 @@ The Grammar data type. > Priority(..), > Assoc(..), > -> errorName, errorTok, startName, dummyName, firstStartTok, dummyTok, +> errorName, errorTok, catchName, catchTok, +> startName, dummyName, firstStartTok, dummyTok, > eofName, epsilonTok, > > mapDollarDollar @@ -111,15 +112,17 @@ In normal and GHC-based parsers, these numbers are also used in the generated grammar itself, except that the error token is mapped to -1. For array-based parsers, see the note in Tabular/LALR.lhs. -> startName, eofName, errorName, dummyName :: String +> startName, eofName, errorName, catchName, dummyName :: String > startName = "%start" -- with a suffix, like %start_1, %start_2 etc. > eofName = "%eof" > errorName = "error" +> catchName = "catch" > dummyName = "%dummy" -- shouldn't occur in the grammar anywhere -> firstStartTok, dummyTok, errorTok, epsilonTok :: Name -> firstStartTok = 3 -> dummyTok = 2 +> firstStartTok, dummyTok, errorTok, catchTok, epsilonTok :: Name +> firstStartTok = 4 +> dummyTok = 3 +> catchTok = 2 > errorTok = 1 > epsilonTok = 0 diff --git a/packages/tabular/src/Happy/Tabular.lhs b/packages/tabular/src/Happy/Tabular.lhs index f86d1c78..860d76cc 100644 --- a/packages/tabular/src/Happy/Tabular.lhs +++ b/packages/tabular/src/Happy/Tabular.lhs @@ -64,7 +64,7 @@ Find unused rules and tokens > start_rules = [ 0 .. (length starts' - 1) ] > used_rules = start_rules ++ > nub [ r | (_,a) <- actions, r <- extract_reductions a ] -> used_tokens = errorTok : eof : +> used_tokens = errorTok : catchTok : eof : > nub [ t | (t,a) <- actions, is_shift a ] > n_prods = length productions' > unused_terminals = filter (`notElem` used_tokens) terms diff --git a/packages/tabular/src/Happy/Tabular/First.lhs b/packages/tabular/src/Happy/Tabular/First.lhs index 7c0da2ca..5d1c9306 100644 --- a/packages/tabular/src/Happy/Tabular/First.lhs +++ b/packages/tabular/src/Happy/Tabular/First.lhs @@ -49,7 +49,7 @@ This will never terminate. > getNext fst_term prodNo prodsOfName env = > [ (nm, next nm) | (nm,_) <- env ] > where -> fn t | t == errorTok || t >= fst_term = Set.singleton t +> fn t | t == errorTok || t == catchTok || t >= fst_term = Set.singleton t > fn x = maybe (error "attempted FIRST(e) :-(") id (lookup x env) > next :: Name -> NameSet diff --git a/packages/tabular/src/Happy/Tabular/LALR.lhs b/packages/tabular/src/Happy/Tabular/LALR.lhs index 8597a405..d286cf88 100644 --- a/packages/tabular/src/Happy/Tabular/LALR.lhs +++ b/packages/tabular/src/Happy/Tabular/LALR.lhs @@ -5,6 +5,8 @@ Generation of LALR parsing tables. (c) 1997-2001 Simon Marlow ----------------------------------------------------------------------------- +> {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} +> > module Happy.Tabular.LALR > (genActionTable, genGotoTable, genLR0items, precalcClosure0, > propLookaheads, calcLookaheads, mergeLookaheadInfo, countConflicts, @@ -518,7 +520,7 @@ Generate the action table > possAction goto _set (Lr1 rule pos la) = > case findRule g rule pos of -> Just t | t >= fst_term || t == errorTok -> +> Just t | t >= fst_term || t == errorTok || t == catchTok -> > let f j = (t,LR'Shift j p) > p = maybe No id (lookup t prios) > in map f $ maybeToList (lookup t goto) diff --git a/tests/issue265.y b/tests/issue265.y new file mode 100644 index 00000000..9cbfcc21 --- /dev/null +++ b/tests/issue265.y @@ -0,0 +1,80 @@ +{ +{-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE FlexibleInstances #-} +-- For ancient GHC 7.0.4 +{-# LANGUAGE MultiParamTypeClasses #-} +module Main where + +import Control.Monad (when) +import Data.Char +import System.Exit +} + +%name parseStmts +%tokentype { Token } +%errorhandlertype explist +%error { handleError } + +%monad { ParseM } { (>>=) } { return } + +%token + '1' { TOne } + '+' { TPlus } + ';' { TSemi } + +%% + +Stmts : {- empty -} { [] } + | Stmt { [$1] } + | Stmts ';' Stmt { $1 ++ [$3] } + +Stmt : Exp { ExpStmt $1 } + +Exp : '1' { One } + | Exp '+' Exp %shift { Plus $1 $3 } + +{ +data Token = TOne | TPlus | TSemi + deriving (Eq,Show) + +type Stmts = [Stmt] +data Stmt = ExpStmt Exp + deriving (Eq, Show) +data Exp = One | Plus Exp Exp + deriving (Eq, Show) + +type ParseM = Either ParseError + +data ParseError + = ParseError [String] + deriving Eq +instance Show ParseError where + show (ParseError exp) = "Parse error. Expected: " ++ show exp + +recordParseError :: [String] -> ParseM a +recordParseError expected = Left (ParseError expected) + +handleError :: ([Token], [String]) -> ParseM a +handleError (ts, expected) = recordParseError expected + +lexer :: String -> [Token] +lexer [] = [] +lexer (c:cs) + | isSpace c = lexer cs + | c == '1' = TOne:(lexer cs) + | c == '+' = TPlus:(lexer cs) + | c == ';' = TSemi:(lexer cs) + | otherwise = error "lexer error" + +main :: IO () +main = do + test "11;1" $ \res -> res == Left (ParseError ["';'","'+'"]) + where + test inp p = do + putStrLn $ "testing " ++ inp + let tokens = lexer inp + let res = parseStmts tokens + when (not (p res)) $ do + print res + exitWith (ExitFailure 1) +} diff --git a/tests/monaderror-explist.y b/tests/monaderror-explist.y index 558f28ee..dd84d23c 100644 --- a/tests/monaderror-explist.y +++ b/tests/monaderror-explist.y @@ -14,8 +14,8 @@ import Data.List (isPrefixOf) %name parseFoo %tokentype { Token } -%errorhandlertype explist %error { handleErrorExpList } +%error.expected %monad { ParseM } { (>>=) } { return } @@ -46,9 +46,9 @@ data Token | TokenTest deriving (Eq,Show) -handleErrorExpList :: ([Token], [String]) -> ParseM a -handleErrorExpList ([], _) = throwError $ ParseError Nothing -handleErrorExpList (ts, explist) = throwError $ ParseError $ Just $ (head ts, explist) +handleErrorExpList :: [Token] -> [String] -> ParseM a +handleErrorExpList [] _ = throwError $ ParseError Nothing +handleErrorExpList ts explist = throwError $ ParseError $ Just $ (head ts, explist) lexer :: String -> [Token] lexer [] = [] diff --git a/tests/monaderror-resume.y b/tests/monaderror-resume.y new file mode 100644 index 00000000..46ebf9fc --- /dev/null +++ b/tests/monaderror-resume.y @@ -0,0 +1,116 @@ +{ +{-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE FlexibleInstances #-} +-- For ancient GHC 7.0.4 +{-# LANGUAGE MultiParamTypeClasses #-} +module Main where + +import Control.Monad (when) +import Data.Char +import System.Exit +} + +%name parseStmts Stmts +%name parseExp Exp +%tokentype { Token } +%error { \_ -> abort } { reportError } -- the entire point of this test +%error.expected -- as in monaderror-explist.y + +%monad { ParseM } { (>>=) } { return } + +%token + '1' { TOne } + '+' { TPlus } + ';' { TSemi } + +%% + +Stmts : {- empty -} { [] } + | Stmt { [$1] } + | Stmts ';' Stmt { $1 ++ [$3] } + | catch ';' Stmt %shift { [$3] } -- Could insert error AST token here in place of $1 + +Stmt : Exp { ExpStmt $1 } + +Exp : '1' { One } + | Exp '+' Exp %shift { Plus $1 $3 } + +{ +data Token = TOne | TPlus | TSemi + deriving (Eq,Show) + +type Stmts = [Stmt] +data Stmt = ExpStmt Exp + deriving (Eq, Show) +data Exp = One | Plus Exp Exp + deriving (Eq, Show) + +----------- Validation monad +data Validate e a = V e (Maybe a) + deriving Functor +instance Monoid e => Applicative (Validate e) where + pure a = V mempty (Just a) + V e1 f <*> V e2 a = V (e1 <> e2) (f <*> a) +instance Monoid e => Monad (Validate e) where + V e Nothing >>= _ = V e Nothing -- fatal + V e1 (Just a) >>= k | V e2 b <- k a = V (e1 <> e2) b -- non-fatal + +abort :: Monoid e => Validate e a -- this would be mzero from MonadPlus +abort = V mempty Nothing + +recordError :: e -> Validate e () -- this would be tell from MonadWriter +recordError e = V e (Just ()) + +runValidate (V e mb_a) = (e, mb_a) +----------- + +type ParseM = Validate [ParseError] + +data ParseError + = ParseError [String] + deriving Eq +instance Show ParseError where + show (ParseError exp) = "Parse error. Expected: " ++ show exp + +recordParseError :: [String] -> ParseM () +recordParseError expected = recordError [ParseError expected] + +reportError :: [Token] -> [String] -> ([Token] -> ParseM a) -> ParseM a +reportError ts expected resume = do + recordParseError expected + resume ts + +lexer :: String -> [Token] +lexer [] = [] +lexer (c:cs) + | isSpace c = lexer cs + | c == '1' = TOne:(lexer cs) + | c == '+' = TPlus:(lexer cs) + | c == ';' = TSemi:(lexer cs) + | otherwise = error "lexer error" + +main :: IO () +main = do + test "1+1;1" $ \(_,mb_ast) -> mb_ast == Just [ExpStmt (One `Plus` One), ExpStmt One] + test "1++1;1" $ \(errs,_) -> errs == [ParseError ["'1'"]] + test "1++1;1;+" $ \(errs,_) -> errs == [ParseError ["'1'"], ParseError ["'1'"]] + test "11;1" $ \(errs,_) -> errs == [ParseError ["';'"]] + test "11;1;++" $ \(errs,_) -> errs == [ParseError ["';'"], ParseError ["'1'"]] + test "11;1;1++" $ \(errs,_) -> errs == [ParseError ["';'"], ParseError ["'1'"]] + testExp "11" $ \(errs,_) -> errs == [ParseError ["'+'"]] + where + test inp p = do + putStrLn $ "testing " ++ inp + let tokens = lexer inp + let res = runValidate $ parseStmts tokens + when (not (p res)) $ do + print res + exitWith (ExitFailure 1) + testExp inp p = do + putStrLn $ "testing Exp " ++ inp + let tokens = lexer inp + let res = runValidate $ parseExp tokens + when (not (p res)) $ do + print res + exitWith (ExitFailure 1) +}