From 17d3c937e9d469a5bb3b0e90b77c394eb8a4ba3b Mon Sep 17 00:00:00 2001 From: John Ericson Date: Fri, 20 Sep 2024 11:44:23 -0400 Subject: [PATCH] Improve the abstract syntax for tokens Instead of deferring the handling of `$$` to the backends, properly parse the `$$` syntax up front, and store the result in the AST. Note that the GLR backend was improperly substituting the `$$` twice. Now that we have better types, this was surfaced as type error, and then removed. Fixes #295 --- .../src/Happy/Backend/GLR/ProduceCode.lhs | 24 +++++----- .../src/Happy/Backend/LALR/ProduceCode.lhs | 15 +++---- lib/frontend/boot-src/Parser.ly | 6 +-- lib/frontend/src/Happy/Frontend/AbsSyn.lhs | 38 ++++++++++++++-- lib/frontend/src/Happy/Frontend/Parser.hs | 10 ++--- lib/grammar/src/Happy/Grammar.lhs | 45 +++++++------------ .../src/Happy/Grammar/ExpressionWithHole.hs | 13 ++++++ lib/happy-lib.cabal | 5 ++- lib/tabular/src/Happy/Tabular/Info.lhs | 6 ++- 9 files changed, 98 insertions(+), 64 deletions(-) create mode 100644 lib/grammar/src/Happy/Grammar/ExpressionWithHole.hs diff --git a/lib/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs b/lib/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs index e319d47e..a4738f0d 100644 --- a/lib/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs +++ b/lib/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs @@ -19,6 +19,7 @@ This module is designed as an extension to the Haskell parser generator Happy. > import Paths_happy_lib ( version ) > import Happy.Grammar +> import Happy.Grammar.ExpressionWithHole ( substExpressionWithHole ) > import Happy.Tabular.LALR > import Data.Array ( Array, (!), array, assocs ) > import Data.Char ( isSpace, isAlphaNum ) @@ -273,9 +274,9 @@ that will be used for them in the GLR parser. > | (i,tok) <- token_specs g ] -- Tokens (terminals) > ++ [(eof_term g,"HappyEOF")] -- EOF symbol (internal terminal) > where -> mkMatch tok = case mapDollarDollar tok of -> Nothing -> tok -> Just fn -> fn "_" +> mkMatch tok = case tok of +> TokenFixed t -> t +> TokenWithValue e -> substExpressionWithHole e "_" > toGSym :: [(Int, String)] -> Int -> String > toGSym gsMap i @@ -314,10 +315,7 @@ It also shares identical reduction values as CAFs > where > startLine > = unwords [ name , show_st exts state, "(" , getTok , ") =" ] -> getTok = let tok = toGSym gsMap symInt -> in case mapDollarDollar tok of -> Nothing -> tok -> Just f -> f "_" +> getTok = toGSym gsMap symInt > mkAct act > = case act of > LR'Shift newSt _ -> "Shift " ++ show newSt ++ " []" @@ -421,7 +419,7 @@ Creating a type for storing semantic rules use in later stages. > type SemInfo -> = [(String, String, [Int], [((Int,Int), ([(Int,String)],String), [Int])])] +> = [(String, String, [Int], [((Int, Int), ([(Int, TokenSpec)], String), [Int])])] > mkGSemType :: Options -> Grammar String -> Pragmas -> (ShowS, SemInfo) > mkGSemType (TreeDecode,_,_) g pragmas @@ -566,17 +564,17 @@ Creates the appropriate semantic values. > nodes UseFiltering = "(" ++ foldr (\l -> mkHappyVar (l+1) . showChar ':') "[])" mask > ] -> mk_lambda :: [(Int, String)] -> Int -> String -> String +> mk_lambda :: [(Int, TokenSpec)] -> Int -> String -> String > mk_lambda pats v > = (\s -> "\\" ++ s ++ " -> ") . mk_binder id pats v -> mk_binder :: (String -> String) -> [(Int, String)] -> Int -> String -> String +> mk_binder :: (String -> String) -> [(Int, TokenSpec)] -> Int -> String -> String > mk_binder wrap pats v > = case lookup v pats of > Nothing -> mkHappyVar v -> Just p -> case mapDollarDollar p of -> Nothing -> wrap . mkHappyVar v . showChar '@' . brack p -> Just fn -> wrap . brack' (fn . mkHappyVar v) +> Just p -> case p of +> TokenFixed p' -> wrap . mkHappyVar v . showChar '@' . brack p' +> TokenWithValue e -> wrap . brack' (substExpressionWithHole e . mkHappyVar v) --- diff --git a/lib/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs b/lib/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs index 74abc0a6..ed7095e3 100644 --- a/lib/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs +++ b/lib/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs @@ -9,6 +9,7 @@ The code generator. > import Paths_happy_lib ( version ) > import Data.Version ( showVersion ) > import Happy.Grammar +> import Happy.Grammar.ExpressionWithHole ( substExpressionWithHole ) > import Happy.Tabular.LALR > import Data.Maybe ( isNothing, fromMaybe ) @@ -417,19 +418,17 @@ The token conversion function. Use a variable rather than '_' to replace '$$', so we can use it on the left hand side of '@'. -> removeDollarDollar xs = case mapDollarDollar xs of -> Nothing -> xs -> Just fn -> fn "happy_dollar_dollar" +> removeDollarDollar tok = case tok of +> TokenFixed t -> t +> TokenWithValue e -> substExpressionWithHole e "happy_dollar_dollar" > mkHappyTerminalVar :: Int -> Int -> String -> String > mkHappyTerminalVar i t = -> case tok_str_fn of +> case lookup t token_rep of > Nothing -> pat -> Just fn -> brack (fn (pat [])) +> Just (TokenFixed _) -> pat +> Just (TokenWithValue e) -> brack $ substExpressionWithHole e $ pat [] > where -> tok_str_fn = case lookup t token_rep of -> Nothing -> Nothing -> Just str' -> mapDollarDollar str' > pat = mkHappyVar i > tokIndex i = i - n_nonterminals - n_starts - 2 diff --git a/lib/frontend/boot-src/Parser.ly b/lib/frontend/boot-src/Parser.ly index 72019e67..b193ae52 100644 --- a/lib/frontend/boot-src/Parser.ly +++ b/lib/frontend/boot-src/Parser.ly @@ -132,12 +132,12 @@ The parser. > : id { Just $1 } > | {- nothing -} { Nothing } -> tokenSpecs :: { [(String,String)] } +> tokenSpecs :: { [(String, TokenSpec)] } > : tokenSpec tokenSpecs { $1:$2 } > | tokenSpec { [$1] } -> tokenSpec :: { (String,String) } -> : id code { ($1,$2) } +> tokenSpec :: { (String, TokenSpec) } +> : id code { ($1, parseTokenSpec $2) } > ids :: { [String] } > : id ids { $1 : $2 } diff --git a/lib/frontend/src/Happy/Frontend/AbsSyn.lhs b/lib/frontend/src/Happy/Frontend/AbsSyn.lhs index 77e98e04..a730ac48 100644 --- a/lib/frontend/src/Happy/Frontend/AbsSyn.lhs +++ b/lib/frontend/src/Happy/Frontend/AbsSyn.lhs @@ -13,10 +13,18 @@ Here is the abstract syntax of the language we parse. > getImportedIdentity, getMonad, getError, > getPrios, getPrioNames, getExpect, getErrorHandlerType, > getAttributes, getAttributetype, getAttributeGrammarExtras, -> Rule(..), Prod(..), Term(..), Prec(..) +> parseTokenSpec, +> Rule(..), Prod(..), Term(..), Prec(..), +> TokenSpec(..) -- reexport > ) where -> import Happy.Grammar (ErrorHandlerType(..), AttributeGrammarExtras(..)) +> import Data.Char (isAlphaNum) +> import Happy.Grammar +> ( ErrorHandlerType(..) +> , TokenSpec(..) +> , AttributeGrammarExtras(..) +> ) +> import Happy.Grammar.ExpressionWithHole > data BookendedAbsSyn > = BookendedAbsSyn @@ -63,7 +71,7 @@ generate some error messages. > > data Directive a > = TokenType String -- %tokentype -> | TokenSpec [(a,String)] -- %token +> | TokenSpec [(a, TokenSpec)] -- %token > | TokenName String (Maybe String) Bool -- %name/%partial (True <=> %partial) > | TokenLexer String String -- %lexer > | TokenErrorHandlerType String -- %errorhandlertype @@ -109,7 +117,7 @@ generate some error messages. > [] -> (False,"()","HappyIdentity","Prelude.>>=","Prelude.return") > _ -> error "multiple monad directives" -> getTokenSpec :: [Directive t] -> [(t, String)] +> getTokenSpec :: [Directive t] -> [(t, TokenSpec)] > getTokenSpec ds = concat [ t | (TokenSpec t) <- ds ] > getPrios :: [Directive t] -> [Directive t] @@ -170,3 +178,25 @@ generate some error messages. > attributetype = at > } > (_ : _, Nothing) -> error "attributes found without attribute type directive" + +> -- | Parse a token spec. +> -- +> -- The first occurence of '$$' indicates an expression in which the '$$' +> -- will be substituted for the actual lexed token. '$$' in string or char +> -- literals ('".."' and '\'.\'') however does not count. +> parseTokenSpec :: String -> TokenSpec +> parseTokenSpec code0 = go code0 "" +> where go code acc = +> case code of +> [] -> TokenFixed code0 +> +> '"' :r -> case reads code :: [(String,String)] of +> [] -> go r ('"':acc) +> (s,r'):_ -> go r' (reverse (show s) ++ acc) +> a:'\'' :r | isAlphaNum a -> go r ('\'':a:acc) +> '\'' :r -> case reads code :: [(Char,String)] of +> [] -> go r ('\'':acc) +> (c,r'):_ -> go r' (reverse (show c) ++ acc) +> '\\':'$':r -> go r ('$':acc) +> '$':'$':r -> TokenWithValue $ ExpressionWithHole (reverse acc) r +> c:r -> go r (c:acc) diff --git a/lib/frontend/src/Happy/Frontend/Parser.hs b/lib/frontend/src/Happy/Frontend/Parser.hs index 81fa5a16..6c19582d 100644 --- a/lib/frontend/src/Happy/Frontend/Parser.hs +++ b/lib/frontend/src/Happy/Frontend/Parser.hs @@ -135,15 +135,15 @@ happyIn19 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap19 x) happyOut19 :: (HappyAbsSyn ) -> HappyWrap19 happyOut19 x = Happy_GHC_Exts.unsafeCoerce# x {-# INLINE happyOut19 #-} -newtype HappyWrap20 = HappyWrap20 ([(String,String)]) -happyIn20 :: ([(String,String)]) -> (HappyAbsSyn ) +newtype HappyWrap20 = HappyWrap20 ([(String, TokenSpec)]) +happyIn20 :: ([(String, TokenSpec)]) -> (HappyAbsSyn ) happyIn20 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap20 x) {-# INLINE happyIn20 #-} happyOut20 :: (HappyAbsSyn ) -> HappyWrap20 happyOut20 x = Happy_GHC_Exts.unsafeCoerce# x {-# INLINE happyOut20 #-} -newtype HappyWrap21 = HappyWrap21 ((String,String)) -happyIn21 :: ((String,String)) -> (HappyAbsSyn ) +newtype HappyWrap21 = HappyWrap21 ((String, TokenSpec)) +happyIn21 :: ((String, TokenSpec)) -> (HappyAbsSyn ) happyIn21 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap21 x) {-# INLINE happyIn21 #-} happyOut21 :: (HappyAbsSyn ) -> HappyWrap21 @@ -768,7 +768,7 @@ happyReduction_51 happy_x_2 = case happyOutTok happy_x_1 of { (TokenInfo happy_var_1 TokId) -> case happyOutTok happy_x_2 of { (TokenInfo happy_var_2 TokCodeQuote) -> happyIn21 - ((happy_var_1,happy_var_2) + ((happy_var_1, parseTokenSpec happy_var_2) )}} happyReduce_52 :: () => Happy_GHC_Exts.Int# -> Token -> Happy_GHC_Exts.Int# -> Happy_IntList -> HappyStk (HappyAbsSyn ) -> P (HappyAbsSyn ) diff --git a/lib/grammar/src/Happy/Grammar.lhs b/lib/grammar/src/Happy/Grammar.lhs index b3d670cb..64b86d37 100644 --- a/lib/grammar/src/Happy/Grammar.lhs +++ b/lib/grammar/src/Happy/Grammar.lhs @@ -8,7 +8,9 @@ The Grammar data type. > module Happy.Grammar ( > Name, > -> Production(..), Grammar(..), +> Production(..), +> TokenSpec(..), +> Grammar(..), > AttributeGrammarExtras(..), > Priority(..), > Assoc(..), @@ -16,24 +18,33 @@ The Grammar data type. > > errorName, errorTok, startName, dummyName, firstStartTok, dummyTok, > eofName, epsilonTok, -> -> mapDollarDollar > ) where > import Data.Array -> import Data.Char (isAlphaNum) +> import Happy.Grammar.ExpressionWithHole (ExpressionWithHole) + > type Name = Int > data Production eliminator > = Production Name [Name] (eliminator,[Int]) Priority > deriving Show +> data TokenSpec +> +> -- | The token is just a fixed value +> = TokenFixed String +> +> -- | The token is an expression involving the value of the lexed token. +> | TokenWithValue ExpressionWithHole +> +> deriving (Eq, Show) + > data Grammar eliminator > = Grammar { > productions :: [Production eliminator], > lookupProdNo :: Int -> Production eliminator, > lookupProdsOfName :: Name -> [Int], -> token_specs :: [(Name,String)], +> token_specs :: [(Name, TokenSpec)], > terminals :: [Name], > non_terminals :: [Name], > starts :: [(String,Name,Name,Bool)], @@ -145,27 +156,3 @@ For array-based parsers, see the note in Tabular/LALR.lhs. > dummyTok = 2 > errorTok = 1 > epsilonTok = 0 - ------------------------------------------------------------------------------ -Replace $$ with an arbitrary string, being careful to avoid ".." and '.'. - -> mapDollarDollar :: String -> Maybe (String -> String) -> mapDollarDollar = fmap (\(l, r) repr -> l ++ repr ++ r) . mapDollarDollar' -> - -> mapDollarDollar' :: String -> Maybe (String, String) -> mapDollarDollar' code0 = go code0 "" -> where go code acc = -> case code of -> [] -> Nothing -> -> '"' :r -> case reads code :: [(String,String)] of -> [] -> go r ('"':acc) -> (s,r'):_ -> go r' (reverse (show s) ++ acc) -> a:'\'' :r | isAlphaNum a -> go r ('\'':a:acc) -> '\'' :r -> case reads code :: [(Char,String)] of -> [] -> go r ('\'':acc) -> (c,r'):_ -> go r' (reverse (show c) ++ acc) -> '\\':'$':r -> go r ('$':acc) -> '$':'$':r -> Just (reverse acc, r) -> c:r -> go r (c:acc) diff --git a/lib/grammar/src/Happy/Grammar/ExpressionWithHole.hs b/lib/grammar/src/Happy/Grammar/ExpressionWithHole.hs new file mode 100644 index 00000000..016574f5 --- /dev/null +++ b/lib/grammar/src/Happy/Grammar/ExpressionWithHole.hs @@ -0,0 +1,13 @@ +module Happy.Grammar.ExpressionWithHole where + +-- | The overall expression is +-- 'tokLeft ++ substitutedForHole ++ tokRight'. +data ExpressionWithHole + = ExpressionWithHole { + exprLeft :: String, + exprRight :: String + } + deriving (Eq, Show) + +substExpressionWithHole :: ExpressionWithHole -> String -> String +substExpressionWithHole (ExpressionWithHole l r) = \repr -> l ++ repr ++ r diff --git a/lib/happy-lib.cabal b/lib/happy-lib.cabal index f2da0cb5..cfccd29a 100644 --- a/lib/happy-lib.cabal +++ b/lib/happy-lib.cabal @@ -75,7 +75,9 @@ common common-stanza library grammar import: common-stanza hs-source-dirs: grammar/src - exposed-modules: Happy.Grammar + exposed-modules: + Happy.Grammar + Happy.Grammar.ExpressionWithHole build-depends: base < 5, array library frontend @@ -129,6 +131,7 @@ library backend-glr library import: common-stanza reexported-modules: Happy.Grammar, + Happy.Grammar.ExpressionWithHole, Happy.Frontend, Happy.Frontend.AbsSyn, Happy.Frontend.Mangler, diff --git a/lib/tabular/src/Happy/Tabular/Info.lhs b/lib/tabular/src/Happy/Tabular/Info.lhs index fedb9fbf..1343facc 100644 --- a/lib/tabular/src/Happy/Tabular/Info.lhs +++ b/lib/tabular/src/Happy/Tabular/Info.lhs @@ -9,6 +9,7 @@ Generating info files. > import Data.Set ( Set ) > import qualified Data.Set as Set hiding ( Set ) > import Happy.Grammar +> import Happy.Grammar.ExpressionWithHole ( substExpressionWithHole ) > import Happy.Tabular.LALR ( Lr0Item(..), LRAction(..), Goto(..), GotoTable, ActionTable ) > import Data.Array @@ -182,7 +183,10 @@ Produce a file of parser information, useful for debugging the parser. > showTerminal (t,s) > = str "\t" > . showJName 15 t -> . str "{ " . str s . str " }" +> . str "{ " . showToken s . str " }" + +> showToken (TokenFixed s) = str s +> showToken (TokenWithValue e) = str $ substExpressionWithHole e "$$" > showNonTerminals > = banner "Non-terminals"