Skip to content

Commit

Permalink
Improve the abstract syntax for tokens
Browse files Browse the repository at this point in the history
Instead of deferring the handling of `$$` to the backends, properly
parse the `$$` syntax up front, and store the result in the AST.

Note that the GLR backend was improperly substituting the `$$` twice.
Now that we have better types, this was surfaced as type error, and then
removed.
  • Loading branch information
Ericson2314 committed Sep 20, 2024
1 parent 65eebe2 commit 4ac4d5d
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 62 deletions.
23 changes: 10 additions & 13 deletions lib/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,9 @@ that will be used for them in the GLR parser.
> | (i,tok) <- token_specs g ] -- Tokens (terminals)
> ++ [(eof_term g,"HappyEOF")] -- EOF symbol (internal terminal)
> where
> mkMatch tok = case mapDollarDollar tok of
> Nothing -> tok
> Just fn -> fn "_"
> mkMatch tok = case tok of
> TokenFixed t -> t
> TokenWithValue e -> substExpressionWithHole e "_"

> toGSym :: [(Int, String)] -> Int -> String
> toGSym gsMap i
Expand Down Expand Up @@ -314,10 +314,7 @@ It also shares identical reduction values as CAFs
> where
> startLine
> = unwords [ name , show_st exts state, "(" , getTok , ") =" ]
> getTok = let tok = toGSym gsMap symInt
> in case mapDollarDollar tok of
> Nothing -> tok
> Just f -> f "_"
> getTok = toGSym gsMap symInt
> mkAct act
> = case act of
> LR'Shift newSt _ -> "Shift " ++ show newSt ++ " []"
Expand Down Expand Up @@ -421,7 +418,7 @@ Creating a type for storing semantic rules
use in later stages.

> type SemInfo
> = [(String, String, [Int], [((Int,Int), ([(Int,String)],String), [Int])])]
> = [(String, String, [Int], [((Int, Int), ([(Int, TokenSpec)], String), [Int])])]

> mkGSemType :: Options -> Grammar String -> Pragmas -> (ShowS, SemInfo)
> mkGSemType (TreeDecode,_,_) g pragmas
Expand Down Expand Up @@ -566,17 +563,17 @@ Creates the appropriate semantic values.
> nodes UseFiltering = "(" ++ foldr (\l -> mkHappyVar (l+1) . showChar ':') "[])" mask
> ]

> mk_lambda :: [(Int, String)] -> Int -> String -> String
> mk_lambda :: [(Int, TokenSpec)] -> Int -> String -> String
> mk_lambda pats v
> = (\s -> "\\" ++ s ++ " -> ") . mk_binder id pats v

> mk_binder :: (String -> String) -> [(Int, String)] -> Int -> String -> String
> mk_binder :: (String -> String) -> [(Int, TokenSpec)] -> Int -> String -> String
> mk_binder wrap pats v
> = case lookup v pats of
> Nothing -> mkHappyVar v
> Just p -> case mapDollarDollar p of
> Nothing -> wrap . mkHappyVar v . showChar '@' . brack p
> Just fn -> wrap . brack' (fn . mkHappyVar v)
> Just p -> case p of
> TokenFixed p' -> wrap . mkHappyVar v . showChar '@' . brack p'
> TokenWithValue e -> wrap . brack' (substExpressionWithHole e . mkHappyVar v)


---
Expand Down
14 changes: 6 additions & 8 deletions lib/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -417,19 +417,17 @@ The token conversion function.
Use a variable rather than '_' to replace '$$', so we can use it on
the left hand side of '@'.

> removeDollarDollar xs = case mapDollarDollar xs of
> Nothing -> xs
> Just fn -> fn "happy_dollar_dollar"
> removeDollarDollar tok = case tok of
> TokenFixed t -> t
> TokenWithValue e -> substExpressionWithHole e "happy_dollar_dollar"

> mkHappyTerminalVar :: Int -> Int -> String -> String
> mkHappyTerminalVar i t =
> case tok_str_fn of
> case lookup t token_rep of
> Nothing -> pat
> Just fn -> brack (fn (pat []))
> Just (TokenFixed _) -> pat
> Just (TokenWithValue e) -> brack $ substExpressionWithHole e $ pat []
> where
> tok_str_fn = case lookup t token_rep of
> Nothing -> Nothing
> Just str' -> mapDollarDollar str'
> pat = mkHappyVar i

> tokIndex i = i - n_nonterminals - n_starts - 2
Expand Down
6 changes: 3 additions & 3 deletions lib/frontend/boot-src/Parser.ly
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,12 @@ The parser.
> : id { Just $1 }
> | {- nothing -} { Nothing }

> tokenSpecs :: { [(String,String)] }
> tokenSpecs :: { [(String, TokenSpec)] }
> : tokenSpec tokenSpecs { $1:$2 }
> | tokenSpec { [$1] }

> tokenSpec :: { (String,String) }
> : id code { ($1,$2) }
> tokenSpec :: { (String, TokenSpec) }
> : id code { ($1, parseTokenSpec $2) }

> ids :: { [String] }
> : id ids { $1 : $2 }
Expand Down
38 changes: 34 additions & 4 deletions lib/frontend/src/Happy/Frontend/AbsSyn.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,18 @@ Here is the abstract syntax of the language we parse.
> getImportedIdentity, getMonad, getError,
> getPrios, getPrioNames, getExpect, getErrorHandlerType,
> getAttributes, getAttributetype, getAttributeGrammarExtras,
> Rule(..), Prod(..), Term(..), Prec(..)
> parseTokenSpec,
> Rule(..), Prod(..), Term(..), Prec(..),
> TokenSpec(..) -- reexport
> ) where

> import Happy.Grammar (ErrorHandlerType(..), AttributeGrammarExtras(..))
> import Data.Char (isAlphaNum)
> import Happy.Grammar
> ( ErrorHandlerType(..)
> , TokenSpec(..)
> , ExpressionWithHole(..)
> , AttributeGrammarExtras(..)
> )

> data BookendedAbsSyn
> = BookendedAbsSyn
Expand Down Expand Up @@ -63,7 +71,7 @@ generate some error messages.
>
> data Directive a
> = TokenType String -- %tokentype
> | TokenSpec [(a,String)] -- %token
> | TokenSpec [(a, TokenSpec)] -- %token
> | TokenName String (Maybe String) Bool -- %name/%partial (True <=> %partial)
> | TokenLexer String String -- %lexer
> | TokenErrorHandlerType String -- %errorhandlertype
Expand Down Expand Up @@ -109,7 +117,7 @@ generate some error messages.
> [] -> (False,"()","HappyIdentity","Prelude.>>=","Prelude.return")
> _ -> error "multiple monad directives"

> getTokenSpec :: [Directive t] -> [(t, String)]
> getTokenSpec :: [Directive t] -> [(t, TokenSpec)]
> getTokenSpec ds = concat [ t | (TokenSpec t) <- ds ]

> getPrios :: [Directive t] -> [Directive t]
Expand Down Expand Up @@ -170,3 +178,25 @@ generate some error messages.
> attributetype = at
> }
> (_ : _, Nothing) -> error "attributes found without attribute type directive"

> -- | Parse a token spec.
> --
> -- The first occurence of '$$' indicates an expression in which the '$$'
> -- will be substituted for the actual lexed token. '$$' in string or char
> -- literals ('".."' and '\'.\'') however does not count.
> parseTokenSpec :: String -> TokenSpec
> parseTokenSpec code0 = go code0 ""
> where go code acc =
> case code of
> [] -> TokenFixed code0
>
> '"' :r -> case reads code :: [(String,String)] of
> [] -> go r ('"':acc)
> (s,r'):_ -> go r' (reverse (show s) ++ acc)
> a:'\'' :r | isAlphaNum a -> go r ('\'':a:acc)
> '\'' :r -> case reads code :: [(Char,String)] of
> [] -> go r ('\'':acc)
> (c,r'):_ -> go r' (reverse (show c) ++ acc)
> '\\':'$':r -> go r ('$':acc)
> '$':'$':r -> TokenWithValue $ ExpressionWithHole (reverse acc) r
> c:r -> go r (c:acc)
10 changes: 5 additions & 5 deletions lib/frontend/src/Happy/Frontend/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,15 @@ happyIn19 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap19 x)
happyOut19 :: (HappyAbsSyn ) -> HappyWrap19
happyOut19 x = Happy_GHC_Exts.unsafeCoerce# x
{-# INLINE happyOut19 #-}
newtype HappyWrap20 = HappyWrap20 ([(String,String)])
happyIn20 :: ([(String,String)]) -> (HappyAbsSyn )
newtype HappyWrap20 = HappyWrap20 ([(String, TokenSpec)])
happyIn20 :: ([(String, TokenSpec)]) -> (HappyAbsSyn )
happyIn20 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap20 x)
{-# INLINE happyIn20 #-}
happyOut20 :: (HappyAbsSyn ) -> HappyWrap20
happyOut20 x = Happy_GHC_Exts.unsafeCoerce# x
{-# INLINE happyOut20 #-}
newtype HappyWrap21 = HappyWrap21 ((String,String))
happyIn21 :: ((String,String)) -> (HappyAbsSyn )
newtype HappyWrap21 = HappyWrap21 ((String, TokenSpec))
happyIn21 :: ((String, TokenSpec)) -> (HappyAbsSyn )
happyIn21 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap21 x)
{-# INLINE happyIn21 #-}
happyOut21 :: (HappyAbsSyn ) -> HappyWrap21
Expand Down Expand Up @@ -768,7 +768,7 @@ happyReduction_51 happy_x_2
= case happyOutTok happy_x_1 of { (TokenInfo happy_var_1 TokId) ->
case happyOutTok happy_x_2 of { (TokenInfo happy_var_2 TokCodeQuote) ->
happyIn21
((happy_var_1,happy_var_2)
((happy_var_1, parseTokenSpec happy_var_2)
)}}

happyReduce_52 :: () => Happy_GHC_Exts.Int# -> Token -> Happy_GHC_Exts.Int# -> Happy_IntList -> HappyStk (HappyAbsSyn ) -> P (HappyAbsSyn )
Expand Down
55 changes: 27 additions & 28 deletions lib/grammar/src/Happy/Grammar.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,52 @@ The Grammar data type.
> module Happy.Grammar (
> Name,
>
> Production(..), Grammar(..),
> Production(..),
> TokenSpec(..),
> Grammar(..),
> AttributeGrammarExtras(..),
> Priority(..),
> Assoc(..),
> Pragmas(..), ErrorHandlerType(..),
>
> ExpressionWithHole(..),
> substExpressionWithHole,
> errorName, errorTok, startName, dummyName, firstStartTok, dummyTok,
> eofName, epsilonTok,
>
> mapDollarDollar
> ) where

> import Data.Array
> import Data.Char (isAlphaNum)
> type Name = Int

> data Production eliminator
> = Production Name [Name] (eliminator,[Int]) Priority
> deriving Show

> data TokenSpec
>
> -- | The token is just a fixed value
> = TokenFixed String
>
> -- | The token is an expression involving the value of the lexed token.
> | TokenWithValue ExpressionWithHole
>
> deriving (Eq, Show)

> -- | The overall expression is
> -- 'tokLeft ++ substitutedForHole ++ tokRight'.
> data ExpressionWithHole
> = ExpressionWithHole {
> exprLeft :: String,
> exprRight :: String
> }
> deriving (Eq, Show)

> data Grammar eliminator
> = Grammar {
> productions :: [Production eliminator],
> lookupProdNo :: Int -> Production eliminator,
> lookupProdsOfName :: Name -> [Int],
> token_specs :: [(Name,String)],
> token_specs :: [(Name, TokenSpec)],
> terminals :: [Name],
> non_terminals :: [Name],
> starts :: [(String,Name,Name,Bool)],
Expand Down Expand Up @@ -146,26 +166,5 @@ For array-based parsers, see the note in Tabular/LALR.lhs.
> errorTok = 1
> epsilonTok = 0

-----------------------------------------------------------------------------
Replace $$ with an arbitrary string, being careful to avoid ".." and '.'.
> mapDollarDollar :: String -> Maybe (String -> String)
> mapDollarDollar = fmap (\(l, r) repr -> l ++ repr ++ r) . mapDollarDollar'
>
> mapDollarDollar' :: String -> Maybe (String, String)
> mapDollarDollar' code0 = go code0 ""
> where go code acc =
> case code of
> [] -> Nothing
>
> '"' :r -> case reads code :: [(String,String)] of
> [] -> go r ('"':acc)
> (s,r'):_ -> go r' (reverse (show s) ++ acc)
> a:'\'' :r | isAlphaNum a -> go r ('\'':a:acc)
> '\'' :r -> case reads code :: [(Char,String)] of
> [] -> go r ('\'':acc)
> (c,r'):_ -> go r' (reverse (show c) ++ acc)
> '\\':'$':r -> go r ('$':acc)
> '$':'$':r -> Just (reverse acc, r)
> c:r -> go r (c:acc)
> substExpressionWithHole :: ExpressionWithHole -> String -> String
> substExpressionWithHole (ExpressionWithHole l r) = \repr -> l ++ repr ++ r
5 changes: 4 additions & 1 deletion lib/tabular/src/Happy/Tabular/Info.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,10 @@ Produce a file of parser information, useful for debugging the parser.
> showTerminal (t,s)
> = str "\t"
> . showJName 15 t
> . str "{ " . str s . str " }"
> . str "{ " . showToken s . str " }"

> showToken (TokenFixed s) = str s
> showToken (TokenWithValue e) = str $ substExpressionWithHole e "$$"

> showNonTerminals
> = banner "Non-terminals"
Expand Down

0 comments on commit 4ac4d5d

Please sign in to comment.